Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

453 lines
14 KiB

  1. <?php
  2. //Includes
  3. require_once('config.php');
  4. require_once('application.php');
  5. require_once ("PEAR/HTTP/Request.php");
  6. require_once('phpcoord.php');
  7. //Generic scrapers
  8. function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){
  9. $applications = array();
  10. //$application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/";
  11. $application_pattern = "/<tr><th>([0-9]*)<\/th>.*(?=<\/tr)/U";
  12. //grab the page
  13. $html = safe_scrape_page($search_url);
  14. //clean html
  15. $html = str_replace("\r\n","", $html);
  16. preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
  17. foreach ($application_matches[0] as $application_match){
  18. //START Duncan's debug
  19. //print_r($application_match);
  20. //print_r("END");
  21. // END Duncan's debug
  22. $detail_pattern = "/<td>([^<])*/";
  23. preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER);
  24. $application = new Application();
  25. //match the basic details
  26. $application->council_reference = str_replace("<td>", "", $detail_matches[0][0]);
  27. $application->date_received = str_replace("<td>", "", $detail_matches[0][1]);
  28. $application->address = str_replace("<td>", "", $detail_matches[0][2]);
  29. //$application->status = str_replace("<td>", "", $detail_matches[0][4]);
  30. //match case number
  31. $casenumber_pattern = "/caseno=([^&]*)/";
  32. preg_match($casenumber_pattern, $application_match, $casenumber_matches);
  33. //START Duncan's debug
  34. //print_r($application_match);
  35. //var_dump($casenumber_matches);
  36. //END Duncan's debug
  37. $case_number ="";
  38. if(sizeof($casenumber_matches)>0){
  39. $case_number = str_replace("caseno=","", $casenumber_matches[0]);
  40. }
  41. //if weve found a caase number, then get the details
  42. if($case_number !=""){
  43. //Comment and info urls
  44. $application->info_url = $info_url_base . $case_number;
  45. $application->comment_url = $comment_url_base . $case_number;
  46. //Get the postcode
  47. $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
  48. preg_match($postcode_pattern, $application->address, $postcode_matches);
  49. if(isset($postcode_matches[0])){
  50. $application->postcode = $postcode_matches[0];
  51. }
  52. //get full details
  53. $details_html = "";
  54. $details_html = safe_scrape_page($info_url_base . $case_number);
  55. //regular expresion and clean
  56. $full_detail_pattern = '/id="desc" rows="[1-9]" cols="80" class="cDetailInput">([^<]*)/';
  57. preg_match($full_detail_pattern, $details_html, $full_detail_matches);
  58. if (isset($full_detail_matches[0])){
  59. $application->description = substr($full_detail_matches[0], strpos($full_detail_matches[0], ">") + 1);
  60. }
  61. //only add it if we have a postcode (bit useless otherwise)
  62. if(is_postcode($application->postcode)){
  63. array_push($applications, $application);
  64. }
  65. }else{
  66. error_log("Unable to find case number for an application at " . $search_url);
  67. }
  68. }
  69. //return
  70. return $applications;
  71. }
  72. function scrape_applications_wam ($search_url, $info_url_base, $comment_url_base, $detail_mode = 1){
  73. $applications = array();
  74. $application_pattern = '/<tr><td class=[^>]*>([^<]*)<\/td><td class=[^>]*><a href="[^"]*">([^<]*)<\/a><\/td><td class=[^>]*>([^<]*)<\/td><td class=[^>]*>([^<]*)<\/td>/';
  75. //grab the page
  76. $html = safe_scrape_page($search_url);
  77. //clean html
  78. $html = str_replace("\r\n","", $html);
  79. preg_match_all($application_pattern, $html, $application_matches, PREG_SET_ORDER);
  80. foreach ($application_matches as $application_match){
  81. if ($application_match[4] != 'Current') { continue; }
  82. $application = new Application();
  83. //match the basic details
  84. $application->council_reference = $application_match[2];
  85. $case_number = $application_match[2];
  86. $application->date_received = $application_match[1];
  87. $application->address = $application_match[3];
  88. //$application->status = $application_match[4];
  89. //if weve found a caase number, then get the details
  90. if($case_number !=""){
  91. //Comment and info urls
  92. $application->info_url = $info_url_base . $case_number;
  93. $application->comment_url = $comment_url_base . $case_number;
  94. //Get the postcode
  95. $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
  96. preg_match($postcode_pattern, $application->address, $postcode_matches);
  97. if(isset($postcode_matches[0])){
  98. $application->postcode = $postcode_matches[0];
  99. }
  100. //get full details
  101. $details_html = "";
  102. $details_html = safe_scrape_page($info_url_base . $case_number);
  103. $details_html = str_replace("\r\n","",$details_html);
  104. //regular expresion and clean. SItes vary a tiny bit in their html, so there's a bit of a hack here
  105. if ($detail_mode == 1){
  106. $full_detail_pattern = '/Development:<.*<td colspan="3">([^<]*)<\/td>/';
  107. }
  108. if ($detail_mode == 2){
  109. $full_detail_pattern = '/Development:<\/td><td>([^<]*)/';
  110. }
  111. preg_match($full_detail_pattern, $details_html, $full_detail_matches);
  112. if (isset($full_detail_matches[1])){
  113. $application->description = $full_detail_matches[1];
  114. }
  115. //only add it if we have a postcode (bit useless otherwise)
  116. if(is_postcode($application->postcode)){
  117. //removed the xy for the moment. It is slowing down the scrape and will be added when the app is parsed anyway (Richard)
  118. /* $xy = postcode_to_location($application->postcode);
  119. $application->x = $xy[0];
  120. $application->y = $xy[1];
  121. $os = new OSRef($xy[0],$xy[1]);
  122. $latlon = $os->toLatLng();
  123. $application->lat = $latlon->lat;
  124. $application->lon = $latlon->lng;
  125. */
  126. array_push($applications, $application);
  127. }
  128. }else{
  129. error_log("Unable to find case number for an application at " . $search_url);
  130. }
  131. }
  132. //return
  133. return $applications;
  134. }
  135. // Council specific scapers
  136. function scrape_applications_islington ($search_url, $info_url_base, $comment_url_base){
  137. $applications = array();
  138. $application_pattern = '/<TR>([^<]*)<TD class="lg" valign="top" >([^<]*)<a href([^<]*)<a href=wphappcriteria.display>Search Criteria(.*)([^<]*)<(.*)>([^<]*)<TD class="lg" >([^<]*)<\/TD>([^<]*)<TD class="lg" >([^<]*)<INPUT TYPE=HIDDEN NAME([^>]*)([^<]*)/';
  139. //grab the page
  140. $html = safe_scrape_page($search_url);
  141. preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
  142. foreach ($application_matches[0] as $application_match){
  143. $application_string = str_replace("\n","", $application_match);
  144. $reference_pattern = '/Search Results<\/a>">([^<]*)/';
  145. preg_match_all($reference_pattern, $application_string, $reference_matches, PREG_PATTERN_ORDER);
  146. $application = new Application();
  147. //match the applicaiton number
  148. $application->council_reference = str_replace('Search Results</a>">', "", $reference_matches[0][0]);
  149. //Comment and info urls
  150. $application->info_url = $info_url_base . $application->council_reference;
  151. $application->comment_url = $comment_url_base . $application->council_reference;
  152. //get full details
  153. $details_html = "";
  154. $details_html = safe_scrape_page($info_url_base . $application->council_reference);
  155. $details_html = str_replace("\r\n","",$details_html);
  156. //Details
  157. $full_detail_pattern = '/Proposal:<\/label><\/td>([^<]*)<td colspan="3">([^<]*)/';
  158. preg_match($full_detail_pattern, $details_html, $full_detail_matches);
  159. if (isset($full_detail_matches[2])){
  160. $application->description = $full_detail_matches[2];
  161. }
  162. //Address
  163. $address_pattern = '/Main location:<\/label><\/td>([^<]*)<td colspan="3">([^<]*)/';
  164. $address = "";
  165. preg_match($address_pattern, $details_html, $address_matches);
  166. if(isset($address_matches[2])){
  167. $application->address = $address_matches[2];
  168. }
  169. //postcode
  170. $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
  171. preg_match($postcode_pattern, $application->address, $postcode_matches);
  172. if(isset($postcode_matches[0])){
  173. $application->postcode = $postcode_matches[0];
  174. }
  175. //only add it if we have a postcode (bit useless otherwise)
  176. if(is_postcode($application->postcode)){
  177. array_push($applications, $application);
  178. }
  179. }
  180. //return
  181. return $applications;
  182. }
  183. //validate postcode
  184. function is_postcode ($postcode){
  185. $valid = false;
  186. $postcode=str_replace(" ","",$postcode);
  187. if(ereg ('^[a-zA-Z]{1,2}[0-9]{1,2}[a-zA-Z]{0,1}[0-9]{1}[a-zA-Z]{2}$', $postcode)){
  188. $valid = true;
  189. }
  190. return $valid;
  191. }
  192. function clean_postcode ($postcode, $upper = true) {
  193. $reg = array();
  194. $postcode = trim($postcode);
  195. preg_match('/^(.+?)([0-9][a-z]{2})$/',$postcode, $reg);
  196. $clean_postcode = trim($reg[1]) . ' ' . trim($reg[2]);
  197. if($upper){
  198. $clean_postcode = strtoupper($clean_postcode);
  199. }
  200. return $clean_postcode;
  201. }
  202. //Tiny url
  203. function tiny_url($url,$length=30){
  204. // make nasty big url all small
  205. if (strlen($url) >= $length){
  206. $tinyurl = @file ("http://tinyurl.com/api-create.php?url=$url");
  207. if (is_array($tinyurl)){
  208. $tinyurl = join ('', $tinyurl);
  209. } else {
  210. $tinyurl = $url;
  211. }
  212. } else {
  213. $tinyurl = $url;
  214. }
  215. return $tinyurl;
  216. }
  217. //Google maps url
  218. function googlemap_url_from_postcode($postcode, $zoom = 15){
  219. $postcode = strtolower(str_replace(" ", "+", $postcode));
  220. return "http://maps.google.co.uk/maps?q=$postcode&z=$zoom";
  221. }
  222. //postcode to location
  223. function postcode_to_location($postcode){
  224. // We don't actually need to fetch the page, we
  225. // can get everything we need from the url we are
  226. // redirected to.
  227. $clean_postcode = strtolower($postcode);
  228. $clean_postcode = str_replace(" ","+", $clean_postcode);
  229. $url = "http://ernestmarples.com/?p=sw98jx&f=csv";
  230. $result = file_get_contents($url);
  231. $result = split(",", $result);
  232. if(count($result) != 2){
  233. trigger_error("No lat/long could be found");
  234. }
  235. $lat = $result[0];
  236. $lng = $result[1];
  237. $LatLng = new LatLng($lat, $lng);
  238. $OSBG = $LatLng->toOSRef();
  239. $return = array($OSBG->easting, $OSBG->northing);
  240. }
  241. function location_to_postcode($easting, $northing) {
  242. $url = sprintf(
  243. "http://streetmap.co.uk/streetmap.dll?GridConvert?name=%d,%d&type=OSGrid",
  244. $easting, $northing);
  245. $resp = @file($url);
  246. if (is_array($resp)) $resp = join("\n", $resp);
  247. $resp = strip_tags($resp);
  248. // Kinda ghetto. Would be nice to have a nicer regex for postcodes.
  249. if (preg_match('/Nearest\s+Post\s+Code\s+(\S+\s+\S+)/i', $resp, $mat))
  250. return $mat[1];
  251. return NULL;
  252. }
  253. function valid_email ($string) {
  254. $valid = false;
  255. if (!ereg('^[-!#$%&\'*+\\./0-9=?A-Z^_`a-z{|}~]+'.
  256. '@'.
  257. '[-!#$%&\'*+\\/0-9=?A-Z^_`a-z{|}~]+\.'.
  258. '[-!#$%&\'*+\\./0-9=?A-Z^_`a-z{|}~]+$', $string)) {
  259. $valid = false;
  260. } else {
  261. $valid = true;
  262. }
  263. return $valid;
  264. }
  265. function alert_size_to_meters($alert_area_size){
  266. $area_size_meters = 0;
  267. if ($alert_area_size == "s"){
  268. $area_size_meters = SMALL_ZONE_SIZE;
  269. }elseif ($alert_area_size == "m"){
  270. $area_size_meters = MEDIUM_ZONE_SIZE;
  271. }elseif ($alert_area_size == "l"){
  272. $area_size_meters = LARGE_ZONE_SIZE;
  273. }
  274. return $area_size_meters;
  275. }
  276. //Send a text email
  277. function send_text_email($to, $from_name, $from_email, $subject, $body){
  278. $headers = 'MIME-Version: 1.0' . "\r\n";
  279. $headers .= 'Content-type: text/plain; charset=iso-8859-1' . "\r\n";
  280. $headers .= 'From: ' . $from_name. ' <' . $from_email . ">\r\n";
  281. mail($to, $subject, $body, $headers);
  282. }
  283. // Format a date to mysql format
  284. function mysql_date($date){
  285. return date("Y-m-d H::i:s", $date);
  286. }
  287. function safe_scrape_page($url, $method = "GET"){
  288. $page = "";
  289. for ($i=0; $i < 3; $i++){
  290. if($page == false){
  291. if (SCRAPE_METHOD == "PEAR"){
  292. $page = scrape_page_pear($url, $method);
  293. }else{
  294. $page = scrape_page_curl($url, $method);
  295. }
  296. }
  297. }
  298. return $page;
  299. }
  300. function scrape_page_pear($url, $method = "GET"){
  301. $page = "";
  302. $request = new HTTP_Request($url, array("method" => $method));
  303. $request->sendRequest();
  304. $page = $request->getResponseBody();
  305. return $page;
  306. }
  307. function scrape_page_curl($url) {
  308. $ch = curl_init($url);
  309. curl_setopt($ch,CURLOPT_RETURNTRANSFER,TRUE);
  310. curl_setopt($ch,CURLOPT_FOLLOWLOCATION,TRUE);
  311. return curl_exec($ch);
  312. }
  313. function display_applications($applications, $authority_name, $authority_short_name){
  314. //smarty
  315. $smarty = new Smarty;
  316. $smarty->force_compile = true;
  317. $smarty->compile_dir = SMARTY_COMPILE_DIRECTORY;
  318. $smarty->template_dir = "../templates";
  319. $smarty->assign("authority_name", $authority_name);
  320. $smarty->assign("authority_short_name", $authority_short_name);
  321. if (sizeof($applications) > 0){
  322. $smarty->assign("applications", $applications);
  323. }
  324. $smarty->display("xml.tpl");
  325. }
  326. function get_time_from_get(){
  327. //if any get params were passed, overwrite the default date
  328. if (isset($_GET['day'])){
  329. $day = $_GET['day'];
  330. }else{
  331. throw_error("No day set in get string");
  332. }
  333. if (isset($_GET['month'])){
  334. $month = $_GET['month'];
  335. }else{
  336. throw_error("No year set in get string");
  337. }
  338. if (isset($_GET['year'])){
  339. $year = $_GET['year'];
  340. }else{
  341. throw_error("No year set in get string");
  342. }
  343. return mktime(0,0,0,$month,$day,$year);
  344. }
  345. function throw_error($message){
  346. throw new exception($message);
  347. }
  348. function redirect ($url){
  349. header("Location: $url");
  350. }
  351. ?>