Automatically exported from code.google.com/p/planningalerts
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

414 linhas
13 KiB

  1. <?php
  2. //Includes
  3. require_once('config.php');
  4. require_once('application.php');
  5. require_once ("PEAR/HTTP/Request.php");
  6. require_once('phpcoord.php');
  7. //Generic scrapers
  8. function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){
  9. $applications = array();
  10. $application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/";
  11. //grab the page
  12. $html = safe_scrape_page($search_url);
  13. //clean html
  14. $html = str_replace("\r\n","", $html);
  15. preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
  16. foreach ($application_matches[0] as $application_match){
  17. $detail_pattern = "/<td>([^<])*/";
  18. preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER);
  19. $application = new Application();
  20. //match the basic details
  21. $application->council_reference = str_replace("<td>", "", $detail_matches[0][0]);
  22. $application->date_received = str_replace("<td>", "", $detail_matches[0][1]);
  23. $application->address = str_replace("<td>", "", $detail_matches[0][2]);
  24. //$application->status = str_replace("<td>", "", $detail_matches[0][4]);
  25. //match case number
  26. $casenumber_pattern = "/caseno=([^&]*)/";
  27. preg_match($casenumber_pattern, $application_match, $casenumber_matches);
  28. $case_number ="";
  29. if(sizeof($casenumber_matches)>0){
  30. $case_number = str_replace("caseno=","", $casenumber_matches[0]);
  31. }
  32. //if weve found a caase number, then get the details
  33. if($case_number !=""){
  34. //Comment and info urls
  35. $application->info_url = $info_url_base . $case_number;
  36. $application->comment_url = $comment_url_base . $case_number;
  37. //Get the postcode
  38. $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
  39. preg_match($postcode_pattern, $application->address, $postcode_matches);
  40. if(isset($postcode_matches[0])){
  41. $application->postcode = $postcode_matches[0];
  42. }
  43. //get full details
  44. $details_html = "";
  45. $details_html = safe_scrape_page($info_url_base . $case_number);
  46. //regular expresion and clean
  47. $full_detail_pattern = '/id="desc" rows="[1-9]" cols="80" class="cDetailInput">([^<]*)/';
  48. preg_match($full_detail_pattern, $details_html, $full_detail_matches);
  49. if (isset($full_detail_matches[0])){
  50. $application->description = substr($full_detail_matches[0], strpos($full_detail_matches[0], ">") + 1);
  51. }
  52. //only add it if we have a postcode (bit useless otherwise)
  53. if(is_postcode($application->postcode)){
  54. array_push($applications, $application);
  55. }
  56. }else{
  57. error_log("Unable to find case number for an application at " . $search_url);
  58. }
  59. }
  60. //return
  61. return $applications;
  62. }
  63. function scrape_applications_wam ($search_url, $info_url_base, $comment_url_base){
  64. $applications = array();
  65. $application_pattern = '/<tr><td class=[^>]*>([^<]*)<\/td><td class=[^>]*><a href="[^"]*">([^<]*)<\/a><\/td><td class=[^>]*>([^<]*)<\/td><td class=[^>]*>([^<]*)<\/td>/';
  66. //grab the page
  67. $html = safe_scrape_page($search_url);
  68. //clean html
  69. $html = str_replace("\r\n","", $html);
  70. preg_match_all($application_pattern, $html, $application_matches, PREG_SET_ORDER);
  71. foreach ($application_matches as $application_match){
  72. if ($application_match[4] != 'Current') { continue; }
  73. $application = new Application();
  74. //match the basic details
  75. $application->council_reference = $application_match[2];
  76. $case_number = $application_match[2];
  77. $application->date_received = $application_match[1];
  78. $application->address = $application_match[3];
  79. //$application->status = $application_match[4];
  80. //if weve found a caase number, then get the details
  81. if($case_number !=""){
  82. //Comment and info urls
  83. $application->info_url = $info_url_base . $case_number;
  84. $application->comment_url = $comment_url_base . $case_number;
  85. //Get the postcode
  86. $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
  87. preg_match($postcode_pattern, $application->address, $postcode_matches);
  88. if(isset($postcode_matches[0])){
  89. $application->postcode = $postcode_matches[0];
  90. }
  91. //get full details
  92. $details_html = "";
  93. $details_html = safe_scrape_page($info_url_base . $case_number);
  94. $details_html = str_replace("\r\n","",$details_html);
  95. //regular expresion and clean
  96. $full_detail_pattern = '/Development:<.*<td colspan="3">([^<]*)<\/td>/';
  97. preg_match($full_detail_pattern, $details_html, $full_detail_matches);
  98. if (isset($full_detail_matches[1])){
  99. $application->description = $full_detail_matches[1];
  100. }
  101. //only add it if we have a postcode (bit useless otherwise)
  102. if(is_postcode($application->postcode)){
  103. //removed the xy for the moment. It is slowing down the scrape and will be added when the app is parsed anyway (Richard)
  104. /* $xy = postcode_to_location($application->postcode);
  105. $application->x = $xy[0];
  106. $application->y = $xy[1];
  107. $os = new OSRef($xy[0],$xy[1]);
  108. $latlon = $os->toLatLng();
  109. $application->lat = $latlon->lat;
  110. $application->lon = $latlon->lng;
  111. */
  112. array_push($applications, $application);
  113. }
  114. }else{
  115. error_log("Unable to find case number for an application at " . $search_url);
  116. }
  117. }
  118. //return
  119. return $applications;
  120. }
  121. // Council specific scapers
  122. function scrape_applications_islington ($search_url, $info_url_base, $comment_url_base){
  123. $applications = array();
  124. $application_pattern = '/<TR>([^<]*)<TD class="lg" valign="top" >([^<]*)<a href([^<]*)<a href=wphappcriteria.display>Search Criteria(.*)([^<]*)<(.*)>([^<]*)<TD class="lg" >([^<]*)<\/TD>([^<]*)<TD class="lg" >([^<]*)<INPUT TYPE=HIDDEN NAME([^>]*)([^<]*)/';
  125. //grab the page
  126. $html = safe_scrape_page($search_url);
  127. preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
  128. foreach ($application_matches[0] as $application_match){
  129. $application_string = str_replace("\n","", $application_match);
  130. $reference_pattern = '/Search Results<\/a>">([^<]*)/';
  131. preg_match_all($reference_pattern, $application_string, $reference_matches, PREG_PATTERN_ORDER);
  132. $application = new Application();
  133. //match the applicaiton number
  134. $application->council_reference = str_replace('Search Results</a>">', "", $reference_matches[0][0]);
  135. //Comment and info urls
  136. $application->info_url = $info_url_base . $application->council_reference;
  137. //$application->comment_url = $comment_url_base . $case_number;
  138. //get full details
  139. $details_html = "";
  140. $details_html = safe_scrape_page($info_url_base . $application->council_reference);
  141. // $details_html = str_replace("\n","", $details_html);
  142. // $details_html = str_replace("\t","", $details_html);
  143. //Details
  144. print $details_html;exit;
  145. //Address
  146. $address_pattern = '/Main location:<\/label><\/td>([^<]*)<td colspan="3">([^<]*)/';
  147. $address = "";
  148. preg_match($address_pattern, $details_html, $address_matches);
  149. if(isset($address_matches[2])){
  150. $application->address = $address_matches[2];
  151. }
  152. //postcode
  153. $postcode_pattern = "/[A-Z][A-Z]?[0-9][A-Z0-9]? ?[0-9][ABDEFGHJLNPQRSTUWXYZ]{2}/";
  154. preg_match($postcode_pattern, $application->address, $postcode_matches);
  155. if(isset($postcode_matches[0])){
  156. $application->postcode = $postcode_matches[0];
  157. }
  158. //only add it if we have a postcode (bit useless otherwise)
  159. if(is_postcode($application->postcode)){
  160. array_push($applications, $application);
  161. }
  162. }
  163. //return
  164. return $applications;
  165. }
  166. //validate postcode
  167. function is_postcode ($postcode){
  168. $valid = false;
  169. $postcode=str_replace(" ","",$postcode);
  170. if(ereg ('^[a-zA-Z]{1,2}[0-9]{1,2}[a-zA-Z]{0,1}[0-9]{1}[a-zA-Z]{2}$', $postcode)){
  171. $valid = true;
  172. }
  173. return $valid;
  174. }
  175. //Tiny url
  176. function tiny_url($url,$length=30){
  177. // make nasty big url all small
  178. if (strlen($url) >= $length){
  179. $tinyurl = @file ("http://tinyurl.com/api-create.php?url=$url");
  180. if (is_array($tinyurl)){
  181. $tinyurl = join ('', $tinyurl);
  182. } else {
  183. $tinyurl = $url;
  184. }
  185. } else {
  186. $tinyurl = $url;
  187. }
  188. return $tinyurl;
  189. }
  190. //Google maps url
  191. function googlemap_url_from_postcode($postcode, $zoom = 15){
  192. $postcode = strtolower(str_replace(" ", "+", $postcode));
  193. return "http://maps.google.com/maps?q=$postcode&z=$zoom";
  194. }
  195. //postcode to location
  196. function postcode_to_location($postcode){
  197. $x = 0;
  198. $y = 0;
  199. $clean_postcode = strtolower($postcode);
  200. $clean_postcode = str_replace(" ","+", $clean_postcode);
  201. $url = "http://www.streetmap.co.uk/newsearch.srf?type=Postcode&name=" . $clean_postcode;
  202. $html = file_get_contents($url);
  203. $x_pattern = "/var _LocationX=\d*;/";
  204. $y_pattern = "/var _LocationY=\d*;/";
  205. //X
  206. preg_match($x_pattern, $html, $matches);
  207. if(sizeof($matches) >0){
  208. $x = $matches[0];
  209. $x = str_replace('var _LocationX=',"", $x);
  210. $x = str_replace(";","", $x);
  211. }
  212. //Y
  213. preg_match($y_pattern, $html, $matches);
  214. if(sizeof($matches) >0){
  215. $y = str_replace("var _LocationY=","", $matches[0]);
  216. $y = str_replace(";","", $y);
  217. }
  218. $return = array();
  219. $return[0] = $x;
  220. $return[1] = $y;
  221. return $return;
  222. }
  223. function valid_email ($string) {
  224. $valid = false;
  225. if (!ereg('^[-!#$%&\'*+\\./0-9=?A-Z^_`a-z{|}~]+'.
  226. '@'.
  227. '[-!#$%&\'*+\\/0-9=?A-Z^_`a-z{|}~]+\.'.
  228. '[-!#$%&\'*+\\./0-9=?A-Z^_`a-z{|}~]+$', $string)) {
  229. $valid = false;
  230. } else {
  231. $valid = true;
  232. }
  233. return $valid;
  234. }
  235. function alert_size_to_meters($alert_area_size){
  236. $area_size_meters = 0;
  237. if ($alert_area_size == "s"){
  238. $area_size_meters = SMALL_ZONE_SIZE;
  239. }elseif ($alert_area_size == "m"){
  240. $area_size_meters = MEDIUM_ZONE_SIZE;
  241. }elseif ($alert_area_size == "l"){
  242. $area_size_meters = LARGE_ZONE_SIZE;
  243. }
  244. return $area_size_meters;
  245. }
  246. //Send a text email
  247. function send_text_email($to, $from_name, $from_email, $subject, $body){
  248. $headers = 'MIME-Version: 1.0' . "\r\n";
  249. $headers .= 'Content-type: text/plain; charset=iso-8859-1' . "\r\n";
  250. $headers .= 'From: ' . $from_name. ' <' . $from_email . ">\r\n";
  251. mail($to, $subject, $body, $headers);
  252. }
  253. // Format a date to mysql format
  254. function mysql_date($date){
  255. return date("Y-m-d H::i:s", $date);
  256. }
  257. function safe_scrape_page($url, $method = "GET"){
  258. error_log(print_r($url, true));
  259. $page = "";
  260. for ($i=0; $i < 3; $i++){
  261. if($page == false){
  262. if (SCRAPE_METHOD == "PEAR"){
  263. $page = scrape_page_pear($url, $method);
  264. }else{
  265. $page = scrape_page_curl($url, $method);
  266. }
  267. }
  268. }
  269. return $page;
  270. }
  271. function scrape_page_pear($url, $method = "GET"){
  272. $page = "";
  273. $request = new HTTP_Request($url, array("method" => $method));
  274. $request->sendRequest();
  275. $page = $request->getResponseBody();
  276. return $page;
  277. }
  278. function scrape_page_curl($url) {
  279. $ch = curl_init($url);
  280. curl_setopt($ch,CURLOPT_RETURNTRANSFER,TRUE);
  281. curl_setopt($ch,CURLOPT_FOLLOWLOCATION,TRUE);
  282. return curl_exec($ch);
  283. }
  284. function display_applications($applications, $authority_name, $authority_short_name){
  285. //smarty
  286. $smarty = new Smarty;
  287. $smarty->force_compile = true;
  288. $smarty->compile_dir = SMARTY_COMPILE_DIRECTORY;
  289. $smarty->template_dir = "../templates";
  290. $smarty->assign("authority_name", $authority_name);
  291. $smarty->assign("authority_short_name", $authority_short_name);
  292. if (sizeof($applications) > 0){
  293. $smarty->assign("applications", $applications);
  294. }
  295. $smarty->display("xml.tpl");
  296. }
  297. function get_time_from_get(){
  298. //if any get params were passed, overwrite the default date
  299. if (isset($_GET['day'])){
  300. $day = $_GET['day'];
  301. }else{
  302. throw_error("No day set in get string");
  303. }
  304. if (isset($_GET['month'])){
  305. $month = $_GET['month'];
  306. }else{
  307. throw_error("No year set in get string");
  308. }
  309. if (isset($_GET['year'])){
  310. $year = $_GET['year'];
  311. }else{
  312. throw_error("No year set in get string");
  313. }
  314. return mktime(0,0,0,$month,$day,$year);
  315. }
  316. function throw_error($message){
  317. throw new exception($message);
  318. }
  319. ?>