Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

154 lines
7.0 KiB

  1. <?php
  2. //
  3. // Scraper for Wealden District Council Planning Website
  4. // Created by Matt Ford on Sunday 24th August 2008
  5. //
  6. // The script works according to requirements of PlanningAlerts.com
  7. // The default output is according to PlanningAlerts requirements,
  8. // to get all the data add 'all=true' to the end of the query string
  9. //
  10. // You need to set the location of the 'cookie jar' for the scraper to work
  11. $cookiejar = '/tmp/wealden_cookies.txt';
  12. //Check a day is set and is valid
  13. $day = (isset($_GET['day']) && !empty($_GET['day']) && $_GET['day'] > 0 && $_GET['day'] < 32) ? $_GET['day'] : 1;
  14. //Check a month is set and is valid
  15. $month = (isset($_GET['month']) && !empty($_GET['month']) && $_GET['month'] > 0 && $_GET['month'] < 13) ? $_GET['month'] : 1;
  16. //Check a year is set and is valid
  17. $year = (isset($_GET['year']) && !empty($_GET['year']) && $_GET['year'] > 2003 && $_GET['year'] <= gmdate('Y')) ? $_GET['year'] : gmdate('Y');
  18. //Do you want all information or only the common stuff?
  19. $all = (isset($_GET['all']) && $_GET['all'] != false) ? true : false;
  20. $date = $day.'/'.$month.'/'.$year;
  21. $applications = array();
  22. function fetch_page($url,$post_string = false,$post_count = false) {
  23. if(!isset($ch)) {
  24. $ch = curl_init();
  25. }
  26. curl_setopt($ch, CURLOPT_URL, $url);
  27. curl_setopt($ch, CURLOPT_HEADER, 0);
  28. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  29. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  30. curl_setopt($ch, CURLOPT_REFERER, $url);
  31. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiejar);
  32. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar);
  33. if($post_count > 0) {
  34. curl_setopt($ch, CURLOPT_POST, $post_count);
  35. curl_setopt($ch, CURLOPT_POSTFIELDS,$post_string);
  36. }
  37. $data = curl_exec($ch);
  38. return $data;
  39. }
  40. function extract_data($string) {
  41. list($junk,$return) = explode('</h5>',$string);
  42. return trim(strip_tags($return));
  43. }
  44. function parse_search($date,$page_no=1,$AppRef='') {
  45. global $applications;
  46. $url = 'http://www.planning.wealden.gov.uk/aspxpages/SearchResults.aspx?pageno='.$page_no.'&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef='.$AppRef.'&Category=DC&DateType=R&StartDate='.$date.'&EndDate='.$date.'&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det=';
  47. //echo 'Loading page '.$page_no.' of data for '.$date.' URL:'.$url.'<br />';
  48. $data = fetch_page($url);
  49. if(strpos($data,"<title>Wealden District Council's applications online - Copyright, disclaimer & personal data</title>")) {
  50. //Accept their terms
  51. list($junk,$viewstate) = explode('<input type="hidden" name="__VIEWSTATE" value="',$data,2);
  52. list($viewstate,$junk) = explode('" />',$viewstate,2);
  53. //echo 'Attempting to bypass copyright page...<br />';
  54. $url = 'http://www.planning.wealden.gov.uk/aspxpages/Copyright.aspx?pageno='.$page_no.'&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef='.$AppRef.'&Category=DC&DateType=R&StartDate='.$date.'&EndDate='.$date.'&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det=';
  55. $data = fetch_page($url,'btnCopyrightAccept=Accept&__VIEWSTATE='.urlencode($viewstate).'',2);
  56. }
  57. list($junk,$data) = explode('<span id="lblSearchResults">', $data);
  58. list($data,$next_page) = explode('<div id="pagenumbers">',$data);
  59. $data = explode('</ul>',$data);
  60. unset($data[10]);
  61. foreach($data as $application) {
  62. $application = explode('</li>',$application);
  63. $AppNo = extract_data($application[0]);
  64. if(!empty($AppNo)) {
  65. $applications[$AppNo]['AppNo'] = $AppNo;
  66. $Loc = extract_data($application[1]);
  67. $applications[$AppNo]['Address'] = $Loc;
  68. preg_match("/([A-Z]{1,2}[0-9][0-9A-Z]?\s?[0-9][A-Z]{2})/",$Loc,$PostCode);
  69. if(isset($PostCode[1])) {
  70. $applications[$AppNo]['PostCode'] = $PostCode[1];
  71. } else {
  72. $applications[$AppNo]['PostCode'] = false;
  73. }
  74. $applications[$AppNo]['Info'] = extract_data($application[2]);
  75. parse_detail($AppNo);
  76. }
  77. }
  78. if(strpos($next_page,'Next</a></div></span> <br />')) {
  79. $page_no++;
  80. //echo "Loading next page...";
  81. if($page_no < 6) {
  82. parse_search($date,$page_no,$AppNo);
  83. }
  84. }
  85. }
  86. function parse_detail($AppNo) {
  87. global $applications;
  88. $url = 'http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref='.$AppNo.'&Category=DC';
  89. list($junk,$data) = explode('<span id="lblSearchDetails">',fetch_page($url),2);
  90. list($data,$junk) = explode('<div class="linkborder">',$data,2);
  91. $data = explode('</li>',$data);
  92. $applications[$AppNo]['AppType'] = extract_data($data[1]);
  93. $applications[$AppNo]['DateRec'] = extract_data($data[2]);
  94. $applications[$AppNo]['DateExp'] = extract_data($data[3]);
  95. $applications[$AppNo]['Parish'] = extract_data($data[6]);
  96. $applications[$AppNo]['GridRef'] = extract_data($data[7]);
  97. $applications[$AppNo]['UPRN'] = extract_data($data[8]);
  98. list($status,$junk) = explode(' - ',extract_data($data[9]));
  99. $applications[$AppNo]['Status'] = trim($status);
  100. $applications[$AppNo]['DateConExp'] = extract_data($data[10]);
  101. $applications[$AppNo]['DateComDel'] = extract_data($data[11]);
  102. $applications[$AppNo]['Decision'] = extract_data($data[12]);
  103. $applications[$AppNo]['DateDec'] = extract_data($data[13]);
  104. $applications[$AppNo]['CaseOfficer'] = extract_data($data[14]);
  105. }
  106. parse_search($date);
  107. header("Content-Type: text/xml");
  108. echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
  109. echo "<planning>\n";
  110. echo "\t<authority_name>Wealden District Council</authority_name>\n";
  111. echo "\t<authority_short_name>Wealden</authority_short_name>\n";
  112. echo "\t<applications>\n";
  113. foreach($applications as $application) {
  114. echo "\t\t<application>\n";
  115. echo "\t\t\t<council_reference>".$application['AppNo']."</council_reference>\n";
  116. echo "\t\t\t<address>".$application['Address']."</address>\n";
  117. echo "\t\t\t<postcode>".$application['PostCode']."</postcode>\n";
  118. echo "\t\t\t<description>".$application['Info']."</description>\n";
  119. echo "\t\t\t<info_url><![CDATA[http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref=".$application['AppNo']."&Category=DC]]></info_url>\n";
  120. echo "\t\t\t<comment_url>planning@wealden.gov.uk</comment_url>\n";
  121. echo "\t\t\t<date_received>".$application['DateRec']."</date_received>\n";
  122. if($all) {
  123. echo "\t\t\t<application_type>".$application['AppType']."</application_type>\n";
  124. echo "\t\t\t<date_expires>".$application['DateExp']."</date_expires>\n";
  125. echo "\t\t\t<parish>".$application['Parish']."</parish>\n";
  126. echo "\t\t\t<grid_reference>".$application['GridRef']."</grid_reference>\n";
  127. echo "\t\t\t<uprn>".$application['UPRN']."</uprn>\n";
  128. echo "\t\t\t<status>".$application['Status']."</status>\n";
  129. echo "\t\t\t<consultation_expiry_date>".$application['DateConExp']."</consultation_expiry_date>\n";
  130. echo "\t\t\t<committee_delegated_date>".$application['DateComDel']."</committee_delegated_date>\n";
  131. echo "\t\t\t<decision>".$application['Decision']."</decision>\n";
  132. echo "\t\t\t<decision_date>".$application['DateDec']."</decision_date>\n";
  133. echo "\t\t\t<case_officer>".$application['CaseOfficer']."</case_officer>\n";
  134. }
  135. echo "\t\t</application>\n";
  136. }
  137. echo "\t</applications>\n";
  138. echo "</planning>";
  139. ?>