| @@ -0,0 +1,153 @@ | |||
| <?php | |||
| // | |||
| // Scraper for Wealden District Council Planning Website | |||
| // Created by Matt Ford on Sunday 24th August 2008 | |||
| // | |||
| // The script works according to requirements of PlanningAlerts.com | |||
| // The default output is according to PlanningAlerts requirements, | |||
| // to get all the data add 'all=true' to the end of the query string | |||
| // | |||
| // You need to set the location of the 'cookie jar' for the scraper to work | |||
| $cookiejar = '/tmp/wealden_cookies.txt'; | |||
| //Check a day is set and is valid | |||
| $day = (isset($_GET['day']) && !empty($_GET['day']) && $_GET['day'] > 0 && $_GET['day'] < 32) ? $_GET['day'] : 1; | |||
| //Check a month is set and is valid | |||
| $month = (isset($_GET['month']) && !empty($_GET['month']) && $_GET['month'] > 0 && $_GET['month'] < 13) ? $_GET['month'] : 1; | |||
| //Check a year is set and is valid | |||
| $year = (isset($_GET['year']) && !empty($_GET['year']) && $_GET['year'] > 2003 && $_GET['year'] <= gmdate('Y')) ? $_GET['year'] : gmdate('Y'); | |||
| //Do you want all information or only the common stuff? | |||
| $all = (isset($_GET['all']) && $_GET['all'] != false) ? true : false; | |||
| $date = $day.'/'.$month.'/'.$year; | |||
| $applications = array(); | |||
| function fetch_page($url,$post_string = false,$post_count = false) { | |||
| if(!isset($ch)) { | |||
| $ch = curl_init(); | |||
| } | |||
| curl_setopt($ch, CURLOPT_URL, $url); | |||
| curl_setopt($ch, CURLOPT_HEADER, 0); | |||
| curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |||
| curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |||
| curl_setopt($ch, CURLOPT_REFERER, $url); | |||
| curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiejar); | |||
| curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar); | |||
| if($post_count > 0) { | |||
| curl_setopt($ch, CURLOPT_POST, $post_count); | |||
| curl_setopt($ch, CURLOPT_POSTFIELDS,$post_string); | |||
| } | |||
| $data = curl_exec($ch); | |||
| return $data; | |||
| } | |||
| function extract_data($string) { | |||
| list($junk,$return) = explode('</h5>',$string); | |||
| return trim(strip_tags($return)); | |||
| } | |||
| function parse_search($date,$page_no=1,$AppRef='') { | |||
| global $applications; | |||
| $url = 'http://www.planning.wealden.gov.uk/aspxpages/SearchResults.aspx?pageno='.$page_no.'&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef='.$AppRef.'&Category=DC&DateType=R&StartDate='.$date.'&EndDate='.$date.'&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det='; | |||
| //echo 'Loading page '.$page_no.' of data for '.$date.' URL:'.$url.'<br />'; | |||
| $data = fetch_page($url); | |||
| if(strpos($data,"<title>Wealden District Council's applications online - Copyright, disclaimer & personal data</title>")) { | |||
| //Accept their terms | |||
| list($junk,$viewstate) = explode('<input type="hidden" name="__VIEWSTATE" value="',$data,2); | |||
| list($viewstate,$junk) = explode('" />',$viewstate,2); | |||
| //echo 'Attempting to bypass copyright page...<br />'; | |||
| $url = 'http://www.planning.wealden.gov.uk/aspxpages/Copyright.aspx?pageno='.$page_no.'&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef='.$AppRef.'&Category=DC&DateType=R&StartDate='.$date.'&EndDate='.$date.'&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det='; | |||
| $data = fetch_page($url,'btnCopyrightAccept=Accept&__VIEWSTATE='.urlencode($viewstate).'',2); | |||
| } | |||
| list($junk,$data) = explode('<span id="lblSearchResults">', $data); | |||
| list($data,$next_page) = explode('<div id="pagenumbers">',$data); | |||
| $data = explode('</ul>',$data); | |||
| unset($data[10]); | |||
| foreach($data as $application) { | |||
| $application = explode('</li>',$application); | |||
| $AppNo = extract_data($application[0]); | |||
| if(!empty($AppNo)) { | |||
| $applications[$AppNo]['AppNo'] = $AppNo; | |||
| $Loc = extract_data($application[1]); | |||
| $applications[$AppNo]['Address'] = $Loc; | |||
| preg_match("/([A-Z]{1,2}[0-9][0-9A-Z]?\s?[0-9][A-Z]{2})/",$Loc,$PostCode); | |||
| if(isset($PostCode[1])) { | |||
| $applications[$AppNo]['PostCode'] = $PostCode[1]; | |||
| } else { | |||
| $applications[$AppNo]['PostCode'] = false; | |||
| } | |||
| $applications[$AppNo]['Info'] = extract_data($application[2]); | |||
| parse_detail($AppNo); | |||
| } | |||
| } | |||
| if(strpos($next_page,'Next</a></div></span> <br />')) { | |||
| $page_no++; | |||
| //echo "Loading next page..."; | |||
| if($page_no < 6) { | |||
| parse_search($date,$page_no,$AppNo); | |||
| } | |||
| } | |||
| } | |||
| function parse_detail($AppNo) { | |||
| global $applications; | |||
| $url = 'http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref='.$AppNo.'&Category=DC'; | |||
| list($junk,$data) = explode('<span id="lblSearchDetails">',fetch_page($url),2); | |||
| list($data,$junk) = explode('<div class="linkborder">',$data,2); | |||
| $data = explode('</li>',$data); | |||
| $applications[$AppNo]['AppType'] = extract_data($data[1]); | |||
| $applications[$AppNo]['DateRec'] = extract_data($data[2]); | |||
| $applications[$AppNo]['DateExp'] = extract_data($data[3]); | |||
| $applications[$AppNo]['Parish'] = extract_data($data[6]); | |||
| $applications[$AppNo]['GridRef'] = extract_data($data[7]); | |||
| $applications[$AppNo]['UPRN'] = extract_data($data[8]); | |||
| list($status,$junk) = explode(' - ',extract_data($data[9])); | |||
| $applications[$AppNo]['Status'] = trim($status); | |||
| $applications[$AppNo]['DateConExp'] = extract_data($data[10]); | |||
| $applications[$AppNo]['DateComDel'] = extract_data($data[11]); | |||
| $applications[$AppNo]['Decision'] = extract_data($data[12]); | |||
| $applications[$AppNo]['DateDec'] = extract_data($data[13]); | |||
| $applications[$AppNo]['CaseOfficer'] = extract_data($data[14]); | |||
| } | |||
| parse_search($date); | |||
| header("Content-Type: text/xml"); | |||
| echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; | |||
| echo "<planning>\n"; | |||
| echo "\t<authority_name>Wealden</authority_name>\n"; | |||
| echo "\t<authority_short_name>Wealden</authority_short_name>\n"; | |||
| echo "\t<applications>\n"; | |||
| foreach($applications as $application) { | |||
| echo "\t\t<application>\n"; | |||
| echo "\t\t\t<council_reference>".$application['AppNo']."</council_reference>\n"; | |||
| echo "\t\t\t<address>".$application['Address']."</address>\n"; | |||
| echo "\t\t\t<postcode>".$application['PostCode']."</postcode>\n"; | |||
| echo "\t\t\t<description>".$application['Info']."</description>\n"; | |||
| echo "\t\t\t<info_url><![CDATA[http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref=".$application['AppNo']."&Category=DC]]></info_url>\n"; | |||
| echo "\t\t\t<comment_url>planning@wealden.gov.uk</comment_url>\n"; | |||
| echo "\t\t\t<date_received>".$application['DateRec']."</date_received>\n"; | |||
| if($all) { | |||
| echo "\t\t\t<application_type>".$application['AppType']."</application_type>\n"; | |||
| echo "\t\t\t<date_expires>".$application['DateExp']."</date_expires>\n"; | |||
| echo "\t\t\t<parish>".$application['Parish']."</parish>\n"; | |||
| echo "\t\t\t<grid_reference>".$application['GridRef']."</grid_reference>\n"; | |||
| echo "\t\t\t<uprn>".$application['UPRN']."</uprn>\n"; | |||
| echo "\t\t\t<status>".$application['Status']."</status>\n"; | |||
| echo "\t\t\t<consultation_expiry_date>".$application['DateConExp']."</consultation_expiry_date>\n"; | |||
| echo "\t\t\t<committee_delegated_date>".$application['DateComDel']."</committee_delegated_date>\n"; | |||
| echo "\t\t\t<decision>".$application['Decision']."</decision>\n"; | |||
| echo "\t\t\t<decision_date>".$application['DateDec']."</decision_date>\n"; | |||
| echo "\t\t\t<case_officer>".$application['CaseOfficer']."</case_officer>\n"; | |||
| } | |||
| echo "\t\t</application>\n"; | |||
| } | |||
| echo "\t</applications>\n"; | |||
| echo "</planning>"; | |||
| ?> | |||