| @@ -0,0 +1,153 @@ | |||||
| <?php | |||||
| // | |||||
| // Scraper for Wealden District Council Planning Website | |||||
| // Created by Matt Ford on Sunday 24th August 2008 | |||||
| // | |||||
| // The script works according to requirements of PlanningAlerts.com | |||||
| // The default output is according to PlanningAlerts requirements, | |||||
| // to get all the data add 'all=true' to the end of the query string | |||||
| // | |||||
| // You need to set the location of the 'cookie jar' for the scraper to work | |||||
| $cookiejar = '/tmp/wealden_cookies.txt'; | |||||
| //Check a day is set and is valid | |||||
| $day = (isset($_GET['day']) && !empty($_GET['day']) && $_GET['day'] > 0 && $_GET['day'] < 32) ? $_GET['day'] : 1; | |||||
| //Check a month is set and is valid | |||||
| $month = (isset($_GET['month']) && !empty($_GET['month']) && $_GET['month'] > 0 && $_GET['month'] < 13) ? $_GET['month'] : 1; | |||||
| //Check a year is set and is valid | |||||
| $year = (isset($_GET['year']) && !empty($_GET['year']) && $_GET['year'] > 2003 && $_GET['year'] <= gmdate('Y')) ? $_GET['year'] : gmdate('Y'); | |||||
| //Do you want all information or only the common stuff? | |||||
| $all = (isset($_GET['all']) && $_GET['all'] != false) ? true : false; | |||||
| $date = $day.'/'.$month.'/'.$year; | |||||
| $applications = array(); | |||||
| function fetch_page($url,$post_string = false,$post_count = false) { | |||||
| if(!isset($ch)) { | |||||
| $ch = curl_init(); | |||||
| } | |||||
| curl_setopt($ch, CURLOPT_URL, $url); | |||||
| curl_setopt($ch, CURLOPT_HEADER, 0); | |||||
| curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |||||
| curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |||||
| curl_setopt($ch, CURLOPT_REFERER, $url); | |||||
| curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiejar); | |||||
| curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar); | |||||
| if($post_count > 0) { | |||||
| curl_setopt($ch, CURLOPT_POST, $post_count); | |||||
| curl_setopt($ch, CURLOPT_POSTFIELDS,$post_string); | |||||
| } | |||||
| $data = curl_exec($ch); | |||||
| return $data; | |||||
| } | |||||
| function extract_data($string) { | |||||
| list($junk,$return) = explode('</h5>',$string); | |||||
| return trim(strip_tags($return)); | |||||
| } | |||||
| function parse_search($date,$page_no=1,$AppRef='') { | |||||
| global $applications; | |||||
| $url = 'http://www.planning.wealden.gov.uk/aspxpages/SearchResults.aspx?pageno='.$page_no.'&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef='.$AppRef.'&Category=DC&DateType=R&StartDate='.$date.'&EndDate='.$date.'&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det='; | |||||
| //echo 'Loading page '.$page_no.' of data for '.$date.' URL:'.$url.'<br />'; | |||||
| $data = fetch_page($url); | |||||
| if(strpos($data,"<title>Wealden District Council's applications online - Copyright, disclaimer & personal data</title>")) { | |||||
| //Accept their terms | |||||
| list($junk,$viewstate) = explode('<input type="hidden" name="__VIEWSTATE" value="',$data,2); | |||||
| list($viewstate,$junk) = explode('" />',$viewstate,2); | |||||
| //echo 'Attempting to bypass copyright page...<br />'; | |||||
| $url = 'http://www.planning.wealden.gov.uk/aspxpages/Copyright.aspx?pageno='.$page_no.'&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef='.$AppRef.'&Category=DC&DateType=R&StartDate='.$date.'&EndDate='.$date.'&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det='; | |||||
| $data = fetch_page($url,'btnCopyrightAccept=Accept&__VIEWSTATE='.urlencode($viewstate).'',2); | |||||
| } | |||||
| list($junk,$data) = explode('<span id="lblSearchResults">', $data); | |||||
| list($data,$next_page) = explode('<div id="pagenumbers">',$data); | |||||
| $data = explode('</ul>',$data); | |||||
| unset($data[10]); | |||||
| foreach($data as $application) { | |||||
| $application = explode('</li>',$application); | |||||
| $AppNo = extract_data($application[0]); | |||||
| if(!empty($AppNo)) { | |||||
| $applications[$AppNo]['AppNo'] = $AppNo; | |||||
| $Loc = extract_data($application[1]); | |||||
| $applications[$AppNo]['Address'] = $Loc; | |||||
| preg_match("/([A-Z]{1,2}[0-9][0-9A-Z]?\s?[0-9][A-Z]{2})/",$Loc,$PostCode); | |||||
| if(isset($PostCode[1])) { | |||||
| $applications[$AppNo]['PostCode'] = $PostCode[1]; | |||||
| } else { | |||||
| $applications[$AppNo]['PostCode'] = false; | |||||
| } | |||||
| $applications[$AppNo]['Info'] = extract_data($application[2]); | |||||
| parse_detail($AppNo); | |||||
| } | |||||
| } | |||||
| if(strpos($next_page,'Next</a></div></span> <br />')) { | |||||
| $page_no++; | |||||
| //echo "Loading next page..."; | |||||
| if($page_no < 6) { | |||||
| parse_search($date,$page_no,$AppNo); | |||||
| } | |||||
| } | |||||
| } | |||||
| function parse_detail($AppNo) { | |||||
| global $applications; | |||||
| $url = 'http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref='.$AppNo.'&Category=DC'; | |||||
| list($junk,$data) = explode('<span id="lblSearchDetails">',fetch_page($url),2); | |||||
| list($data,$junk) = explode('<div class="linkborder">',$data,2); | |||||
| $data = explode('</li>',$data); | |||||
| $applications[$AppNo]['AppType'] = extract_data($data[1]); | |||||
| $applications[$AppNo]['DateRec'] = extract_data($data[2]); | |||||
| $applications[$AppNo]['DateExp'] = extract_data($data[3]); | |||||
| $applications[$AppNo]['Parish'] = extract_data($data[6]); | |||||
| $applications[$AppNo]['GridRef'] = extract_data($data[7]); | |||||
| $applications[$AppNo]['UPRN'] = extract_data($data[8]); | |||||
| list($status,$junk) = explode(' - ',extract_data($data[9])); | |||||
| $applications[$AppNo]['Status'] = trim($status); | |||||
| $applications[$AppNo]['DateConExp'] = extract_data($data[10]); | |||||
| $applications[$AppNo]['DateComDel'] = extract_data($data[11]); | |||||
| $applications[$AppNo]['Decision'] = extract_data($data[12]); | |||||
| $applications[$AppNo]['DateDec'] = extract_data($data[13]); | |||||
| $applications[$AppNo]['CaseOfficer'] = extract_data($data[14]); | |||||
| } | |||||
| parse_search($date); | |||||
| header("Content-Type: text/xml"); | |||||
| echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; | |||||
| echo "<planning>\n"; | |||||
| echo "\t<authority_name>Wealden</authority_name>\n"; | |||||
| echo "\t<authority_short_name>Wealden</authority_short_name>\n"; | |||||
| echo "\t<applications>\n"; | |||||
| foreach($applications as $application) { | |||||
| echo "\t\t<application>\n"; | |||||
| echo "\t\t\t<council_reference>".$application['AppNo']."</council_reference>\n"; | |||||
| echo "\t\t\t<address>".$application['Address']."</address>\n"; | |||||
| echo "\t\t\t<postcode>".$application['PostCode']."</postcode>\n"; | |||||
| echo "\t\t\t<description>".$application['Info']."</description>\n"; | |||||
| echo "\t\t\t<info_url><![CDATA[http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref=".$application['AppNo']."&Category=DC]]></info_url>\n"; | |||||
| echo "\t\t\t<comment_url>planning@wealden.gov.uk</comment_url>\n"; | |||||
| echo "\t\t\t<date_received>".$application['DateRec']."</date_received>\n"; | |||||
| if($all) { | |||||
| echo "\t\t\t<application_type>".$application['AppType']."</application_type>\n"; | |||||
| echo "\t\t\t<date_expires>".$application['DateExp']."</date_expires>\n"; | |||||
| echo "\t\t\t<parish>".$application['Parish']."</parish>\n"; | |||||
| echo "\t\t\t<grid_reference>".$application['GridRef']."</grid_reference>\n"; | |||||
| echo "\t\t\t<uprn>".$application['UPRN']."</uprn>\n"; | |||||
| echo "\t\t\t<status>".$application['Status']."</status>\n"; | |||||
| echo "\t\t\t<consultation_expiry_date>".$application['DateConExp']."</consultation_expiry_date>\n"; | |||||
| echo "\t\t\t<committee_delegated_date>".$application['DateComDel']."</committee_delegated_date>\n"; | |||||
| echo "\t\t\t<decision>".$application['Decision']."</decision>\n"; | |||||
| echo "\t\t\t<decision_date>".$application['DateDec']."</decision_date>\n"; | |||||
| echo "\t\t\t<case_officer>".$application['CaseOfficer']."</case_officer>\n"; | |||||
| } | |||||
| echo "\t\t</application>\n"; | |||||
| } | |||||
| echo "\t</applications>\n"; | |||||
| echo "</planning>"; | |||||
| ?> | |||||