|
|
@@ -0,0 +1,153 @@ |
|
|
|
<?php |
|
|
|
// |
|
|
|
// Scraper for Wealden District Council Planning Website |
|
|
|
// Created by Matt Ford on Sunday 24th August 2008 |
|
|
|
// |
|
|
|
// The script works according to requirements of PlanningAlerts.com |
|
|
|
// The default output is according to PlanningAlerts requirements, |
|
|
|
// to get all the data add 'all=true' to the end of the query string |
|
|
|
// |
|
|
|
// You need to set the location of the 'cookie jar' for the scraper to work |
|
|
|
$cookiejar = '/tmp/wealden_cookies.txt'; |
|
|
|
|
|
|
|
//Check a day is set and is valid |
|
|
|
$day = (isset($_GET['day']) && !empty($_GET['day']) && $_GET['day'] > 0 && $_GET['day'] < 32) ? $_GET['day'] : 1; |
|
|
|
|
|
|
|
//Check a month is set and is valid |
|
|
|
$month = (isset($_GET['month']) && !empty($_GET['month']) && $_GET['month'] > 0 && $_GET['month'] < 13) ? $_GET['month'] : 1; |
|
|
|
|
|
|
|
//Check a year is set and is valid |
|
|
|
$year = (isset($_GET['year']) && !empty($_GET['year']) && $_GET['year'] > 2003 && $_GET['year'] <= gmdate('Y')) ? $_GET['year'] : gmdate('Y'); |
|
|
|
|
|
|
|
//Do you want all information or only the common stuff? |
|
|
|
$all = (isset($_GET['all']) && $_GET['all'] != false) ? true : false; |
|
|
|
|
|
|
|
$date = $day.'/'.$month.'/'.$year; |
|
|
|
|
|
|
|
$applications = array(); |
|
|
|
|
|
|
|
function fetch_page($url,$post_string = false,$post_count = false) { |
|
|
|
if(!isset($ch)) { |
|
|
|
$ch = curl_init(); |
|
|
|
} |
|
|
|
curl_setopt($ch, CURLOPT_URL, $url); |
|
|
|
curl_setopt($ch, CURLOPT_HEADER, 0); |
|
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); |
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
|
|
|
curl_setopt($ch, CURLOPT_REFERER, $url); |
|
|
|
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookiejar); |
|
|
|
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookiejar); |
|
|
|
if($post_count > 0) { |
|
|
|
curl_setopt($ch, CURLOPT_POST, $post_count); |
|
|
|
curl_setopt($ch, CURLOPT_POSTFIELDS,$post_string); |
|
|
|
} |
|
|
|
$data = curl_exec($ch); |
|
|
|
return $data; |
|
|
|
} |
|
|
|
|
|
|
|
function extract_data($string) { |
|
|
|
list($junk,$return) = explode('</h5>',$string); |
|
|
|
return trim(strip_tags($return)); |
|
|
|
} |
|
|
|
|
|
|
|
function parse_search($date,$page_no=1,$AppRef='') { |
|
|
|
global $applications; |
|
|
|
|
|
|
|
$url = 'http://www.planning.wealden.gov.uk/aspxpages/SearchResults.aspx?pageno='.$page_no.'&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef='.$AppRef.'&Category=DC&DateType=R&StartDate='.$date.'&EndDate='.$date.'&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det='; |
|
|
|
//echo 'Loading page '.$page_no.' of data for '.$date.' URL:'.$url.'<br />'; |
|
|
|
|
|
|
|
$data = fetch_page($url); |
|
|
|
if(strpos($data,"<title>Wealden District Council's applications online - Copyright, disclaimer & personal data</title>")) { |
|
|
|
//Accept their terms |
|
|
|
list($junk,$viewstate) = explode('<input type="hidden" name="__VIEWSTATE" value="',$data,2); |
|
|
|
list($viewstate,$junk) = explode('" />',$viewstate,2); |
|
|
|
//echo 'Attempting to bypass copyright page...<br />'; |
|
|
|
$url = 'http://www.planning.wealden.gov.uk/aspxpages/Copyright.aspx?pageno='.$page_no.'&QueryType=9&WeekNo=&WeekStart=&WeekEnd=&CaseNo=&Add=&ShowInd=&DocId=&AppRef='.$AppRef.'&Category=DC&DateType=R&StartDate='.$date.'&EndDate='.$date.'&Agent=&ParishCode=&WardCode=&Parish=&Ward=&AdvAppNo=&AdvAdd=&AdvProposal=&DecisionCode=&Det='; |
|
|
|
$data = fetch_page($url,'btnCopyrightAccept=Accept&__VIEWSTATE='.urlencode($viewstate).'',2); |
|
|
|
} |
|
|
|
list($junk,$data) = explode('<span id="lblSearchResults">', $data); |
|
|
|
list($data,$next_page) = explode('<div id="pagenumbers">',$data); |
|
|
|
$data = explode('</ul>',$data); |
|
|
|
unset($data[10]); |
|
|
|
foreach($data as $application) { |
|
|
|
$application = explode('</li>',$application); |
|
|
|
$AppNo = extract_data($application[0]); |
|
|
|
if(!empty($AppNo)) { |
|
|
|
$applications[$AppNo]['AppNo'] = $AppNo; |
|
|
|
$Loc = extract_data($application[1]); |
|
|
|
$applications[$AppNo]['Address'] = $Loc; |
|
|
|
preg_match("/([A-Z]{1,2}[0-9][0-9A-Z]?\s?[0-9][A-Z]{2})/",$Loc,$PostCode); |
|
|
|
if(isset($PostCode[1])) { |
|
|
|
$applications[$AppNo]['PostCode'] = $PostCode[1]; |
|
|
|
} else { |
|
|
|
$applications[$AppNo]['PostCode'] = false; |
|
|
|
} |
|
|
|
$applications[$AppNo]['Info'] = extract_data($application[2]); |
|
|
|
parse_detail($AppNo); |
|
|
|
} |
|
|
|
} |
|
|
|
if(strpos($next_page,'Next</a></div></span> <br />')) { |
|
|
|
$page_no++; |
|
|
|
//echo "Loading next page..."; |
|
|
|
if($page_no < 6) { |
|
|
|
parse_search($date,$page_no,$AppNo); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
function parse_detail($AppNo) { |
|
|
|
global $applications; |
|
|
|
$url = 'http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref='.$AppNo.'&Category=DC'; |
|
|
|
list($junk,$data) = explode('<span id="lblSearchDetails">',fetch_page($url),2); |
|
|
|
list($data,$junk) = explode('<div class="linkborder">',$data,2); |
|
|
|
$data = explode('</li>',$data); |
|
|
|
$applications[$AppNo]['AppType'] = extract_data($data[1]); |
|
|
|
$applications[$AppNo]['DateRec'] = extract_data($data[2]); |
|
|
|
$applications[$AppNo]['DateExp'] = extract_data($data[3]); |
|
|
|
$applications[$AppNo]['Parish'] = extract_data($data[6]); |
|
|
|
$applications[$AppNo]['GridRef'] = extract_data($data[7]); |
|
|
|
$applications[$AppNo]['UPRN'] = extract_data($data[8]); |
|
|
|
list($status,$junk) = explode(' - ',extract_data($data[9])); |
|
|
|
$applications[$AppNo]['Status'] = trim($status); |
|
|
|
$applications[$AppNo]['DateConExp'] = extract_data($data[10]); |
|
|
|
$applications[$AppNo]['DateComDel'] = extract_data($data[11]); |
|
|
|
$applications[$AppNo]['Decision'] = extract_data($data[12]); |
|
|
|
$applications[$AppNo]['DateDec'] = extract_data($data[13]); |
|
|
|
$applications[$AppNo]['CaseOfficer'] = extract_data($data[14]); |
|
|
|
} |
|
|
|
parse_search($date); |
|
|
|
|
|
|
|
header("Content-Type: text/xml"); |
|
|
|
echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; |
|
|
|
echo "<planning>\n"; |
|
|
|
echo "\t<authority_name>Wealden</authority_name>\n"; |
|
|
|
echo "\t<authority_short_name>Wealden</authority_short_name>\n"; |
|
|
|
echo "\t<applications>\n"; |
|
|
|
foreach($applications as $application) { |
|
|
|
echo "\t\t<application>\n"; |
|
|
|
echo "\t\t\t<council_reference>".$application['AppNo']."</council_reference>\n"; |
|
|
|
echo "\t\t\t<address>".$application['Address']."</address>\n"; |
|
|
|
echo "\t\t\t<postcode>".$application['PostCode']."</postcode>\n"; |
|
|
|
echo "\t\t\t<description>".$application['Info']."</description>\n"; |
|
|
|
echo "\t\t\t<info_url><![CDATA[http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref=".$application['AppNo']."&Category=DC]]></info_url>\n"; |
|
|
|
echo "\t\t\t<comment_url>planning@wealden.gov.uk</comment_url>\n"; |
|
|
|
echo "\t\t\t<date_received>".$application['DateRec']."</date_received>\n"; |
|
|
|
if($all) { |
|
|
|
echo "\t\t\t<application_type>".$application['AppType']."</application_type>\n"; |
|
|
|
echo "\t\t\t<date_expires>".$application['DateExp']."</date_expires>\n"; |
|
|
|
echo "\t\t\t<parish>".$application['Parish']."</parish>\n"; |
|
|
|
echo "\t\t\t<grid_reference>".$application['GridRef']."</grid_reference>\n"; |
|
|
|
echo "\t\t\t<uprn>".$application['UPRN']."</uprn>\n"; |
|
|
|
echo "\t\t\t<status>".$application['Status']."</status>\n"; |
|
|
|
echo "\t\t\t<consultation_expiry_date>".$application['DateConExp']."</consultation_expiry_date>\n"; |
|
|
|
echo "\t\t\t<committee_delegated_date>".$application['DateComDel']."</committee_delegated_date>\n"; |
|
|
|
echo "\t\t\t<decision>".$application['Decision']."</decision>\n"; |
|
|
|
echo "\t\t\t<decision_date>".$application['DateDec']."</decision_date>\n"; |
|
|
|
echo "\t\t\t<case_officer>".$application['CaseOfficer']."</case_officer>\n"; |
|
|
|
} |
|
|
|
echo "\t\t</application>\n"; |
|
|
|
} |
|
|
|
echo "\t</applications>\n"; |
|
|
|
echo "</planning>"; |
|
|
|
?> |
|
|
|
|