diff --git a/trunk/docs/scrapers/richmond.php b/trunk/docs/scrapers/richmond.php new file mode 100644 index 0000000..0457d23 --- /dev/null +++ b/trunk/docs/scrapers/richmond.php @@ -0,0 +1,104 @@ + 0 && $_GET['day'] < 32) ? $_GET['day'] : 1; + +//Check a month is set and is valid +$month = (isset($_GET['month']) && !empty($_GET['month']) && $_GET['month'] > 0 && $_GET['month'] < 13) ? $_GET['month'] : 1; + +//Check a year is set and is valid +$year = (isset($_GET['year']) && !empty($_GET['year']) && $_GET['year'] > 2003 && $_GET['year'] <= gmdate('Y')) ? $_GET['year'] : gmdate('Y'); + +$xml = array( 'name' => 'Richmond', + 'full_name' => 'London Borough of Richmond upon Thames', + 'url' => 'http://www2.richmond.gov.uk/PlanData2/planning_summary.aspx', + 'detail_url' => 'http://www2.richmond.gov.uk/PlanData2/Planning_CaseNo.aspx?strCASENO=', + 'comments' => 'http://forms.richmond.gov.uk/AF3/an/default.aspx/RenderForm/?F.Name=C5aG_poZZFP'); + +$date = $day.'/'.$month.'/'.$year; + +$applications = array(); + +function fetch_page($url) { + if(!isset($ch)) { + $ch = curl_init(); + } + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_REFERER, $url); + $data = curl_exec($ch); + return $data; +} + +function parse_search($date) { + global $applications; + + $url = 'http://www2.richmond.gov.uk/PlanData2/planning_summary.aspx?strRecTo='.$date.'&strRecFrom='.$date; + //echo 'Loading page '.$page_no.' of data for '.$date.' URL:'.$url.'
'; + + $data = fetch_page($url); + $data = explode('',$data); + $data = explode('',$data[1]); + foreach($data as $application) { + $application = explode('',$application); + $detail = explode('',$application[3]); + list($AppNo,$junk) = explode('
',$detail[0]); + $AppNo = trim(strip_tags($AppNo)); + $applications[$AppNo]['AppNo'] = $AppNo; + $applications[$AppNo]['DateRec'] = trim(strip_tags($detail[2])); + $applications[$AppNo]['Info'] = trim(strip_tags($detail[4])); + $Loc = trim(strip_tags($application[1])); + if(substr($Loc,-29) == 'Click here for a location map') { + $Loc = substr($Loc,0,-29); + } + $applications[$AppNo]['Address'] = $Loc; + preg_match("/([A-Z]{1,2}[0-9][0-9A-Z]?\s?[0-9][A-Z]{2})/",$Loc,$PostCode); + if(isset($PostCode[1])) { + $applications[$AppNo]['PostCode'] = $PostCode[1]; + } else { + $applications[$AppNo]['PostCode'] = false; + } + if(empty($AppNo)) { + unset($applications[$AppNo]); + } + } +} + +parse_search($date); + +header("Content-Type: text/xml"); +echo "\n"; +echo "\n"; +echo "\t".$xml['full_name']."\n"; +echo "\t".$xml['name']."\n"; +echo "\t\n"; +foreach($applications as $application) { + echo "\t\t\n"; + echo "\t\t\t".$application['AppNo']."\n"; + echo "\t\t\t
".$application['Address']."
\n"; + echo "\t\t\t".$application['PostCode']."\n"; + echo "\t\t\t\n"; + echo "\t\t\t".$xml['detail_url'].$application['AppNo']."\n"; + echo "\t\t\t".$xml['comments']."\n"; + echo "\t\t\t".$application['DateRec']."\n"; + echo "\t\t
\n"; +} +echo "\t
\n"; +echo "
"; +?> + + + + + + + + diff --git a/trunk/docs/scrapers/wiltshire.php b/trunk/docs/scrapers/wiltshire.php new file mode 100644 index 0000000..91dd752 --- /dev/null +++ b/trunk/docs/scrapers/wiltshire.php @@ -0,0 +1,100 @@ + 0 && $_GET['day'] < 32) ? $_GET['day'] : 1; + +//Check a month is set and is valid +$month = (isset($_GET['month']) && !empty($_GET['month']) && $_GET['month'] > 0 && $_GET['month'] < 13) ? $_GET['month'] : 1; + +//Check a year is set and is valid +$year = (isset($_GET['year']) && !empty($_GET['year']) && $_GET['year'] > 2003 && $_GET['year'] <= gmdate('Y')) ? $_GET['year'] : gmdate('Y'); + +$authority = array( 'name' => 'Wiltshire', + 'full_name' => 'Wiltshire County Council', + 'url' => 'http://www.wiltshireplanningapplications.co.uk/planning_summary.aspx', + 'detail_url' => 'http://www.wiltshireplanningapplications.co.uk/Planning_DETAIL.aspx?strCASENO=', + 'comments' => 'planningcontrol@wiltshire.gov.uk'); + +// There is a comment url available on some of the info pages, but we would +// have to download the info page to get it (it has a case officer parameter). +// The page looks like it might work without the case officer parameter, but +// the email address is probably a better bet - Duncan + +$date = $day.'/'.$month.'/'.$year; + +$applications = array(); + +function fetch_page($url) { + if(!isset($ch)) { + $ch = curl_init(); + } + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_REFERER, $url); + $data = curl_exec($ch); + return $data; +} + +function parse_search($date) { + global $applications,$authority; + + $url = $authority['url'].'?strRecTo='.$date.'&strRecFrom='.$date; + //echo 'Loading page '.$page_no.' of data for '.$date.' URL:'.$url.'
'; + + $data = fetch_page($url); + $data = explode('',$data); + $data = explode('',$data[2]); + foreach($data as $application) { + $application = explode('',$application); + $detail = explode('',$application[1]); + list($AppNo,$junk) = explode('
',$detail[0]); + $AppNo = trim(strip_tags($AppNo)); + $applications[$AppNo]['AppNo'] = $AppNo; + $applications[$AppNo]['Info'] = trim(strip_tags($application[2])); + list($Loc,$junk) = explode('',$detail[2]); + $Loc = trim(strip_tags($Loc)); + $applications[$AppNo]['Address'] = $Loc; + list($junk,$DateRec) = explode('',$detail[2]); + $applications[$AppNo]['DateRec'] = trim(strip_tags($DateRec)); + preg_match("/([A-Z]{1,2}[0-9][0-9A-Z]?\s?[0-9][A-Z]{2})/",$Loc,$PostCode); + if(isset($PostCode[1])) { + $applications[$AppNo]['PostCode'] = $PostCode[1]; + } else { + $applications[$AppNo]['PostCode'] = false; + } + if(empty($AppNo)) { + unset($applications[$AppNo]); + } + } +} + +parse_search($date); + +header("Content-Type: text/xml"); +echo "\n"; +echo "\n"; +echo "\t".$authority['full_name']."\n"; +echo "\t".$authority['name']."\n"; +echo "\t\n"; +foreach($applications as $application) { + echo "\t\t\n"; + echo "\t\t\t".$application['AppNo']."\n"; + echo "\t\t\t
".$application['Address']."
\n"; + echo "\t\t\t".$application['PostCode']."\n"; + echo "\t\t\t\n"; + echo "\t\t\t".$authority['detail_url'].$application['AppNo']."\n"; + echo "\t\t\t".$authority['comments']."\n"; + echo "\t\t\t".$application['DateRec']."\n"; + echo "\t\t
\n"; +} +echo "\t
\n"; +echo "
"; +?>