@@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser): | |||
comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | |||
class GuildfordParser(AcolnetParser): | |||
case_number_tr = 1 | |||
reg_date_tr = 7 | |||
location_tr = 2 | |||
proposal_tr = 3 | |||
#http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||
class SurreyHeathParser(AcolnetParser): | |||
# This is not working yet. | |||
# _getSearchResponse is an attempt to work around | |||
@@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser): | |||
if __name__ == '__main__': | |||
day = 31 | |||
month = 8 | |||
year = 2007 | |||
day = 22 | |||
month = 2 | |||
year = 2005 | |||
# returns error 400 - bad request | |||
#parser = BridgenorthParser() | |||
@@ -489,7 +497,8 @@ if __name__ == '__main__': | |||
# canterbury | |||
# results as columns of one table | |||
parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
#parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch") | |||
print parser.getResults(day, month, year) | |||
@@ -10,7 +10,8 @@ require_once('phpcoord.php'); | |||
function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){ | |||
$applications = array(); | |||
$application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/"; | |||
//$application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/"; | |||
$application_pattern = "/<tr><th>([0-9]*)<\/th>.*(?=<\/tr)/U"; | |||
//grab the page | |||
$html = safe_scrape_page($search_url); | |||
@@ -21,6 +22,10 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment | |||
preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER); | |||
foreach ($application_matches[0] as $application_match){ | |||
//START Duncan's debug | |||
//print_r($application_match); | |||
//print_r("END"); | |||
// END Duncan's debug | |||
$detail_pattern = "/<td>([^<])*/"; | |||
preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER); | |||
@@ -36,12 +41,16 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment | |||
//match case number | |||
$casenumber_pattern = "/caseno=([^&]*)/"; | |||
preg_match($casenumber_pattern, $application_match, $casenumber_matches); | |||
//START Duncan's debug | |||
//print_r($application_match); | |||
//var_dump($casenumber_matches); | |||
//END Duncan's debug | |||
$case_number =""; | |||
if(sizeof($casenumber_matches)>0){ | |||
$case_number = str_replace("caseno=","", $casenumber_matches[0]); | |||
} | |||
//if weve found a caase number, then get the details | |||
if($case_number !=""){ | |||
//Comment and info urls | |||
@@ -47,4 +47,4 @@ if (isset($_GET['year'])){ | |||
$smarty->display("xml.tpl"); | |||
?> | |||
?> |
@@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser): | |||
comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | |||
class GuildfordParser(AcolnetParser): | |||
case_number_tr = 1 | |||
reg_date_tr = 7 | |||
location_tr = 2 | |||
proposal_tr = 3 | |||
#http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||
class SurreyHeathParser(AcolnetParser): | |||
# This is not working yet. | |||
# _getSearchResponse is an attempt to work around | |||
@@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser): | |||
if __name__ == '__main__': | |||
day = 31 | |||
month = 8 | |||
year = 2007 | |||
day = 22 | |||
month = 2 | |||
year = 2005 | |||
# returns error 400 - bad request | |||
#parser = BridgenorthParser() | |||
@@ -489,7 +497,8 @@ if __name__ == '__main__': | |||
# canterbury | |||
# results as columns of one table | |||
parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
#parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch") | |||
print parser.getResults(day, month, year) | |||
@@ -131,4 +131,6 @@ | |||
"Bracknell Forest Borough Council", "Bracknell Forest", "https://my.bracknell-forest.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Chiltern District Council", "Chiltern", "https://isa.chiltern.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Hinkley and Bosworth Borough Council", "Hinkley and Bosworth", "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" |