@@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser): | |||||
comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | ||||
class GuildfordParser(AcolnetParser): | |||||
case_number_tr = 1 | |||||
reg_date_tr = 7 | |||||
location_tr = 2 | |||||
proposal_tr = 3 | |||||
#http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||||
class SurreyHeathParser(AcolnetParser): | class SurreyHeathParser(AcolnetParser): | ||||
# This is not working yet. | # This is not working yet. | ||||
# _getSearchResponse is an attempt to work around | # _getSearchResponse is an attempt to work around | ||||
@@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser): | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
day = 31 | |||||
month = 8 | |||||
year = 2007 | |||||
day = 22 | |||||
month = 2 | |||||
year = 2005 | |||||
# returns error 400 - bad request | # returns error 400 - bad request | ||||
#parser = BridgenorthParser() | #parser = BridgenorthParser() | ||||
@@ -489,7 +497,8 @@ if __name__ == '__main__': | |||||
# canterbury | # canterbury | ||||
# results as columns of one table | # results as columns of one table | ||||
parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||||
#parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||||
parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch") | |||||
print parser.getResults(day, month, year) | print parser.getResults(day, month, year) | ||||
@@ -10,7 +10,8 @@ require_once('phpcoord.php'); | |||||
function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){ | function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){ | ||||
$applications = array(); | $applications = array(); | ||||
$application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/"; | |||||
//$application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/"; | |||||
$application_pattern = "/<tr><th>([0-9]*)<\/th>.*(?=<\/tr)/U"; | |||||
//grab the page | //grab the page | ||||
$html = safe_scrape_page($search_url); | $html = safe_scrape_page($search_url); | ||||
@@ -21,6 +22,10 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment | |||||
preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER); | preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER); | ||||
foreach ($application_matches[0] as $application_match){ | foreach ($application_matches[0] as $application_match){ | ||||
//START Duncan's debug | |||||
//print_r($application_match); | |||||
//print_r("END"); | |||||
// END Duncan's debug | |||||
$detail_pattern = "/<td>([^<])*/"; | $detail_pattern = "/<td>([^<])*/"; | ||||
preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER); | preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER); | ||||
@@ -36,12 +41,16 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment | |||||
//match case number | //match case number | ||||
$casenumber_pattern = "/caseno=([^&]*)/"; | $casenumber_pattern = "/caseno=([^&]*)/"; | ||||
preg_match($casenumber_pattern, $application_match, $casenumber_matches); | preg_match($casenumber_pattern, $application_match, $casenumber_matches); | ||||
//START Duncan's debug | |||||
//print_r($application_match); | |||||
//var_dump($casenumber_matches); | |||||
//END Duncan's debug | |||||
$case_number =""; | $case_number =""; | ||||
if(sizeof($casenumber_matches)>0){ | if(sizeof($casenumber_matches)>0){ | ||||
$case_number = str_replace("caseno=","", $casenumber_matches[0]); | $case_number = str_replace("caseno=","", $casenumber_matches[0]); | ||||
} | } | ||||
//if weve found a caase number, then get the details | //if weve found a caase number, then get the details | ||||
if($case_number !=""){ | if($case_number !=""){ | ||||
//Comment and info urls | //Comment and info urls | ||||
@@ -47,4 +47,4 @@ if (isset($_GET['year'])){ | |||||
$smarty->display("xml.tpl"); | $smarty->display("xml.tpl"); | ||||
?> | |||||
?> |
@@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser): | |||||
comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | ||||
class GuildfordParser(AcolnetParser): | |||||
case_number_tr = 1 | |||||
reg_date_tr = 7 | |||||
location_tr = 2 | |||||
proposal_tr = 3 | |||||
#http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||||
class SurreyHeathParser(AcolnetParser): | class SurreyHeathParser(AcolnetParser): | ||||
# This is not working yet. | # This is not working yet. | ||||
# _getSearchResponse is an attempt to work around | # _getSearchResponse is an attempt to work around | ||||
@@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser): | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
day = 31 | |||||
month = 8 | |||||
year = 2007 | |||||
day = 22 | |||||
month = 2 | |||||
year = 2005 | |||||
# returns error 400 - bad request | # returns error 400 - bad request | ||||
#parser = BridgenorthParser() | #parser = BridgenorthParser() | ||||
@@ -489,7 +497,8 @@ if __name__ == '__main__': | |||||
# canterbury | # canterbury | ||||
# results as columns of one table | # results as columns of one table | ||||
parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||||
#parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||||
parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch") | |||||
print parser.getResults(day, month, year) | print parser.getResults(day, month, year) | ||||
@@ -131,4 +131,6 @@ | |||||
"Bracknell Forest Borough Council", "Bracknell Forest", "https://my.bracknell-forest.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | "Bracknell Forest Borough Council", "Bracknell Forest", "https://my.bracknell-forest.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | ||||
"Chiltern District Council", "Chiltern", "https://isa.chiltern.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | "Chiltern District Council", "Chiltern", "https://isa.chiltern.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | ||||
"Hinkley and Bosworth Borough Council", "Hinkley and Bosworth", "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | "Hinkley and Bosworth Borough Council", "Hinkley and Bosworth", "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | ||||
"Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||||
"Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||||
"Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||||
"Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" |