| @@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser): | |||
| comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | |||
| class GuildfordParser(AcolnetParser): | |||
| case_number_tr = 1 | |||
| reg_date_tr = 7 | |||
| location_tr = 2 | |||
| proposal_tr = 3 | |||
| #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||
| class SurreyHeathParser(AcolnetParser): | |||
| # This is not working yet. | |||
| # _getSearchResponse is an attempt to work around | |||
| @@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser): | |||
| if __name__ == '__main__': | |||
| day = 31 | |||
| month = 8 | |||
| year = 2007 | |||
| day = 22 | |||
| month = 2 | |||
| year = 2005 | |||
| # returns error 400 - bad request | |||
| #parser = BridgenorthParser() | |||
| @@ -489,7 +497,8 @@ if __name__ == '__main__': | |||
| # canterbury | |||
| # results as columns of one table | |||
| parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch") | |||
| print parser.getResults(day, month, year) | |||
| @@ -10,7 +10,8 @@ require_once('phpcoord.php'); | |||
| function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){ | |||
| $applications = array(); | |||
| $application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/"; | |||
| //$application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/"; | |||
| $application_pattern = "/<tr><th>([0-9]*)<\/th>.*(?=<\/tr)/U"; | |||
| //grab the page | |||
| $html = safe_scrape_page($search_url); | |||
| @@ -21,6 +22,10 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment | |||
| preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER); | |||
| foreach ($application_matches[0] as $application_match){ | |||
| //START Duncan's debug | |||
| //print_r($application_match); | |||
| //print_r("END"); | |||
| // END Duncan's debug | |||
| $detail_pattern = "/<td>([^<])*/"; | |||
| preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER); | |||
| @@ -36,12 +41,16 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment | |||
| //match case number | |||
| $casenumber_pattern = "/caseno=([^&]*)/"; | |||
| preg_match($casenumber_pattern, $application_match, $casenumber_matches); | |||
| //START Duncan's debug | |||
| //print_r($application_match); | |||
| //var_dump($casenumber_matches); | |||
| //END Duncan's debug | |||
| $case_number =""; | |||
| if(sizeof($casenumber_matches)>0){ | |||
| $case_number = str_replace("caseno=","", $casenumber_matches[0]); | |||
| } | |||
| //if weve found a caase number, then get the details | |||
| if($case_number !=""){ | |||
| //Comment and info urls | |||
| @@ -47,4 +47,4 @@ if (isset($_GET['year'])){ | |||
| $smarty->display("xml.tpl"); | |||
| ?> | |||
| ?> | |||
| @@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser): | |||
| comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | |||
| class GuildfordParser(AcolnetParser): | |||
| case_number_tr = 1 | |||
| reg_date_tr = 7 | |||
| location_tr = 2 | |||
| proposal_tr = 3 | |||
| #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||
| class SurreyHeathParser(AcolnetParser): | |||
| # This is not working yet. | |||
| # _getSearchResponse is an attempt to work around | |||
| @@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser): | |||
| if __name__ == '__main__': | |||
| day = 31 | |||
| month = 8 | |||
| year = 2007 | |||
| day = 22 | |||
| month = 2 | |||
| year = 2005 | |||
| # returns error 400 - bad request | |||
| #parser = BridgenorthParser() | |||
| @@ -489,7 +497,8 @@ if __name__ == '__main__': | |||
| # canterbury | |||
| # results as columns of one table | |||
| parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch") | |||
| print parser.getResults(day, month, year) | |||
| @@ -131,4 +131,6 @@ | |||
| "Bracknell Forest Borough Council", "Bracknell Forest", "https://my.bracknell-forest.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Chiltern District Council", "Chiltern", "https://isa.chiltern.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Hinkley and Bosworth Borough Council", "Hinkley and Bosworth", "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||