| @@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser): | |||||
| comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | ||||
| class GuildfordParser(AcolnetParser): | |||||
| case_number_tr = 1 | |||||
| reg_date_tr = 7 | |||||
| location_tr = 2 | |||||
| proposal_tr = 3 | |||||
| #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||||
| class SurreyHeathParser(AcolnetParser): | class SurreyHeathParser(AcolnetParser): | ||||
| # This is not working yet. | # This is not working yet. | ||||
| # _getSearchResponse is an attempt to work around | # _getSearchResponse is an attempt to work around | ||||
| @@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser): | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| day = 31 | |||||
| month = 8 | |||||
| year = 2007 | |||||
| day = 22 | |||||
| month = 2 | |||||
| year = 2005 | |||||
| # returns error 400 - bad request | # returns error 400 - bad request | ||||
| #parser = BridgenorthParser() | #parser = BridgenorthParser() | ||||
| @@ -489,7 +497,8 @@ if __name__ == '__main__': | |||||
| # canterbury | # canterbury | ||||
| # results as columns of one table | # results as columns of one table | ||||
| parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||||
| #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||||
| parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch") | |||||
| print parser.getResults(day, month, year) | print parser.getResults(day, month, year) | ||||
| @@ -10,7 +10,8 @@ require_once('phpcoord.php'); | |||||
| function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){ | function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){ | ||||
| $applications = array(); | $applications = array(); | ||||
| $application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/"; | |||||
| //$application_pattern = "/<tr><th>([0-9]*)<\/th>([^;]*)([^<]*)/"; | |||||
| $application_pattern = "/<tr><th>([0-9]*)<\/th>.*(?=<\/tr)/U"; | |||||
| //grab the page | //grab the page | ||||
| $html = safe_scrape_page($search_url); | $html = safe_scrape_page($search_url); | ||||
| @@ -21,6 +22,10 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment | |||||
| preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER); | preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER); | ||||
| foreach ($application_matches[0] as $application_match){ | foreach ($application_matches[0] as $application_match){ | ||||
| //START Duncan's debug | |||||
| //print_r($application_match); | |||||
| //print_r("END"); | |||||
| // END Duncan's debug | |||||
| $detail_pattern = "/<td>([^<])*/"; | $detail_pattern = "/<td>([^<])*/"; | ||||
| preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER); | preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER); | ||||
| @@ -36,12 +41,16 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment | |||||
| //match case number | //match case number | ||||
| $casenumber_pattern = "/caseno=([^&]*)/"; | $casenumber_pattern = "/caseno=([^&]*)/"; | ||||
| preg_match($casenumber_pattern, $application_match, $casenumber_matches); | preg_match($casenumber_pattern, $application_match, $casenumber_matches); | ||||
| //START Duncan's debug | |||||
| //print_r($application_match); | |||||
| //var_dump($casenumber_matches); | |||||
| //END Duncan's debug | |||||
| $case_number =""; | $case_number =""; | ||||
| if(sizeof($casenumber_matches)>0){ | if(sizeof($casenumber_matches)>0){ | ||||
| $case_number = str_replace("caseno=","", $casenumber_matches[0]); | $case_number = str_replace("caseno=","", $casenumber_matches[0]); | ||||
| } | } | ||||
| //if weve found a caase number, then get the details | //if weve found a caase number, then get the details | ||||
| if($case_number !=""){ | if($case_number !=""){ | ||||
| //Comment and info urls | //Comment and info urls | ||||
| @@ -47,4 +47,4 @@ if (isset($_GET['year'])){ | |||||
| $smarty->display("xml.tpl"); | $smarty->display("xml.tpl"); | ||||
| ?> | |||||
| ?> | |||||
| @@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser): | |||||
| comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | ||||
| class GuildfordParser(AcolnetParser): | |||||
| case_number_tr = 1 | |||||
| reg_date_tr = 7 | |||||
| location_tr = 2 | |||||
| proposal_tr = 3 | |||||
| #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||||
| class SurreyHeathParser(AcolnetParser): | class SurreyHeathParser(AcolnetParser): | ||||
| # This is not working yet. | # This is not working yet. | ||||
| # _getSearchResponse is an attempt to work around | # _getSearchResponse is an attempt to work around | ||||
| @@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser): | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| day = 31 | |||||
| month = 8 | |||||
| year = 2007 | |||||
| day = 22 | |||||
| month = 2 | |||||
| year = 2005 | |||||
| # returns error 400 - bad request | # returns error 400 - bad request | ||||
| #parser = BridgenorthParser() | #parser = BridgenorthParser() | ||||
| @@ -489,7 +497,8 @@ if __name__ == '__main__': | |||||
| # canterbury | # canterbury | ||||
| # results as columns of one table | # results as columns of one table | ||||
| parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||||
| #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||||
| parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch") | |||||
| print parser.getResults(day, month, year) | print parser.getResults(day, month, year) | ||||
| @@ -131,4 +131,6 @@ | |||||
| "Bracknell Forest Borough Council", "Bracknell Forest", "https://my.bracknell-forest.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | "Bracknell Forest Borough Council", "Bracknell Forest", "https://my.bracknell-forest.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | ||||
| "Chiltern District Council", "Chiltern", "https://isa.chiltern.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | "Chiltern District Council", "Chiltern", "https://isa.chiltern.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | ||||
| "Hinkley and Bosworth Borough Council", "Hinkley and Bosworth", "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | "Hinkley and Bosworth Borough Council", "Hinkley and Bosworth", "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | ||||
| "Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||||
| "Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||||
| "Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||||
| "Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||||