diff --git a/trunk/cgi-bin/AcolnetParser.py b/trunk/cgi-bin/AcolnetParser.py
index a700188..40dcba1 100644
--- a/trunk/cgi-bin/AcolnetParser.py
+++ b/trunk/cgi-bin/AcolnetParser.py
@@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser):
comments_email_address = "d.c.admin@suffolkcoastal.gov.uk"
+class GuildfordParser(AcolnetParser):
+ case_number_tr = 1
+ reg_date_tr = 7
+ location_tr = 2
+ proposal_tr = 3
+
+ #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch
+
class SurreyHeathParser(AcolnetParser):
# This is not working yet.
# _getSearchResponse is an attempt to work around
@@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser):
if __name__ == '__main__':
- day = 31
- month = 8
- year = 2007
+ day = 22
+ month = 2
+ year = 2005
# returns error 400 - bad request
#parser = BridgenorthParser()
@@ -489,7 +497,8 @@ if __name__ == '__main__':
# canterbury
# results as columns of one table
- parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
+ #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
+ parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch")
print parser.getResults(day, month, year)
diff --git a/trunk/docs/include/scraper_support.php b/trunk/docs/include/scraper_support.php
index f98ea34..45d229e 100644
--- a/trunk/docs/include/scraper_support.php
+++ b/trunk/docs/include/scraper_support.php
@@ -10,7 +10,8 @@ require_once('phpcoord.php');
function scrape_applications_publicaccess ($search_url, $info_url_base, $comment_url_base){
$applications = array();
- $application_pattern = "/
([0-9]*)<\/th>.*(?=<\/tr)/U";
//grab the page
$html = safe_scrape_page($search_url);
@@ -21,6 +22,10 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment
preg_match_all($application_pattern, $html, $application_matches, PREG_PATTERN_ORDER);
foreach ($application_matches[0] as $application_match){
+ //START Duncan's debug
+ //print_r($application_match);
+ //print_r("END");
+ // END Duncan's debug
$detail_pattern = "/ | ([^<])*/";
preg_match_all($detail_pattern, $application_match, $detail_matches, PREG_PATTERN_ORDER);
@@ -36,12 +41,16 @@ function scrape_applications_publicaccess ($search_url, $info_url_base, $comment
//match case number
$casenumber_pattern = "/caseno=([^&]*)/";
preg_match($casenumber_pattern, $application_match, $casenumber_matches);
-
+ //START Duncan's debug
+ //print_r($application_match);
+ //var_dump($casenumber_matches);
+ //END Duncan's debug
+
$case_number ="";
if(sizeof($casenumber_matches)>0){
$case_number = str_replace("caseno=","", $casenumber_matches[0]);
}
-
+
//if weve found a caase number, then get the details
if($case_number !=""){
//Comment and info urls
diff --git a/trunk/docs/scrapers/stafford.php b/trunk/docs/scrapers/stafford.php
index 8ed896c..46ad0b7 100644
--- a/trunk/docs/scrapers/stafford.php
+++ b/trunk/docs/scrapers/stafford.php
@@ -47,4 +47,4 @@ if (isset($_GET['year'])){
$smarty->display("xml.tpl");
-?>
\ No newline at end of file
+?>
diff --git a/trunk/python_scrapers/AcolnetParser.py b/trunk/python_scrapers/AcolnetParser.py
index a700188..40dcba1 100644
--- a/trunk/python_scrapers/AcolnetParser.py
+++ b/trunk/python_scrapers/AcolnetParser.py
@@ -433,6 +433,14 @@ class SuffolkCoastalParser(AcolnetParser):
comments_email_address = "d.c.admin@suffolkcoastal.gov.uk"
+class GuildfordParser(AcolnetParser):
+ case_number_tr = 1
+ reg_date_tr = 7
+ location_tr = 2
+ proposal_tr = 3
+
+ #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch
+
class SurreyHeathParser(AcolnetParser):
# This is not working yet.
# _getSearchResponse is an attempt to work around
@@ -476,9 +484,9 @@ class SurreyHeathParser(AcolnetParser):
if __name__ == '__main__':
- day = 31
- month = 8
- year = 2007
+ day = 22
+ month = 2
+ year = 2005
# returns error 400 - bad request
#parser = BridgenorthParser()
@@ -489,7 +497,8 @@ if __name__ == '__main__':
# canterbury
# results as columns of one table
- parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
+ #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
+ parser = GuildfordParser("Guildford", "Guildford", "http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch")
print parser.getResults(day, month, year)
diff --git a/trunk/python_scrapers/SitesToGenerate.csv b/trunk/python_scrapers/SitesToGenerate.csv
index f8eef74..59250d3 100644
--- a/trunk/python_scrapers/SitesToGenerate.csv
+++ b/trunk/python_scrapers/SitesToGenerate.csv
@@ -131,4 +131,6 @@
"Bracknell Forest Borough Council", "Bracknell Forest", "https://my.bracknell-forest.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Chiltern District Council", "Chiltern", "https://isa.chiltern.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Hinkley and Bosworth Borough Council", "Hinkley and Bosworth", "https://cx.hinckley-bosworth.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
-"Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
\ No newline at end of file
+"Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
+"Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
+"Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
\ No newline at end of file
|
---|