From 45bcc3539f29c54ea4d08174bc6e510fa70c6b82 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Mon, 6 Aug 2007 16:09:18 +0000 Subject: [PATCH] add New Forest DC --- cgi-bin/AcolnetParser.py | 18 ++++++++---- cgi-bin/New Forest DC.cgi | 29 +++++++++++++++++++ cgi-bin/{New Forest.cgi => New Forest NP.cgi} | 4 +-- python_scrapers/AcolnetParser.py | 18 ++++++++---- python_scrapers/PublicAccessSites.csv | 3 +- 5 files changed, 59 insertions(+), 13 deletions(-) create mode 100755 cgi-bin/New Forest DC.cgi rename cgi-bin/{New Forest.cgi => New Forest NP.cgi} (83%) diff --git a/cgi-bin/AcolnetParser.py b/cgi-bin/AcolnetParser.py index 78ab7de..d5d3121 100644 --- a/cgi-bin/AcolnetParser.py +++ b/cgi-bin/AcolnetParser.py @@ -159,8 +159,8 @@ class AcolnetParser(HTMLParser.HTMLParser): search_form_response = urllib2.urlopen(self.base_url) search_form_contents = search_form_response.read() - outfile = open("tmpfile", "w") - outfile.write(search_form_contents) + #outfile = open("tmpfile", "w") + #outfile.write(search_form_contents) # This sometimes causes a problem in HTMLParser, so let's just get the link # out with a regex... @@ -360,7 +360,7 @@ class LewishamParser(AcolnetParser): ## comments_email_address = "planning@lewisham.gov.uk" ## #action_regex = re.compile("
]*>", re.IGNORECASE) -class NewForestParser(AcolnetParser): +class NewForestNPParser(AcolnetParser): # In this case there is an online comment facility at the # bottom of each view app page... case_number_tr = 1 # this one can be got by the td class attribute @@ -368,6 +368,14 @@ class NewForestParser(AcolnetParser): location_tr = 4 proposal_tr = 5 +class NewForestDCParser(AcolnetParser): + # In this case there is an online comment facility at the + # bottom of each view app page... + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 5 + proposal_tr = 6 + class NorthWiltshireParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 3 @@ -379,8 +387,8 @@ class OldhamParser(AcolnetParser): reg_date_tr = 3 location_tr = 6 proposal_tr = 7 - - def _cleanupHTML(self, html): + +def _cleanupHTML(self, html): """There is a bad table end tag in this one. Fix it before we start""" diff --git a/cgi-bin/New Forest DC.cgi b/cgi-bin/New Forest DC.cgi new file mode 100755 index 0000000..29e555b --- /dev/null +++ b/cgi-bin/New Forest DC.cgi @@ -0,0 +1,29 @@ +#!/usr/local/bin/python + +# This is the parser for New Forest District Council. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "New Forest District Council" +authority_short_name = "New Forest DC" +base_url = "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" + +import AcolnetParser + +parser = AcolnetParser.NewForestDCParser(authority_name, authority_short_name, base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/cgi-bin/New Forest.cgi b/cgi-bin/New Forest NP.cgi similarity index 83% rename from cgi-bin/New Forest.cgi rename to cgi-bin/New Forest NP.cgi index 39e34b2..67305fa 100755 --- a/cgi-bin/New Forest.cgi +++ b/cgi-bin/New Forest NP.cgi @@ -15,12 +15,12 @@ year = form.getfirst('year') authority_name = "New Forest National Park" -authority_short_name = "New Forest" +authority_short_name = "New Forest NP" base_url = "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch" import AcolnetParser -parser = AcolnetParser.NewForestParser(authority_name, authority_short_name, base_url) +parser = AcolnetParser.NewForestNPParser(authority_name, authority_short_name, base_url) xml = parser.getResults(day, month, year) diff --git a/python_scrapers/AcolnetParser.py b/python_scrapers/AcolnetParser.py index 78ab7de..d5d3121 100644 --- a/python_scrapers/AcolnetParser.py +++ b/python_scrapers/AcolnetParser.py @@ -159,8 +159,8 @@ class AcolnetParser(HTMLParser.HTMLParser): search_form_response = urllib2.urlopen(self.base_url) search_form_contents = search_form_response.read() - outfile = open("tmpfile", "w") - outfile.write(search_form_contents) + #outfile = open("tmpfile", "w") + #outfile.write(search_form_contents) # This sometimes causes a problem in HTMLParser, so let's just get the link # out with a regex... @@ -360,7 +360,7 @@ class LewishamParser(AcolnetParser): ## comments_email_address = "planning@lewisham.gov.uk" ## #action_regex = re.compile("]*>", re.IGNORECASE) -class NewForestParser(AcolnetParser): +class NewForestNPParser(AcolnetParser): # In this case there is an online comment facility at the # bottom of each view app page... case_number_tr = 1 # this one can be got by the td class attribute @@ -368,6 +368,14 @@ class NewForestParser(AcolnetParser): location_tr = 4 proposal_tr = 5 +class NewForestDCParser(AcolnetParser): + # In this case there is an online comment facility at the + # bottom of each view app page... + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 5 + proposal_tr = 6 + class NorthWiltshireParser(AcolnetParser): case_number_tr = 1 # this one can be got by the td class attribute reg_date_tr = 3 @@ -379,8 +387,8 @@ class OldhamParser(AcolnetParser): reg_date_tr = 3 location_tr = 6 proposal_tr = 7 - - def _cleanupHTML(self, html): + +def _cleanupHTML(self, html): """There is a bad table end tag in this one. Fix it before we start""" diff --git a/python_scrapers/PublicAccessSites.csv b/python_scrapers/PublicAccessSites.csv index e5e6eb8..cfbba04 100644 --- a/python_scrapers/PublicAccessSites.csv +++ b/python_scrapers/PublicAccessSites.csv @@ -101,7 +101,7 @@ "Hertsmere Borough Council", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HertsmereParser" "London Borough of Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "LewishamParser" "North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "NorthHertfordshireParser" -"New Forest National Park", "New Forest", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestParser" +"New Forest National Park", "New Forest NP", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestNPParser" "Bridgnorth District Council", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "BridgnorthParser" "Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CarlisleParser" "Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" @@ -112,3 +112,4 @@ "South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "SouthBedfordshireParser" "Suffolk Coastal District Council", "Suffolk Coastal", "https://apps3.suffolkcoastal.gov.uk/planningonline/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SuffolkCoastalParser" "Surrey Heath Borough Council", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SurreyHeathParser" +"New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestDCParser" \ No newline at end of file