From 0b3199fa8527f938b5a5deaa8b004d22ed2bdae8 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Thu, 31 Jan 2008 09:39:49 +0000 Subject: [PATCH] Fix a bug in almost all of the Acolnet scrapers. Repeat to self: haggis is on python 2.4, haggis is on python 2.4, ... --- python_scrapers/AcolnetParser.py | 34 ++++++++++++++++++++++------- python_scrapers/PlanningExplorer.py | 2 +- python_scrapers/SitesToGenerate.csv | 2 +- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/python_scrapers/AcolnetParser.py b/python_scrapers/AcolnetParser.py index ee47b16..eaa2f96 100644 --- a/python_scrapers/AcolnetParser.py +++ b/python_scrapers/AcolnetParser.py @@ -4,7 +4,9 @@ import urllib2 import urlparse from datetime import date -import datetime + +# Use this when we have python 2.5 +#import datetime import re @@ -54,8 +56,11 @@ class AcolnetParser: def _getDateReceived(self, app_table): date_str = ''.join(app_table.find(text="Registration Date:").findNext("td").string.strip().split()) - - return datetime.datetime.strptime(date_str, self.received_date_format) + day, month, year = date_str.split('/') + return date(int(year), int(month), int(day)) + + # This will be better from python 2.5 + #return datetime.datetime.strptime(date_str, self.received_date_format) def _getAddress(self, app_table): return app_table.find(text="Location:").findNext("td").string.strip() @@ -226,7 +231,11 @@ class CanterburyParser(AcolnetParser): def _getDateReceived(self, app_table): date_str = app_table.findAll("td")[3].string.strip() - return datetime.datetime.strptime(date_str, self.received_date_format) + day, month, year = date_str.split('/') + return date(int(year), int(month), int(day)) + + # This will be better once we have python 2.5 + #return datetime.datetime.strptime(date_str, self.received_date_format) def _getAddress(self, app_table): return app_table.findAll("td")[1].string.strip() @@ -253,7 +262,11 @@ class SouthwarkParser(AcolnetParser): def _getDateReceived(self, app_table): date_str = ''.join(app_table.find(text="Statutory start date:").findNext("td").string.strip().split()) - return datetime.datetime.strptime(date_str, self.received_date_format) + day, month, year = date_str.split('/') + return date(int(year), int(month), int(day)) + + # Use this once we have python 2.5 + #return datetime.datetime.strptime(date_str, self.received_date_format) class SurreyHeathParser(AcolnetParser): # This is not working yet. @@ -311,6 +324,11 @@ class BoltonParser(AcolnetParser): def _getCouncilReference(self, app_table): return app_table.findAll("a")[1].string.strip() +class CarlisleParser(AcolnetParser): + def _getCouncilReference(self, app_table): + return app_table.findAll("a")[1].string.strip() + + class LewishamParser(AcolnetParser): def _getCouncilReference(self, app_table): return app_table.findAll("a")[1].string.strip() @@ -327,7 +345,7 @@ if __name__ == '__main__': #parser = BridgnorthParser("Bridgnorth", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") #parser = AcolnetParser("Bury", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = CanterburyParser("Canterbury", "Canterbury", "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") - #parser = AcolnetParser("Carlisle", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = CarlisleParser("Carlisle", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Derby", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("East Lindsey", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser") @@ -335,9 +353,9 @@ if __name__ == '__main__': #parser = AcolnetParser("Fylde", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Guildford", "Guildford", "http://www.guildford.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Harlow", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") - #parser = AcolnetParser("Havant", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + parser = AcolnetParser("Havant", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Hertsmere", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") - parser = LewishamParser("Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") + #parser = LewishamParser("Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") #parser = AcolnetParser("Mid Suffolk", "Mid Suffolk", "http://planning.midsuffolk.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = NewForestDCParser("New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = NewForestNPAParser("New Forest National Park Authority", "New Forest NPA", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") diff --git a/python_scrapers/PlanningExplorer.py b/python_scrapers/PlanningExplorer.py index 2ab9230..8a5a7b6 100644 --- a/python_scrapers/PlanningExplorer.py +++ b/python_scrapers/PlanningExplorer.py @@ -564,7 +564,7 @@ if __name__ == '__main__': #parser = TamworthParser("Tamworth Borough Council", "Tamworth", "http://80.1.64.77/") #parser = TraffordParser("Trafford Council", "Trafford", "http://planning.trafford.gov.uk/") #parser = WestOxfordshireParser("West Oxfordshire District Council", "West Oxfordshire", "http://planning.westoxon.gov.uk/") - print parser.getResults(1, 11, 2007) + print parser.getResults(22, 1, 2008) # To Do diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv index 04dd055..7715390 100644 --- a/python_scrapers/SitesToGenerate.csv +++ b/python_scrapers/SitesToGenerate.csv @@ -102,7 +102,7 @@ "North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" "New Forest National Park", "New Forest NP", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestNPAParser" "Bridgnorth District Council", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "BridgnorthParser" -"Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CarlisleParser" "Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "North Wiltshire District Council", "North Wiltshire", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Oldham Metropolitan Borough Council", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "OldhamParser"