From 3cc4d48397b61e4974ac6e315078998d3a255b33 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Sat, 23 Aug 2008 15:20:27 +0000 Subject: [PATCH] Some debug (mostly for westminster). --- trunk/python_scrapers/AcolnetParser.py | 4 ++-- trunk/python_scrapers/PublicAccess.py | 7 ++++--- trunk/python_scrapers/SwiftLG.py | 4 ++-- trunk/python_scrapers/Westminster.py | 11 +++++++++++ 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/trunk/python_scrapers/AcolnetParser.py b/trunk/python_scrapers/AcolnetParser.py index 2ddf105..527f207 100644 --- a/trunk/python_scrapers/AcolnetParser.py +++ b/trunk/python_scrapers/AcolnetParser.py @@ -345,14 +345,14 @@ if __name__ == '__main__': year = 2008 #parser = AcolnetParser("Babergh", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") -# parser = AcolnetParser("Barnet", "Barnet", "http://194.75.183.100/planning-cases/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + parser = AcolnetParser("Barnet", "Barnet", "http://194.75.183.100/planning-cases/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Basingstoke", "Basingstoke", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = BassetlawParser("Bassetlaw", "Bassetlaw", "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Bolton", "Bolton", "http://www.planning.bolton.gov.uk/DCOnlineV2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = BridgnorthParser("Bridgnorth", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") #parser = AcolnetParser("Bury", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = CanterburyParser("Canterbury", "Canterbury", "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") - parser = AcolnetParser("Carlisle", "Carlisle", "http://planning.carlisle.gov.uk/PlanData/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") +# parser = AcolnetParser("Carlisle", "Carlisle", "http://planning.carlisle.gov.uk/PlanData/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("Derby", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") #parser = AcolnetParser("East Lindsey", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser") diff --git a/trunk/python_scrapers/PublicAccess.py b/trunk/python_scrapers/PublicAccess.py index 876b9f9..323b41b 100644 --- a/trunk/python_scrapers/PublicAccess.py +++ b/trunk/python_scrapers/PublicAccess.py @@ -350,8 +350,8 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser): if __name__ == '__main__': - day = 20 - month = 5 + day = 1 + month = 8 year = 2008 #parser = PublicAccessParser("East Northants", "East Northants", "http://publicaccesssrv.east-northamptonshire.gov.uk/PublicAccess/tdc/", True) @@ -359,6 +359,7 @@ if __name__ == '__main__': #parser = PublicAccessParser("Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/", True) #parser = PublicAccessParser("Durham City Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/tdc/", True) #parser = PublicAccessParser("Moray Council", "Moray", "http://public.moray.gov.uk/publicaccess/tdc/", True) - parser = PublicAccessParser("Sheffield City Council", "Sheffield", "http://planning.sheffield.gov.uk/publicaccess/tdc/") +# parser = PublicAccessParser("Sheffield City Council", "Sheffield", "http://planning.sheffield.gov.uk/publicaccess/tdc/") + parser = PublicAccessParser("London Borough of Barking and Dagenham", "Barking and Dagenham", "http://paweb.barking-dagenham.gov.uk/PublicAccess/tdc/") print parser.getResults(day, month, year) diff --git a/trunk/python_scrapers/SwiftLG.py b/trunk/python_scrapers/SwiftLG.py index b084fd6..dbf4e44 100644 --- a/trunk/python_scrapers/SwiftLG.py +++ b/trunk/python_scrapers/SwiftLG.py @@ -211,10 +211,10 @@ if __name__ == '__main__': # parser = SwiftLGParser("Pembrokeshire County Council", "Pembrokeshire", "http://planning.pembrokeshire.gov.uk/swiftlg/apas/run/") # parser = SwiftLGParser("Rochdale Metropolitan Borough Council", "Rochdale", "http://www.rochdale.gov.uk/swiftlg/apas/run/") # parser = SloughParser("Slough", "Slough", "http://www2.slough.gov.uk/swiftlg/apas/run/") -# parser = SwiftLGParser("Snowdonia National Park", "Snowdonia", "http://www.snowdonia-npa.gov.uk/swiftlg/apas/run/") + parser = SwiftLGParser("Snowdonia National Park", "Snowdonia", "http://www.snowdonia-npa.gov.uk/swiftlg/apas/run/") # parser = SwiftLGParser("St Edmundsbury", "Bury St Edmunds", "http://www.stedmundsbury.gov.uk/swiftlg/apas/run/") # parser = MacclesfieldParser("Macclesfield", "Macclesfield", "http://www.planportal.macclesfield.gov.uk/swiftlg/apas/run/") - parser = SwiftLGParser("Daventry District Council", "Daventry", "http://62.231.149.150/swiftlg/apas/run/wphappcriteria.display") +# parser = SwiftLGParser("Daventry District Council", "Daventry", "http://62.231.149.150/swiftlg/apas/run/wphappcriteria.display") # parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display") # parser = SwiftLGParser("Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display") # parser = SwiftLGParser("London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display") diff --git a/trunk/python_scrapers/Westminster.py b/trunk/python_scrapers/Westminster.py index 743e721..c653d9a 100644 --- a/trunk/python_scrapers/Westminster.py +++ b/trunk/python_scrapers/Westminster.py @@ -16,6 +16,8 @@ import urlparse import datetime, time import cgi +import sys + from BeautifulSoup import BeautifulSoup from PlanningUtils import PlanningApplication, \ @@ -58,7 +60,13 @@ class WestminsterParser: while post_data: # Now get the search page + + sys.stderr.write("Fetching: %s" %self.base_url) + sys.stderr.write("post data: %s" %post_data) + response = urllib2.urlopen(self.base_url, post_data) + + sys.stderr.write("Got it") soup = BeautifulSoup(response.read()) results_form = soup.find("form", {"name": "currentsearchresultsNext"}) @@ -87,7 +95,10 @@ class WestminsterParser: # To get the comment url, we're going to have to go to each info url :-( + sys.stderr.write("Fetching: %s" %application.info_url) info_response = urllib2.urlopen(application.info_url) + sys.stderr.write("Got it") + info_soup = BeautifulSoup(info_response) comment_nav_string = info_soup.find(text="Comment on this case")