From cd9dd408a85387dcb9af3b6db0c8c9243d237602 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Tue, 3 Apr 2007 17:51:03 +0000 Subject: [PATCH] Update python scrapers to cope with publicaccess sites with dc in the path. --- trunk/python_scrapers/PublicAccess.py | 23 +++++++- trunk/python_scrapers/PublicAccessSites.csv | 61 +++++++++++---------- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/trunk/python_scrapers/PublicAccess.py b/trunk/python_scrapers/PublicAccess.py index db635be..b7873ac 100644 --- a/trunk/python_scrapers/PublicAccess.py +++ b/trunk/python_scrapers/PublicAccess.py @@ -13,9 +13,9 @@ cookie_jar = cookielib.CookieJar() from PlanningUtils import fixNewlines, PlanningAuthorityResults, PlanningApplication -search_form_url_end = "tdc/DcApplication/application_searchform.aspx" -search_results_url_end = "tdc/DcApplication/application_searchresults.aspx" -comments_url_end = "tdc/DcApplication/application_comments_entryform.aspx" +search_form_url_end = "DcApplication/application_searchform.aspx" +search_results_url_end = "DcApplication/application_searchresults.aspx" +comments_url_end = "DcApplication/application_comments_entryform.aspx" class PublicAccessParser(HTMLParser.HTMLParser): """This is the class which parses the PublicAccess search results page. @@ -339,3 +339,20 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser): if attr == "value": self.postcode = value + +# These still don't work: + +# Perthshire +#http://193.63.61.22/publicaccess/tdc/DcApplication/application_searchform.aspx +#"Perth and Kinross Council", "Perthshire", "http://193.63.61.22/publicaccess/tdc/" + +# Hambleton +#http://planning.hambleton.gov.uk/publicaccess/tdc/DcApplication/application_searchform.aspx +#"Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/" + +# These use https: + +# Chiltern +#https://isa.chiltern.gov.uk/publicaccess/tdc/tdc_home.aspx +# Hinckley-Bosworth +#https://cx.hinckley-bosworth.gov.uk/PublicAccess/dc/DcApplication/application_searchform.aspx diff --git a/trunk/python_scrapers/PublicAccessSites.csv b/trunk/python_scrapers/PublicAccessSites.csv index a604ba3..d304f37 100644 --- a/trunk/python_scrapers/PublicAccessSites.csv +++ b/trunk/python_scrapers/PublicAccessSites.csv @@ -1,29 +1,34 @@ "authority_name", "authority_short_name", "base_url" -"City of York Council", "York", "http://planning.york.gov.uk/PublicAccess/" -"Cherwell District Council", "Cherwell", "http://cherweb.cherwell-dc.gov.uk/publicaccess/" -"Angus Council", "Angus", "http://planning.angus.gov.uk/PublicAccess/" -"Huntingdonshire District Council", "Huntingdonshire", "http://planning.huntsdc.gov.uk/publicaccess/" -"South Staffordshire Council", "South Staffs", "https://services.sstaffs.gov.uk/PublicAccess/" -"Bexley Council", "Bexley", "http://publicaccess.bexley.gov.uk/publicaccess/" -"Lancaster City Council", "Lancaster", "http://planapps.lancaster.gov.uk/PublicAccess/" -"Bristol City Council", "Bristol", "http://e2eweb.bristol-city.gov.uk/publicaccess/" -"Portsmouth City Council", "Portsmouth", "http://planning.portsmouth.gov.uk/PublicAccess/" -"The Borough of Oadby and Wigston", "Oadby and Wigston", "http://web.owbc.net/PublicAccess/" -"Test Valley Borough Council", "Test Valley", "http://publicaccess.testvalley.gov.uk/publicaccess/" -"Kings Lynn and West Norfolk Borough Council", "West Norfolk", "http://online.west-norfolk.gov.uk/publicaccess/" -"Sunderland City Council", "Sunderland", "http://www.sunderland.gov.uk/publicaccess/" -"Southampton City Council", "Southampton", "http://publicaccess.southampton.gov.uk/publicaccess/" -"Bath and North East Somerset", "Bath", "http://planning.bathnes.gov.uk/publicaccess/" -"Buckinghamshire County Council", "Buckinghamshire", "http://www.bucksplanning.gov.uk/PublicAccess/" -"Spelthorne Borough Council", "Spelthorne", "http://phoenix.spelthorne.gov.uk/PublicAccess/" -"Stevenage Borough Council", "Stevenage", "http://publicaccess.stevenage.gov.uk/publicaccess/" -"Tonbridge and Malling Borough Council", "Tonbridge", "http://publicaccess.tmbc.gov.uk/publicaccess/" -"Hart District Council", "Hart", "http://publicaccess.hart.gov.uk/publicaccess/" -"Luton Borough Council", "Luton", "http://www.eplan.luton.gov.uk/PublicAccess/" -"Rushmoor Borough Council", "Rushmoor", "http://pa-dc.rushmoor.gov.uk/publicaccess/" -"Blaby District Council", "Blaby", "http://www.blaby.gov.uk/PublicAccess/" -"East Devon District Council", "East Devon", "http://planning.eastdevon.gov.uk/PublicAccess/" -"Mid Devon District Council", "Mid Devon", "http://planning.middevon.gov.uk/publicaccess/" -"Sevenoaks District Council", "Sevenoaks", "http://publicaccess.sevenoaks.gov.uk/publicaccess/" -"Woking Borough Council", "Woking", "http://caps.woking.gov.uk/publicaccess/" -"Basildon District Council", "Basildon", "http://planning.basildon.gov.uk/publicaccess/" \ No newline at end of file +"City of York Council", "York", "http://planning.york.gov.uk/PublicAccess/tdc/" +"Cherwell District Council", "Cherwell", "http://cherweb.cherwell-dc.gov.uk/publicaccess/tdc/" +"Angus Council", "Angus", "http://planning.angus.gov.uk/PublicAccess/tdc/" +"Huntingdonshire District Council", "Huntingdonshire", "http://planning.huntsdc.gov.uk/publicaccess/tdc/" +"South Staffordshire Council", "South Staffs", "https://services.sstaffs.gov.uk/PublicAccess/tdc/" +"Bexley Council", "Bexley", "http://publicaccess.bexley.gov.uk/publicaccess/tdc/" +"Lancaster City Council", "Lancaster", "http://planapps.lancaster.gov.uk/PublicAccess/tdc/" +"Bristol City Council", "Bristol", "http://e2eweb.bristol-city.gov.uk/publicaccess/tdc/" +"Portsmouth City Council", "Portsmouth", "http://planning.portsmouth.gov.uk/PublicAccess/tdc/" +"The Borough of Oadby and Wigston", "Oadby and Wigston", "http://web.owbc.net/PublicAccess/tdc/" +"Test Valley Borough Council", "Test Valley", "http://publicaccess.testvalley.gov.uk/publicaccess/tdc/" +"Kings Lynn and West Norfolk Borough Council", "West Norfolk", "http://online.west-norfolk.gov.uk/publicaccess/tdc/" +"Sunderland City Council", "Sunderland", "http://www.sunderland.gov.uk/publicaccess/tdc/" +"Southampton City Council", "Southampton", "http://publicaccess.southampton.gov.uk/publicaccess/tdc/" +"Bath and North East Somerset", "Bath", "http://planning.bathnes.gov.uk/publicaccess/tdc/" +"Buckinghamshire County Council", "Buckinghamshire", "http://www.bucksplanning.gov.uk/PublicAccess/tdc/" +"Spelthorne Borough Council", "Spelthorne", "http://phoenix.spelthorne.gov.uk/PublicAccess/tdc/" +"Stevenage Borough Council", "Stevenage", "http://publicaccess.stevenage.gov.uk/publicaccess/tdc/" +"Tonbridge and Malling Borough Council", "Tonbridge", "http://publicaccess.tmbc.gov.uk/publicaccess/tdc/" +"Hart District Council", "Hart", "http://publicaccess.hart.gov.uk/publicaccess/tdc/" +"Luton Borough Council", "Luton", "http://www.eplan.luton.gov.uk/PublicAccess/tdc/" +"Rushmoor Borough Council", "Rushmoor", "http://pa-dc.rushmoor.gov.uk/publicaccess/tdc/" +"Blaby District Council", "Blaby", "http://www.blaby.gov.uk/PublicAccess/tdc/" +"East Devon District Council", "East Devon", "http://planning.eastdevon.gov.uk/PublicAccess/tdc/" +"Mid Devon District Council", "Mid Devon", "http://planning.middevon.gov.uk/publicaccess/tdc/" +"Sevenoaks District Council", "Sevenoaks", "http://publicaccess.sevenoaks.gov.uk/publicaccess/tdc/" +"Woking Borough Council", "Woking", "http://caps.woking.gov.uk/publicaccess/tdc/" +"Basildon District Council", "Basildon", "http://planning.basildon.gov.uk/publicaccess/tdc/" +"The City of Edinburgh Council", "Edinburgh", "http://citydev-portal.edinburgh.gov.uk/publicaccess/dc/" +"Fenland District Council", "Fenland", "http://www.fenland.gov.uk/publicaccess/dc/" +"Scarborough Borough Council", "Scarborough", "http://planning.scarborough.gov.uk/publicaccess/dc/" + +