From f3a7a9f49b39550fa1664e04d115a401e780a2b2 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Wed, 21 Nov 2007 10:36:52 +0000 Subject: [PATCH] Fix problems with the following scrapers: Rochford - not sure of problem - works with python scraper Chester-le-Street - not sure of problem - works with python scraper Vale of the White Horse not sure of problem - works with python scraper Corby - changed to https, so using python scraper North East Derbyshire - now tdc not dc Haringey - this can never have worked as it was... the url was wrong. Gravesham - new url Suffolk Coastal - new url (old server still up so no error...) --- docs/scrapers/northeastderbyshire.php | 2 +- python_scrapers/AcolnetParser.py | 15 +++++++++++---- python_scrapers/SitesToGenerate.csv | 10 +++++++--- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/docs/scrapers/northeastderbyshire.php b/docs/scrapers/northeastderbyshire.php index ad184ea..62c7a92 100644 --- a/docs/scrapers/northeastderbyshire.php +++ b/docs/scrapers/northeastderbyshire.php @@ -21,7 +21,7 @@ if (isset($_GET['year'])){ $year = $_GET['year']; } //search url - $search_url = "http://planapps-online.ne-derbyshire.gov.uk/publicaccess/dc/DcApplication/application_searchresults.aspx?searchtype=WEEKLY&selWeeklyListRange=#daterange&weektype=VAL"; + $search_url = "http://planapps-online.ne-derbyshire.gov.uk/publicaccess/tdc/DcApplication/application_searchresults.aspx?searchtype=WEEKLY&selWeeklyListRange=#daterange&weektype=VAL"; $date_range = "{$day}%2F{$month}%2F{$year}%7C{$day}%2F{$month}%2F{$year}"; $search_url = str_replace("#daterange", $date_range, $search_url); diff --git a/python_scrapers/AcolnetParser.py b/python_scrapers/AcolnetParser.py index 8d400dc..7dcbaca 100644 --- a/python_scrapers/AcolnetParser.py +++ b/python_scrapers/AcolnetParser.py @@ -436,10 +436,17 @@ class SouthBedfordshireParser(AcolnetParser): proposal_tr = 6 class SuffolkCoastalParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 +# case_number_tr = 1 # this one can be got by the td class attribute +# reg_date_tr = 2 +# location_tr = 4 +# proposal_tr = 5 + +# New URL with different layout + case_number_tr = 1 + reg_date_tr = 3 + location_tr = 5 + proposal_tr = 6 + comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv index e7dee4e..c9a019b 100644 --- a/python_scrapers/SitesToGenerate.csv +++ b/python_scrapers/SitesToGenerate.csv @@ -56,7 +56,7 @@ "Milton Keynes Council", "Milton Keynes", "http://publicaccess.milton-keynes.gov.uk/tdc/", "PublicAccess", "PublicAccessParser" "Coventry City Council", "Coventry", "http://planning.coventry.gov.uk/", "ApplicationSearchServletParser", "CoventrySearchParser" "Alnwick District Council", "Alnwick", "http://services.castlemorpeth.gov.uk:7777/", "ApplicationSearchServletParser", "AlnwickSearchParser" -"Haringey Council", "Haringey", "http://www.planningservices.haringey.gov.uk/", "ApplicationSearchServletParser", "HaringeySearchParser" +"Haringey Council", "Haringey", "http://www.planningservices.haringey.gov.uk/portal/servlets/ApplicationSearchServlet", "ApplicationSearchServletParser", "HaringeySearchParser" "Hartlepool Borough Council", "Hartlepool", "http://eforms.hartlepool.gov.uk:7777/", "ApplicationSearchServletParser", "HartlepoolSearchParser" "North Warwickshire Borough Council", "North Warwickshire", "http://planning.northwarks.gov.uk/", "ApplicationSearchServletParser", "NorthWarksSearchParser" "St Helens Council", "St Helens", "http://212.248.225.150:8080/", "ApplicationSearchServletParser", "StHelensSearchParser" @@ -69,7 +69,7 @@ "Denbighshire County Council", "Denbighshire", "http://planning.denbighshire.gov.uk/", "ApplicationSearchServletParser", "DenbighshireSearchParser" "Wear Valley District Council", "Wear Valley", "http://planning.wearvalley.gov.uk/", "ApplicationSearchServletParser", "WearValleySearchParser" "Chorley Borough Council", "Chorley", "http://planning.chorley.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" -"Gravesham Borough Council", "Gravesham", "http://195.102.67.4/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" +"Gravesham Borough Council", "Gravesham", "http://plan.gravesham.gov.uk/PublicAccess/TDC/", "PublicAccess", "PublicAccessParser" "London Borough Of Newham", "Newham", "http://pacaps.newham.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "North West Leicestershire District Council", "NW Leicestershire", "http://paccess.nwleics.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" "Redditch Borough Council", "Redditch", "http://access.redditchbc.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" @@ -108,7 +108,7 @@ "Renfrewshire Council", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "RenfrewshireParser" "Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "SouthBedfordshireParser" -"Suffolk Coastal District Council", "Suffolk Coastal", "https://apps3.suffolkcoastal.gov.uk/planningonline/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SuffolkCoastalParser" +"Suffolk Coastal District Council", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SuffolkCoastalParser" "Surrey Heath Borough Council", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SurreyHeathParser" "New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestDCParser" "Craven District Council", "Craven", "http://www.planning.cravendc.gov.uk/fastweb/", "FastWeb", "FastWeb" @@ -133,3 +133,7 @@ "Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" "Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "Havant Borough Council", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HavantParser" +"Rochford District Council", "Rochford", "http://www.rochford.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" +"Chester-le-Street District Council", "Chester-le-Street", "http://planning.chester-le-street.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" +"Vale of the White Horse District Council", "Vale of the White Horse", "http://planning.whitehorsedc.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" +"Corby Borough Council", "Corby", "https://publicaccess.corby.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"