From c9c35aa262c81f741e4fb2e7a405df1a0a2aa86f Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Wed, 4 Apr 2007 00:46:36 +0000 Subject: [PATCH] adding new publicaccess scrapers --- CGI/Cornwall.cgi | 31 +++++++++++++++++++++++++++ CGI/Malvern Hills.cgi | 31 +++++++++++++++++++++++++++ CGI/PublicAccess.py | 21 +++++++++++++++++- CGI/South Bucks.cgi | 31 +++++++++++++++++++++++++++ CGI/West Lancashire.cgi | 31 +++++++++++++++++++++++++++ python_scrapers/PublicAccess.py | 21 +++++++++++++++++- python_scrapers/PublicAccessSites.csv | 4 ++++ 7 files changed, 168 insertions(+), 2 deletions(-) create mode 100755 CGI/Cornwall.cgi create mode 100755 CGI/Malvern Hills.cgi create mode 100755 CGI/South Bucks.cgi create mode 100755 CGI/West Lancashire.cgi diff --git a/CGI/Cornwall.cgi b/CGI/Cornwall.cgi new file mode 100755 index 0000000..dad269e --- /dev/null +++ b/CGI/Cornwall.cgi @@ -0,0 +1,31 @@ +#!/usr/bin/python + +# This is the parser for Cornwall County Council. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "Cornwall County Council" +authority_short_name = "Cornwall" +base_url = "http://planapps.cornwall.gov.uk/publicaccess/tdc/" + +import PublicAccess + +parser = PublicAccess.PublicAccessParser(authority_name, + authority_short_name, + base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/CGI/Malvern Hills.cgi b/CGI/Malvern Hills.cgi new file mode 100755 index 0000000..1664f31 --- /dev/null +++ b/CGI/Malvern Hills.cgi @@ -0,0 +1,31 @@ +#!/usr/bin/python + +# This is the parser for Malvern Hills District Council. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "Malvern Hills District Council" +authority_short_name = "Malvern Hills" +base_url = "http://public.malvernhills.gov.uk/publicaccess/tdc/" + +import PublicAccess + +parser = PublicAccess.PublicAccessParser(authority_name, + authority_short_name, + base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/CGI/PublicAccess.py b/CGI/PublicAccess.py index a264689..eaa8083 100644 --- a/CGI/PublicAccess.py +++ b/CGI/PublicAccess.py @@ -350,7 +350,10 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser): # but are always empty for my script... #http://planning.hambleton.gov.uk/publicaccess/tdc/DcApplication/application_searchform.aspx #"Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/" -"Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/dc/" +#"Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/dc/" +#"City of Durham Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/dc/" +#"Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/dc/" + # Bromley # http://83.244.199.114/publicaccess/ @@ -379,6 +382,22 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser): # Gravesham seems to be broken #"Gravesham Borough Council", "Gravesham", "http://195.102.67.4/PublicAccess/tdc/" + +# Manchester +# Missing the main menu on the left. +#http://www.publicaccess.manchester.gov.uk/publicaccess/tdc/tdc_home.aspx + +#Bracknell Forest - has an error +#https://my.bracknell-forest.gov.uk/publicaccess/tdc/tdc_home.aspx + +# Redditch +# Gives an error +# http://access.redditchbc.gov.uk/publicaccess/tdc/tdc_home.aspx + +# Stirling +# Gives an error +#http://planpub.stirling.gov.uk/publicaccess/tdc/tdc_home.aspx + # These use https: # Chiltern diff --git a/CGI/South Bucks.cgi b/CGI/South Bucks.cgi new file mode 100755 index 0000000..b03fc1f --- /dev/null +++ b/CGI/South Bucks.cgi @@ -0,0 +1,31 @@ +#!/usr/bin/python + +# This is the parser for South Bucks District Council. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "South Bucks District Council" +authority_short_name = "South Bucks" +base_url = "http://sbdc-paweb.southbucks.gov.uk/publicaccess/tdc/" + +import PublicAccess + +parser = PublicAccess.PublicAccessParser(authority_name, + authority_short_name, + base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/CGI/West Lancashire.cgi b/CGI/West Lancashire.cgi new file mode 100755 index 0000000..63b9572 --- /dev/null +++ b/CGI/West Lancashire.cgi @@ -0,0 +1,31 @@ +#!/usr/bin/python + +# This is the parser for West Lancashire District Council. +# it is generated from the file CGITemplate + +import cgi +import cgitb +#cgitb.enable(display=0, logdir="/tmp") + + +form = cgi.FieldStorage() +day = form.getfirst('day') +month = form.getfirst('month') +year = form.getfirst('year') + + +authority_name = "West Lancashire District Council" +authority_short_name = "West Lancashire" +base_url = "http://publicaccess.westlancsdc.gov.uk/PublicAccess/tdc/" + +import PublicAccess + +parser = PublicAccess.PublicAccessParser(authority_name, + authority_short_name, + base_url) + +xml = parser.getResults(day, month, year) + +print "Content-Type: text/xml" # XML is following +print +print xml # print the xml diff --git a/python_scrapers/PublicAccess.py b/python_scrapers/PublicAccess.py index a264689..eaa8083 100644 --- a/python_scrapers/PublicAccess.py +++ b/python_scrapers/PublicAccess.py @@ -350,7 +350,10 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser): # but are always empty for my script... #http://planning.hambleton.gov.uk/publicaccess/tdc/DcApplication/application_searchform.aspx #"Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/" -"Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/dc/" +#"Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/dc/" +#"City of Durham Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/dc/" +#"Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/dc/" + # Bromley # http://83.244.199.114/publicaccess/ @@ -379,6 +382,22 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser): # Gravesham seems to be broken #"Gravesham Borough Council", "Gravesham", "http://195.102.67.4/PublicAccess/tdc/" + +# Manchester +# Missing the main menu on the left. +#http://www.publicaccess.manchester.gov.uk/publicaccess/tdc/tdc_home.aspx + +#Bracknell Forest - has an error +#https://my.bracknell-forest.gov.uk/publicaccess/tdc/tdc_home.aspx + +# Redditch +# Gives an error +# http://access.redditchbc.gov.uk/publicaccess/tdc/tdc_home.aspx + +# Stirling +# Gives an error +#http://planpub.stirling.gov.uk/publicaccess/tdc/tdc_home.aspx + # These use https: # Chiltern diff --git a/python_scrapers/PublicAccessSites.csv b/python_scrapers/PublicAccessSites.csv index 8a54dd6..3e3daf4 100644 --- a/python_scrapers/PublicAccessSites.csv +++ b/python_scrapers/PublicAccessSites.csv @@ -44,3 +44,7 @@ "Aylesbury Vale District Council", "Aylesbury Vale", "http://eplanning.aylesburyvaledc.gov.uk/tdc/" "Epsom and Ewell Borough Council", "Epsom and Ewell", "http://eplanning.epsom-ewell.gov.uk/publicaccess/tdc/" "Gedling Borough Council", "Gedling", "http://publicaccess.gedling.gov.uk/publicaccess/tdc/" +"Cornwall County Council", "Cornwall", "http://planapps.cornwall.gov.uk/publicaccess/tdc/" +"South Bucks District Council", "South Bucks", "http://sbdc-paweb.southbucks.gov.uk/publicaccess/tdc/" +"Malvern Hills District Council", "Malvern Hills", "http://public.malvernhills.gov.uk/publicaccess/tdc/" +"West Lancashire District Council", "West Lancashire", "http://publicaccess.westlancsdc.gov.uk/PublicAccess/tdc/"