Add Blackburn and Greenwich (acolnet sites - patch from Peter Collingbourne. Fixes to Ocella parser (also from Peter). Adds Bridgend, Castle Point, Great Yarmouth, Havering). Fix typo and escaping in getinvolved (patch from David Sheldon).import/raw
@@ -7,13 +7,13 @@ | |||
You can help by writing a <a href="http://en.wikipedia.org/wiki/Screen_scraping">screen scraper</a> for your local authority that was can import into planningalerts.com. There are only 2 criteria for the screen scraper: | |||
</p> | |||
<ol> | |||
<li>That it can output data in the following format: <a href="http://www.planningalerts.com/lambeth.xml">http://www.planningalerts.com/lambeth.xml</a></li> | |||
<li>That it can accept a query sting in the format day=X&month=Y&year=Z</li> | |||
<li>That it can output data in the following format: <a href="/lambeth.xml">http://www.planningalerts.com/lambeth.xml</a></li> | |||
<li>That it can accept a query sting in the format day=X&month=Y&year=Z</li> | |||
</ol> | |||
<p> | |||
Other than that it's up to you. It can be in any language. You can host them yourself or we can host it for you. | |||
</p> | |||
<p><span class="highlight">You can grab the code for this site and view some developent tickets <a href="http://code.google.com/p/planningalerts/">here</a> and join our developer mailing list <a href="http://groups.google.com/group/planningalerts-dev"> here</a>.</span> | |||
<p><span class="highlight">You can grab the code for this site and view some development tickets <a href="http://code.google.com/p/planningalerts/">here</a> and join our developer mailing list <a href="http://groups.google.com/group/planningalerts-dev"> here</a>.</span> | |||
</p> | |||
<h3>I work for a local authority and would like to make our data available</h3> | |||
<p> | |||
@@ -34,6 +34,7 @@ end_head_regex = re.compile("</head>?", re.IGNORECASE) | |||
class AcolnetParser: | |||
received_date_label = "Registration Date:" | |||
received_date_format = "%d/%m/%Y" | |||
comment_qs_template = "ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=%s" | |||
@@ -55,7 +56,7 @@ class AcolnetParser: | |||
return app_table.a.string.strip() | |||
def _getDateReceived(self, app_table): | |||
date_str = ''.join(app_table.find(text="Registration Date:").findNext("td").string.strip().split()) | |||
date_str = ''.join(app_table.find(text=self.received_date_label).findNext("td").string.strip().split()) | |||
day, month, year = date_str.split('/') | |||
return date(int(year), int(month), int(day)) | |||
@@ -205,6 +206,15 @@ class BridgnorthParser(AcolnetParser): | |||
#http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=46958 | |||
return self._current_application.info_url.replace("NewPages", "PgeCommentForm") | |||
class BlackpoolParser(AcolnetParser): | |||
received_date_label = "Application Date:" | |||
def _getResultsSections(self, soup): | |||
return soup.findAll("table", {"class": "acolnet-results-table"}) | |||
def _getCommentUrl(self, app_table): | |||
ref = self._getCouncilReference(app_table) | |||
return "https://www.blackpool.gov.uk/Services/M-R/PlanningApplications/Forms/PlanningNeighbourResponseForm.htm?Application_No=" + ref.replace('/','%2F') | |||
class CanterburyParser(AcolnetParser): | |||
"""Here the apps are one row each in a big table.""" | |||
@@ -227,14 +237,17 @@ class CanterburyParser(AcolnetParser): | |||
def _getDescription(self, app_table): | |||
return app_table.findAll("td")[2].string.strip() | |||
#Kensington and chelsea is sufficiently different, it may as well be handled separately | |||
class GreenwichParser(AcolnetParser): | |||
received_date_label = "Registration date:" | |||
comment_qs_template = "ACTION=UNWRAP&RIPNAME=Root.PgeCommentNeighbourForm&TheSystemkey=%s" | |||
# Mid Bedfordshire - there is an acolnet here, but you have to have a username | |||
# and password to access it! | |||
def _getInfoUrl(self, app_table): | |||
return AcolnetParser._getInfoUrl(self, app_table).replace('/?', '/acolnetcgi.gov?', 1) | |||
#Kensington and chelsea is sufficiently different, it may as well be handled separately | |||
class MidBedsParser(AcolnetParser): | |||
def _getCouncilReference(self, app_table): | |||
# return app_table.findAll("a")[1].string.strip() | |||
return app_table.findAll("a")[1].string.strip() | |||
class OldhamParser(AcolnetParser): | |||
@@ -361,11 +374,11 @@ if __name__ == '__main__': | |||
#parser = AcolnetParser("Suffolk Coastal", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
# parser = AcolnetParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
#parser = AcolnetParser("Stockport Metropolitan Borough Council", "Stockport", "http://planning.stockport.gov.uk/PlanningData/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
# parser = MidBedsParser("Mid Bedfordshire District Council", "Mid Beds", "http://www.midbeds.gov.uk/acolnetDC/DCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
# parser = AcolnetParser("Cambridgeshire County Council", "Cambridgeshire", "http://planapps2.cambridgeshire.gov.uk/DCWebPages/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
parser = AcolnetParser("East Hampshire District Council", "East Hampshire", "http://planningdevelopment.easthants.gov.uk/dconline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
# parser = AcolnetParser("East Hampshire District Council", "East Hampshire", "http://planningdevelopment.easthants.gov.uk/dconline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
# parser = AcolnetParser("Stockport Metropolitan Borough Council", "Stockport", "http://planning.stockport.gov.uk/PlanningData/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
# parser = BlackpoolParser("Blackpool Borough Council", "Blackpool", "http://www2.blackpool.gov.uk/PlanningApplications/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
parser = GreenwichParser("London Borough of Greenwich", "Greenwich", "http://onlineplanning.greenwich.gov.uk/acolnet/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
print parser.getResults(day, month, year) | |||
@@ -50,6 +50,7 @@ class OcellaParser: | |||
# These will be used to store the column numbers of the appropriate items in the results table | |||
self.reference_col = None | |||
self.address_col = None | |||
self.applicant_col = None | |||
self.description_col = None | |||
self.received_date_col = None | |||
self.accepted_date_col = None | |||
@@ -59,6 +60,7 @@ class OcellaParser: | |||
# First get the search page | |||
get_request = urllib2.Request(self.base_url) | |||
get_request.add_header('Accept', 'text/html') | |||
get_response = urllib2.urlopen(get_request) | |||
cookie_jar.extract_cookies(get_response, get_request) | |||
@@ -75,6 +77,14 @@ class OcellaParser: | |||
# but it seems we don't need it... | |||
session_id = None | |||
# Unless we retrieve the correct form name, we will simply get the last week's applications | |||
submit_tag = get_soup.find('input', {'value': 'Search'}) or get_soup.find('input', {'value': 'Search for Applications'}) or get_soup.find('input', {'value': 'Submit'}) | |||
try: | |||
submit_name = submit_tag['name'] | |||
form_name = submit_name.split('.')[0] | |||
except TypeError: | |||
form_name = 'FRM_PLANNING_LIST' | |||
# # From Breckland | |||
# p_object_name=FRM_WEEKLY_LIST.DEFAULT.SUBMIT_TOP.01 | |||
@@ -88,21 +98,23 @@ class OcellaParser: | |||
# FRM_WEEKLY_LIST.DEFAULT.PARISH.01= | |||
post_data = urllib.urlencode( | |||
[('p_object_name', 'FRM_WEEKLY_LIST.DEFAULT.SUBMIT_TOP.01'), | |||
[('p_object_name', form_name + '.DEFAULT.SUBMIT_TOP.01'), | |||
('p_instance', '1'), | |||
('p_event_type', 'ON_CLICK'), | |||
('p_user_args', ''), | |||
('p_session_id', session_id), | |||
('p_page_url', self.base_url), | |||
('FRM_WEEKLY_LIST.DEFAULT.START_DATE.01', search_date.strftime(search_date_format)), | |||
('FRM_WEEKLY_LIST.DEFAULT.END_DATE.01', search_date.strftime(search_date_format)), | |||
('FRM_WEEKLY_LIST.DEFAULT.PARISH.01', ''), | |||
(form_name + '.DEFAULT.AGENT.01', ''), | |||
(form_name + '.DEFAULT.START_DATE.01', search_date.strftime(search_date_format)), | |||
(form_name + '.DEFAULT.END_DATE.01', search_date.strftime(search_date_format)), | |||
(form_name + '.DEFAULT.PARISH.01', ''), | |||
] | |||
) | |||
post_request = urllib2.Request(action, post_data) | |||
cookie_jar.add_cookie_header(post_request) | |||
post_request.add_header('Accept', 'text/html') | |||
post_request.add_header('Referer', self.base_url) | |||
post_response = cookie_handling_opener.open(post_request) | |||
@@ -119,10 +131,12 @@ class OcellaParser: | |||
th_index = 0 | |||
for th in ths: | |||
th_content = th.font.string.strip() | |||
if th_content == 'Reference' or th_content == 'Application Ref': | |||
if th_content == 'Reference' or th_content == 'Application Ref' or th_content == 'Application Number': | |||
self.reference_col = th_index | |||
elif th_content == 'Location': | |||
self.address_col = th_index | |||
elif th_content == 'Applicant Details': | |||
self.applicant_col = th_index | |||
elif th_content == 'Proposal': | |||
self.description_col = th_index | |||
elif th_content == 'Development Description': | |||
@@ -159,8 +173,12 @@ class OcellaParser: | |||
self._current_application.address = tds[self.address_col].font.string.strip() | |||
self._current_application.postcode = getPostcodeFromText(self._current_application.address) | |||
if self._current_application.postcode is None and self.applicant_col is not None: | |||
# won't always be accurate to do this but better than nothing (needed for Havering) | |||
self._current_application.postcode = getPostcodeFromText(tds[self.applicant_col].font.string.strip()) | |||
self._current_application.description = tds[self.description_col].font.string.strip() | |||
self._current_application.info_url = tds[self.reference_col].a['href'] | |||
# seems to be dependent on the implementation whether the URL is encoded (e.g. Great Yarmouth does this), so we cannot do anything more "standard" | |||
self._current_application.info_url = urlparse.urljoin(post_response.geturl(), tds[self.reference_col].a['href'].replace('&','&')) | |||
# This is what a comment url looks like | |||
# It seems to be no problem to remove the sessionid (which is in any case blank...) | |||
@@ -184,20 +202,13 @@ if __name__ == '__main__': | |||
# parser = OcellaParser("Ellesmere Port", "Ellesmere Port", "http://ocella.epnbc.gov.uk/portal/page?_pageid=33,38205&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Fareham", "Fareham", "http://eocella.fareham.gov.uk/portal/page?_pageid=33,31754&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Hillingdon", "Hillingdon", "http://w09.hillingdon.gov.uk/portal/page?_pageid=33,82093&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Middlesbrough", "Middlesbrough", "http://planserv.middlesbrough.gov.uk/portal/page?_pageid=33,4166&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("North East Lincolnshire", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,68034&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Uttlesford", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page?_pageid=33,35447&_dad=portal&_schema=PORTAL") | |||
# Bad status line? Try changing browser id string? | |||
# parser = OcellaParser("Middlesbrough", "Middlesbrough", "http://planserv.middlesbrough.gov.uk/portal/page?_pageid=33,4178&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("North East Lincolnshire", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,64104&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Uttlesford", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page/portal/plan/weekly") | |||
# parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL") | |||
# Post never comes back | |||
# parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL") | |||
# Can't find the URL similar to the others, even though it is clearly Ocella | |||
# We get a 406 at the moment. Try browser id string? | |||
parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/search") | |||
parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly") | |||
print parser.getResults(21,5,2008) | |||
@@ -192,7 +192,6 @@ | |||
"Chichester District Council", "Chichester", "http://pa.chichester.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"London Borough of Barking and Dagenham", "Barking and Dagenham", "http://idoxwam.lbbd.gov.uk:8081/WAM/pas/searchApplications.do", "WAM", "WAMParser" | |||
"Braintree District Council", "Braintree", "http://planningapp.braintree.gov.uk/WAM1/weeklyApplications.do", "WAM", "BraintreeParser" | |||
"Castle Point Borough Council", "Castle Point", "http://wam.castlepoint.gov.uk/WAM/pas/searchApplications.do", "WAM", "WAMParser" | |||
"Colchester Borough Council", "Colchester", "http://www.planning.colchester.gov.uk/WAM/weeklyApplications.do", "WAM", "BraintreeParser" | |||
"East Lothian Council", "East Lothian", "http://www.planning.eastlothian.gov.uk/WAM/pas/searchApplications.do", "WAM", "WAMParser" | |||
"North Somerset Council", "North Somerset", "http://wam.n-somerset.gov.uk/MULTIWAM/weeklyApplications.do", "WAM", "BraintreeParser" | |||
@@ -213,9 +212,9 @@ | |||
"Ellesmere Port and Neston Borough Council", "Ellesmere Port", "http://ocella.epnbc.gov.uk/portal/page?_pageid=33,38205&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"Fareham Borough Council", "Fareham", "http://eocella.fareham.gov.uk/portal/page?_pageid=33,31754&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"London Borough of Hillingdon", "Hillingdon", "http://w09.hillingdon.gov.uk/portal/page?_pageid=33,82093&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"North East Lincolnshire Council", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,68034&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"Middlesbrough Borough Council", "Middlesbrough", "http://planserv.middlesbrough.gov.uk/portal/page?_pageid=33,4166&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"Uttlesford District Council", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page?_pageid=33,35447&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"North East Lincolnshire Council", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,64104&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"Middlesbrough Borough Council", "Middlesbrough", "http://planserv.middlesbrough.gov.uk/portal/page?_pageid=33,4178&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"Uttlesford District Council", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page/portal/plan/weekly", "Ocella", "OcellaParser" | |||
"Mid Bedfordshire District Council", "Mid Beds", "http://www.midbeds.gov.uk/acolnetDC/DCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "MidBedsParser" | |||
"Cambridgeshire County Council", "Cambridgeshire", "http://planapps2.cambridgeshire.gov.uk/DCWebPages/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
"Isle of Wight Council", "Isle of Wight", "", "IsleOfWight", "IsleOfWightParser" | |||
@@ -237,3 +236,10 @@ | |||
"Lewes District Council", "Lewes", "http://planning.lewes.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display", "SwiftLG", "SwiftLGParser" | |||
"Olympic Delivery Authority", "Olympics", "http://planning.london2012.com/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display", "SwiftLG", "SwiftLGParser" | |||
"Blackpool Borough Council", "Blackpool", "http://www2.blackpool.gov.uk/PlanningApplications/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BlackpoolParser" | |||
"London Borough of Greenwich", "Greenwich", "http://onlineplanning.greenwich.gov.uk/acolnet/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "GreenwichParser" | |||
"Bridgend County Borough Council", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"London Borough of Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"Castle Point Borough Council", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL", "Ocella", "OcellaParser" | |||
"Great Yarmouth Borough Council", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly", "Ocella", "OcellaParser" |
@@ -215,7 +215,8 @@ if __name__ == '__main__': | |||
# parser = SwiftLGParser("St Edmundsbury", "Bury St Edmunds", "http://www.stedmundsbury.gov.uk/swiftlg/apas/run/") | |||
# parser = MacclesfieldParser("Macclesfield", "Macclesfield", "http://www.planportal.macclesfield.gov.uk/swiftlg/apas/run/") | |||
# parser = SwiftLGParser("Daventry District Council", "Daventry", "http://www.daventrydc.gov.uk/swiftlg/apas/run/wphappcriteria.display") | |||
parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display") | |||
# parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display") | |||
parser = SwiftLGParser("Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display") | |||
print parser.getResults(26,6,2008) | |||