diff --git a/python_scrapers/AcolnetParser.py b/python_scrapers/AcolnetParser.py index 56eaaa5..768e5cd 100644 --- a/python_scrapers/AcolnetParser.py +++ b/python_scrapers/AcolnetParser.py @@ -1,41 +1,40 @@ #!/usr/local/bin/python -import urllib, urllib2 -import HTMLParser -#from BeautifulSoup import BeautifulSoup +import urllib2 +import urlparse + +from datetime import date +import datetime + +import re + +from BeautifulSoup import BeautifulSoup # Adding this to try to help Surrey Heath - Duncan 14/9/2007 import cookielib cookie_jar = cookielib.CookieJar() ################ -import urlparse - -import re - -end_head_regex = re.compile(" for Bridgnorth, which doesn't have broken html +end_head_regex = re.compile("?", re.IGNORECASE) + + class AcolnetParser(HTMLParser.HTMLParser): - case_number_tr = None # this one can be got by the td class attribute - reg_date_tr = None - location_tr = None - proposal_tr = None + received_date_format = "%d/%m/%Y" + + comment_qs_template = "ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=%s" # There is no online comment facility in these, so we provide an # appropriate email address instead @@ -44,32 +43,60 @@ class AcolnetParser(HTMLParser.HTMLParser): # The optional amp; is to cope with Oldham, which seems to have started # quoting this url. action_regex = re.compile("]*action=\"([^\"]*ACTION=UNWRAP&(?:amp;)?RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE) + + def _getResultsSections(self, soup): + """In most cases, there is a table per app.""" + return soup.findAll("table", {"class": "results-table"}) + + def _getCouncilReference(self, app_table): + return app_table.a.string.strip() + + def _getDateReceived(self, app_table): + date_str = ''.join(app_table.find(text="Registration Date:").findNext("td").string.strip().split()) + + return datetime.datetime.strptime(date_str, self.received_date_format) + + def _getAddress(self, app_table): + return app_table.find(text="Location:").findNext("td").string.strip() + def _getDescription(self, app_table): + return app_table.find(text="Proposal:").findNext("td").string.strip() + + def _getInfoUrl(self, app_table): + """Returns the info url for this app. + + We also set the system key on self._current_application, + as we'll need that for the comment url. + + """ + url = app_table.a['href'] + self._current_application.system_key = system_key_regex.search(url).groups()[0] + return urlparse.urljoin(self.base_url, url) + + def _getCommentUrl(self, app_table): + """This must be run after _getInfoUrl""" + + if self.comments_email_address: + return self.comments_email_address + + split_info_url = urlparse.urlsplit(self._current_application.info_url) + + comment_qs = self.comment_qs_template %self._current_application.system_key + + return urlparse.urlunsplit(split_info_url[:3] + (comment_qs,) + split_info_url[4:]) + + def __init__(self, authority_name, authority_short_name, base_url, debug=False): - - - HTMLParser.HTMLParser.__init__(self) - self.authority_name = authority_name self.authority_short_name = authority_short_name self.base_url = base_url self.debug = debug - self._tr_number = 0 - - # This will be used to track the subtable depth - # when we are in a results-table, in order to - # avoid adding an application before we have got to - # the end of the results-table - self._subtable_depth = None - - self._in_td = False - # This in where we store the results self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) @@ -82,86 +109,6 @@ class AcolnetParser(HTMLParser.HTMLParser): HTML cleanup.""" return html - def handle_starttag(self, tag, attrs): - #print tag, attrs - - if tag == "table": - if self._current_application is None: - # Each application is in a separate table with class "results-table" - for key, value in attrs: - if key == "class" and value == "results-table": - #print "found results-table" - self._current_application = PlanningApplication() - self._tr_number = 0 - self._subtable_depth = 0 - self._current_application.comment_url = self.comments_email_address - break - else: - # We are already in a results-table, and this is the start of a subtable, - # so increment the subtable depth. - self._subtable_depth += 1 - - elif self._current_application is not None: - if tag == "tr" and self._subtable_depth == 0: - self._tr_number += 1 - if tag == "td": - self._in_td = True - if tag == "a" and self._tr_number == self.case_number_tr: - # this is where we get the info link and the case number - for key, value in attrs: - if key == "href": - self._current_application.info_url = value - - system_key = system_key_regex.search(value).groups()[0] - - if self.comments_email_address is not None: - self._current_application.comment_url = self.comments_email_address - else: - self._current_application.comment_url = value.replace("PgeResultDetail", "PgeCommentForm") - - def handle_data(self, data): - # If we are in the tr which contains the case number, - # then data is the council reference, so - # add it to self._current_application. - if self._in_td: - if self._tr_number == self.case_number_tr: - self._current_application.council_reference = data.strip() - elif self._tr_number == self.reg_date_tr: - # we need to make a date object out of data - date_as_str = ''.join(data.strip().split()) - received_date = date(*strptime(date_as_str, date_format)[0:3]) - - #print received_date - - self._current_application.date_received = received_date - - elif self._tr_number == self.location_tr: - location = data.strip() - - self._current_application.address = location - self._current_application.postcode = getPostcodeFromText(location) - elif self._tr_number == self.proposal_tr: - self._current_application.description = data.strip() - - - def handle_endtag(self, tag): - #print "ending: ", tag - if tag == "table" and self._current_application is not None: - if self._subtable_depth > 0: - self._subtable_depth -= 1 - else: - # We need to add the last application in the table - if self._current_application is not None: - #print "adding application" - self._results.addApplication(self._current_application) - #print self._current_application - self._current_application = None - self._tr_number = None - self._subtable_depth = None - elif tag == "td": - self._in_td = False - - def _getSearchResponse(self): # It looks like we sometimes need to do some stuff to get around a # javascript redirect and cookies. @@ -202,9 +149,6 @@ class AcolnetParser(HTMLParser.HTMLParser): response = opener.open(action_url, search_data) results_html = response.read() - #outfile = open("tmpfile", "w") - #outfile.write(results_html) - # This is for doing site specific html cleanup results_html = self._cleanupHTML(results_html) @@ -212,53 +156,40 @@ class AcolnetParser(HTMLParser.HTMLParser): #so we'll just have the body just_body = "" + end_head_regex.split(results_html)[-1] - #outfile = open(self.authority_short_name + ".debug", "w") - #outfile.write(just_body) - - self.feed(just_body) + #self.feed(just_body) - return self._results + soup = BeautifulSoup(just_body) + # Each app is in a table of it's own. + results_tables = self._getResultsSections(soup) - def getResults(self, day, month, year): - return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + for app_table in results_tables: + self._current_application = PlanningApplication() -## # Babergh up to 21/06/2007 -## class BaberghParser(AcolnetParser): -## case_number_tr = 1 # this one can be got by the td class attribute -## reg_date_tr = 2 -## location_tr = 4 -## proposal_tr = 5 + self._current_application.council_reference = self._getCouncilReference(app_table) + self._current_application.address = self._getAddress(app_table) + + # Get the postcode from the address + self._current_application.postcode = getPostcodeFromText(self._current_application.address) + + self._current_application.description = self._getDescription(app_table) + self._current_application.info_url = self._getInfoUrl(app_table) + self._current_application.comment_url = self._getCommentUrl(app_table) + self._current_application.date_received = self._getDateReceived(app_table) -## # It would be nice to scrape this... -## comments_email_address = "planning.reception@babergh.gov.uk" + self._results.addApplication(self._current_application) -# Site changes to here from 22/06/2007 -class BaberghParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 5 - proposal_tr = 6 - # It would be nice to scrape this... - comments_email_address = "planning.reception@babergh.gov.uk" + return self._results -class BasingstokeParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 6 - proposal_tr = 8 - # It would be nice to scrape this... - comments_email_address = "development.control@basingstoke.gov.uk" -class BassetlawParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +class BassetlawParser(AcolnetParser): comments_email_address = "planning@bassetlaw.gov.uk" def _cleanupHTML(self, html): @@ -270,214 +201,58 @@ class BassetlawParser(AcolnetParser): class BridgnorthParser(AcolnetParser): - # This site is currently down... - #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" - #authority_name = "Bridgenorth District Council" - #authority_short_name = "Bridgenorth" - - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 - - comments_email_address = "contactus@bridgnorth-dc.gov.uk" - -class BuryParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 5 - proposal_tr = 6 - - #comments_email_address = "development.control@bury.gov.uk" - -## class CanterburyParser(AcolnetParser): -## search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" - -## case_number_tr = 1 # this one can be got by the td class attribute -## reg_date_tr = 2 -## location_tr = 4 -## proposal_tr = 5 - -## authority_name = "Canterbury City Council" -## authority_short_name = "Canterbury" + def _getResultsSections(self, soup): + return soup.findAll("table", {"class": "app"}) -## comments_email_address = "" + def _getCouncilReference(self, app_table): + return app_table.a.string.split()[-1] -class CarlisleParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 5 - proposal_tr = 6 - - comments_email_address = "dc@carlisle.gov.uk" - -class DerbyParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 4 - proposal_tr = 5 - - comments_email_address = "developmentcontrol@derby.gov.uk" - -class CroydonParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 5 - proposal_tr = 6 - - comments_email_address = "planning.control@croydon.gov.uk" - -class EastLindseyParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 5 - proposal_tr = 6 - comments_email_address = "development.control@e-lindsey.gov.uk" - -class FyldeParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 + def _getCommentUrl(self, app_table): + """This must be run after _getInfoUrl""" +#http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=46958 + return self._current_application.info_url.replace("NewPages", "PgeCommentForm") - comments_email_address = "planning@fylde.gov.uk" +# Cambridgeshire, although an Acolnet site, is so different that it +# may as well be handled completely separately. -class HarlowParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 +class CanterburyParser(AcolnetParser): + """Here the apps are one row each in a big table.""" - comments_email_address = "Planning.services@harlow.gov.uk" + def _getResultsSections(self, soup): + return soup.find("table", {"class": "results-table"}).findAll("tr")[1:] -class HavantParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 6 - proposal_tr = 8 + def _getDateReceived(self, app_table): + date_str = app_table.findAll("td")[3].string.strip() - comments_email_address = "representations@havant.gov.uk" + return datetime.datetime.strptime(date_str, self.received_date_format) -class HertsmereParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 + def _getAddress(self, app_table): + return app_table.findAll("td")[1].string.strip() - comments_email_address = "planning@hertsmere.gov.uk" + def _getDescription(self, app_table): + return app_table.findAll("td")[2].string.strip() -class LewishamParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 +#Kensington and chelsea is sufficiently different, it may as well be handled separately - comments_email_address = "planning@lewisham.gov.uk" +# Mid Bedfordshire - there is an acolnet here, but you have to have a username +# and password to access it! -class NorthHertfordshireParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 - -## class MidSuffolkParser(AcolnetParser): -## case_number_tr = 1 # this one can be got by the td class attribute -## reg_date_tr = 2 -## location_tr = 4 -## proposal_tr = 5 - -## comments_email_address = "planning@lewisham.gov.uk" -## #action_regex = re.compile("
]*>", re.IGNORECASE) - -class NewForestNPParser(AcolnetParser): - # In this case there is an online comment facility at the - # bottom of each view app page... - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 - -class NewForestDCParser(AcolnetParser): - # In this case there is an online comment facility at the - # bottom of each view app page... - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 6 - proposal_tr = 7 - -class NorthWiltshireParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 6 - proposal_tr = 7 - class OldhamParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 6 - proposal_tr = 7 - def _cleanupHTML(self, html): """There is a bad table end tag in this one. Fix it before we start""" bad_table_end = '' good_table_end = '' + return html.replace(bad_table_end, good_table_end) +class SouthwarkParser(AcolnetParser): + def _getDateReceived(self, app_table): + date_str = ''.join(app_table.find(text="Statutory start date:").findNext("td").string.strip().split()) -class RenfrewshireParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 - - comments_email_address = "pt@renfrewshire.gov.uk" - -class SouthBedfordshireParser(AcolnetParser): - case_number_tr = 1 # this one can be got by the td class attribute - reg_date_tr = 3 - location_tr = 5 - proposal_tr = 6 - -class SuffolkCoastalParser(AcolnetParser): -# case_number_tr = 1 # this one can be got by the td class attribute -# reg_date_tr = 2 -# location_tr = 4 -# proposal_tr = 5 - -# New URL with different layout - case_number_tr = 1 - reg_date_tr = 3 - location_tr = 5 - proposal_tr = 6 - - - comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" - -class GuildfordParser(AcolnetParser): - case_number_tr = 1 - reg_date_tr = 7 - location_tr = 2 - proposal_tr = 3 - - #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch - -class BoltonParser(AcolnetParser): - case_number_tr = 1 - reg_date_tr = 2 - location_tr = 4 - proposal_tr = 5 - comments_email_address = "Planning.control@bolton.gov.uk" - - -class ExeterParser(AcolnetParser): - case_number_tr = 1 - reg_date_tr = 3 - location_tr = 5 - proposal_tr = 6 - + return datetime.datetime.strptime(date_str, self.received_date_format) class SurreyHeathParser(AcolnetParser): # This is not working yet. @@ -520,23 +295,42 @@ class SurreyHeathParser(AcolnetParser): # return javascript_redirect_response +# Wychavon is rather different, and will need some thought. There is no +# advanced search page if __name__ == '__main__': - day = 20 + day = 30 month = 11 year = 2007 - # returns error 400 - bad request - #parser = BridgenorthParser() - - # cambridgeshire is a bit different... - # no advanced search page - - # canterbury - # results as columns of one table - - #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") - - parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch") + #parser = AcolnetParser("Babergh", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Basingstoke", "Basingstoke", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = BassetlawParser("Bassetlaw", "Bassetlaw", "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Bolton", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") +# parser = BridgnorthParser("Bridgnorth", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") + #parser = AcolnetParser("Bury", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = CanterburyParser("Canterbury", "Canterbury", "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Carlisle", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Derby", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("East Lindsey", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser") + #parser = AcolnetParser("Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Fylde", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Guildford", "Guildford", "http://www.guildford.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Harlow", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Havant", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Hertsmere", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") + #parser = AcolnetParser("Mid Suffolk", "Mid Suffolk", "http://planning.midsuffolk.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("New Forest National Park Authority", "New Forest NPA", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("North Hertfordshire", "North Herts", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") + #parser = AcolnetParser("North Wiltshire", "North Wilts", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch") + #parser = AcolnetParser("Renfrewshire", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") + #parser = AcolnetParser("South Bedfordshire", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") + #parser = SouthwarkParser("London Borough of Southwark", "Southwark", "http://planningonline.southwarksites.com/planningonline2/AcolNetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Suffolk Coastal", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + #parser = AcolnetParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") print parser.getResults(day, month, year) diff --git a/python_scrapers/AcolnetParser_HTMLParser.py b/python_scrapers/AcolnetParser_HTMLParser.py new file mode 100644 index 0000000..6d993ca --- /dev/null +++ b/python_scrapers/AcolnetParser_HTMLParser.py @@ -0,0 +1,542 @@ +#!/usr/local/bin/python + +import urllib, urllib2 +import HTMLParser +#from BeautifulSoup import BeautifulSoup + +# Adding this to try to help Surrey Heath - Duncan 14/9/2007 +import cookielib +cookie_jar = cookielib.CookieJar() +################ + +import urlparse + +import re + +# We allow the optional > for Bridgenorth, which doesn't have broken html +end_head_regex = re.compile("?", re.IGNORECASE) + +import MultipartPostHandler +# this is not mine, or part of standard python (though it should be!) +# it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py + +from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication + +from datetime import date +from time import strptime + + +date_format = "%d/%m/%Y" +our_date = date(2007,4,25) + +#This is to get the system key out of the info url +system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE) + +class AcolnetParser(HTMLParser.HTMLParser): + case_number_tr = None # this one can be got by the td class attribute + reg_date_tr = None + location_tr = None + proposal_tr = None + + # There is no online comment facility in these, so we provide an + # appropriate email address instead + comments_email_address = None + + # The optional amp; is to cope with Oldham, which seems to have started + # quoting this url. + action_regex = re.compile("]*action=\"([^\"]*ACTION=UNWRAP&(?:amp;)?RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE) + + def __init__(self, + authority_name, + authority_short_name, + base_url, + debug=False): + + + HTMLParser.HTMLParser.__init__(self) + + self.authority_name = authority_name + self.authority_short_name = authority_short_name + self.base_url = base_url + + self.debug = debug + + self._tr_number = 0 + + # This will be used to track the subtable depth + # when we are in a results-table, in order to + # avoid adding an application before we have got to + # the end of the results-table + self._subtable_depth = None + + self._in_td = False + + # This in where we store the results + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + # This will store the planning application we are currently working on. + self._current_application = None + + + def _cleanupHTML(self, html): + """This method should be overridden in subclasses to perform site specific + HTML cleanup.""" + return html + + def handle_starttag(self, tag, attrs): + #print tag, attrs + + if tag == "table": + if self._current_application is None: + # Each application is in a separate table with class "results-table" + for key, value in attrs: + if key == "class" and value == "results-table": + #print "found results-table" + self._current_application = PlanningApplication() + self._tr_number = 0 + self._subtable_depth = 0 + self._current_application.comment_url = self.comments_email_address + break + else: + # We are already in a results-table, and this is the start of a subtable, + # so increment the subtable depth. + self._subtable_depth += 1 + + elif self._current_application is not None: + if tag == "tr" and self._subtable_depth == 0: + self._tr_number += 1 + if tag == "td": + self._in_td = True + if tag == "a" and self._tr_number == self.case_number_tr: + # this is where we get the info link and the case number + for key, value in attrs: + if key == "href": + self._current_application.info_url = value + + system_key = system_key_regex.search(value).groups()[0] + + if self.comments_email_address is not None: + self._current_application.comment_url = self.comments_email_address + else: + self._current_application.comment_url = value.replace("PgeResultDetail", "PgeCommentForm") + + def handle_data(self, data): + # If we are in the tr which contains the case number, + # then data is the council reference, so + # add it to self._current_application. + if self._in_td: + if self._tr_number == self.case_number_tr: + self._current_application.council_reference = data.strip() + elif self._tr_number == self.reg_date_tr: + # we need to make a date object out of data + date_as_str = ''.join(data.strip().split()) + received_date = date(*strptime(date_as_str, date_format)[0:3]) + + #print received_date + + self._current_application.date_received = received_date + + elif self._tr_number == self.location_tr: + location = data.strip() + + self._current_application.address = location + self._current_application.postcode = getPostcodeFromText(location) + elif self._tr_number == self.proposal_tr: + self._current_application.description = data.strip() + + + def handle_endtag(self, tag): + #print "ending: ", tag + if tag == "table" and self._current_application is not None: + if self._subtable_depth > 0: + self._subtable_depth -= 1 + else: + # We need to add the last application in the table + if self._current_application is not None: + #print "adding application" + self._results.addApplication(self._current_application) + #print self._current_application + self._current_application = None + self._tr_number = None + self._subtable_depth = None + elif tag == "td": + self._in_td = False + + + def _getSearchResponse(self): + # It looks like we sometimes need to do some stuff to get around a + # javascript redirect and cookies. + search_form_request = urllib2.Request(self.base_url) + search_form_response = urllib2.urlopen(search_form_request) + + return search_form_response + + + def getResultsByDayMonthYear(self, day, month, year): + # first we fetch the search page to get ourselves some session info... + search_form_response = self._getSearchResponse() + + search_form_contents = search_form_response.read() + + # This sometimes causes a problem in HTMLParser, so let's just get the link + # out with a regex... + + groups = self.action_regex.search(search_form_contents).groups() + + action = groups[0] + #print action + + # This is to handle the amp; which seems to have appeared in this + # url on the Oldham site + action = ''.join(action.split('amp;')) + + action_url = urlparse.urljoin(self.base_url, action) + #print action_url + + our_date = date(year, month, day) + + search_data = {"regdate1": our_date.strftime(date_format), + "regdate2": our_date.strftime(date_format), + } + + opener = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler) + response = opener.open(action_url, search_data) + results_html = response.read() + + # This is for doing site specific html cleanup + results_html = self._cleanupHTML(results_html) + + #some javascript garbage in the header upsets HTMLParser, + #so we'll just have the body + just_body = "" + end_head_regex.split(results_html)[-1] + + #outfile = open(self.authority_short_name + ".debug", "w") + #outfile.write(just_body) + + #print just_body + + self.feed(just_body) + + return self._results + + + + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +## # Babergh up to 21/06/2007 +## class BaberghParser(AcolnetParser): +## case_number_tr = 1 # this one can be got by the td class attribute +## reg_date_tr = 2 +## location_tr = 4 +## proposal_tr = 5 + +## # It would be nice to scrape this... +## comments_email_address = "planning.reception@babergh.gov.uk" + +# Site changes to here from 22/06/2007 +class BaberghParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 5 + proposal_tr = 6 + + # It would be nice to scrape this... + comments_email_address = "planning.reception@babergh.gov.uk" + +class BasingstokeParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 6 + proposal_tr = 8 + + # It would be nice to scrape this... + comments_email_address = "development.control@basingstoke.gov.uk" + +class BassetlawParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "planning@bassetlaw.gov.uk" + + def _cleanupHTML(self, html): + """There is a broken div in this page. We don't need any divs, so + let's get rid of them all.""" + + div_regex = re.compile("]*>", re.IGNORECASE) + return div_regex.sub('', html) + + +class BridgnorthParser(AcolnetParser): + # This site is currently down... + #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" + #authority_name = "Bridgenorth District Council" + #authority_short_name = "Bridgenorth" + + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "contactus@bridgnorth-dc.gov.uk" + +class BuryParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 5 + proposal_tr = 6 + + #comments_email_address = "development.control@bury.gov.uk" + +## class CanterburyParser(AcolnetParser): +## search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" + +## case_number_tr = 1 # this one can be got by the td class attribute +## reg_date_tr = 2 +## location_tr = 4 +## proposal_tr = 5 + +## authority_name = "Canterbury City Council" +## authority_short_name = "Canterbury" + +## comments_email_address = "" + +class CarlisleParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 5 + proposal_tr = 6 + + comments_email_address = "dc@carlisle.gov.uk" + +class DerbyParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "developmentcontrol@derby.gov.uk" + +class CroydonParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 5 + proposal_tr = 6 + + comments_email_address = "planning.control@croydon.gov.uk" + +class EastLindseyParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 5 + proposal_tr = 6 + + comments_email_address = "development.control@e-lindsey.gov.uk" + +class FyldeParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "planning@fylde.gov.uk" + +class HarlowParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "Planning.services@harlow.gov.uk" + +class HavantParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 6 + proposal_tr = 8 + + comments_email_address = "representations@havant.gov.uk" + +class HertsmereParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "planning@hertsmere.gov.uk" + +class LewishamParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "planning@lewisham.gov.uk" + +class NorthHertfordshireParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + +## class MidSuffolkParser(AcolnetParser): +## case_number_tr = 1 # this one can be got by the td class attribute +## reg_date_tr = 2 +## location_tr = 4 +## proposal_tr = 5 + +## comments_email_address = "planning@lewisham.gov.uk" +## #action_regex = re.compile("]*>", re.IGNORECASE) + +class NewForestNPParser(AcolnetParser): + # In this case there is an online comment facility at the + # bottom of each view app page... + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + +class NewForestDCParser(AcolnetParser): + # In this case there is an online comment facility at the + # bottom of each view app page... + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 6 + proposal_tr = 7 + +class NorthWiltshireParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 6 + proposal_tr = 7 + +class OldhamParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 6 + proposal_tr = 7 + + def _cleanupHTML(self, html): + """There is a bad table end tag in this one. + Fix it before we start""" + + bad_table_end = '' + good_table_end = '' + return html.replace(bad_table_end, good_table_end) + + +class RenfrewshireParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "pt@renfrewshire.gov.uk" + +class SouthBedfordshireParser(AcolnetParser): + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 3 + location_tr = 5 + proposal_tr = 6 + +class SuffolkCoastalParser(AcolnetParser): +# case_number_tr = 1 # this one can be got by the td class attribute +# reg_date_tr = 2 +# location_tr = 4 +# proposal_tr = 5 + +# New URL with different layout + case_number_tr = 1 + reg_date_tr = 3 + location_tr = 5 + proposal_tr = 6 + + + comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" + +class GuildfordParser(AcolnetParser): + case_number_tr = 1 + reg_date_tr = 7 + location_tr = 2 + proposal_tr = 3 + + #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch + +class BoltonParser(AcolnetParser): + case_number_tr = 1 + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + comments_email_address = "Planning.control@bolton.gov.uk" + + +class ExeterParser(AcolnetParser): + case_number_tr = 1 + reg_date_tr = 3 + location_tr = 5 + proposal_tr = 6 + + +class SurreyHeathParser(AcolnetParser): + # This is not working yet. + # _getSearchResponse is an attempt to work around + # cookies and a javascript redirect. + # I may have a bit more of a go at this at some point if I have time. + case_number_tr = 1 # this one can be got by the td class attribute + reg_date_tr = 2 + location_tr = 4 + proposal_tr = 5 + + comments_email_address = "development-control@surreyheath.gov.uk" + + def _getSearchResponse(self): + # It looks like we sometimes need to do some stuff to get around a + # javascript redirect and cookies. + search_form_request = urllib2.Request(self.base_url) + + # Lying about the user-agent doesn't seem to help. + #search_form_request.add_header("user-agent", "Mozilla/5.0 (compatible; Konqu...L/3.5.6 (like Gecko) (Kubuntu)") + + search_form_response = urllib2.urlopen(search_form_request) + + cookie_jar.extract_cookies(search_form_response, search_form_request) + + + print search_form_response.geturl() + print search_form_response.info() + + print search_form_response.read() +# validate_url = "https://www.public.surreyheath-online.gov.uk/whalecom7cace3215643e22bb7b0b8cc97a7/whalecom0/InternalSite/Validate.asp" +# javascript_redirect_url = urlparse.urljoin(self.base_url, "/whalecom7cace3215643e22bb7b0b8cc97a7/whalecom0/InternalSite/RedirectToOrigURL.asp?site_name=public&secure=1") + +# javascript_redirect_request = urllib2.Request(javascript_redirect_url) +# javascript_redirect_request.add_header('Referer', validate_url) + +# cookie_jar.add_cookie_header(javascript_redirect_request) + +# javascript_redirect_response = urllib2.urlopen(javascript_redirect_request) + +# return javascript_redirect_response + + +if __name__ == '__main__': + day = 4 + month = 12 + year = 2007 + + # returns error 400 - bad request + #parser = BridgenorthParser() + + # cambridgeshire is a bit different... + # no advanced search page + + # canterbury + # results as columns of one table + + #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") + parser = BridgnorthParser("Bridgnorth", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") +# parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch") + print parser.getResults(day, month, year) + diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv index 7915e90..783e349 100644 --- a/python_scrapers/SitesToGenerate.csv +++ b/python_scrapers/SitesToGenerate.csv @@ -87,30 +87,31 @@ "Vale Royal Borough Council", "Vale Royal", "http://pa.valeroyal.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "Winchester City Council", "Winchester", "http://win2padmz.winchester.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "Wolverhampton City Council", "Wolverhampton", "http://planningonline.wolverhampton.gov.uk/PublicAccess/dc/", "PublicAccess", "PublicAccessParser" -"Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BaberghParser" -"Basingstoke and Deane Borough Council", "Basingstoke and Deane", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BasingstokeParser" +"Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"Basingstoke and Deane Borough Council", "Basingstoke and Deane", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Bassetlaw District Council", "Bassetlaw","http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BassetlawParser" -"Bury Metropolitan Borough Council", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BuryParser" -"Derby City Council", "Derby", "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "DerbyParser" -"London Borough of Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CroydonParser" -"East Lindsey District Council", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "EastLindseyParser" -"Fylde Borough Council", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "FyldeParser" -"Harlow Council", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HarlowParser" -"Hertsmere Borough Council", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HertsmereParser" -"London Borough of Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "LewishamParser" -"North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "NorthHertfordshireParser" -"New Forest National Park", "New Forest NP", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestNPParser" +"Bury Metropolitan Borough Council", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"Derby City Council", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"London Borough of Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"East Lindsey District Council", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"Fylde Borough Council", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"Guildford Borough Council", "Guildford", "http://www.guildford.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"Harlow Council", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"Hertsmere Borough Council", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"London Borough of Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" +"North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" +"New Forest National Park", "New Forest NP", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Bridgnorth District Council", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "BridgnorthParser" -"Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CarlisleParser" +"Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" -"North Wiltshire District Council", "North Wiltshire", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NorthWiltshireParser" +"North Wiltshire District Council", "North Wiltshire", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Oldham Metropolitan Borough Council", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "OldhamParser" -"Renfrewshire Council", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "RenfrewshireParser" +"Renfrewshire Council", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" "Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" -"South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "SouthBedfordshireParser" -"Suffolk Coastal District Council", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SuffolkCoastalParser" +"South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" +"Suffolk Coastal District Council", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Surrey Heath Borough Council", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SurreyHeathParser" -"New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestDCParser" +"New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Craven District Council", "Craven", "http://www.planning.cravendc.gov.uk/fastweb/", "FastWeb", "FastWeb" "Eastleigh Borough Council", "Eastleigh", "http://www.eastleigh.gov.uk/FastWEB/", "FastWeb", "FastWeb" "Eden District Council", "Eden", "http://eforms.eden.gov.uk/fastweb/", "FastWeb", "FastWeb" @@ -132,7 +133,7 @@ "Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" "Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" -"Havant Borough Council", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HavantParser" +"Havant Borough Council", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Rochford District Council", "Rochford", "http://www.rochford.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" "Chester-le-Street District Council", "Chester-le-Street", "http://planning.chester-le-street.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" "Vale of the White Horse District Council", "Vale of the White Horse", "http://planning.whitehorsedc.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" @@ -144,8 +145,8 @@ "Cumbria County Council", "Cumbria", "http://217.114.50.149:7778/ePlanningOPS/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" "Lincolnshire County Council", "Lincolnshire", "http://apps1.lincolnshire.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" "West Sussex County Council", "West Sussex", "http://eplanning.westsussex.gov.uk/ePlanningOPS/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" -"Bolton Metropolitan Borough Council", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BoltonParser" -"Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "ExeterParser" +"Bolton Metropolitan Borough Council", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Suffolk County Council", "Suffolk", "http://atrium.suffolkcc.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" "Blackburn with Darwen Borough Council", "Blackburn", "http://195.8.175.6/", "PlanningExplorer", "BlackburnParser" "Broadland District Council", "Broadland", "http://www.broadland.gov.uk/", "PlanningExplorer", "BroadlandParser" @@ -184,3 +185,6 @@ "Shrewsbury and Atcham Borough Council", "Shrewsbury", "http://www2.shrewsbury.gov.uk/", "PlanningExplorer", "ShrewsburyParser" "Dorset County Council", "Dorset", "http://www.dorsetforyou.com/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" "Somerset County Council", "Somerset", "http://webapp1.somerset.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" +"Mid Suffolk District Council", "Mid Suffolk", "http://planning.midsuffolk.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" +"London Borough of Southwark", "Southwark", "http://planningonline.southwarksites.com/planningonline2/AcolNetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SouthwarkParser" +"Canterbury City Council", "Canterbury", "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CanterburyParser"