This allows us to do lots of sites without custom scrapers. We also get a few that we didn't have before: Bridgnorth Canterbury Guildford Mid Suffolk (no postcodes) Southwarkmaster
| @@ -1,41 +1,40 @@ | |||
| #!/usr/local/bin/python | |||
| import urllib, urllib2 | |||
| import HTMLParser | |||
| #from BeautifulSoup import BeautifulSoup | |||
| import urllib2 | |||
| import urlparse | |||
| from datetime import date | |||
| import datetime | |||
| import re | |||
| from BeautifulSoup import BeautifulSoup | |||
| # Adding this to try to help Surrey Heath - Duncan 14/9/2007 | |||
| import cookielib | |||
| cookie_jar = cookielib.CookieJar() | |||
| ################ | |||
| import urlparse | |||
| import re | |||
| end_head_regex = re.compile("</head", re.IGNORECASE) | |||
| import MultipartPostHandler | |||
| # this is not mine, or part of standard python (though it should be!) | |||
| # it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py | |||
| from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication | |||
| from datetime import date | |||
| from time import strptime | |||
| date_format = "%d/%m/%Y" | |||
| our_date = date(2007,4,25) | |||
| #This is to get the system key out of the info url | |||
| system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE) | |||
| # We allow the optional > for Bridgnorth, which doesn't have broken html | |||
| end_head_regex = re.compile("</head>?", re.IGNORECASE) | |||
| class AcolnetParser(HTMLParser.HTMLParser): | |||
| case_number_tr = None # this one can be got by the td class attribute | |||
| reg_date_tr = None | |||
| location_tr = None | |||
| proposal_tr = None | |||
| received_date_format = "%d/%m/%Y" | |||
| comment_qs_template = "ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=%s" | |||
| # There is no online comment facility in these, so we provide an | |||
| # appropriate email address instead | |||
| @@ -44,32 +43,60 @@ class AcolnetParser(HTMLParser.HTMLParser): | |||
| # The optional amp; is to cope with Oldham, which seems to have started | |||
| # quoting this url. | |||
| action_regex = re.compile("<form[^>]*action=\"([^\"]*ACTION=UNWRAP&(?:amp;)?RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE) | |||
| def _getResultsSections(self, soup): | |||
| """In most cases, there is a table per app.""" | |||
| return soup.findAll("table", {"class": "results-table"}) | |||
| def _getCouncilReference(self, app_table): | |||
| return app_table.a.string.strip() | |||
| def _getDateReceived(self, app_table): | |||
| date_str = ''.join(app_table.find(text="Registration Date:").findNext("td").string.strip().split()) | |||
| return datetime.datetime.strptime(date_str, self.received_date_format) | |||
| def _getAddress(self, app_table): | |||
| return app_table.find(text="Location:").findNext("td").string.strip() | |||
| def _getDescription(self, app_table): | |||
| return app_table.find(text="Proposal:").findNext("td").string.strip() | |||
| def _getInfoUrl(self, app_table): | |||
| """Returns the info url for this app. | |||
| We also set the system key on self._current_application, | |||
| as we'll need that for the comment url. | |||
| """ | |||
| url = app_table.a['href'] | |||
| self._current_application.system_key = system_key_regex.search(url).groups()[0] | |||
| return urlparse.urljoin(self.base_url, url) | |||
| def _getCommentUrl(self, app_table): | |||
| """This must be run after _getInfoUrl""" | |||
| if self.comments_email_address: | |||
| return self.comments_email_address | |||
| split_info_url = urlparse.urlsplit(self._current_application.info_url) | |||
| comment_qs = self.comment_qs_template %self._current_application.system_key | |||
| return urlparse.urlunsplit(split_info_url[:3] + (comment_qs,) + split_info_url[4:]) | |||
| def __init__(self, | |||
| authority_name, | |||
| authority_short_name, | |||
| base_url, | |||
| debug=False): | |||
| HTMLParser.HTMLParser.__init__(self) | |||
| self.authority_name = authority_name | |||
| self.authority_short_name = authority_short_name | |||
| self.base_url = base_url | |||
| self.debug = debug | |||
| self._tr_number = 0 | |||
| # This will be used to track the subtable depth | |||
| # when we are in a results-table, in order to | |||
| # avoid adding an application before we have got to | |||
| # the end of the results-table | |||
| self._subtable_depth = None | |||
| self._in_td = False | |||
| # This in where we store the results | |||
| self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) | |||
| @@ -82,86 +109,6 @@ class AcolnetParser(HTMLParser.HTMLParser): | |||
| HTML cleanup.""" | |||
| return html | |||
| def handle_starttag(self, tag, attrs): | |||
| #print tag, attrs | |||
| if tag == "table": | |||
| if self._current_application is None: | |||
| # Each application is in a separate table with class "results-table" | |||
| for key, value in attrs: | |||
| if key == "class" and value == "results-table": | |||
| #print "found results-table" | |||
| self._current_application = PlanningApplication() | |||
| self._tr_number = 0 | |||
| self._subtable_depth = 0 | |||
| self._current_application.comment_url = self.comments_email_address | |||
| break | |||
| else: | |||
| # We are already in a results-table, and this is the start of a subtable, | |||
| # so increment the subtable depth. | |||
| self._subtable_depth += 1 | |||
| elif self._current_application is not None: | |||
| if tag == "tr" and self._subtable_depth == 0: | |||
| self._tr_number += 1 | |||
| if tag == "td": | |||
| self._in_td = True | |||
| if tag == "a" and self._tr_number == self.case_number_tr: | |||
| # this is where we get the info link and the case number | |||
| for key, value in attrs: | |||
| if key == "href": | |||
| self._current_application.info_url = value | |||
| system_key = system_key_regex.search(value).groups()[0] | |||
| if self.comments_email_address is not None: | |||
| self._current_application.comment_url = self.comments_email_address | |||
| else: | |||
| self._current_application.comment_url = value.replace("PgeResultDetail", "PgeCommentForm") | |||
| def handle_data(self, data): | |||
| # If we are in the tr which contains the case number, | |||
| # then data is the council reference, so | |||
| # add it to self._current_application. | |||
| if self._in_td: | |||
| if self._tr_number == self.case_number_tr: | |||
| self._current_application.council_reference = data.strip() | |||
| elif self._tr_number == self.reg_date_tr: | |||
| # we need to make a date object out of data | |||
| date_as_str = ''.join(data.strip().split()) | |||
| received_date = date(*strptime(date_as_str, date_format)[0:3]) | |||
| #print received_date | |||
| self._current_application.date_received = received_date | |||
| elif self._tr_number == self.location_tr: | |||
| location = data.strip() | |||
| self._current_application.address = location | |||
| self._current_application.postcode = getPostcodeFromText(location) | |||
| elif self._tr_number == self.proposal_tr: | |||
| self._current_application.description = data.strip() | |||
| def handle_endtag(self, tag): | |||
| #print "ending: ", tag | |||
| if tag == "table" and self._current_application is not None: | |||
| if self._subtable_depth > 0: | |||
| self._subtable_depth -= 1 | |||
| else: | |||
| # We need to add the last application in the table | |||
| if self._current_application is not None: | |||
| #print "adding application" | |||
| self._results.addApplication(self._current_application) | |||
| #print self._current_application | |||
| self._current_application = None | |||
| self._tr_number = None | |||
| self._subtable_depth = None | |||
| elif tag == "td": | |||
| self._in_td = False | |||
| def _getSearchResponse(self): | |||
| # It looks like we sometimes need to do some stuff to get around a | |||
| # javascript redirect and cookies. | |||
| @@ -202,9 +149,6 @@ class AcolnetParser(HTMLParser.HTMLParser): | |||
| response = opener.open(action_url, search_data) | |||
| results_html = response.read() | |||
| #outfile = open("tmpfile", "w") | |||
| #outfile.write(results_html) | |||
| # This is for doing site specific html cleanup | |||
| results_html = self._cleanupHTML(results_html) | |||
| @@ -212,53 +156,40 @@ class AcolnetParser(HTMLParser.HTMLParser): | |||
| #so we'll just have the body | |||
| just_body = "<html>" + end_head_regex.split(results_html)[-1] | |||
| #outfile = open(self.authority_short_name + ".debug", "w") | |||
| #outfile.write(just_body) | |||
| self.feed(just_body) | |||
| #self.feed(just_body) | |||
| return self._results | |||
| soup = BeautifulSoup(just_body) | |||
| # Each app is in a table of it's own. | |||
| results_tables = self._getResultsSections(soup) | |||
| def getResults(self, day, month, year): | |||
| return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() | |||
| for app_table in results_tables: | |||
| self._current_application = PlanningApplication() | |||
| ## # Babergh up to 21/06/2007 | |||
| ## class BaberghParser(AcolnetParser): | |||
| ## case_number_tr = 1 # this one can be got by the td class attribute | |||
| ## reg_date_tr = 2 | |||
| ## location_tr = 4 | |||
| ## proposal_tr = 5 | |||
| self._current_application.council_reference = self._getCouncilReference(app_table) | |||
| self._current_application.address = self._getAddress(app_table) | |||
| # Get the postcode from the address | |||
| self._current_application.postcode = getPostcodeFromText(self._current_application.address) | |||
| self._current_application.description = self._getDescription(app_table) | |||
| self._current_application.info_url = self._getInfoUrl(app_table) | |||
| self._current_application.comment_url = self._getCommentUrl(app_table) | |||
| self._current_application.date_received = self._getDateReceived(app_table) | |||
| ## # It would be nice to scrape this... | |||
| ## comments_email_address = "planning.reception@babergh.gov.uk" | |||
| self._results.addApplication(self._current_application) | |||
| # Site changes to here from 22/06/2007 | |||
| class BaberghParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| # It would be nice to scrape this... | |||
| comments_email_address = "planning.reception@babergh.gov.uk" | |||
| return self._results | |||
| class BasingstokeParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 8 | |||
| # It would be nice to scrape this... | |||
| comments_email_address = "development.control@basingstoke.gov.uk" | |||
| class BassetlawParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| def getResults(self, day, month, year): | |||
| return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() | |||
| class BassetlawParser(AcolnetParser): | |||
| comments_email_address = "planning@bassetlaw.gov.uk" | |||
| def _cleanupHTML(self, html): | |||
| @@ -270,214 +201,58 @@ class BassetlawParser(AcolnetParser): | |||
| class BridgnorthParser(AcolnetParser): | |||
| # This site is currently down... | |||
| #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" | |||
| #authority_name = "Bridgenorth District Council" | |||
| #authority_short_name = "Bridgenorth" | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "contactus@bridgnorth-dc.gov.uk" | |||
| class BuryParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| #comments_email_address = "development.control@bury.gov.uk" | |||
| ## class CanterburyParser(AcolnetParser): | |||
| ## search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" | |||
| ## case_number_tr = 1 # this one can be got by the td class attribute | |||
| ## reg_date_tr = 2 | |||
| ## location_tr = 4 | |||
| ## proposal_tr = 5 | |||
| ## authority_name = "Canterbury City Council" | |||
| ## authority_short_name = "Canterbury" | |||
| def _getResultsSections(self, soup): | |||
| return soup.findAll("table", {"class": "app"}) | |||
| ## comments_email_address = "" | |||
| def _getCouncilReference(self, app_table): | |||
| return app_table.a.string.split()[-1] | |||
| class CarlisleParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| comments_email_address = "dc@carlisle.gov.uk" | |||
| class DerbyParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "developmentcontrol@derby.gov.uk" | |||
| class CroydonParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| comments_email_address = "planning.control@croydon.gov.uk" | |||
| class EastLindseyParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| comments_email_address = "development.control@e-lindsey.gov.uk" | |||
| class FyldeParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| def _getCommentUrl(self, app_table): | |||
| """This must be run after _getInfoUrl""" | |||
| #http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=46958 | |||
| return self._current_application.info_url.replace("NewPages", "PgeCommentForm") | |||
| comments_email_address = "planning@fylde.gov.uk" | |||
| # Cambridgeshire, although an Acolnet site, is so different that it | |||
| # may as well be handled completely separately. | |||
| class HarlowParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| class CanterburyParser(AcolnetParser): | |||
| """Here the apps are one row each in a big table.""" | |||
| comments_email_address = "Planning.services@harlow.gov.uk" | |||
| def _getResultsSections(self, soup): | |||
| return soup.find("table", {"class": "results-table"}).findAll("tr")[1:] | |||
| class HavantParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 8 | |||
| def _getDateReceived(self, app_table): | |||
| date_str = app_table.findAll("td")[3].string.strip() | |||
| comments_email_address = "representations@havant.gov.uk" | |||
| return datetime.datetime.strptime(date_str, self.received_date_format) | |||
| class HertsmereParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| def _getAddress(self, app_table): | |||
| return app_table.findAll("td")[1].string.strip() | |||
| comments_email_address = "planning@hertsmere.gov.uk" | |||
| def _getDescription(self, app_table): | |||
| return app_table.findAll("td")[2].string.strip() | |||
| class LewishamParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| #Kensington and chelsea is sufficiently different, it may as well be handled separately | |||
| comments_email_address = "planning@lewisham.gov.uk" | |||
| # Mid Bedfordshire - there is an acolnet here, but you have to have a username | |||
| # and password to access it! | |||
| class NorthHertfordshireParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| ## class MidSuffolkParser(AcolnetParser): | |||
| ## case_number_tr = 1 # this one can be got by the td class attribute | |||
| ## reg_date_tr = 2 | |||
| ## location_tr = 4 | |||
| ## proposal_tr = 5 | |||
| ## comments_email_address = "planning@lewisham.gov.uk" | |||
| ## #action_regex = re.compile("<FORM .*action=\"(.*ACTION=UNWRAP&RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE) | |||
| class NewForestNPParser(AcolnetParser): | |||
| # In this case there is an online comment facility at the | |||
| # bottom of each view app page... | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| class NewForestDCParser(AcolnetParser): | |||
| # In this case there is an online comment facility at the | |||
| # bottom of each view app page... | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 7 | |||
| class NorthWiltshireParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 7 | |||
| class OldhamParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 7 | |||
| def _cleanupHTML(self, html): | |||
| """There is a bad table end tag in this one. | |||
| Fix it before we start""" | |||
| bad_table_end = '</table summary="Copyright">' | |||
| good_table_end = '</table>' | |||
| return html.replace(bad_table_end, good_table_end) | |||
| class SouthwarkParser(AcolnetParser): | |||
| def _getDateReceived(self, app_table): | |||
| date_str = ''.join(app_table.find(text="Statutory start date:").findNext("td").string.strip().split()) | |||
| class RenfrewshireParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "pt@renfrewshire.gov.uk" | |||
| class SouthBedfordshireParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| class SuffolkCoastalParser(AcolnetParser): | |||
| # case_number_tr = 1 # this one can be got by the td class attribute | |||
| # reg_date_tr = 2 | |||
| # location_tr = 4 | |||
| # proposal_tr = 5 | |||
| # New URL with different layout | |||
| case_number_tr = 1 | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | |||
| class GuildfordParser(AcolnetParser): | |||
| case_number_tr = 1 | |||
| reg_date_tr = 7 | |||
| location_tr = 2 | |||
| proposal_tr = 3 | |||
| #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||
| class BoltonParser(AcolnetParser): | |||
| case_number_tr = 1 | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "Planning.control@bolton.gov.uk" | |||
| class ExeterParser(AcolnetParser): | |||
| case_number_tr = 1 | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| return datetime.datetime.strptime(date_str, self.received_date_format) | |||
| class SurreyHeathParser(AcolnetParser): | |||
| # This is not working yet. | |||
| @@ -520,23 +295,42 @@ class SurreyHeathParser(AcolnetParser): | |||
| # return javascript_redirect_response | |||
| # Wychavon is rather different, and will need some thought. There is no | |||
| # advanced search page | |||
| if __name__ == '__main__': | |||
| day = 20 | |||
| day = 30 | |||
| month = 11 | |||
| year = 2007 | |||
| # returns error 400 - bad request | |||
| #parser = BridgenorthParser() | |||
| # cambridgeshire is a bit different... | |||
| # no advanced search page | |||
| # canterbury | |||
| # results as columns of one table | |||
| #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch") | |||
| #parser = AcolnetParser("Babergh", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Basingstoke", "Basingstoke", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = BassetlawParser("Bassetlaw", "Bassetlaw", "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Bolton", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| # parser = BridgnorthParser("Bridgnorth", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") | |||
| #parser = AcolnetParser("Bury", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = CanterburyParser("Canterbury", "Canterbury", "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Carlisle", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Derby", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("East Lindsey", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser") | |||
| #parser = AcolnetParser("Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Fylde", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Guildford", "Guildford", "http://www.guildford.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Harlow", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Havant", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Hertsmere", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") | |||
| #parser = AcolnetParser("Mid Suffolk", "Mid Suffolk", "http://planning.midsuffolk.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("New Forest National Park Authority", "New Forest NPA", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("North Hertfordshire", "North Herts", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") | |||
| #parser = AcolnetParser("North Wiltshire", "North Wilts", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch") | |||
| #parser = AcolnetParser("Renfrewshire", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") | |||
| #parser = AcolnetParser("South Bedfordshire", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") | |||
| #parser = SouthwarkParser("London Borough of Southwark", "Southwark", "http://planningonline.southwarksites.com/planningonline2/AcolNetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Suffolk Coastal", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| #parser = AcolnetParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| print parser.getResults(day, month, year) | |||
| @@ -0,0 +1,542 @@ | |||
| #!/usr/local/bin/python | |||
| import urllib, urllib2 | |||
| import HTMLParser | |||
| #from BeautifulSoup import BeautifulSoup | |||
| # Adding this to try to help Surrey Heath - Duncan 14/9/2007 | |||
| import cookielib | |||
| cookie_jar = cookielib.CookieJar() | |||
| ################ | |||
| import urlparse | |||
| import re | |||
| # We allow the optional > for Bridgenorth, which doesn't have broken html | |||
| end_head_regex = re.compile("</head>?", re.IGNORECASE) | |||
| import MultipartPostHandler | |||
| # this is not mine, or part of standard python (though it should be!) | |||
| # it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py | |||
| from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication | |||
| from datetime import date | |||
| from time import strptime | |||
| date_format = "%d/%m/%Y" | |||
| our_date = date(2007,4,25) | |||
| #This is to get the system key out of the info url | |||
| system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE) | |||
| class AcolnetParser(HTMLParser.HTMLParser): | |||
| case_number_tr = None # this one can be got by the td class attribute | |||
| reg_date_tr = None | |||
| location_tr = None | |||
| proposal_tr = None | |||
| # There is no online comment facility in these, so we provide an | |||
| # appropriate email address instead | |||
| comments_email_address = None | |||
| # The optional amp; is to cope with Oldham, which seems to have started | |||
| # quoting this url. | |||
| action_regex = re.compile("<form[^>]*action=\"([^\"]*ACTION=UNWRAP&(?:amp;)?RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE) | |||
| def __init__(self, | |||
| authority_name, | |||
| authority_short_name, | |||
| base_url, | |||
| debug=False): | |||
| HTMLParser.HTMLParser.__init__(self) | |||
| self.authority_name = authority_name | |||
| self.authority_short_name = authority_short_name | |||
| self.base_url = base_url | |||
| self.debug = debug | |||
| self._tr_number = 0 | |||
| # This will be used to track the subtable depth | |||
| # when we are in a results-table, in order to | |||
| # avoid adding an application before we have got to | |||
| # the end of the results-table | |||
| self._subtable_depth = None | |||
| self._in_td = False | |||
| # This in where we store the results | |||
| self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) | |||
| # This will store the planning application we are currently working on. | |||
| self._current_application = None | |||
| def _cleanupHTML(self, html): | |||
| """This method should be overridden in subclasses to perform site specific | |||
| HTML cleanup.""" | |||
| return html | |||
| def handle_starttag(self, tag, attrs): | |||
| #print tag, attrs | |||
| if tag == "table": | |||
| if self._current_application is None: | |||
| # Each application is in a separate table with class "results-table" | |||
| for key, value in attrs: | |||
| if key == "class" and value == "results-table": | |||
| #print "found results-table" | |||
| self._current_application = PlanningApplication() | |||
| self._tr_number = 0 | |||
| self._subtable_depth = 0 | |||
| self._current_application.comment_url = self.comments_email_address | |||
| break | |||
| else: | |||
| # We are already in a results-table, and this is the start of a subtable, | |||
| # so increment the subtable depth. | |||
| self._subtable_depth += 1 | |||
| elif self._current_application is not None: | |||
| if tag == "tr" and self._subtable_depth == 0: | |||
| self._tr_number += 1 | |||
| if tag == "td": | |||
| self._in_td = True | |||
| if tag == "a" and self._tr_number == self.case_number_tr: | |||
| # this is where we get the info link and the case number | |||
| for key, value in attrs: | |||
| if key == "href": | |||
| self._current_application.info_url = value | |||
| system_key = system_key_regex.search(value).groups()[0] | |||
| if self.comments_email_address is not None: | |||
| self._current_application.comment_url = self.comments_email_address | |||
| else: | |||
| self._current_application.comment_url = value.replace("PgeResultDetail", "PgeCommentForm") | |||
| def handle_data(self, data): | |||
| # If we are in the tr which contains the case number, | |||
| # then data is the council reference, so | |||
| # add it to self._current_application. | |||
| if self._in_td: | |||
| if self._tr_number == self.case_number_tr: | |||
| self._current_application.council_reference = data.strip() | |||
| elif self._tr_number == self.reg_date_tr: | |||
| # we need to make a date object out of data | |||
| date_as_str = ''.join(data.strip().split()) | |||
| received_date = date(*strptime(date_as_str, date_format)[0:3]) | |||
| #print received_date | |||
| self._current_application.date_received = received_date | |||
| elif self._tr_number == self.location_tr: | |||
| location = data.strip() | |||
| self._current_application.address = location | |||
| self._current_application.postcode = getPostcodeFromText(location) | |||
| elif self._tr_number == self.proposal_tr: | |||
| self._current_application.description = data.strip() | |||
| def handle_endtag(self, tag): | |||
| #print "ending: ", tag | |||
| if tag == "table" and self._current_application is not None: | |||
| if self._subtable_depth > 0: | |||
| self._subtable_depth -= 1 | |||
| else: | |||
| # We need to add the last application in the table | |||
| if self._current_application is not None: | |||
| #print "adding application" | |||
| self._results.addApplication(self._current_application) | |||
| #print self._current_application | |||
| self._current_application = None | |||
| self._tr_number = None | |||
| self._subtable_depth = None | |||
| elif tag == "td": | |||
| self._in_td = False | |||
| def _getSearchResponse(self): | |||
| # It looks like we sometimes need to do some stuff to get around a | |||
| # javascript redirect and cookies. | |||
| search_form_request = urllib2.Request(self.base_url) | |||
| search_form_response = urllib2.urlopen(search_form_request) | |||
| return search_form_response | |||
| def getResultsByDayMonthYear(self, day, month, year): | |||
| # first we fetch the search page to get ourselves some session info... | |||
| search_form_response = self._getSearchResponse() | |||
| search_form_contents = search_form_response.read() | |||
| # This sometimes causes a problem in HTMLParser, so let's just get the link | |||
| # out with a regex... | |||
| groups = self.action_regex.search(search_form_contents).groups() | |||
| action = groups[0] | |||
| #print action | |||
| # This is to handle the amp; which seems to have appeared in this | |||
| # url on the Oldham site | |||
| action = ''.join(action.split('amp;')) | |||
| action_url = urlparse.urljoin(self.base_url, action) | |||
| #print action_url | |||
| our_date = date(year, month, day) | |||
| search_data = {"regdate1": our_date.strftime(date_format), | |||
| "regdate2": our_date.strftime(date_format), | |||
| } | |||
| opener = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler) | |||
| response = opener.open(action_url, search_data) | |||
| results_html = response.read() | |||
| # This is for doing site specific html cleanup | |||
| results_html = self._cleanupHTML(results_html) | |||
| #some javascript garbage in the header upsets HTMLParser, | |||
| #so we'll just have the body | |||
| just_body = "<html>" + end_head_regex.split(results_html)[-1] | |||
| #outfile = open(self.authority_short_name + ".debug", "w") | |||
| #outfile.write(just_body) | |||
| #print just_body | |||
| self.feed(just_body) | |||
| return self._results | |||
| def getResults(self, day, month, year): | |||
| return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() | |||
| ## # Babergh up to 21/06/2007 | |||
| ## class BaberghParser(AcolnetParser): | |||
| ## case_number_tr = 1 # this one can be got by the td class attribute | |||
| ## reg_date_tr = 2 | |||
| ## location_tr = 4 | |||
| ## proposal_tr = 5 | |||
| ## # It would be nice to scrape this... | |||
| ## comments_email_address = "planning.reception@babergh.gov.uk" | |||
| # Site changes to here from 22/06/2007 | |||
| class BaberghParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| # It would be nice to scrape this... | |||
| comments_email_address = "planning.reception@babergh.gov.uk" | |||
| class BasingstokeParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 8 | |||
| # It would be nice to scrape this... | |||
| comments_email_address = "development.control@basingstoke.gov.uk" | |||
| class BassetlawParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "planning@bassetlaw.gov.uk" | |||
| def _cleanupHTML(self, html): | |||
| """There is a broken div in this page. We don't need any divs, so | |||
| let's get rid of them all.""" | |||
| div_regex = re.compile("</?div[^>]*>", re.IGNORECASE) | |||
| return div_regex.sub('', html) | |||
| class BridgnorthParser(AcolnetParser): | |||
| # This site is currently down... | |||
| #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch" | |||
| #authority_name = "Bridgenorth District Council" | |||
| #authority_short_name = "Bridgenorth" | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "contactus@bridgnorth-dc.gov.uk" | |||
| class BuryParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| #comments_email_address = "development.control@bury.gov.uk" | |||
| ## class CanterburyParser(AcolnetParser): | |||
| ## search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch" | |||
| ## case_number_tr = 1 # this one can be got by the td class attribute | |||
| ## reg_date_tr = 2 | |||
| ## location_tr = 4 | |||
| ## proposal_tr = 5 | |||
| ## authority_name = "Canterbury City Council" | |||
| ## authority_short_name = "Canterbury" | |||
| ## comments_email_address = "" | |||
| class CarlisleParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| comments_email_address = "dc@carlisle.gov.uk" | |||
| class DerbyParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "developmentcontrol@derby.gov.uk" | |||
| class CroydonParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| comments_email_address = "planning.control@croydon.gov.uk" | |||
| class EastLindseyParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| comments_email_address = "development.control@e-lindsey.gov.uk" | |||
| class FyldeParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "planning@fylde.gov.uk" | |||
| class HarlowParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "Planning.services@harlow.gov.uk" | |||
| class HavantParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 8 | |||
| comments_email_address = "representations@havant.gov.uk" | |||
| class HertsmereParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "planning@hertsmere.gov.uk" | |||
| class LewishamParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "planning@lewisham.gov.uk" | |||
| class NorthHertfordshireParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| ## class MidSuffolkParser(AcolnetParser): | |||
| ## case_number_tr = 1 # this one can be got by the td class attribute | |||
| ## reg_date_tr = 2 | |||
| ## location_tr = 4 | |||
| ## proposal_tr = 5 | |||
| ## comments_email_address = "planning@lewisham.gov.uk" | |||
| ## #action_regex = re.compile("<FORM .*action=\"(.*ACTION=UNWRAP&RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE) | |||
| class NewForestNPParser(AcolnetParser): | |||
| # In this case there is an online comment facility at the | |||
| # bottom of each view app page... | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| class NewForestDCParser(AcolnetParser): | |||
| # In this case there is an online comment facility at the | |||
| # bottom of each view app page... | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 7 | |||
| class NorthWiltshireParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 7 | |||
| class OldhamParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 6 | |||
| proposal_tr = 7 | |||
| def _cleanupHTML(self, html): | |||
| """There is a bad table end tag in this one. | |||
| Fix it before we start""" | |||
| bad_table_end = '</table summary="Copyright">' | |||
| good_table_end = '</table>' | |||
| return html.replace(bad_table_end, good_table_end) | |||
| class RenfrewshireParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "pt@renfrewshire.gov.uk" | |||
| class SouthBedfordshireParser(AcolnetParser): | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| class SuffolkCoastalParser(AcolnetParser): | |||
| # case_number_tr = 1 # this one can be got by the td class attribute | |||
| # reg_date_tr = 2 | |||
| # location_tr = 4 | |||
| # proposal_tr = 5 | |||
| # New URL with different layout | |||
| case_number_tr = 1 | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| comments_email_address = "d.c.admin@suffolkcoastal.gov.uk" | |||
| class GuildfordParser(AcolnetParser): | |||
| case_number_tr = 1 | |||
| reg_date_tr = 7 | |||
| location_tr = 2 | |||
| proposal_tr = 3 | |||
| #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch | |||
| class BoltonParser(AcolnetParser): | |||
| case_number_tr = 1 | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "Planning.control@bolton.gov.uk" | |||
| class ExeterParser(AcolnetParser): | |||
| case_number_tr = 1 | |||
| reg_date_tr = 3 | |||
| location_tr = 5 | |||
| proposal_tr = 6 | |||
| class SurreyHeathParser(AcolnetParser): | |||
| # This is not working yet. | |||
| # _getSearchResponse is an attempt to work around | |||
| # cookies and a javascript redirect. | |||
| # I may have a bit more of a go at this at some point if I have time. | |||
| case_number_tr = 1 # this one can be got by the td class attribute | |||
| reg_date_tr = 2 | |||
| location_tr = 4 | |||
| proposal_tr = 5 | |||
| comments_email_address = "development-control@surreyheath.gov.uk" | |||
| def _getSearchResponse(self): | |||
| # It looks like we sometimes need to do some stuff to get around a | |||
| # javascript redirect and cookies. | |||
| search_form_request = urllib2.Request(self.base_url) | |||
| # Lying about the user-agent doesn't seem to help. | |||
| #search_form_request.add_header("user-agent", "Mozilla/5.0 (compatible; Konqu...L/3.5.6 (like Gecko) (Kubuntu)") | |||
| search_form_response = urllib2.urlopen(search_form_request) | |||
| cookie_jar.extract_cookies(search_form_response, search_form_request) | |||
| print search_form_response.geturl() | |||
| print search_form_response.info() | |||
| print search_form_response.read() | |||
| # validate_url = "https://www.public.surreyheath-online.gov.uk/whalecom7cace3215643e22bb7b0b8cc97a7/whalecom0/InternalSite/Validate.asp" | |||
| # javascript_redirect_url = urlparse.urljoin(self.base_url, "/whalecom7cace3215643e22bb7b0b8cc97a7/whalecom0/InternalSite/RedirectToOrigURL.asp?site_name=public&secure=1") | |||
| # javascript_redirect_request = urllib2.Request(javascript_redirect_url) | |||
| # javascript_redirect_request.add_header('Referer', validate_url) | |||
| # cookie_jar.add_cookie_header(javascript_redirect_request) | |||
| # javascript_redirect_response = urllib2.urlopen(javascript_redirect_request) | |||
| # return javascript_redirect_response | |||
| if __name__ == '__main__': | |||
| day = 4 | |||
| month = 12 | |||
| year = 2007 | |||
| # returns error 400 - bad request | |||
| #parser = BridgenorthParser() | |||
| # cambridgeshire is a bit different... | |||
| # no advanced search page | |||
| # canterbury | |||
| # results as columns of one table | |||
| #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch") | |||
| parser = BridgnorthParser("Bridgnorth", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch") | |||
| # parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch") | |||
| print parser.getResults(day, month, year) | |||
| @@ -87,30 +87,31 @@ | |||
| "Vale Royal Borough Council", "Vale Royal", "http://pa.valeroyal.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Winchester City Council", "Winchester", "http://win2padmz.winchester.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Wolverhampton City Council", "Wolverhampton", "http://planningonline.wolverhampton.gov.uk/PublicAccess/dc/", "PublicAccess", "PublicAccessParser" | |||
| "Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BaberghParser" | |||
| "Basingstoke and Deane Borough Council", "Basingstoke and Deane", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BasingstokeParser" | |||
| "Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Basingstoke and Deane Borough Council", "Basingstoke and Deane", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Bassetlaw District Council", "Bassetlaw","http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BassetlawParser" | |||
| "Bury Metropolitan Borough Council", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BuryParser" | |||
| "Derby City Council", "Derby", "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "DerbyParser" | |||
| "London Borough of Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CroydonParser" | |||
| "East Lindsey District Council", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "EastLindseyParser" | |||
| "Fylde Borough Council", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "FyldeParser" | |||
| "Harlow Council", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HarlowParser" | |||
| "Hertsmere Borough Council", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HertsmereParser" | |||
| "London Borough of Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "LewishamParser" | |||
| "North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "NorthHertfordshireParser" | |||
| "New Forest National Park", "New Forest NP", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestNPParser" | |||
| "Bury Metropolitan Borough Council", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Derby City Council", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "London Borough of Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "East Lindsey District Council", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Fylde Borough Council", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Guildford Borough Council", "Guildford", "http://www.guildford.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Harlow Council", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Hertsmere Borough Council", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "London Borough of Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" | |||
| "North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" | |||
| "New Forest National Park", "New Forest NP", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Bridgnorth District Council", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "BridgnorthParser" | |||
| "Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CarlisleParser" | |||
| "Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Newcastle City Council", "Newcastle", "http://gispublic.newcastle.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "North Wiltshire District Council", "North Wiltshire", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NorthWiltshireParser" | |||
| "North Wiltshire District Council", "North Wiltshire", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Oldham Metropolitan Borough Council", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "OldhamParser" | |||
| "Renfrewshire Council", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "RenfrewshireParser" | |||
| "Renfrewshire Council", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" | |||
| "Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "SouthBedfordshireParser" | |||
| "Suffolk Coastal District Council", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SuffolkCoastalParser" | |||
| "South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser" | |||
| "Suffolk Coastal District Council", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Surrey Heath Borough Council", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SurreyHeathParser" | |||
| "New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestDCParser" | |||
| "New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Craven District Council", "Craven", "http://www.planning.cravendc.gov.uk/fastweb/", "FastWeb", "FastWeb" | |||
| "Eastleigh Borough Council", "Eastleigh", "http://www.eastleigh.gov.uk/FastWEB/", "FastWeb", "FastWeb" | |||
| "Eden District Council", "Eden", "http://eforms.eden.gov.uk/fastweb/", "FastWeb", "FastWeb" | |||
| @@ -132,7 +133,7 @@ | |||
| "Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Havant Borough Council", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HavantParser" | |||
| "Havant Borough Council", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Rochford District Council", "Rochford", "http://www.rochford.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Chester-le-Street District Council", "Chester-le-Street", "http://planning.chester-le-street.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| "Vale of the White Horse District Council", "Vale of the White Horse", "http://planning.whitehorsedc.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
| @@ -144,8 +145,8 @@ | |||
| "Cumbria County Council", "Cumbria", "http://217.114.50.149:7778/ePlanningOPS/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" | |||
| "Lincolnshire County Council", "Lincolnshire", "http://apps1.lincolnshire.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" | |||
| "West Sussex County Council", "West Sussex", "http://eplanning.westsussex.gov.uk/ePlanningOPS/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" | |||
| "Bolton Metropolitan Borough Council", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BoltonParser" | |||
| "Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "ExeterParser" | |||
| "Bolton Metropolitan Borough Council", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "Suffolk County Council", "Suffolk", "http://atrium.suffolkcc.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" | |||
| "Blackburn with Darwen Borough Council", "Blackburn", "http://195.8.175.6/", "PlanningExplorer", "BlackburnParser" | |||
| "Broadland District Council", "Broadland", "http://www.broadland.gov.uk/", "PlanningExplorer", "BroadlandParser" | |||
| @@ -184,3 +185,6 @@ | |||
| "Shrewsbury and Atcham Borough Council", "Shrewsbury", "http://www2.shrewsbury.gov.uk/", "PlanningExplorer", "ShrewsburyParser" | |||
| "Dorset County Council", "Dorset", "http://www.dorsetforyou.com/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" | |||
| "Somerset County Council", "Somerset", "http://webapp1.somerset.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser" | |||
| "Mid Suffolk District Council", "Mid Suffolk", "http://planning.midsuffolk.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" | |||
| "London Borough of Southwark", "Southwark", "http://planningonline.southwarksites.com/planningonline2/AcolNetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SouthwarkParser" | |||
| "Canterbury City Council", "Canterbury", "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CanterburyParser" | |||