Add New version of the Acolnet scraper using BeautifulSoup instead of HTMLParser.

This allows us to do lots of sites without custom scrapers. We also get a few that we didn't have before: Bridgnorth Canterbury Guildford Mid Suffolk (no postcodes) Southwark
18 anos atrás · 5366dcea29
--- a/python_scrapers/AcolnetParser.py
+++ b/python_scrapers/AcolnetParser.py
@@ -1,41 +1,40 @@
 #!/usr/local/bin/python

 import urllib, urllib2
 import HTMLParser
 #from BeautifulSoup import BeautifulSoup
 import urllib2
 import urlparse

 from datetime import date
 import datetime

 import re

 from BeautifulSoup import BeautifulSoup

 # Adding this to try to help Surrey Heath - Duncan 14/9/2007
 import cookielib
 cookie_jar = cookielib.CookieJar()
 ################

 import urlparse

 import re

 end_head_regex = re.compile("</head", re.IGNORECASE)

 import MultipartPostHandler
 # this is not mine, or part of standard python (though it should be!)
 # it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py

 from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication

 from datetime import date
 from time import strptime


 date_format = "%d/%m/%Y"
 our_date = date(2007,4,25)

 #This is to get the system key out of the info url
 system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE)

 # We allow the optional > for Bridgnorth, which doesn't have broken html
 end_head_regex = re.compile("</head>?", re.IGNORECASE)


 class AcolnetParser(HTMLParser.HTMLParser):
    case_number_tr = None # this one can be got by the td class attribute
    reg_date_tr = None
    location_tr = None
    proposal_tr = None
    received_date_format = "%d/%m/%Y"

    comment_qs_template = "ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=%s"

    # There is no online comment facility in these, so we provide an
    # appropriate email address instead
@@ -44,32 +43,60 @@ class AcolnetParser(HTMLParser.HTMLParser):
    # The optional amp; is to cope with Oldham, which seems to have started
    # quoting this url.
    action_regex = re.compile("<form[^>]*action=\"([^\"]*ACTION=UNWRAP&(?:amp;)?RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE)    
  
    def _getResultsSections(self, soup):
        """In most cases, there is a table per app."""
        return soup.findAll("table", {"class": "results-table"})
  
    def _getCouncilReference(self, app_table):
        return app_table.a.string.strip()

    def _getDateReceived(self, app_table):
        date_str = ''.join(app_table.find(text="Registration Date:").findNext("td").string.strip().split())
        
        return datetime.datetime.strptime(date_str, self.received_date_format)

    def _getAddress(self, app_table):
        return app_table.find(text="Location:").findNext("td").string.strip()
    
    def _getDescription(self, app_table):
        return app_table.find(text="Proposal:").findNext("td").string.strip()

    def _getInfoUrl(self, app_table):
        """Returns the info url for this app.
        
        We also set the system key on self._current_application, 
        as we'll need that for the comment url.

        """
        url = app_table.a['href']
        self._current_application.system_key = system_key_regex.search(url).groups()[0]
        return urlparse.urljoin(self.base_url, url)

    def _getCommentUrl(self, app_table):
        """This must be run after _getInfoUrl"""

        if self.comments_email_address:
            return self.comments_email_address

        split_info_url = urlparse.urlsplit(self._current_application.info_url)

        comment_qs = self.comment_qs_template %self._current_application.system_key

        return urlparse.urlunsplit(split_info_url[:3] + (comment_qs,) + split_info_url[4:])


    def __init__(self,
                 authority_name,
                 authority_short_name,
                 base_url,
                 debug=False):


        HTMLParser.HTMLParser.__init__(self)

        self.authority_name = authority_name
        self.authority_short_name = authority_short_name
        self.base_url = base_url

        self.debug = debug

        self._tr_number = 0

        # This will be used to track the subtable depth
        # when we are in a results-table, in order to
        # avoid adding an application before we have got to
        # the end of the results-table
        self._subtable_depth = None

        self._in_td = False

        # This in where we store the results
        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)

@@ -82,86 +109,6 @@ class AcolnetParser(HTMLParser.HTMLParser):
        HTML cleanup."""
        return html

    def handle_starttag(self, tag, attrs):
        #print tag, attrs
                    
        if tag == "table":
            if self._current_application is None:
                # Each application is in a separate table with class "results-table"
                for key, value in attrs:
                    if key == "class" and value == "results-table":
                        #print "found results-table"
                        self._current_application = PlanningApplication()
                        self._tr_number = 0
                        self._subtable_depth = 0
                        self._current_application.comment_url = self.comments_email_address
                        break
            else:
                # We are already in a results-table, and this is the start of a subtable,
                # so increment the subtable depth.
                self._subtable_depth += 1

        elif self._current_application is not None:
            if tag == "tr" and self._subtable_depth == 0:
                self._tr_number += 1
            if tag == "td":
                self._in_td = True
            if tag == "a" and self._tr_number == self.case_number_tr:
                # this is where we get the info link and the case number
                for key, value in attrs:
                    if key == "href":
                        self._current_application.info_url = value

                        system_key = system_key_regex.search(value).groups()[0]

                        if self.comments_email_address is not None:
                            self._current_application.comment_url = self.comments_email_address
                        else:
                            self._current_application.comment_url = value.replace("PgeResultDetail", "PgeCommentForm")
                        
    def handle_data(self, data):
        # If we are in the tr which contains the case number,
        # then data is the council reference, so
        # add it to self._current_application.
        if self._in_td:
            if self._tr_number == self.case_number_tr:
                self._current_application.council_reference = data.strip()
            elif self._tr_number == self.reg_date_tr:
                # we need to make a date object out of data
                date_as_str = ''.join(data.strip().split())
                received_date = date(*strptime(date_as_str, date_format)[0:3])

                #print received_date

                self._current_application.date_received = received_date

            elif self._tr_number == self.location_tr:
                location = data.strip()

                self._current_application.address = location
                self._current_application.postcode = getPostcodeFromText(location)
            elif self._tr_number == self.proposal_tr:
                self._current_application.description = data.strip()


    def handle_endtag(self, tag):
        #print "ending: ", tag
        if tag == "table" and self._current_application is not None:
            if self._subtable_depth > 0:
                self._subtable_depth -= 1
            else:
                # We need to add the last application in the table
                if self._current_application is not None:
                    #print "adding application"
                    self._results.addApplication(self._current_application)
                    #print self._current_application
                    self._current_application = None
                    self._tr_number = None
                    self._subtable_depth = None
        elif tag == "td":
            self._in_td = False


    def _getSearchResponse(self):
        # It looks like we sometimes need to do some stuff to get around a
        # javascript redirect and cookies.
@@ -202,9 +149,6 @@ class AcolnetParser(HTMLParser.HTMLParser):
        response = opener.open(action_url, search_data)
        results_html = response.read()

        #outfile = open("tmpfile", "w")
        #outfile.write(results_html)

        # This is for doing site specific html cleanup
        results_html = self._cleanupHTML(results_html)

@@ -212,53 +156,40 @@ class AcolnetParser(HTMLParser.HTMLParser):
        #so we'll just have the body
        just_body = "<html>" + end_head_regex.split(results_html)[-1]

        #outfile = open(self.authority_short_name + ".debug", "w")
        #outfile.write(just_body)        

        self.feed(just_body)
        #self.feed(just_body)
        
        return self._results
        soup = BeautifulSoup(just_body)

        # Each app is in a table of it's own.
        results_tables = self._getResultsSections(soup)


    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
        for app_table in results_tables:
            self._current_application = PlanningApplication()

 ## # Babergh up to 21/06/2007
 ## class BaberghParser(AcolnetParser):
 ##     case_number_tr = 1 # this one can be got by the td class attribute
 ##     reg_date_tr = 2
 ##     location_tr = 4
 ##     proposal_tr = 5
            self._current_application.council_reference = self._getCouncilReference(app_table)
            self._current_application.address = self._getAddress(app_table)
            
            # Get the postcode from the address
            self._current_application.postcode = getPostcodeFromText(self._current_application.address)
            
            self._current_application.description = self._getDescription(app_table)
            self._current_application.info_url = self._getInfoUrl(app_table)
            self._current_application.comment_url = self._getCommentUrl(app_table)
            self._current_application.date_received = self._getDateReceived(app_table)

 ##     # It would be nice to scrape this...
 ##     comments_email_address = "planning.reception@babergh.gov.uk"
            self._results.addApplication(self._current_application)

 # Site changes to here from 22/06/2007
 class BaberghParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6

    # It would be nice to scrape this...
    comments_email_address = "planning.reception@babergh.gov.uk"
        return self._results

 class BasingstokeParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 8

    # It would be nice to scrape this...
    comments_email_address = "development.control@basingstoke.gov.uk"

 class BassetlawParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    
    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()


 class BassetlawParser(AcolnetParser):
    comments_email_address = "planning@bassetlaw.gov.uk"

    def _cleanupHTML(self, html):
@@ -270,214 +201,58 @@ class BassetlawParser(AcolnetParser):


 class BridgnorthParser(AcolnetParser):
    # This site is currently down...
    #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
    #authority_name = "Bridgenorth District Council"
    #authority_short_name = "Bridgenorth"
    
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "contactus@bridgnorth-dc.gov.uk"

 class BuryParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6    

    #comments_email_address = "development.control@bury.gov.uk"

 ## class CanterburyParser(AcolnetParser):
 ##     search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"

 ##     case_number_tr = 1 # this one can be got by the td class attribute
 ##     reg_date_tr = 2
 ##     location_tr = 4
 ##     proposal_tr = 5    

 ##     authority_name = "Canterbury City Council"
 ##     authority_short_name = "Canterbury"
    def _getResultsSections(self, soup):
        return soup.findAll("table", {"class": "app"})

 ##     comments_email_address = ""
    def _getCouncilReference(self, app_table):
        return app_table.a.string.split()[-1]

 class CarlisleParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 5
    proposal_tr = 6    

    comments_email_address = "dc@carlisle.gov.uk"

 class DerbyParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "developmentcontrol@derby.gov.uk"

 class CroydonParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6    

    comments_email_address = "planning.control@croydon.gov.uk"

 class EastLindseyParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6    

    comments_email_address = "development.control@e-lindsey.gov.uk"

 class FyldeParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    
    def _getCommentUrl(self, app_table):
        """This must be run after _getInfoUrl"""
 #http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeCommentForm&TheSystemkey=46958
        return self._current_application.info_url.replace("NewPages", "PgeCommentForm")

    comments_email_address = "planning@fylde.gov.uk"
 # Cambridgeshire, although an Acolnet site, is so different that it
 # may as well be handled completely separately.

 class HarlowParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    
 class CanterburyParser(AcolnetParser):
    """Here the apps are one row each in a big table."""

    comments_email_address = "Planning.services@harlow.gov.uk"
    def _getResultsSections(self, soup):
        return soup.find("table", {"class": "results-table"}).findAll("tr")[1:]

 class HavantParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 8    
    def _getDateReceived(self, app_table):
        date_str = app_table.findAll("td")[3].string.strip()

    comments_email_address = "representations@havant.gov.uk"
        return datetime.datetime.strptime(date_str, self.received_date_format)

 class HertsmereParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    
    def _getAddress(self, app_table):
        return app_table.findAll("td")[1].string.strip()

    comments_email_address = "planning@hertsmere.gov.uk"
    def _getDescription(self, app_table):
        return app_table.findAll("td")[2].string.strip()        

 class LewishamParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    
 #Kensington and chelsea is sufficiently different, it may as well be handled separately    

    comments_email_address = "planning@lewisham.gov.uk"
 # Mid Bedfordshire - there is an acolnet here, but you have to have a username
 # and password to access it!
    
 class NorthHertfordshireParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

 ## class MidSuffolkParser(AcolnetParser):
 ##     case_number_tr = 1 # this one can be got by the td class attribute
 ##     reg_date_tr = 2
 ##     location_tr = 4
 ##     proposal_tr = 5    

 ##     comments_email_address = "planning@lewisham.gov.uk"
 ##     #action_regex = re.compile("<FORM .*action=\"(.*ACTION=UNWRAP&RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE)    

 class NewForestNPParser(AcolnetParser):
    # In this case there is an online comment facility at the
    # bottom of each view app page...
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5

 class NewForestDCParser(AcolnetParser):
    # In this case there is an online comment facility at the
    # bottom of each view app page...
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 7

 class NorthWiltshireParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 7

 class OldhamParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 7
        
    def _cleanupHTML(self, html):
        """There is a bad table end tag in this one.
        Fix it before we start"""
        
        bad_table_end = '</table summary="Copyright">'
        good_table_end = '</table>'

        return html.replace(bad_table_end, good_table_end)

 class SouthwarkParser(AcolnetParser):
    def _getDateReceived(self, app_table):
        date_str = ''.join(app_table.find(text="Statutory start date:").findNext("td").string.strip().split())
        
 class RenfrewshireParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5

    comments_email_address = "pt@renfrewshire.gov.uk"

 class SouthBedfordshireParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6

 class SuffolkCoastalParser(AcolnetParser):
 #    case_number_tr = 1 # this one can be got by the td class attribute
 #    reg_date_tr = 2
 #    location_tr = 4
 #    proposal_tr = 5

 # New URL with different layout
    case_number_tr = 1
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6


    comments_email_address = "d.c.admin@suffolkcoastal.gov.uk"

 class GuildfordParser(AcolnetParser):
    case_number_tr = 1
    reg_date_tr = 7
    location_tr = 2
    proposal_tr = 3
    
    #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch

 class BoltonParser(AcolnetParser):
    case_number_tr = 1
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5
    comments_email_address = "Planning.control@bolton.gov.uk"


 class ExeterParser(AcolnetParser):
    case_number_tr = 1
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6
    
        return datetime.datetime.strptime(date_str, self.received_date_format)

 class SurreyHeathParser(AcolnetParser):
    # This is not working yet.
@@ -520,23 +295,42 @@ class SurreyHeathParser(AcolnetParser):
        
 #        return javascript_redirect_response
    
 # Wychavon is rather different, and will need some thought. There is no
 # advanced search page
        
 if __name__ == '__main__':
    day = 20
    day = 30
    month = 11
    year = 2007

    # returns error 400 - bad request
    #parser = BridgenorthParser()

    # cambridgeshire is a bit different...
    # no advanced search page

    # canterbury
    # results as columns of one table

    #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")

    parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch")
    #parser = AcolnetParser("Babergh", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Basingstoke", "Basingstoke", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = BassetlawParser("Bassetlaw", "Bassetlaw", "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Bolton", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
 #    parser = BridgnorthParser("Bridgnorth", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch")
    #parser = AcolnetParser("Bury", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = CanterburyParser("Canterbury", "Canterbury", "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Carlisle", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Derby", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("East Lindsey", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser")
    #parser = AcolnetParser("Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Fylde", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Guildford", "Guildford", "http://www.guildford.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Harlow", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Havant", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Hertsmere", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch")
    #parser = AcolnetParser("Mid Suffolk", "Mid Suffolk", "http://planning.midsuffolk.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("New Forest National Park Authority", "New Forest NPA", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("North Hertfordshire", "North Herts", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch")
    #parser = AcolnetParser("North Wiltshire", "North Wilts", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch")
    #parser = AcolnetParser("Renfrewshire", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch")
    #parser = AcolnetParser("South Bedfordshire", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch")
    #parser = SouthwarkParser("London Borough of Southwark", "Southwark", "http://planningonline.southwarksites.com/planningonline2/AcolNetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Suffolk Coastal", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    #parser = AcolnetParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    print parser.getResults(day, month, year)
    
--- a/python_scrapers/AcolnetParser_HTMLParser.py
+++ b/python_scrapers/AcolnetParser_HTMLParser.py
@@ -0,0 +1,542 @@
 #!/usr/local/bin/python

 import urllib, urllib2
 import HTMLParser
 #from BeautifulSoup import BeautifulSoup

 # Adding this to try to help Surrey Heath - Duncan 14/9/2007
 import cookielib
 cookie_jar = cookielib.CookieJar()
 ################

 import urlparse

 import re

 # We allow the optional > for Bridgenorth, which doesn't have broken html
 end_head_regex = re.compile("</head>?", re.IGNORECASE)

 import MultipartPostHandler
 # this is not mine, or part of standard python (though it should be!)
 # it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py

 from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication

 from datetime import date
 from time import strptime


 date_format = "%d/%m/%Y"
 our_date = date(2007,4,25)

 #This is to get the system key out of the info url
 system_key_regex = re.compile("TheSystemkey=(\d*)", re.IGNORECASE)

 class AcolnetParser(HTMLParser.HTMLParser):
    case_number_tr = None # this one can be got by the td class attribute
    reg_date_tr = None
    location_tr = None
    proposal_tr = None

    # There is no online comment facility in these, so we provide an
    # appropriate email address instead
    comments_email_address = None

    # The optional amp; is to cope with Oldham, which seems to have started
    # quoting this url.
    action_regex = re.compile("<form[^>]*action=\"([^\"]*ACTION=UNWRAP&(?:amp;)?RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE)    
    
    def __init__(self,
                 authority_name,
                 authority_short_name,
                 base_url,
                 debug=False):


        HTMLParser.HTMLParser.__init__(self)

        self.authority_name = authority_name
        self.authority_short_name = authority_short_name
        self.base_url = base_url

        self.debug = debug

        self._tr_number = 0

        # This will be used to track the subtable depth
        # when we are in a results-table, in order to
        # avoid adding an application before we have got to
        # the end of the results-table
        self._subtable_depth = None

        self._in_td = False

        # This in where we store the results
        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)

        # This will store the planning application we are currently working on.
        self._current_application = None


    def _cleanupHTML(self, html):
        """This method should be overridden in subclasses to perform site specific
        HTML cleanup."""
        return html

    def handle_starttag(self, tag, attrs):
        #print tag, attrs
                    
        if tag == "table":
            if self._current_application is None:
                # Each application is in a separate table with class "results-table"
                for key, value in attrs:
                    if key == "class" and value == "results-table":
                        #print "found results-table"
                        self._current_application = PlanningApplication()
                        self._tr_number = 0
                        self._subtable_depth = 0
                        self._current_application.comment_url = self.comments_email_address
                        break
            else:
                # We are already in a results-table, and this is the start of a subtable,
                # so increment the subtable depth.
                self._subtable_depth += 1

        elif self._current_application is not None:
            if tag == "tr" and self._subtable_depth == 0:
                self._tr_number += 1
            if tag == "td":
                self._in_td = True
            if tag == "a" and self._tr_number == self.case_number_tr:
                # this is where we get the info link and the case number
                for key, value in attrs:
                    if key == "href":
                        self._current_application.info_url = value

                        system_key = system_key_regex.search(value).groups()[0]

                        if self.comments_email_address is not None:
                            self._current_application.comment_url = self.comments_email_address
                        else:
                            self._current_application.comment_url = value.replace("PgeResultDetail", "PgeCommentForm")
                        
    def handle_data(self, data):
        # If we are in the tr which contains the case number,
        # then data is the council reference, so
        # add it to self._current_application.
        if self._in_td:
            if self._tr_number == self.case_number_tr:
                self._current_application.council_reference = data.strip()
            elif self._tr_number == self.reg_date_tr:
                # we need to make a date object out of data
                date_as_str = ''.join(data.strip().split())
                received_date = date(*strptime(date_as_str, date_format)[0:3])

                #print received_date

                self._current_application.date_received = received_date

            elif self._tr_number == self.location_tr:
                location = data.strip()

                self._current_application.address = location
                self._current_application.postcode = getPostcodeFromText(location)
            elif self._tr_number == self.proposal_tr:
                self._current_application.description = data.strip()


    def handle_endtag(self, tag):
        #print "ending: ", tag
        if tag == "table" and self._current_application is not None:
            if self._subtable_depth > 0:
                self._subtable_depth -= 1
            else:
                # We need to add the last application in the table
                if self._current_application is not None:
                    #print "adding application"
                    self._results.addApplication(self._current_application)
                    #print self._current_application
                    self._current_application = None
                    self._tr_number = None
                    self._subtable_depth = None
        elif tag == "td":
            self._in_td = False


    def _getSearchResponse(self):
        # It looks like we sometimes need to do some stuff to get around a
        # javascript redirect and cookies.
        search_form_request = urllib2.Request(self.base_url)
        search_form_response = urllib2.urlopen(search_form_request)

        return search_form_response
        

    def getResultsByDayMonthYear(self, day, month, year):
        # first we fetch the search page to get ourselves some session info...
        search_form_response = self._getSearchResponse()
        
        search_form_contents = search_form_response.read()

        # This sometimes causes a problem in HTMLParser, so let's just get the link
        # out with a regex...

        groups = self.action_regex.search(search_form_contents).groups()

        action = groups[0] 
        #print action

        # This is to handle the amp; which seems to have appeared in this
        # url on the Oldham site
        action = ''.join(action.split('amp;'))

        action_url = urlparse.urljoin(self.base_url, action)
        #print action_url

        our_date = date(year, month, day)
        
        search_data = {"regdate1": our_date.strftime(date_format),
                       "regdate2": our_date.strftime(date_format),
                       }
        
        opener = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler)
        response = opener.open(action_url, search_data)
        results_html = response.read()

        # This is for doing site specific html cleanup
        results_html = self._cleanupHTML(results_html)

        #some javascript garbage in the header upsets HTMLParser,
        #so we'll just have the body
        just_body = "<html>" + end_head_regex.split(results_html)[-1]

        #outfile = open(self.authority_short_name + ".debug", "w")
        #outfile.write(just_body)        

        #print just_body

        self.feed(just_body)
        
        return self._results



    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

 ## # Babergh up to 21/06/2007
 ## class BaberghParser(AcolnetParser):
 ##     case_number_tr = 1 # this one can be got by the td class attribute
 ##     reg_date_tr = 2
 ##     location_tr = 4
 ##     proposal_tr = 5

 ##     # It would be nice to scrape this...
 ##     comments_email_address = "planning.reception@babergh.gov.uk"

 # Site changes to here from 22/06/2007
 class BaberghParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6

    # It would be nice to scrape this...
    comments_email_address = "planning.reception@babergh.gov.uk"

 class BasingstokeParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 8

    # It would be nice to scrape this...
    comments_email_address = "development.control@basingstoke.gov.uk"

 class BassetlawParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "planning@bassetlaw.gov.uk"

    def _cleanupHTML(self, html):
        """There is a broken div in this page. We don't need any divs, so
        let's get rid of them all."""

        div_regex = re.compile("</?div[^>]*>", re.IGNORECASE)
        return div_regex.sub('', html)


 class BridgnorthParser(AcolnetParser):
    # This site is currently down...
    #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
    #authority_name = "Bridgenorth District Council"
    #authority_short_name = "Bridgenorth"
    
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "contactus@bridgnorth-dc.gov.uk"

 class BuryParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6    

    #comments_email_address = "development.control@bury.gov.uk"

 ## class CanterburyParser(AcolnetParser):
 ##     search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"

 ##     case_number_tr = 1 # this one can be got by the td class attribute
 ##     reg_date_tr = 2
 ##     location_tr = 4
 ##     proposal_tr = 5    

 ##     authority_name = "Canterbury City Council"
 ##     authority_short_name = "Canterbury"

 ##     comments_email_address = ""

 class CarlisleParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 5
    proposal_tr = 6    

    comments_email_address = "dc@carlisle.gov.uk"

 class DerbyParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "developmentcontrol@derby.gov.uk"

 class CroydonParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6    

    comments_email_address = "planning.control@croydon.gov.uk"

 class EastLindseyParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6    

    comments_email_address = "development.control@e-lindsey.gov.uk"

 class FyldeParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "planning@fylde.gov.uk"

 class HarlowParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "Planning.services@harlow.gov.uk"

 class HavantParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 8    

    comments_email_address = "representations@havant.gov.uk"

 class HertsmereParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "planning@hertsmere.gov.uk"

 class LewishamParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

    comments_email_address = "planning@lewisham.gov.uk"
    
 class NorthHertfordshireParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5    

 ## class MidSuffolkParser(AcolnetParser):
 ##     case_number_tr = 1 # this one can be got by the td class attribute
 ##     reg_date_tr = 2
 ##     location_tr = 4
 ##     proposal_tr = 5    

 ##     comments_email_address = "planning@lewisham.gov.uk"
 ##     #action_regex = re.compile("<FORM .*action=\"(.*ACTION=UNWRAP&RIPSESSION=[^\"]*)\"[^>]*>", re.IGNORECASE)    

 class NewForestNPParser(AcolnetParser):
    # In this case there is an online comment facility at the
    # bottom of each view app page...
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5

 class NewForestDCParser(AcolnetParser):
    # In this case there is an online comment facility at the
    # bottom of each view app page...
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 7

 class NorthWiltshireParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 7

 class OldhamParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 6
    proposal_tr = 7
        
    def _cleanupHTML(self, html):
        """There is a bad table end tag in this one.
        Fix it before we start"""
        
        bad_table_end = '</table summary="Copyright">'
        good_table_end = '</table>'
        return html.replace(bad_table_end, good_table_end)

        
 class RenfrewshireParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5

    comments_email_address = "pt@renfrewshire.gov.uk"

 class SouthBedfordshireParser(AcolnetParser):
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6

 class SuffolkCoastalParser(AcolnetParser):
 #    case_number_tr = 1 # this one can be got by the td class attribute
 #    reg_date_tr = 2
 #    location_tr = 4
 #    proposal_tr = 5

 # New URL with different layout
    case_number_tr = 1
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6


    comments_email_address = "d.c.admin@suffolkcoastal.gov.uk"

 class GuildfordParser(AcolnetParser):
    case_number_tr = 1
    reg_date_tr = 7
    location_tr = 2
    proposal_tr = 3
    
    #http://www.guildford.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&Root=PgeSearch

 class BoltonParser(AcolnetParser):
    case_number_tr = 1
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5
    comments_email_address = "Planning.control@bolton.gov.uk"


 class ExeterParser(AcolnetParser):
    case_number_tr = 1
    reg_date_tr = 3
    location_tr = 5
    proposal_tr = 6
    

 class SurreyHeathParser(AcolnetParser):
    # This is not working yet.
    # _getSearchResponse is an attempt to work around
    # cookies and a javascript redirect.
    # I may have a bit more of a go at this at some point if I have time.
    case_number_tr = 1 # this one can be got by the td class attribute
    reg_date_tr = 2
    location_tr = 4
    proposal_tr = 5

    comments_email_address = "development-control@surreyheath.gov.uk"

    def _getSearchResponse(self):
        # It looks like we sometimes need to do some stuff to get around a
        # javascript redirect and cookies.
        search_form_request = urllib2.Request(self.base_url)

        # Lying about the user-agent doesn't seem to help.
        #search_form_request.add_header("user-agent", "Mozilla/5.0 (compatible; Konqu...L/3.5.6 (like Gecko) (Kubuntu)")
        
        search_form_response = urllib2.urlopen(search_form_request)
        
        cookie_jar.extract_cookies(search_form_response, search_form_request)


        print search_form_response.geturl()
        print search_form_response.info()

        print search_form_response.read()
 #        validate_url = "https://www.public.surreyheath-online.gov.uk/whalecom7cace3215643e22bb7b0b8cc97a7/whalecom0/InternalSite/Validate.asp"
 #        javascript_redirect_url = urlparse.urljoin(self.base_url, "/whalecom7cace3215643e22bb7b0b8cc97a7/whalecom0/InternalSite/RedirectToOrigURL.asp?site_name=public&secure=1")

 #        javascript_redirect_request = urllib2.Request(javascript_redirect_url)
 #        javascript_redirect_request.add_header('Referer', validate_url)
        
 #        cookie_jar.add_cookie_header(javascript_redirect_request)

 #        javascript_redirect_response = urllib2.urlopen(javascript_redirect_request)
        
 #        return javascript_redirect_response
    
        
 if __name__ == '__main__':
    day = 4
    month = 12
    year = 2007

    # returns error 400 - bad request
    #parser = BridgenorthParser()

    # cambridgeshire is a bit different...
    # no advanced search page

    # canterbury
    # results as columns of one table

    #parser = SurreyHeathParser("Surrey Heath", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
    parser = BridgnorthParser("Bridgnorth", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch")
 #    parser = OldhamParser("Oldham", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&Root=PgeSearch")
    print parser.getResults(day, month, year)
    
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -87,30 +87,31 @@
 "Vale Royal Borough Council", "Vale Royal", "http://pa.valeroyal.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Winchester City Council", "Winchester", "http://win2padmz.winchester.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Wolverhampton City Council", "Wolverhampton", "http://planningonline.wolverhampton.gov.uk/PublicAccess/dc/", "PublicAccess", "PublicAccessParser"
 "Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BaberghParser"
 "Basingstoke and Deane Borough Council", "Basingstoke and Deane", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BasingstokeParser"
 "Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dcdatav2//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Basingstoke and Deane Borough Council", "Basingstoke and Deane", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Bassetlaw District Council", "Bassetlaw","http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BassetlawParser"
 "Bury Metropolitan Borough Council", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser",  "BuryParser"
 "Derby City Council", "Derby", "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "DerbyParser"
 "London Borough of Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CroydonParser"
 "East Lindsey District Council", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "EastLindseyParser"
 "Fylde Borough Council", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "FyldeParser"
 "Harlow Council", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HarlowParser"
 "Hertsmere Borough Council", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HertsmereParser"
 "London Borough of Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "LewishamParser"
 "North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "NorthHertfordshireParser"
 "New Forest National Park", "New Forest NP", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestNPParser"
 "Bury Metropolitan Borough Council", "Bury", "http://e-planning.bury.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser",  "AcolnetParser"
 "Derby City Council", "Derby", "http://eplanning.derby.gov.uk/acolnet/planningpages02/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "London Borough of Croydon", "Croydon", "http://planning.croydon.gov.uk/DCWebPages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "East Lindsey District Council", "East Lindsey", "http://www.e-lindsey.gov.uk/planning/AcolnetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Fylde Borough Council", "Fylde", "http://www2.fylde.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Guildford Borough Council", "Guildford", "http://www.guildford.gov.uk/DLDC_Version_2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Harlow Council", "Harlow", "http://planning.harlow.gov.uk/PlanningSearch/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Hertsmere Borough Council", "Hertsmere", "http://www2.hertsmere.gov.uk/ACOLNET/DCOnline//acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "London Borough of Lewisham", "Lewisham", "http://acolnet.lewisham.gov.uk/lewis-xslpagesdc/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser"
 "North Hertfordshire District Council", "North Hertfordshire", "http://www.north-herts.gov.uk/dcdataonline/Pages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser"
 "New Forest National Park", "New Forest NP", "http://web01.newforestnpa.gov.uk/planningpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Bridgnorth District Council", "Bridgnorth", "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "BridgnorthParser"
 "Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CarlisleParser"
 "Carlisle City Council", "Carlisle", "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Newcastle City Council", "Newcastle",  "http://gispublic.newcastle.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "North Wiltshire District Council", "North Wiltshire", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NorthWiltshireParser"
 "North Wiltshire District Council", "North Wiltshire", "http://planning.northwilts.gov.uk/DCOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Oldham Metropolitan Borough Council", "Oldham", "http://planning.oldham.gov.uk/planning/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "OldhamParser"
 "Renfrewshire Council", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "RenfrewshireParser"
 "Renfrewshire Council", "Renfrewshire", "http://planning.renfrewshire.gov.uk/acolnetDCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser"
 "Selby District Council", "Selby", "http://publicaccess.selby.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "SouthBedfordshireParser"
 "Suffolk Coastal District Council", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SuffolkCoastalParser"
 "South Bedfordshire District Council", "South Bedfordshire", "http://planning.southbeds.gov.uk/plantech/DCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "AcolnetParser"
 "Suffolk Coastal District Council", "Suffolk Coastal", "http://apps3.suffolkcoastal.gov.uk/DCDataV2/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Surrey Heath Borough Council", "Surrey Heath", "https://www.public.surreyheath-online.gov.uk/whalecom60b1ef305f59f921/whalecom0/Scripts/PlanningPagesOnline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SurreyHeathParser"
 "New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "NewForestDCParser"
 "New Forest District Council", "New Forest DC", "http://web3.newforest.gov.uk/planningonline/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Craven District Council", "Craven", "http://www.planning.cravendc.gov.uk/fastweb/", "FastWeb", "FastWeb"
 "Eastleigh Borough Council", "Eastleigh", "http://www.eastleigh.gov.uk/FastWEB/", "FastWeb", "FastWeb"
 "Eden District Council", "Eden", "http://eforms.eden.gov.uk/fastweb/", "FastWeb", "FastWeb"
@@ -132,7 +133,7 @@
 "Tendring District Council", "Tendring", "http://195.99.151.54/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Argyl And Bute Council", "Argyl and Bute", "http://www.argyll-bute.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Oxford City Council", "Oxford", "http://uniformpublicaccess.oxford.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Havant Borough Council", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "HavantParser"
 "Havant Borough Council", "Havant", "http://www3.havant.gov.uk/scripts/planningpages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Rochford District Council", "Rochford", "http://www.rochford.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Chester-le-Street District Council", "Chester-le-Street", "http://planning.chester-le-street.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Vale of the White Horse District Council", "Vale of the White Horse", "http://planning.whitehorsedc.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
@@ -144,8 +145,8 @@
 "Cumbria County Council", "Cumbria", "http://217.114.50.149:7778/ePlanningOPS/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"
 "Lincolnshire County Council", "Lincolnshire", "http://apps1.lincolnshire.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"
 "West Sussex County Council", "West Sussex", "http://eplanning.westsussex.gov.uk/ePlanningOPS/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"
 "Bolton Metropolitan Borough Council", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser",  "BoltonParser"
 "Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "ExeterParser"
 "Bolton Metropolitan Borough Council", "Bolton", "http://www.planning.bolton.gov.uk/PlanningSearch/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser",  "AcolnetParser"
 "Exeter City Council", "Exeter", "http://pub.exeter.gov.uk/scripts/Acolnet/dataonlineplanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Suffolk County Council", "Suffolk", "http://atrium.suffolkcc.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"
 "Blackburn with Darwen Borough Council", "Blackburn", "http://195.8.175.6/", "PlanningExplorer", "BlackburnParser"
 "Broadland District Council", "Broadland", "http://www.broadland.gov.uk/", "PlanningExplorer", "BroadlandParser"
@@ -184,3 +185,6 @@
 "Shrewsbury and Atcham Borough Council", "Shrewsbury", "http://www2.shrewsbury.gov.uk/", "PlanningExplorer", "ShrewsburyParser"
 "Dorset County Council", "Dorset", "http://www.dorsetforyou.com/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"
 "Somerset County Council", "Somerset", "http://webapp1.somerset.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"
 "Mid Suffolk District Council", "Mid Suffolk", "http://planning.midsuffolk.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "London Borough of Southwark", "Southwark", "http://planningonline.southwarksites.com/planningonline2/AcolNetCGI.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "SouthwarkParser"
 "Canterbury City Council", "Canterbury", "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "CanterburyParser"