From 43dceae0c60b3215ebf61db94bd89e528a24a136 Mon Sep 17 00:00:00 2001
From: "duncan.parkes" <duncan.parkes@cf68f80a-222c-0410-aabe-5f79d0504a29>
Date: Tue, 8 May 2007 09:30:40 +0000
Subject: [PATCH] add some Acolnet sites:

Babergh
Basingstoke
Bassetlaw
Bury
Derby
---
 cgi-bin/AcolnetParser.py                | 364 ++++++++++++++++++++++++
 cgi-bin/Allerdale.cgi                   |   0
 cgi-bin/Alnwick.cgi                     |   0
 cgi-bin/Angus.cgi                       |   0
 cgi-bin/Aylesbury Vale.cgi              |   0
 cgi-bin/Babergh.cgi                     |  29 ++
 cgi-bin/Barrow.cgi                      |   0
 cgi-bin/Basildon.cgi                    |   0
 cgi-bin/Basingstoke and Deane.cgi       |  29 ++
 cgi-bin/Bassetlaw.cgi                   |  29 ++
 cgi-bin/Bath.cgi                        |   0
 cgi-bin/Bexley.cgi                      |   0
 cgi-bin/Blaby.cgi                       |   0
 cgi-bin/Bolsover.cgi                    |   0
 cgi-bin/Bristol.cgi                     |   0
 cgi-bin/Buckinghamshire.cgi             |   0
 cgi-bin/Bury.cgi                        |  29 ++
 cgi-bin/Chelmsford.cgi                  |   0
 cgi-bin/Cherwell.cgi                    |   0
 cgi-bin/Chorley.cgi                     |   0
 cgi-bin/City of London.cgi              |   0
 cgi-bin/Cornwall.cgi                    |   0
 cgi-bin/Coventry.cgi                    |   0
 cgi-bin/Dacorum.cgi                     | 108 -------
 cgi-bin/Denbighshire.cgi                |   0
 cgi-bin/Derby.cgi                       |  29 ++
 cgi-bin/Doncaster.cgi                   |   0
 cgi-bin/Dundee.cgi                      |   0
 cgi-bin/Durham.cgi                      |   0
 cgi-bin/Ealing.cgi                      |   0
 cgi-bin/Easington.cgi                   |   0
 cgi-bin/East Devon.cgi                  |   0
 cgi-bin/East Dorset.cgi                 |   0
 cgi-bin/EastHerts.cgi                   | 122 --------
 cgi-bin/Edinburgh.cgi                   |   0
 cgi-bin/Enfield.cgi                     | 121 --------
 cgi-bin/Epsom and Ewell.cgi             |   0
 cgi-bin/Fenland.cgi                     |   0
 cgi-bin/Gateshead.cgi                   |   0
 cgi-bin/Gedling.cgi                     |   0
 cgi-bin/Gloucestershire.cgi             |   0
 cgi-bin/Gravesham.cgi                   |   0
 cgi-bin/Hammersmith and Fulham.cgi      |   0
 cgi-bin/Haringey.cgi                    |   0
 cgi-bin/Harrogate.cgi                   |   0
 cgi-bin/Hart.cgi                        |   0
 cgi-bin/Hartlepool.cgi                  |   0
 cgi-bin/High Peak.cgi                   |   0
 cgi-bin/Huntingdonshire.cgi             |   0
 cgi-bin/Kerrier.cgi                     |   0
 cgi-bin/Knowsley.cgi                    |   0
 cgi-bin/Lancaster.cgi                   |   0
 cgi-bin/Luton.cgi                       |   0
 cgi-bin/Malvern Hills.cgi               |   0
 cgi-bin/Mid Devon.cgi                   |   0
 cgi-bin/Milton Keynes.cgi               |   0
 cgi-bin/MultipartPostHandler.py         | 133 +++++++++
 cgi-bin/NW Leicestershire.cgi           |   0
 cgi-bin/Newcastle-under-Lyme.cgi        |   0
 cgi-bin/Newham.cgi                      |   0
 cgi-bin/North Tyneside.cgi              |   0
 cgi-bin/North Warwickshire.cgi          |   0
 cgi-bin/Northumberland.cgi              |   0
 cgi-bin/Oadby and Wigston.cgi           |   0
 cgi-bin/Oswestry.cgi                    |   0
 cgi-bin/Peterborough.cgi                |   0
 cgi-bin/Portsmouth.cgi                  |   0
 cgi-bin/Redditch.cgi                    |   0
 cgi-bin/Rushmoor.cgi                    |   0
 cgi-bin/Scarborough.cgi                 |   0
 cgi-bin/Sevenoaks.cgi                   |   0
 cgi-bin/South Bucks.cgi                 |   0
 cgi-bin/South Ribble.cgi                |   0
 cgi-bin/South Staffordshire.cgi         |   0
 cgi-bin/SouthOxfordshire.cgi            |   0
 cgi-bin/Southampton.cgi                 |   0
 cgi-bin/Spelthorne.cgi                  |   0
 cgi-bin/St Helens.cgi                   |   0
 cgi-bin/Stevenage.cgi                   |   0
 cgi-bin/Stirling.cgi                    |   0
 cgi-bin/Stockton-On-Tees.cgi            |   0
 cgi-bin/Stratford.cgi                   |   0
 cgi-bin/Sunderland.cgi                  |   0
 cgi-bin/Teignbridge.cgi                 |   0
 cgi-bin/Test Valley.cgi                 |   0
 cgi-bin/Tonbridge.cgi                   |   0
 cgi-bin/Torbay.cgi                      |   0
 cgi-bin/Vale Royal.cgi                  |   0
 cgi-bin/Waveney.cgi                     |   0
 cgi-bin/Wear Valley.cgi                 |   0
 cgi-bin/Wellingborough.cgi              |   0
 cgi-bin/West Berkshire.cgi              |   0
 cgi-bin/West Lancashire.cgi             |   0
 cgi-bin/West Norfolk.cgi                |   0
 cgi-bin/Winchester.cgi                  |   0
 cgi-bin/Woking.cgi                      |   0
 cgi-bin/Wolverhampton.cgi               |   0
 cgi-bin/York.cgi                        |   0
 cgi-bin/broxbourne.cgi                  | 163 -----------
 python_scrapers/AcolnetParser.py        | 364 ++++++++++++++++++++++++
 python_scrapers/MultipartPostHandler.py | 133 +++++++++
 python_scrapers/OtherFilesToCopy.csv    |   3 +-
 python_scrapers/PublicAccessSites.csv   |   7 +-
 103 files changed, 1146 insertions(+), 517 deletions(-)
 create mode 100644 cgi-bin/AcolnetParser.py
 mode change 100644 => 100755 cgi-bin/Allerdale.cgi
 mode change 100644 => 100755 cgi-bin/Alnwick.cgi
 mode change 100644 => 100755 cgi-bin/Angus.cgi
 mode change 100644 => 100755 cgi-bin/Aylesbury Vale.cgi
 create mode 100755 cgi-bin/Babergh.cgi
 mode change 100644 => 100755 cgi-bin/Barrow.cgi
 mode change 100644 => 100755 cgi-bin/Basildon.cgi
 create mode 100755 cgi-bin/Basingstoke and Deane.cgi
 create mode 100755 cgi-bin/Bassetlaw.cgi
 mode change 100644 => 100755 cgi-bin/Bath.cgi
 mode change 100644 => 100755 cgi-bin/Bexley.cgi
 mode change 100644 => 100755 cgi-bin/Blaby.cgi
 mode change 100644 => 100755 cgi-bin/Bolsover.cgi
 mode change 100644 => 100755 cgi-bin/Bristol.cgi
 mode change 100644 => 100755 cgi-bin/Buckinghamshire.cgi
 create mode 100755 cgi-bin/Bury.cgi
 mode change 100644 => 100755 cgi-bin/Chelmsford.cgi
 mode change 100644 => 100755 cgi-bin/Cherwell.cgi
 mode change 100644 => 100755 cgi-bin/Chorley.cgi
 mode change 100644 => 100755 cgi-bin/City of London.cgi
 mode change 100644 => 100755 cgi-bin/Cornwall.cgi
 mode change 100644 => 100755 cgi-bin/Coventry.cgi
 delete mode 100644 cgi-bin/Dacorum.cgi
 mode change 100644 => 100755 cgi-bin/Denbighshire.cgi
 create mode 100755 cgi-bin/Derby.cgi
 mode change 100644 => 100755 cgi-bin/Doncaster.cgi
 mode change 100644 => 100755 cgi-bin/Dundee.cgi
 mode change 100644 => 100755 cgi-bin/Durham.cgi
 mode change 100644 => 100755 cgi-bin/Ealing.cgi
 mode change 100644 => 100755 cgi-bin/Easington.cgi
 mode change 100644 => 100755 cgi-bin/East Devon.cgi
 mode change 100755 => 100644 cgi-bin/East Dorset.cgi
 delete mode 100644 cgi-bin/EastHerts.cgi
 mode change 100644 => 100755 cgi-bin/Edinburgh.cgi
 delete mode 100644 cgi-bin/Enfield.cgi
 mode change 100644 => 100755 cgi-bin/Epsom and Ewell.cgi
 mode change 100644 => 100755 cgi-bin/Fenland.cgi
 mode change 100755 => 100644 cgi-bin/Gateshead.cgi
 mode change 100644 => 100755 cgi-bin/Gedling.cgi
 mode change 100755 => 100644 cgi-bin/Gloucestershire.cgi
 mode change 100644 => 100755 cgi-bin/Gravesham.cgi
 mode change 100644 => 100755 cgi-bin/Hammersmith and Fulham.cgi
 mode change 100644 => 100755 cgi-bin/Haringey.cgi
 mode change 100644 => 100755 cgi-bin/Harrogate.cgi
 mode change 100644 => 100755 cgi-bin/Hart.cgi
 mode change 100644 => 100755 cgi-bin/Hartlepool.cgi
 mode change 100644 => 100755 cgi-bin/High Peak.cgi
 mode change 100644 => 100755 cgi-bin/Huntingdonshire.cgi
 mode change 100644 => 100755 cgi-bin/Kerrier.cgi
 mode change 100644 => 100755 cgi-bin/Knowsley.cgi
 mode change 100644 => 100755 cgi-bin/Lancaster.cgi
 mode change 100644 => 100755 cgi-bin/Luton.cgi
 mode change 100644 => 100755 cgi-bin/Malvern Hills.cgi
 mode change 100644 => 100755 cgi-bin/Mid Devon.cgi
 mode change 100644 => 100755 cgi-bin/Milton Keynes.cgi
 create mode 100644 cgi-bin/MultipartPostHandler.py
 mode change 100644 => 100755 cgi-bin/NW Leicestershire.cgi
 mode change 100755 => 100644 cgi-bin/Newcastle-under-Lyme.cgi
 mode change 100644 => 100755 cgi-bin/Newham.cgi
 mode change 100644 => 100755 cgi-bin/North Tyneside.cgi
 mode change 100644 => 100755 cgi-bin/North Warwickshire.cgi
 mode change 100644 => 100755 cgi-bin/Northumberland.cgi
 mode change 100644 => 100755 cgi-bin/Oadby and Wigston.cgi
 mode change 100644 => 100755 cgi-bin/Oswestry.cgi
 mode change 100644 => 100755 cgi-bin/Peterborough.cgi
 mode change 100644 => 100755 cgi-bin/Portsmouth.cgi
 mode change 100644 => 100755 cgi-bin/Redditch.cgi
 mode change 100644 => 100755 cgi-bin/Rushmoor.cgi
 mode change 100644 => 100755 cgi-bin/Scarborough.cgi
 mode change 100644 => 100755 cgi-bin/Sevenoaks.cgi
 mode change 100644 => 100755 cgi-bin/South Bucks.cgi
 mode change 100644 => 100755 cgi-bin/South Ribble.cgi
 mode change 100644 => 100755 cgi-bin/South Staffordshire.cgi
 mode change 100644 => 100755 cgi-bin/SouthOxfordshire.cgi
 mode change 100644 => 100755 cgi-bin/Southampton.cgi
 mode change 100644 => 100755 cgi-bin/Spelthorne.cgi
 mode change 100644 => 100755 cgi-bin/St Helens.cgi
 mode change 100644 => 100755 cgi-bin/Stevenage.cgi
 mode change 100644 => 100755 cgi-bin/Stirling.cgi
 mode change 100644 => 100755 cgi-bin/Stockton-On-Tees.cgi
 mode change 100644 => 100755 cgi-bin/Stratford.cgi
 mode change 100644 => 100755 cgi-bin/Sunderland.cgi
 mode change 100644 => 100755 cgi-bin/Teignbridge.cgi
 mode change 100644 => 100755 cgi-bin/Test Valley.cgi
 mode change 100644 => 100755 cgi-bin/Tonbridge.cgi
 mode change 100644 => 100755 cgi-bin/Torbay.cgi
 mode change 100755 => 100644 cgi-bin/Vale Royal.cgi
 mode change 100644 => 100755 cgi-bin/Waveney.cgi
 mode change 100644 => 100755 cgi-bin/Wear Valley.cgi
 mode change 100644 => 100755 cgi-bin/Wellingborough.cgi
 mode change 100644 => 100755 cgi-bin/West Berkshire.cgi
 mode change 100644 => 100755 cgi-bin/West Lancashire.cgi
 mode change 100644 => 100755 cgi-bin/West Norfolk.cgi
 mode change 100755 => 100644 cgi-bin/Winchester.cgi
 mode change 100644 => 100755 cgi-bin/Woking.cgi
 mode change 100755 => 100644 cgi-bin/Wolverhampton.cgi
 mode change 100644 => 100755 cgi-bin/York.cgi
 delete mode 100755 cgi-bin/broxbourne.cgi
 create mode 100644 python_scrapers/AcolnetParser.py
 create mode 100644 python_scrapers/MultipartPostHandler.py

diff --git a/cgi-bin/AcolnetParser.py b/cgi-bin/AcolnetParser.py
new file mode 100644
index 0000000..55e2796
--- /dev/null
+++ b/cgi-bin/AcolnetParser.py
@@ -0,0 +1,364 @@
+#!/usr/local/bin/python
+
+import urllib, urllib2
+import HTMLParser
+#from BeautifulSoup import BeautifulSoup
+
+import urlparse
+
+import re
+
+end_head_regex = re.compile("</head", re.IGNORECASE)
+
+import MultipartPostHandler
+# this is not mine, or part of standard python (though it should be!)
+# it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py
+
+from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication
+
+from datetime import date
+from time import strptime
+
+
+date_format = "%d/%m/%Y"
+our_date = date(2007,4,25)
+
+
+class AcolnetParser(HTMLParser.HTMLParser):
+    case_number_tr = None # this one can be got by the td class attribute
+    reg_date_tr = None
+    location_tr = None
+    proposal_tr = None
+
+    # There is no online comment facility in these, so we provide an
+    # appropriate email address instead
+    comments_email_address = None
+
+    
+    def __init__(self,
+                 authority_name,
+                 authority_short_name,
+                 base_url,
+                 debug=False):
+
+
+        HTMLParser.HTMLParser.__init__(self)
+
+        self.authority_name = authority_name
+        self.authority_short_name = authority_short_name
+        self.base_url = base_url
+
+        self.debug = debug
+
+        self._tr_number = 0
+
+        # This will be used to track the subtable depth
+        # when we are in a results-table, in order to
+        # avoid adding an application before we have got to
+        # the end of the results-table
+        self._subtable_depth = None
+
+        self._in_td = False
+
+        # This in where we store the results
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+        # This will store the planning application we are currently working on.
+        self._current_application = None
+
+
+    def _cleanupHTML(self, html):
+        """This method should be overridden in subclasses to perform site specific
+        HTML cleanup."""
+        return html
+
+    def handle_starttag(self, tag, attrs):
+        #print tag, attrs
+                    
+        if tag == "table":
+            if self._current_application is None:
+                # Each application is in a separate table with class "results-table"
+                for key, value in attrs:
+                    if key == "class" and value == "results-table":
+                        #print "found results-table"
+                        self._current_application = PlanningApplication()
+                        self._tr_number = 0
+                        self._subtable_depth = 0
+                        self._current_application.comment_url = self.comments_email_address
+                        break
+            else:
+                # We are already in a results-table, and this is the start of a subtable,
+                # so increment the subtable depth.
+                self._subtable_depth += 1
+
+        elif self._current_application is not None:
+            if tag == "tr" and self._subtable_depth == 0:
+                self._tr_number += 1
+            if tag == "td":
+                self._in_td = True
+                if self._tr_number == self.case_number_tr:
+                    #get the reference and the info link here
+                    pass
+                elif self._tr_number == self.reg_date_tr:
+                    #get the registration date here
+                    pass
+                elif self._tr_number == self.location_tr:
+                    #get the address and postcode here
+                    pass
+                elif self._tr_number == self.proposal_tr:
+                    #get the description here
+                    pass
+            if tag == "a" and self._tr_number == self.case_number_tr:
+                # this is where we get the info link and the case number
+                for key, value in attrs:
+                    if key == "href":
+                        self._current_application.info_url = value
+                        
+    def handle_data(self, data):
+        # If we are in the tr which contains the case number,
+        # then data is the council reference, so
+        # add it to self._current_application.
+        if self._in_td:
+            if self._tr_number == self.case_number_tr:
+                self._current_application.council_reference = data.strip()
+            elif self._tr_number == self.reg_date_tr:
+                # we need to make a date object out of data
+                date_as_str = ''.join(data.strip().split())
+                received_date = date(*strptime(date_as_str, date_format)[0:3])
+
+                #print received_date
+
+                self._current_application.date_received = received_date
+
+            elif self._tr_number == self.location_tr:
+                location = data.strip()
+
+                self._current_application.address = location
+                self._current_application.postcode = getPostcodeFromText(location)
+            elif self._tr_number == self.proposal_tr:
+                self._current_application.description = data.strip()
+
+
+    def handle_endtag(self, tag):
+        #print "ending: ", tag
+        if tag == "table" and self._current_application is not None:
+            if self._subtable_depth > 0:
+                self._subtable_depth -= 1
+            else:
+                # We need to add the last application in the table
+                if self._current_application is not None:
+                    #print "adding application"
+                    self._results.addApplication(self._current_application)
+                    #print self._current_application
+                    self._current_application = None
+                    self._tr_number = None
+                    self._subtable_depth = None
+        elif tag == "td":
+            self._in_td = False
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        # first we fetch the search page to get ourselves some session info...
+        search_form_response = urllib2.urlopen(self.base_url)
+        search_form_contents = search_form_response.read()
+
+        # This sometimes causes a problem in HTMLParser, so let's just get the link
+        # out with a regex...
+
+        groups = self.action_regex.search(search_form_contents).groups()
+
+        action = groups[0] 
+        #print action
+
+        action_url = urlparse.urljoin(self.base_url, action)
+        #print action_url
+
+        our_date = date(year, month, day)
+        
+        search_data = {"regdate1": our_date.strftime(date_format),
+                       "regdate2": our_date.strftime(date_format),
+                       }
+        
+        opener = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler)
+        response = opener.open(action_url, search_data)
+        results_html = response.read()
+
+        # This is for doing site specific html cleanup
+        results_html = self._cleanupHTML(results_html)
+
+        #some javascript garbage in the header upsets HTMLParser,
+        #so we'll just have the body
+        just_body = "<html>" + end_head_regex.split(results_html)[-1]
+
+        #outfile = open(self.authority_short_name + ".debug", "w")
+        #outfile.write(just_body)        
+
+        self.feed(just_body)
+        
+        return self._results
+
+
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+
+class BaberghParser(AcolnetParser):
+    #search_url = "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 4
+    proposal_tr = 5
+
+    #authority_name = "Babergh District Council"
+    #authority_short_name = "Babergh"
+
+    # It would be nice to scrape this...
+    comments_email_address = "planning.reception@babergh.gov.uk"
+
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
+
+class BasingstokeParser(AcolnetParser):
+    #search_url = "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+    
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 3
+    location_tr = 6
+    proposal_tr = 8
+
+    #authority_name = "Basingstoke and Deane Borough Council"
+    #authority_short_name = "Basingstoke and Deane"
+
+    # It would be nice to scrape this...
+    comments_email_address = "development.control@basingstoke.gov.uk"
+
+    action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
+
+class BassetlawParser(AcolnetParser):
+    #search_url =  "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 5
+    proposal_tr = 6    
+
+    #authority_name = "Bassetlaw District Council"
+    #authority_short_name = "Bassetlaw"
+
+    comments_email_address = "planning@bassetlaw.gov.uk"
+
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)
+
+    def _cleanupHTML(self, html):
+        """There is a broken div in this page. We don't need any divs, so
+        let's get rid of them all."""
+
+        div_regex = re.compile("</?div[^>]*>", re.IGNORECASE)
+        return div_regex.sub('', html)
+
+
+class BridgenorthParser(AcolnetParser):
+    #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 4
+    proposal_tr = 5    
+
+    #authority_name = "Bridgenorth District Council"
+    #authority_short_name = "Bridgenorth"
+
+    comments_email_address = "contactus@bridgnorth-dc.gov.uk"
+
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
+
+class BuryParser(AcolnetParser):
+    #search_url = "http://e-planning.bury.gov.uk/ePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 4
+    proposal_tr = 5    
+
+    #authority_name = "Bury Metropolitan Borough Council"
+    #authority_short_name = "Bury"
+
+    comments_email_address = "development.control@bury.gov.uk"
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
+
+## class CanterburyParser(AcolnetParser):
+##     search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+##     case_number_tr = 1 # this one can be got by the td class attribute
+##     reg_date_tr = 2
+##     location_tr = 4
+##     proposal_tr = 5    
+
+##     authority_name = "Canterbury City Council"
+##     authority_short_name = "Canterbury"
+
+##     comments_email_address = ""
+##     action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
+
+class CarlisleParser(AcolnetParser):
+    #search_url = "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 5
+    proposal_tr = 6    
+
+    #authority_name = "Carlisle City Council"
+    #authority_short_name = "Carlisle"
+
+    comments_email_address = "dc@carlisle.gov.uk"
+    action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
+
+
+class DerbyParser(AcolnetParser):
+    #search_url = "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 3
+    location_tr = 4
+    proposal_tr = 5    
+
+    #authority_name = "Derby City Council"
+    #authority_short_name = "Derby"
+
+    comments_email_address = "developmentcontrol@derby.gov.uk"
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
+
+
+if __name__ == '__main__':
+    day = 15
+    month = 3
+    year = 2007
+
+    # working
+    # parser = BasingstokeParser()
+    parser = BaberghParser("Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
+
+    # works with the divs stripped out
+    #parser = BassetlawParser()
+
+    # returns error 400 - bad request
+    #parser = BridgenorthParser()
+
+    # working
+    #parser = BuryParser()
+
+    # cambridgeshire is a bit different...
+    # no advanced search page
+
+    # canterbury
+    # results as columns of one table
+
+    # returns error 400 - bad request
+    #parser = CarlisleParser()
+
+    # working
+    #parser = DerbyParser()
+    
+    print parser.getResults(day, month, year)
+    
diff --git a/cgi-bin/Allerdale.cgi b/cgi-bin/Allerdale.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Alnwick.cgi b/cgi-bin/Alnwick.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Angus.cgi b/cgi-bin/Angus.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Aylesbury Vale.cgi b/cgi-bin/Aylesbury Vale.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Babergh.cgi b/cgi-bin/Babergh.cgi
new file mode 100755
index 0000000..28ecb9e
--- /dev/null
+++ b/cgi-bin/Babergh.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Babergh District Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Babergh District Council"
+authority_short_name = "Babergh"
+base_url = "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+import AcolnetParser
+
+parser = AcolnetParser.BaberghParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml"     # XML is following
+print
+print xml                          # print the xml
diff --git a/cgi-bin/Barrow.cgi b/cgi-bin/Barrow.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Basildon.cgi b/cgi-bin/Basildon.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Basingstoke and Deane.cgi b/cgi-bin/Basingstoke and Deane.cgi
new file mode 100755
index 0000000..d4be3d1
--- /dev/null
+++ b/cgi-bin/Basingstoke and Deane.cgi	
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Basingstoke and Deane Borough Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Basingstoke and Deane Borough Council"
+authority_short_name = "Basingstoke and Deane"
+base_url = "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+import AcolnetParser
+
+parser = AcolnetParser.BasingstokeParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml"     # XML is following
+print
+print xml                          # print the xml
diff --git a/cgi-bin/Bassetlaw.cgi b/cgi-bin/Bassetlaw.cgi
new file mode 100755
index 0000000..409f7a8
--- /dev/null
+++ b/cgi-bin/Bassetlaw.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Bassetlaw District Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Bassetlaw District Council"
+authority_short_name = "Bassetlaw"
+base_url = "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+import AcolnetParser
+
+parser = AcolnetParser.BassetlawParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml"     # XML is following
+print
+print xml                          # print the xml
diff --git a/cgi-bin/Bath.cgi b/cgi-bin/Bath.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Bexley.cgi b/cgi-bin/Bexley.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Blaby.cgi b/cgi-bin/Blaby.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Bolsover.cgi b/cgi-bin/Bolsover.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Bristol.cgi b/cgi-bin/Bristol.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Buckinghamshire.cgi b/cgi-bin/Buckinghamshire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Bury.cgi b/cgi-bin/Bury.cgi
new file mode 100755
index 0000000..f6454f3
--- /dev/null
+++ b/cgi-bin/Bury.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Bury Metropolitan Borough Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Bury Metropolitan Borough Council"
+authority_short_name = "Bury"
+base_url = "http://e-planning.bury.gov.uk/ePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
+
+import AcolnetParser
+
+parser = AcolnetParser.BuryParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml"     # XML is following
+print
+print xml                          # print the xml
diff --git a/cgi-bin/Chelmsford.cgi b/cgi-bin/Chelmsford.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Cherwell.cgi b/cgi-bin/Cherwell.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Chorley.cgi b/cgi-bin/Chorley.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/City of London.cgi b/cgi-bin/City of London.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Cornwall.cgi b/cgi-bin/Cornwall.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Coventry.cgi b/cgi-bin/Coventry.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Dacorum.cgi b/cgi-bin/Dacorum.cgi
deleted file mode 100644
index 1a27715..0000000
--- a/cgi-bin/Dacorum.cgi
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-
-use CGI qw(:cgi);
-use HTML::TreeBuilder;
-use LWP::UserAgent;
-use XML::Writer;
-
-# The master URLs for the Dacorum planning search
-our $SearchURL = "http://www.dacorum.gov.uk/default.aspx?page=1495";
-our $InfoURL = "http://www.dacorum.gov.uk/Default.aspx?page=1497&ID=";
-our $CommentURL = "http://www.dacorum.gov.uk/Default.aspx?page=2847&ID=";
-
-# We're a CGI script...
-my $query = CGI->new();
-
-# Construct an LWP user agent
-our $UA = LWP::UserAgent->new(env_proxy => 1,
-                              cookie_jar => {},
-                              requests_redirectable => [ 'GET', 'HEAD', 'POST' ]);
-
-# Post the URL to get an initial blank form
-my $state = get_state(do_post());
-
-# Do the search
-my $page = do_post({"__VIEWSTATE" => $state,
-                    "Template:_ctl10:_ctl0:btnSearch" => "Search",
-                    "Template:_ctl10:_ctl0:tbRegistrationFromDay" => $query->param("day"),
-                    "Template:_ctl10:_ctl0:tbRegistrationFromMon" => $query->param("month"),
-                    "Template:_ctl10:_ctl0:tbRegistrationFromYear" => $query->param("year"),
-                    "Template:_ctl10:_ctl0:tbRegistrationToDay" => $query->param("day"),
-                    "Template:_ctl10:_ctl0:tbRegistrationToMon" => $query->param("month"),
-                    "Template:_ctl10:_ctl0:tbRegistrationToYear" => $query->param("year")});
-
-# Output an HTTP response header
-print $query->header(-type  => "text/xml");
-
-# Create an XML output stream
-my $Writer = XML::Writer->new(DATA_MODE => 1);
-
-# Output the XML header data
-$Writer->xmlDecl("UTF-8");
-$Writer->startTag("planning");
-$Writer->dataElement("authority_name", "Dacorum Borough Council");
-$Writer->dataElement("authority_short_name", "Dacorum");
-$Writer->startTag("applications");
-
-# Find the result table
-my $table = $page->look_down("_tag" => "table", "class" => "FormDataGrid");
-
-# Process each row of the results
-foreach my $row ($table->look_down("_tag" => "tr"))
-{
-    my @cells = $row->look_down("_tag" => "td");
-
-    if ($cells[0]->attr("class") eq "FormGridDataItem" ||
-        $cells[0]->attr("class") eq "FormGridAlternatingDataItem")
-    {
-        my $reference = $cells[0]->as_trimmed_text;
-        my $address = $cells[1]->as_trimmed_text;
-        my $description = $cells[2]->as_trimmed_text;
-        my $date = $cells[3]->as_trimmed_text;
-        my $postcode;
-
-        if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
-        {
-            $postcode = $1;
-        }
-
-        $Writer->startTag("application");
-        $Writer->dataElement("council_reference", $reference);
-        $Writer->dataElement("address", $address);
-        $Writer->dataElement("postcode", $postcode);
-        $Writer->dataElement("description", $description);
-        $Writer->dataElement("info_url", $InfoURL . $reference);
-        $Writer->dataElement("comment_url", $CommentURL . $reference);
-        $Writer->dataElement("date_received", $date);
-        $Writer->endTag("application");
-    }
-}
-
-# Finish off XML output
-$Writer->endTag("applications");
-$Writer->endTag("planning");
-$Writer->end();
-
-exit 0;
-
-# Extract the state from a page so we can repost it
-sub get_state
-{
-    my $page = shift;
-    my $viewstate = $page->look_down("_tag" => "input", "name" => "__VIEWSTATE");
-
-    return $viewstate->attr("value");
-}
-
-# Post to the planning search page
-sub do_post
-{
-    my $response = $UA->post($SearchURL, @_);
-
-    die $response->status_line unless $response->is_success;
-
-    return HTML::TreeBuilder->new_from_content($response->content);
-}
diff --git a/cgi-bin/Denbighshire.cgi b/cgi-bin/Denbighshire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Derby.cgi b/cgi-bin/Derby.cgi
new file mode 100755
index 0000000..1ae552e
--- /dev/null
+++ b/cgi-bin/Derby.cgi
@@ -0,0 +1,29 @@
+#!/usr/local/bin/python
+
+# This is the parser for Derby City Council.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "Derby City Council"
+authority_short_name = "Derby"
+base_url = "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+import AcolnetParser
+
+parser = AcolnetParser.DerbyParser(authority_name, authority_short_name, base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml"     # XML is following
+print
+print xml                          # print the xml
diff --git a/cgi-bin/Doncaster.cgi b/cgi-bin/Doncaster.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Dundee.cgi b/cgi-bin/Dundee.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Durham.cgi b/cgi-bin/Durham.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Ealing.cgi b/cgi-bin/Ealing.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Easington.cgi b/cgi-bin/Easington.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/East Devon.cgi b/cgi-bin/East Devon.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/East Dorset.cgi b/cgi-bin/East Dorset.cgi
old mode 100755
new mode 100644
diff --git a/cgi-bin/EastHerts.cgi b/cgi-bin/EastHerts.cgi
deleted file mode 100644
index 34dec27..0000000
--- a/cgi-bin/EastHerts.cgi
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-
-use CGI qw(:cgi);
-use HTML::TreeBuilder;
-use LWP::UserAgent;
-use XML::Writer;
-
-# The master URLs for the East Herts planning search
-our $SearchURL = "http://e-services.eastherts.gov.uk/swiftlg/apas/run/WPHAPPCRITERIA";
-our $InfoURL = "http://e-services.eastherts.gov.uk/swiftlg/apas/run/WPHAPPDETAIL.DisplayUrl?theApnID=";
-our $CommentURL = "http://e-services.eastherts.gov.uk/swiftlg/apas/run/wphmakerep.displayURL?ApnID=";
-
-# We're a CGI script...
-my $query = CGI->new();
-
-# Get the date to fetch
-my $date = $query->param("day") . "/" . $query->param("month") . "/" . $query->param("year");
-
-# Construct an LWP user agent
-our $UA = LWP::UserAgent->new(env_proxy => 1);
-
-# Do the search
-my $page = do_post($SearchURL,
-                   {"REGFROMDATE.MAINBODY.WPACIS.1." => $date,
-                    "REGTODATE.MAINBODY.WPACIS.1." => $date,
-                    "SEARCHBUTTON.MAINBODY.WPACIS.1." => "Search"});
-
-# Output an HTTP response header
-print $query->header(-type  => "text/xml");
-
-# Create an XML output stream
-my $Writer = XML::Writer->new(DATA_MODE => 1);
-
-# Output the XML header data
-$Writer->xmlDecl("UTF-8");
-$Writer->startTag("planning");
-$Writer->dataElement("authority_name", "East Herts Council");
-$Writer->dataElement("authority_short_name", "East Herts");
-$Writer->startTag("applications");
-
-# Output any applications on the first page
-output_applications($page);
-
-# Loop over any additional results pages
-foreach my $link ($page->look_down("_tag" => "a", "href" => qr/^WPHAPPSEARCHRES\.displayResultsURL/))
-{
-    # Fetch this page...
-    $page = do_get(URI->new_abs($link->attr("href"), $SearchURL));
-
-    # ...and output the applications from it
-    output_applications($page);
-}
-
-# Finish off XML output
-$Writer->endTag("applications");
-$Writer->endTag("planning");
-$Writer->end();
-
-exit 0;
-
-# Make a GET request
-sub do_get
-{
-    my $response = $UA->get(@_);
-
-    die $response->status_line unless $response->is_success;
-
-    return HTML::TreeBuilder->new_from_content($response->content);
-}
-
-# Make a POST request
-sub do_post
-{
-    my $response = $UA->post(@_);
-
-    die $response->status_line unless $response->is_success;
-
-    return HTML::TreeBuilder->new_from_content($response->content);
-}
-
-# Output applications from a results page
-sub output_applications
-{
-    my $page = shift;
-
-    # Find the result table
-    my $table = $page->look_down("_tag" => "table", "cellspacing" => "2", "cellpadding" => "2");
-
-    # Process each row of the results
-    foreach my $row ($table->look_down("_tag" => "tr"))
-    {
-        my @cells = $row->look_down("_tag" => "td");
-
-        if (@cells >= 3)
-        {
-            my $reference = $cells[0]->as_trimmed_text;
-            my $description = $cells[1]->as_trimmed_text;
-            my $address = $cells[2]->as_trimmed_text;
-            my $postcode;
-
-            if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
-            {
-                $postcode = $1;
-            }
-
-            $Writer->startTag("application");
-            $Writer->dataElement("council_reference", $reference);
-            $Writer->dataElement("address", $address);
-            $Writer->dataElement("postcode", $postcode);
-            $Writer->dataElement("description", $description);
-            $Writer->dataElement("info_url", $InfoURL . $reference);
-            $Writer->dataElement("comment_url", $CommentURL . $reference);
-            $Writer->dataElement("date_received", $date);
-            $Writer->endTag("application");
-        }
-    }
-
-    return;
-}
diff --git a/cgi-bin/Edinburgh.cgi b/cgi-bin/Edinburgh.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Enfield.cgi b/cgi-bin/Enfield.cgi
deleted file mode 100644
index 7462ebd..0000000
--- a/cgi-bin/Enfield.cgi
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-
-use CGI qw(:cgi);
-use HTML::TreeBuilder;
-use LWP::UserAgent;
-use XML::Writer;
-
-
-# The master URLs for the Enfield planning search
-our $SearchURL = "http://forms.enfield.gov.uk/swiftlg/apas/run/WPHAPPCRITERIA";
-our $InfoURL = "http://forms.enfield.gov.uk/swiftlg/apas/run/WPHAPPDETAIL.DisplayUrl?theApnID=";
-
-# We're a CGI script...
-my $query = CGI->new();
-
-# Get the date to fetch
-my $date = $query->param("day") . "/" . $query->param("month") . "/" . $query->param("year");
-
-# Construct an LWP user agent
-our $UA = LWP::UserAgent->new(env_proxy => 1);
-
-# Do the search
-my $page = do_post($SearchURL,
-                   {"REGFROMDATE.MAINBODY.WPACIS.1." => $date,
-                    "REGTODATE.MAINBODY.WPACIS.1." => $date,
-                    "SEARCHBUTTON.MAINBODY.WPACIS.1." => "Search"});
-
-# Output an HTTP response header
-print $query->header(-type  => "text/xml");
-
-# Create an XML output stream
-my $Writer = XML::Writer->new(DATA_MODE => 1);
-
-# Output the XML header data
-$Writer->xmlDecl("UTF-8");
-$Writer->startTag("planning");
-$Writer->dataElement("authority_name", "Enfield Council");
-$Writer->dataElement("authority_short_name", "Enfield");
-$Writer->startTag("applications");
-
-# Output any applications on the first page
-output_applications($page);
-
-# Loop over any additional results pages
-foreach my $link ($page->look_down("_tag" => "a", "href" => qr/^WPHAPPSEARCHRES\.displayResultsURL/))
-{
-    # Fetch this page...
-    $page = do_get(URI->new_abs($link->attr("href"), $SearchURL));
-
-    # ...and output the applications from it
-    output_applications($page);
-}
-
-# Finish off XML output
-$Writer->endTag("applications");
-$Writer->endTag("planning");
-$Writer->end();
-
-exit 0;
-
-# Make a GET request
-sub do_get
-{
-    my $response = $UA->get(@_);
-
-    die $response->status_line unless $response->is_success;
-
-    return HTML::TreeBuilder->new_from_content($response->content);
-}
-
-# Make a POST request
-sub do_post
-{
-    my $response = $UA->post(@_);
-
-    die $response->status_line unless $response->is_success;
-
-    return HTML::TreeBuilder->new_from_content($response->content);
-}
-
-# Output applications from a results page
-sub output_applications
-{
-    my $page = shift;
-
-    # Find the result table
-    my $table = $page->look_down("_tag" => "table", "class" => "apas_tbl");
-
-    # Process each row of the results
-    foreach my $row ($table->look_down("_tag" => "tr"))
-    {
-        my @cells = $row->look_down("_tag" => "td");
-
-        if (@cells >= 3)
-        {
-            my $reference = $cells[0]->as_trimmed_text;
-            my $description = $cells[1]->as_trimmed_text;
-            my $address = $cells[2]->as_trimmed_text;
-            my $postcode;
-
-            if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
-            {
-                $postcode = $1;
-            }
-
-            $Writer->startTag("application");
-            $Writer->dataElement("council_reference", $reference);
-            $Writer->dataElement("address", $address);
-            $Writer->dataElement("postcode", $postcode);
-            $Writer->dataElement("description", $description);
-            $Writer->dataElement("info_url", $InfoURL . $reference);
-            $Writer->dataElement("date_received", $date);
-            $Writer->endTag("application");
-        }
-    }
-
-    return;
-}
diff --git a/cgi-bin/Epsom and Ewell.cgi b/cgi-bin/Epsom and Ewell.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Fenland.cgi b/cgi-bin/Fenland.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Gateshead.cgi b/cgi-bin/Gateshead.cgi
old mode 100755
new mode 100644
diff --git a/cgi-bin/Gedling.cgi b/cgi-bin/Gedling.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Gloucestershire.cgi b/cgi-bin/Gloucestershire.cgi
old mode 100755
new mode 100644
diff --git a/cgi-bin/Gravesham.cgi b/cgi-bin/Gravesham.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Hammersmith and Fulham.cgi b/cgi-bin/Hammersmith and Fulham.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Haringey.cgi b/cgi-bin/Haringey.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Harrogate.cgi b/cgi-bin/Harrogate.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Hart.cgi b/cgi-bin/Hart.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Hartlepool.cgi b/cgi-bin/Hartlepool.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/High Peak.cgi b/cgi-bin/High Peak.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Huntingdonshire.cgi b/cgi-bin/Huntingdonshire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Kerrier.cgi b/cgi-bin/Kerrier.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Knowsley.cgi b/cgi-bin/Knowsley.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Lancaster.cgi b/cgi-bin/Lancaster.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Luton.cgi b/cgi-bin/Luton.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Malvern Hills.cgi b/cgi-bin/Malvern Hills.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Mid Devon.cgi b/cgi-bin/Mid Devon.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Milton Keynes.cgi b/cgi-bin/Milton Keynes.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/MultipartPostHandler.py b/cgi-bin/MultipartPostHandler.py
new file mode 100644
index 0000000..c427613
--- /dev/null
+++ b/cgi-bin/MultipartPostHandler.py
@@ -0,0 +1,133 @@
+####
+# 02/2006 Will Holcomb <wholcomb@gmail.com>
+# 
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+
+# I have edited out a bit in the middle of this which reverts to a normal
+# post with "application/x-www-form-urlencoded" content-type when there are
+# no files.
+# Duncan 5/5/2007
+
+"""
+Usage:
+  Enables the use of multipart/form-data for posting forms
+
+Inspirations:
+  Upload files in python:
+    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
+  urllib2_file:
+    Fabien Seisen: <fabien@seisen.org>
+
+Example:
+  import MultipartPostHandler, urllib2, cookielib
+
+  cookies = cookielib.CookieJar()
+  opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies),
+                                MultipartPostHandler.MultipartPostHandler)
+  params = { "username" : "bob", "password" : "riviera",
+             "file" : open("filename", "rb") }
+  opener.open("http://wwww.bobsite.com/upload/", params)
+
+Further Example:
+  The main function of this file is a sample which downloads a page and
+  then uploads it to the W3C validator.
+"""
+
+import urllib
+import urllib2
+import mimetools, mimetypes
+import os, stat
+
+class Callable:
+    def __init__(self, anycallable):
+        self.__call__ = anycallable
+
+# Controls how sequences are uncoded. If true, elements may be given multiple values by
+#  assigning a sequence.
+doseq = 1
+
+class MultipartPostHandler(urllib2.BaseHandler):
+    handler_order = urllib2.HTTPHandler.handler_order - 10 # needs to run first
+
+    def http_request(self, request):
+        data = request.get_data()
+        if data is not None and type(data) != str:
+            v_files = []
+            v_vars = []
+            try:
+                 for(key, value) in data.items():
+                     if type(value) == file:
+                         v_files.append((key, value))
+                     else:
+                         v_vars.append((key, value))
+            except TypeError:
+                systype, value, traceback = sys.exc_info()
+                raise TypeError, "not a valid non-string sequence or mapping object", traceback
+
+            boundary, data = self.multipart_encode(v_vars, v_files)
+            contenttype = 'multipart/form-data; boundary=%s' % boundary
+            if(request.has_header('Content-Type')
+               and request.get_header('Content-Type').find('multipart/form-data') != 0):
+                print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')
+            request.add_unredirected_header('Content-Type', contenttype)
+
+            request.add_data(data)
+        return request
+
+    def multipart_encode(vars, files, boundary = None, buffer = None):
+        if boundary is None:
+            boundary = mimetools.choose_boundary()
+        if buffer is None:
+            buffer = ''
+        for(key, value) in vars:
+            buffer += '--%s\r\n' % boundary
+            buffer += 'Content-Disposition: form-data; name="%s"' % key
+            buffer += '\r\n\r\n' + value + '\r\n'
+        for(key, fd) in files:
+            file_size = os.fstat(fd.fileno())[stat.ST_SIZE]
+            filename = fd.name.split('/')[-1]
+            contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+            buffer += '--%s\r\n' % boundary
+            buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename)
+            buffer += 'Content-Type: %s\r\n' % contenttype
+            # buffer += 'Content-Length: %s\r\n' % file_size
+            fd.seek(0)
+            buffer += '\r\n' + fd.read() + '\r\n'
+        buffer += '--%s--\r\n\r\n' % boundary
+        return boundary, buffer
+    multipart_encode = Callable(multipart_encode)
+
+    https_request = http_request
+
+## def main():
+##     import tempfile, sys
+
+##     validatorURL = "http://validator.w3.org/check"
+##     opener = urllib2.build_opener(MultipartPostHandler)
+
+##     def validateFile(url):
+##         temp = tempfile.mkstemp(suffix=".html")
+##         os.write(temp[0], opener.open(url).read())
+##         params = { "ss" : "0",            # show source
+##                    "doctype" : "Inline",
+##                    "uploaded_file" : open(temp[1], "rb") }
+##         print opener.open(validatorURL, params).read()
+##         os.remove(temp[1])
+
+##     if len(sys.argv[1:]) > 0:
+##         for arg in sys.argv[1:]:
+##             validateFile(arg)
+##     else:
+##         validateFile("http://www.google.com")
+
+## if __name__=="__main__":
+##     main()
diff --git a/cgi-bin/NW Leicestershire.cgi b/cgi-bin/NW Leicestershire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Newcastle-under-Lyme.cgi b/cgi-bin/Newcastle-under-Lyme.cgi
old mode 100755
new mode 100644
diff --git a/cgi-bin/Newham.cgi b/cgi-bin/Newham.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/North Tyneside.cgi b/cgi-bin/North Tyneside.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/North Warwickshire.cgi b/cgi-bin/North Warwickshire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Northumberland.cgi b/cgi-bin/Northumberland.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Oadby and Wigston.cgi b/cgi-bin/Oadby and Wigston.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Oswestry.cgi b/cgi-bin/Oswestry.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Peterborough.cgi b/cgi-bin/Peterborough.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Portsmouth.cgi b/cgi-bin/Portsmouth.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Redditch.cgi b/cgi-bin/Redditch.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Rushmoor.cgi b/cgi-bin/Rushmoor.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Scarborough.cgi b/cgi-bin/Scarborough.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Sevenoaks.cgi b/cgi-bin/Sevenoaks.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/South Bucks.cgi b/cgi-bin/South Bucks.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/South Ribble.cgi b/cgi-bin/South Ribble.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/South Staffordshire.cgi b/cgi-bin/South Staffordshire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/SouthOxfordshire.cgi b/cgi-bin/SouthOxfordshire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Southampton.cgi b/cgi-bin/Southampton.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Spelthorne.cgi b/cgi-bin/Spelthorne.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/St Helens.cgi b/cgi-bin/St Helens.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Stevenage.cgi b/cgi-bin/Stevenage.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Stirling.cgi b/cgi-bin/Stirling.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Stockton-On-Tees.cgi b/cgi-bin/Stockton-On-Tees.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Stratford.cgi b/cgi-bin/Stratford.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Sunderland.cgi b/cgi-bin/Sunderland.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Teignbridge.cgi b/cgi-bin/Teignbridge.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Test Valley.cgi b/cgi-bin/Test Valley.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Tonbridge.cgi b/cgi-bin/Tonbridge.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Torbay.cgi b/cgi-bin/Torbay.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Vale Royal.cgi b/cgi-bin/Vale Royal.cgi
old mode 100755
new mode 100644
diff --git a/cgi-bin/Waveney.cgi b/cgi-bin/Waveney.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Wear Valley.cgi b/cgi-bin/Wear Valley.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Wellingborough.cgi b/cgi-bin/Wellingborough.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/West Berkshire.cgi b/cgi-bin/West Berkshire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/West Lancashire.cgi b/cgi-bin/West Lancashire.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/West Norfolk.cgi b/cgi-bin/West Norfolk.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Winchester.cgi b/cgi-bin/Winchester.cgi
old mode 100755
new mode 100644
diff --git a/cgi-bin/Woking.cgi b/cgi-bin/Woking.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/Wolverhampton.cgi b/cgi-bin/Wolverhampton.cgi
old mode 100755
new mode 100644
diff --git a/cgi-bin/York.cgi b/cgi-bin/York.cgi
old mode 100644
new mode 100755
diff --git a/cgi-bin/broxbourne.cgi b/cgi-bin/broxbourne.cgi
deleted file mode 100755
index ffe3063..0000000
--- a/cgi-bin/broxbourne.cgi
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-
-use CGI qw(:cgi);
-use DateTime;
-#use DateTime::Format::DateParse;
-use HTML::TreeBuilder;
-use LWP::UserAgent;
-use XML::Writer;
-
-# The master URL for the Broxbourne planning search
-our $SearchURL = "http://www2.broxbourne.gov.uk/planningsearch/webform1.aspx";
-
-# We're a CGI script...
-my $query = CGI->new();
-
-# Get the date as an offset from 2000-01-01
-my $epoch = DateTime->new(year => 2000, month => 1, day => 1);
-my $querydate = DateTime->new(year => $query->param("year"),
-                              month => $query->param("month"),
-                              day => $query->param("day"));
-$querydate = $querydate->delta_days($epoch)->delta_days;
-
-# Construct an LWP user agent
-our $UA = LWP::UserAgent->new(env_proxy => 1);
-
-# Post the URL to get an initial blank form
-my $state = get_state(do_post());
-
-# Post each date in turn to build up the state - you can thank
-# Microsoft and ASP.NET for the horrible way we have to do this
-# by posting each argument in turn to build up the state
-$state = get_state(do_post_back($state, 'DateSelector1$Calendar1', $querydate));
-$state = get_state(do_post_back($state, 'DateSelector2$Calendar1', $querydate));
-
-# Output an HTTP response header
-print $query->header(-type  => "text/xml");
-
-# Create an XML output stream
-my $Writer = XML::Writer->new(DATA_MODE => 1);
-
-# Output the XML header data
-$Writer->xmlDecl("UTF-8");
-$Writer->startTag("planning");
-$Writer->dataElement("authority_name", "Borough of Broxbourne");
-$Writer->dataElement("authority_short_name", "Broxbourne");
-$Writer->startTag("applications");
-
-# Get the arguments for the search...
-my $args = {
-    "Srch" => "rb1",
-    "__VIEWSTATE" => $state,
-    "btnSearch" => "Search",
-    "tbReference" => "",
-    "tbRef2" => ""
-};
-
-# ...and then (at last) we can do the search!
-my $page = do_post($args);
-
-# Loop processing pages of results
-while ($page)
-{
-    my $table = $page->look_down("_tag" => "table", "id" => "DataGrid1");
-
-    # Remember the state
-    $state = get_state($page);
-
-    # Clear the page for now - this will be reinitialised if we
-    # find another page of results to make us go round the loop
-    # all over again
-    undef $page;
-
-    # Check that we found a table - searches that find no results
-    # produce a page with no table in it
-    if ($table)
-    {
-        # Process each row of the results
-        foreach my $row ($table->look_down("_tag" => "tr"))
-        {
-            my @cells = $row->look_down("_tag" => "td");
-
-            if ($cells[0]->look_down("_tag" => "input"))
-            {
-                my $reference = $cells[1]->as_trimmed_text;
-                my $date = $cells[2]->as_trimmed_text;
-                my $address = $cells[3]->as_trimmed_text;
-                my $description = $cells[4]->as_trimmed_text;
-                my $postcode;
-
-                if ($address =~ /\s+([A-Z]+\d+\s+\d+[A-Z]+)$/)
-                {
-                    $postcode = $1;
-                }
-
-                $Writer->startTag("application");
-                $Writer->dataElement("council_reference", $reference);
-                $Writer->dataElement("address", $address);
-                $Writer->dataElement("postcode", $postcode);
-                $Writer->dataElement("description", $description);
-                $Writer->dataElement("date_received", $date);
-                $Writer->endTag("application");
-            }
-            elsif ($cells[0]->attr("colspan") && $cells[0]->attr("colspan") eq "5")
-            {
-                foreach my $link ($cells[0]->look_down("_tag" => "a"))
-                {
-                    if ($link->as_trimmed_text eq ">" &&
-                        $link->attr("href") =~ /^javascript:__doPostBack\('([^\']*)','([^\']*)'\)$/)
-                    {
-                        $page = do_post_back($state, $1, $2);
-                    }
-                }
-            }
-        }
-    }
-}
-
-# Finish off XML output
-$Writer->endTag("applications");
-$Writer->endTag("planning");
-$Writer->end();
-
-exit 0;
-
-# Extract the state from a page so we can repost it
-sub get_state
-{
-    my $page = shift;
-    my $viewstate = $page->look_down("_tag" => "input", "name" => "__VIEWSTATE");
-
-    return $viewstate->attr("value");
-}
-
-# Fake up what the doPostBack javascript function in the page does...
-sub do_post_back
-{
-    my $state = shift;
-    my $target = shift;
-    my $argument = shift;
-
-    $target =~ s/\$/:/g;
-
-    my $args = {
-        "__EVENTTARGET" => $target,
-        "__EVENTARGUMENT" => $argument,
-        "__VIEWSTATE" => $state
-    };
-
-    return do_post($args);
-}
-
-# Post to the planning search page
-sub do_post
-{
-    my $response = $UA->post($SearchURL, @_);
-
-    die $response->status_line unless $response->is_success;
-
-    return HTML::TreeBuilder->new_from_content($response->content);
-}
diff --git a/python_scrapers/AcolnetParser.py b/python_scrapers/AcolnetParser.py
new file mode 100644
index 0000000..55e2796
--- /dev/null
+++ b/python_scrapers/AcolnetParser.py
@@ -0,0 +1,364 @@
+#!/usr/local/bin/python
+
+import urllib, urllib2
+import HTMLParser
+#from BeautifulSoup import BeautifulSoup
+
+import urlparse
+
+import re
+
+end_head_regex = re.compile("</head", re.IGNORECASE)
+
+import MultipartPostHandler
+# this is not mine, or part of standard python (though it should be!)
+# it comes from http://pipe.scs.fsu.edu/PostHandler/MultipartPostHandler.py
+
+from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication
+
+from datetime import date
+from time import strptime
+
+
+date_format = "%d/%m/%Y"
+our_date = date(2007,4,25)
+
+
+class AcolnetParser(HTMLParser.HTMLParser):
+    case_number_tr = None # this one can be got by the td class attribute
+    reg_date_tr = None
+    location_tr = None
+    proposal_tr = None
+
+    # There is no online comment facility in these, so we provide an
+    # appropriate email address instead
+    comments_email_address = None
+
+    
+    def __init__(self,
+                 authority_name,
+                 authority_short_name,
+                 base_url,
+                 debug=False):
+
+
+        HTMLParser.HTMLParser.__init__(self)
+
+        self.authority_name = authority_name
+        self.authority_short_name = authority_short_name
+        self.base_url = base_url
+
+        self.debug = debug
+
+        self._tr_number = 0
+
+        # This will be used to track the subtable depth
+        # when we are in a results-table, in order to
+        # avoid adding an application before we have got to
+        # the end of the results-table
+        self._subtable_depth = None
+
+        self._in_td = False
+
+        # This in where we store the results
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+        # This will store the planning application we are currently working on.
+        self._current_application = None
+
+
+    def _cleanupHTML(self, html):
+        """This method should be overridden in subclasses to perform site specific
+        HTML cleanup."""
+        return html
+
+    def handle_starttag(self, tag, attrs):
+        #print tag, attrs
+                    
+        if tag == "table":
+            if self._current_application is None:
+                # Each application is in a separate table with class "results-table"
+                for key, value in attrs:
+                    if key == "class" and value == "results-table":
+                        #print "found results-table"
+                        self._current_application = PlanningApplication()
+                        self._tr_number = 0
+                        self._subtable_depth = 0
+                        self._current_application.comment_url = self.comments_email_address
+                        break
+            else:
+                # We are already in a results-table, and this is the start of a subtable,
+                # so increment the subtable depth.
+                self._subtable_depth += 1
+
+        elif self._current_application is not None:
+            if tag == "tr" and self._subtable_depth == 0:
+                self._tr_number += 1
+            if tag == "td":
+                self._in_td = True
+                if self._tr_number == self.case_number_tr:
+                    #get the reference and the info link here
+                    pass
+                elif self._tr_number == self.reg_date_tr:
+                    #get the registration date here
+                    pass
+                elif self._tr_number == self.location_tr:
+                    #get the address and postcode here
+                    pass
+                elif self._tr_number == self.proposal_tr:
+                    #get the description here
+                    pass
+            if tag == "a" and self._tr_number == self.case_number_tr:
+                # this is where we get the info link and the case number
+                for key, value in attrs:
+                    if key == "href":
+                        self._current_application.info_url = value
+                        
+    def handle_data(self, data):
+        # If we are in the tr which contains the case number,
+        # then data is the council reference, so
+        # add it to self._current_application.
+        if self._in_td:
+            if self._tr_number == self.case_number_tr:
+                self._current_application.council_reference = data.strip()
+            elif self._tr_number == self.reg_date_tr:
+                # we need to make a date object out of data
+                date_as_str = ''.join(data.strip().split())
+                received_date = date(*strptime(date_as_str, date_format)[0:3])
+
+                #print received_date
+
+                self._current_application.date_received = received_date
+
+            elif self._tr_number == self.location_tr:
+                location = data.strip()
+
+                self._current_application.address = location
+                self._current_application.postcode = getPostcodeFromText(location)
+            elif self._tr_number == self.proposal_tr:
+                self._current_application.description = data.strip()
+
+
+    def handle_endtag(self, tag):
+        #print "ending: ", tag
+        if tag == "table" and self._current_application is not None:
+            if self._subtable_depth > 0:
+                self._subtable_depth -= 1
+            else:
+                # We need to add the last application in the table
+                if self._current_application is not None:
+                    #print "adding application"
+                    self._results.addApplication(self._current_application)
+                    #print self._current_application
+                    self._current_application = None
+                    self._tr_number = None
+                    self._subtable_depth = None
+        elif tag == "td":
+            self._in_td = False
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        # first we fetch the search page to get ourselves some session info...
+        search_form_response = urllib2.urlopen(self.base_url)
+        search_form_contents = search_form_response.read()
+
+        # This sometimes causes a problem in HTMLParser, so let's just get the link
+        # out with a regex...
+
+        groups = self.action_regex.search(search_form_contents).groups()
+
+        action = groups[0] 
+        #print action
+
+        action_url = urlparse.urljoin(self.base_url, action)
+        #print action_url
+
+        our_date = date(year, month, day)
+        
+        search_data = {"regdate1": our_date.strftime(date_format),
+                       "regdate2": our_date.strftime(date_format),
+                       }
+        
+        opener = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler)
+        response = opener.open(action_url, search_data)
+        results_html = response.read()
+
+        # This is for doing site specific html cleanup
+        results_html = self._cleanupHTML(results_html)
+
+        #some javascript garbage in the header upsets HTMLParser,
+        #so we'll just have the body
+        just_body = "<html>" + end_head_regex.split(results_html)[-1]
+
+        #outfile = open(self.authority_short_name + ".debug", "w")
+        #outfile.write(just_body)        
+
+        self.feed(just_body)
+        
+        return self._results
+
+
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+
+class BaberghParser(AcolnetParser):
+    #search_url = "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 4
+    proposal_tr = 5
+
+    #authority_name = "Babergh District Council"
+    #authority_short_name = "Babergh"
+
+    # It would be nice to scrape this...
+    comments_email_address = "planning.reception@babergh.gov.uk"
+
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
+
+class BasingstokeParser(AcolnetParser):
+    #search_url = "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+    
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 3
+    location_tr = 6
+    proposal_tr = 8
+
+    #authority_name = "Basingstoke and Deane Borough Council"
+    #authority_short_name = "Basingstoke and Deane"
+
+    # It would be nice to scrape this...
+    comments_email_address = "development.control@basingstoke.gov.uk"
+
+    action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
+
+class BassetlawParser(AcolnetParser):
+    #search_url =  "http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 5
+    proposal_tr = 6    
+
+    #authority_name = "Bassetlaw District Council"
+    #authority_short_name = "Bassetlaw"
+
+    comments_email_address = "planning@bassetlaw.gov.uk"
+
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">", re.IGNORECASE)
+
+    def _cleanupHTML(self, html):
+        """There is a broken div in this page. We don't need any divs, so
+        let's get rid of them all."""
+
+        div_regex = re.compile("</?div[^>]*>", re.IGNORECASE)
+        return div_regex.sub('', html)
+
+
+class BridgenorthParser(AcolnetParser):
+    #search_url = "http://www2.bridgnorth-dc.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 4
+    proposal_tr = 5    
+
+    #authority_name = "Bridgenorth District Council"
+    #authority_short_name = "Bridgenorth"
+
+    comments_email_address = "contactus@bridgnorth-dc.gov.uk"
+
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
+
+class BuryParser(AcolnetParser):
+    #search_url = "http://e-planning.bury.gov.uk/ePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 4
+    proposal_tr = 5    
+
+    #authority_name = "Bury Metropolitan Borough Council"
+    #authority_short_name = "Bury"
+
+    comments_email_address = "development.control@bury.gov.uk"
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
+
+## class CanterburyParser(AcolnetParser):
+##     search_url = "http://planning.canterbury.gov.uk/scripts/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+##     case_number_tr = 1 # this one can be got by the td class attribute
+##     reg_date_tr = 2
+##     location_tr = 4
+##     proposal_tr = 5    
+
+##     authority_name = "Canterbury City Council"
+##     authority_short_name = "Canterbury"
+
+##     comments_email_address = ""
+##     action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
+
+class CarlisleParser(AcolnetParser):
+    #search_url = "http://planning.carlisle.gov.uk/acolnet/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 2
+    location_tr = 5
+    proposal_tr = 6    
+
+    #authority_name = "Carlisle City Council"
+    #authority_short_name = "Carlisle"
+
+    comments_email_address = "dc@carlisle.gov.uk"
+    action_regex = re.compile("<form id=\"frmSearch\" onSubmit=\"\"return ValidateSearch\(\)\"\" name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" enctype=\"multipart/form-data\">")
+
+
+class DerbyParser(AcolnetParser):
+    #search_url = "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch"
+
+    case_number_tr = 1 # this one can be got by the td class attribute
+    reg_date_tr = 3
+    location_tr = 4
+    proposal_tr = 5    
+
+    #authority_name = "Derby City Council"
+    #authority_short_name = "Derby"
+
+    comments_email_address = "developmentcontrol@derby.gov.uk"
+    action_regex = re.compile("<FORM  name=\"frmSearch\" method=\"post\" action=\"([^\"]*)\" onSubmit=\"return ValidateSearch\(\)\" enctype=\"multipart/form-data\">")
+
+
+if __name__ == '__main__':
+    day = 15
+    month = 3
+    year = 2007
+
+    # working
+    # parser = BasingstokeParser()
+    parser = BaberghParser("Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch")
+
+    # works with the divs stripped out
+    #parser = BassetlawParser()
+
+    # returns error 400 - bad request
+    #parser = BridgenorthParser()
+
+    # working
+    #parser = BuryParser()
+
+    # cambridgeshire is a bit different...
+    # no advanced search page
+
+    # canterbury
+    # results as columns of one table
+
+    # returns error 400 - bad request
+    #parser = CarlisleParser()
+
+    # working
+    #parser = DerbyParser()
+    
+    print parser.getResults(day, month, year)
+    
diff --git a/python_scrapers/MultipartPostHandler.py b/python_scrapers/MultipartPostHandler.py
new file mode 100644
index 0000000..c427613
--- /dev/null
+++ b/python_scrapers/MultipartPostHandler.py
@@ -0,0 +1,133 @@
+####
+# 02/2006 Will Holcomb <wholcomb@gmail.com>
+# 
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+
+# I have edited out a bit in the middle of this which reverts to a normal
+# post with "application/x-www-form-urlencoded" content-type when there are
+# no files.
+# Duncan 5/5/2007
+
+"""
+Usage:
+  Enables the use of multipart/form-data for posting forms
+
+Inspirations:
+  Upload files in python:
+    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
+  urllib2_file:
+    Fabien Seisen: <fabien@seisen.org>
+
+Example:
+  import MultipartPostHandler, urllib2, cookielib
+
+  cookies = cookielib.CookieJar()
+  opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies),
+                                MultipartPostHandler.MultipartPostHandler)
+  params = { "username" : "bob", "password" : "riviera",
+             "file" : open("filename", "rb") }
+  opener.open("http://wwww.bobsite.com/upload/", params)
+
+Further Example:
+  The main function of this file is a sample which downloads a page and
+  then uploads it to the W3C validator.
+"""
+
+import urllib
+import urllib2
+import mimetools, mimetypes
+import os, stat
+
+class Callable:
+    def __init__(self, anycallable):
+        self.__call__ = anycallable
+
+# Controls how sequences are uncoded. If true, elements may be given multiple values by
+#  assigning a sequence.
+doseq = 1
+
+class MultipartPostHandler(urllib2.BaseHandler):
+    handler_order = urllib2.HTTPHandler.handler_order - 10 # needs to run first
+
+    def http_request(self, request):
+        data = request.get_data()
+        if data is not None and type(data) != str:
+            v_files = []
+            v_vars = []
+            try:
+                 for(key, value) in data.items():
+                     if type(value) == file:
+                         v_files.append((key, value))
+                     else:
+                         v_vars.append((key, value))
+            except TypeError:
+                systype, value, traceback = sys.exc_info()
+                raise TypeError, "not a valid non-string sequence or mapping object", traceback
+
+            boundary, data = self.multipart_encode(v_vars, v_files)
+            contenttype = 'multipart/form-data; boundary=%s' % boundary
+            if(request.has_header('Content-Type')
+               and request.get_header('Content-Type').find('multipart/form-data') != 0):
+                print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')
+            request.add_unredirected_header('Content-Type', contenttype)
+
+            request.add_data(data)
+        return request
+
+    def multipart_encode(vars, files, boundary = None, buffer = None):
+        if boundary is None:
+            boundary = mimetools.choose_boundary()
+        if buffer is None:
+            buffer = ''
+        for(key, value) in vars:
+            buffer += '--%s\r\n' % boundary
+            buffer += 'Content-Disposition: form-data; name="%s"' % key
+            buffer += '\r\n\r\n' + value + '\r\n'
+        for(key, fd) in files:
+            file_size = os.fstat(fd.fileno())[stat.ST_SIZE]
+            filename = fd.name.split('/')[-1]
+            contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+            buffer += '--%s\r\n' % boundary
+            buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename)
+            buffer += 'Content-Type: %s\r\n' % contenttype
+            # buffer += 'Content-Length: %s\r\n' % file_size
+            fd.seek(0)
+            buffer += '\r\n' + fd.read() + '\r\n'
+        buffer += '--%s--\r\n\r\n' % boundary
+        return boundary, buffer
+    multipart_encode = Callable(multipart_encode)
+
+    https_request = http_request
+
+## def main():
+##     import tempfile, sys
+
+##     validatorURL = "http://validator.w3.org/check"
+##     opener = urllib2.build_opener(MultipartPostHandler)
+
+##     def validateFile(url):
+##         temp = tempfile.mkstemp(suffix=".html")
+##         os.write(temp[0], opener.open(url).read())
+##         params = { "ss" : "0",            # show source
+##                    "doctype" : "Inline",
+##                    "uploaded_file" : open(temp[1], "rb") }
+##         print opener.open(validatorURL, params).read()
+##         os.remove(temp[1])
+
+##     if len(sys.argv[1:]) > 0:
+##         for arg in sys.argv[1:]:
+##             validateFile(arg)
+##     else:
+##         validateFile("http://www.google.com")
+
+## if __name__=="__main__":
+##     main()
diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv
index 09922e0..f60c5db 100644
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -4,4 +4,5 @@
 "SouthOxfordshireParser.py", "420"  
 "SouthOxfordshire.cgi", "493"
 "ApplicationSearchServletParser.py", "420"
-
+"AcolnetParser.py", "420"
+"MultipartPostHandler.py", "420"
diff --git a/python_scrapers/PublicAccessSites.csv b/python_scrapers/PublicAccessSites.csv
index 5db8d69..70c20f3 100644
--- a/python_scrapers/PublicAccessSites.csv
+++ b/python_scrapers/PublicAccessSites.csv
@@ -82,11 +82,14 @@
 "Dundee City Council", "Dundee", "http://bwarrant.dundeecity.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Durham City Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/dc/", "PublicAccess", "PublicAccessParser"
 "East Dorset District Council", "East Dorset", "http://193.243.228.16/PublicAccess/dc/", "PublicAccess", "PublicAccessParser"
-"Epsom and Ewell Borough Council", "Epsom and Ewell", "http://eplanning.epsom-ewell.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Gateshead Metropolitan Borough Council", "Gateshead", "http://planning.gateshead.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
-"Gedling Borough Council", "Gedling", "http://publicaccess.gedling.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Gloucestershire County Council", "Gloucestershire", "http://planning.gloucestershire.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Newcastle-under-Lyme Borough Council", "Newcastle-under-Lyme", "http://publicaccess.newcastle-staffs.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Vale Royal Borough Council", "Vale Royal", "http://pa.valeroyal.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Winchester City Council", "Winchester", "http://win2padmz.winchester.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
 "Wolverhampton City Council", "Wolverhampton", "http://planningonline.wolverhampton.gov.uk/PublicAccess/dc/", "PublicAccess", "PublicAccessParser"
+"Babergh District Council", "Babergh", "http://planning.babergh.gov.uk/dataOnlinePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BaberghParser"
+"Basingstoke and Deane Borough Council", "Basingstoke and Deane", "http://planning.basingstoke.gov.uk/DCOnline2/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BasingstokeParser"
+"Bassetlaw District Council", "Bassetlaw","http://www.bassetlaw.gov.uk/planning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "BassetlawParser"
+"Bury Metropolitan Borough Council", "Bury", "http://e-planning.bury.gov.uk/ePlanning/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.PgeSearch", "AcolnetParser", "BuryParser"
+"Derby City Council", "Derby", "http://195.224.106.204/scripts/planningpages02%5CXSLPagesDC_DERBY%5CDCWebPages/acolnetcgi.exe?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "DerbyParser"
\ No newline at end of file