diff --git a/trunk/python_scrapers/PlanningExplorer.py b/trunk/python_scrapers/PlanningExplorer.py
new file mode 100644
index 0000000..b96bd6e
--- /dev/null
+++ b/trunk/python_scrapers/PlanningExplorer.py
@@ -0,0 +1,568 @@
+import urllib2
+import urllib
+import urlparse
+import cgi
+import re
+import datetime
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+    PlanningAuthorityResults, \
+    getPostcodeFromText
+
+# Date format to enter into search boxes
+date_format = "%d/%m/%Y"
+
+# Regex for getting the application code
+# (needed for the comments url, when it exists)
+app_code_regex = re.compile("PARAM0=(\d*)")
+
+
+class PlanningExplorerParser:
+    # If this authority doesn't have a comments page, 
+    # then set this email_address to an address for the 
+    # planning department, and it will be used in lieu of
+    # a comments url.
+    comments_email_address = None
+
+    # These are the directories where the info urls, and search urls,
+    # usually live underneath the base_url. 
+    # If these are different for a particular
+    # authority, then they can be overridden in a subclass.
+    info_url_path = "MVM/Online/Generic/"
+    search_url_path = "MVM/Online/PL/GeneralSearch.aspx"
+    
+    # This is the most common place for comments urls to live
+    # The %s will be filled in with an application code
+    comments_path = "MVM/Online/PL/PLComments.aspx?pk=%s"
+
+    # Most authorities don't need the referer header on the post
+    # request. If one does, override this in the subclass
+    use_referer = False
+
+    # Some authorities won't give us anything back if we use the
+    # python urllib2 useragent string. In that case, override this
+    # in a subclass to pretend to be firefox.
+    use_firefox_user_agent = False
+
+    # This is the most common css class of the table containing the
+    # the search results. If it is different for a particular authority
+    # it can be overridden in a subclass
+    results_table_attrs = {"class": "ResultsTable"}
+
+    # These are the most common column positions for the
+    # council reference, the address, and the description
+    # in the results table.
+    # They should be overridden in subclasses if they are different
+    # for a particular authority.
+    reference_td_no = 0
+    address_td_no = 1
+    description_td_no = 2
+
+    def _modify_response(self, response):
+        """For most sites, we have managed to get all the apps on a
+        single page by choosing the right parameters.
+        If that hasn't been possible, override this method to get a
+        new response object which has all the apps in one page.
+        (See, for example, Hackney).
+        """
+        return response
+
+    def _find_trs(self, results_table):
+        """Normally, we just want a list of all the trs except the first one
+        (which is usually a header).
+        If the authority requires a different list of trs, override this method.
+        """
+        return results_table.findAll("tr")[1:]
+
+    def _sanitisePostHtml(self, html):
+        """This method can be overriden in subclasses if the
+        html that comes back from the post request is bad, and
+        needs tidying up before giving it to BeautifulSoup."""
+        return html
+
+    def _sanitiseInfoUrl(self, url):
+        """If an authority has info urls which are for some reason full
+        of crap (like Broadland does), then this method should be overridden
+        in order to tidy them up."""
+        return url
+
+    def _getHeaders(self):
+        """If the authority requires any headers for the post request,
+        override this method returning a dictionary of header key to 
+        header value."""
+        headers = {}
+        
+        if self.use_firefox_user_agent:
+            headers["User-Agent"] = "Mozilla/5.0 (X11; U; Linux i686; en-GB; rv:1.8.1.10) Gecko/20071126 Ubuntu/7.10 (gutsy) Firefox/2.0.0.10"
+
+        if self.use_referer:
+            headers["Referer"] = self.search_url
+
+        return headers
+
+    def _getPostData(self, asp_args, search_date):
+        """Accepts asp_args (a tuple of key value pairs of the pesky ASP
+        parameters, and search_date, a datetime.date object for the day
+        we are searching for.
+
+        This seems to be the most common set of post data which is needed
+        for PlanningExplorer sites. It won't work for all of them, so
+        will sometimes need to be overridden in a subclass.
+
+        The parameter edrDateSelection is often not needed. 
+        It is needed by Charnwood though, so I've left it in 
+        to keep things simple.
+        """
+        year_month_day = search_date.timetuple()[:3]
+
+        post_data = urllib.urlencode(asp_args + (
+                ("_ctl0", "DATE_RECEIVED"),
+                ("rbGroup", "_ctl5"),
+                ("_ctl7_hidden", urllib.quote('<DateChooser Value="%d%%2C%d%%2C%d"><ExpandEffects></ExpandEffects></DateChooser>' %year_month_day)),
+                ("_ctl8_hidden", urllib.quote('<DateChooser Value="%d%%2C%d%%2C%d"><ExpandEffects></ExpandEffects></DateChooser>' %year_month_day)),
+                ("edrDateSelection", "1"),
+                ("csbtnSearch", "Search"),
+                ("cboNumRecs", "99999"),
+                ))
+        
+        return post_data
+
+    def _getPostCode(self):
+        """In most cases, the postcode can be got from the address in 
+        the results table. Some councils put the address there without the
+        postcode. In this case we will have to go to the info page to get
+        the postcode. This should be done by overriding this method with
+        one that parses the info page."""
+
+        return getPostcodeFromText(self._current_application.address)
+        
+    def __init__(self,
+                 authority_name,
+                 authority_short_name,
+                 base_url,
+                 debug=False):
+
+        self.authority_name = authority_name
+        self.authority_short_name = authority_short_name
+        self.base_url = base_url
+
+        self.search_url = urlparse.urljoin(base_url, self.search_url_path)
+        self.info_url_base = urlparse.urljoin(self.base_url, self.info_url_path)
+    
+        self.debug = debug
+
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        search_date = datetime.date(year, month, day)
+
+        # First do a get, to get some state
+        get_request = urllib2.Request(self.search_url)
+        get_response = urllib2.urlopen(get_request)
+
+        html = get_response.read()
+
+        # We need to find those ASP parameters such as __VIEWSTATE
+        # so we can use them in the next POST
+        asp_args_regex = re.compile('<input[^>]*name=\"(__[A-Z]*)\"[^>]*value=\"([^\"]*)\"[^>]*>')
+
+        # re.findall gets us a list of key value pairs.
+        # We want to concatenate it with a tuple, so we must
+        # make it a tuple
+        asp_args = tuple(re.findall(asp_args_regex, html))
+
+        # The post data needs to be different for different councils
+        # so we have a method on each council's scraper to make it.
+        post_data = self._getPostData(asp_args, search_date)
+        
+        headers = self._getHeaders()
+
+        request = urllib2.Request(self.search_url, post_data, headers)
+        post_response = urllib2.urlopen(request)
+
+        # We have actually been returned here by an http302 object
+        # moved, and the response called post_response is really a get.
+
+        # In some cases, we can't get the page size set high
+        # until now. In that case, override _modify_response
+        # so that we get back a response with all the apps on one page.
+        # We pass in headers so that any
+        post_response = self._modify_response(post_response)
+
+        html = self._sanitisePostHtml(post_response.read())
+
+        soup = BeautifulSoup(html)
+
+        results_table = soup.find("table", attrs=self.results_table_attrs)
+
+        # If there is no results table, then there were no apps on that day.
+        if results_table:
+            trs = self._find_trs(results_table)
+
+            self._current_application = None
+
+            # The first tr is just titles, cycle through the trs after that
+            for tr in trs:
+                self._current_application = PlanningApplication()
+
+                # There is no need to search for the date_received, it's what
+                # we searched for            
+                self._current_application.date_received = search_date
+
+                tds = tr.findAll("td")
+
+                for td_no in range(len(tds)):
+                    if td_no == self.reference_td_no:
+                        # This td contains the reference number and a link to details
+                        self._current_application.council_reference = tds[td_no].a.string
+
+                        relative_info_url =  self._sanitiseInfoUrl(tds[td_no].a['href'])
+
+                        self._current_application.info_url = urlparse.urljoin(self.info_url_base, relative_info_url)
+
+
+                        # What about a comment url?
+                        # There doesn't seem to be one, so we'll use the email address
+                        if self.comments_email_address is not None:
+                            # We're using the email address, as there doesn't seem
+                            # to be a web form for comments
+                            self._current_application.comment_url = self.comments_email_address
+                        else:
+                            # This link contains a code which we need for the comments url
+                            # (on those sites that use it)
+                            application_code = app_code_regex.search(relative_info_url).groups()[0]
+
+                            relative_comments_url = self.comments_path %(application_code)
+                            self._current_application.comment_url = urlparse.urljoin(self.base_url, relative_comments_url)
+
+                    elif td_no == self.address_td_no:
+                        # If this td contains a div, then the address is the
+                        # string in there - otherwise, use the string in the td.
+                        if tds[td_no].div is not None:
+                            address = tds[td_no].div.string
+                        else:
+                            address = tds[td_no].string
+
+                        self._current_application.address = address
+
+                        self._current_application.postcode = self._getPostCode()
+
+                    elif td_no == self.description_td_no:
+                        if tds[td_no].div is not None:
+                            # Mostly this is in a div
+                            # Use the empty string if the description is missing
+                            description = tds[td_no].div.string or ""
+                        else:
+                            # But sometimes (eg Crewe) it is directly in the td.
+                            # Use the empty string if the description is missing
+                            description = tds[td_no].string or ""
+
+                        self._current_application.description = description
+
+                self._results.addApplication(self._current_application)
+
+        return self._results
+
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+
+class BroadlandLike:
+    # FIXME - BroadlandLike authorities don't have postcodes on their site, but
+    # they do have grid references. We should use these.
+
+    results_table_attrs = {"class": "display_table"}
+
+    info_url_path = "Northgate/PlanningExplorer/Generic/"
+    search_url_path = "Northgate/PlanningExplorer/GeneralSearch.aspx"
+
+    use_firefox_user_agent = True
+    use_referer = True
+
+    def _getPostData(self, asp_args, search_date):
+        post_data = urllib.urlencode(asp_args + (
+                ("cboSelectDateValue", "DATE_RECEIVED"),
+                ("rbGroup", "rbRange"),
+                ("dateStart", search_date.strftime(date_format)),
+                ("dateEnd", search_date.strftime(date_format)),
+                ("cboNumRecs", "99999"),
+                ("csbtnSearch", "Search"),
+                ))
+
+        return post_data
+
+
+    def _sanitiseInfoUrl(self, url):
+        """The broadland info urls arrive full of rubbish. This method tidies
+        them up."""
+
+        # We need to
+        # 1) Remove whitespace
+        # 2) Remove &#xA; and &#xD;
+
+        ws_re = re.compile("(?:(?:\s)|(?:&#x\w;))*")
+
+        return ''.join(ws_re.split(url))
+
+
+
+class BlackburnParser(PlanningExplorerParser):
+    use_firefox_user_agent = True
+
+class BroadlandParser(BroadlandLike, PlanningExplorerParser):
+    # FIXME - is http://secure.broadland.gov.uk/mvm/Online/PL/GeneralSearch.aspx
+    # a better url for Broadland?
+
+    def _sanitisePostHtml(self, html):
+        """The page that comes back from the post for the broadland site
+        has a broken doctype declaration. We need to tidy that up before
+        giving it to BeautifulSoup."""
+
+        # This is what it looks like - note the missing close doublequote
+        #<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd>
+
+        # Split on the broken doctype and join with the doctype with
+        # closing quote.
+
+        html = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.join(html.split('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd>'))
+
+        return html
+
+class CamdenParser(BroadlandLike, PlanningExplorerParser):
+    comments_path = "Northgate/PlanningExplorer/PLComments.aspx?pk=%s"
+
+class CharnwoodParser(PlanningExplorerParser):
+    use_firefox_user_agent = True
+
+class CreweParser(PlanningExplorerParser):
+    use_firefox_user_agent = True
+    address_td_no = 4
+
+    def _getPostData(self, asp_args, search_date):
+        year_month_day = search_date.timetuple()[:3]
+
+        post_data = urllib.urlencode(asp_args + (
+                ("drDateReceived:_ctl0_hidden", urllib.quote('<DateChooser Value="%d%%2C%d%%2C%d"><ExpandEffects></ExpandEffects></DateChooser>' %year_month_day)),
+                ("drDateReceivedxxctl0_input", search_date.strftime(date_format)),
+                ("drDateReceived:_ctl1_hidden", urllib.quote('<DateChooser Value="%d%%2C%d%%2C%d"><ExpandEffects></ExpandEffects></DateChooser>' %year_month_day)),
+                ("drDateReceivedxxctl1_input", search_date.strftime(date_format)),
+                ("cboNumRecs", "99999"),
+                ("csbtnSearch", "Search"),
+                ))
+
+        return post_data
+
+
+class EastStaffsParser(PlanningExplorerParser):
+    use_firefox_user_agent = True
+
+    address_td_no = 4
+    description_td_no = 1
+
+
+class EppingForestParser(PlanningExplorerParser):
+    use_firefox_user_agent = True
+
+    address_td_no = 3
+    description_td_no = 1
+
+class ForestHeathParser(BroadlandLike, PlanningExplorerParser):
+    pass
+
+class HackneyParser(PlanningExplorerParser):
+    # FIXME - This will only get the first ten records on this
+    # day. Need to deal with paging.
+
+    use_firefox_user_agent = True
+
+    address_td_no = 6
+    description_td_no = 5
+
+    def _modify_response(self, response):
+        # In order to make sure we don't have to worry about any paging,
+        # We'll fetch this url again with PS=99999.
+        real_url_tuple = urlparse.urlsplit(response.geturl())
+
+        query_string = real_url_tuple[3]
+        
+        # Get the query as a list of key, value pairs
+        parsed_query_list = list(cgi.parse_qsl(query_string))
+
+        # Go through the query string replacing any PS parameters
+        # with PS=99999
+        
+        for i in range(len(parsed_query_list)):
+            key, value = parsed_query_list[i]
+
+            if key == "PS":
+                value = "99999"
+                parsed_query_list[i] = (key, value)
+
+        new_query_string = urllib.urlencode(parsed_query_list)
+        
+        new_url_tuple = real_url_tuple[:3] + (new_query_string,) + real_url_tuple[4:]
+        
+        new_url = urlparse.urlunsplit(new_url_tuple)        
+        new_request = urllib2.Request(new_url, None, self._getHeaders())
+        new_response = urllib2.urlopen(new_request)
+
+        return new_response
+
+
+    def _getPostData(self, asp_args, search_date):
+        post_data = urllib.urlencode(asp_args + (
+                ("ctl00", "DATE_RECEIVED"),
+                ("rbGroup", "ctl05"),
+                ("ctl07_input", search_date.strftime(date_format)),
+                ("ctl08_input", search_date.strftime(date_format)),
+                ("edrDateSelection", "1"),
+                ("csbtnSearch", "Search"),
+                ))
+
+        return post_data
+            
+class KennetParser(PlanningExplorerParser):
+    use_firefox_user_agent = True
+
+    address_td_no = 3
+    
+class LincolnParser(PlanningExplorerParser):
+    use_firefox_user_agent = True
+    
+class LiverpoolParser(PlanningExplorerParser):
+    comments_email_address = "planningandbuildingcontrol@liverpool.gov.uk"
+    use_firefox_user_agent = True
+    use_referer = True
+
+    results_table_attrs = {"xmlns:mvm":"http://www.mvm.co.uk"}
+
+    info_url_path = "mvm/"
+    search_url_path = "mvm/planningsearch.aspx"
+
+    def _find_trs(self, results_table):
+        """In this case we are after all trs except the first two which have a
+        class attribute row0 or row1."""
+        return results_table.findAll("tr", {"class":["row0", "row1"]})[3:]
+
+    def _getPostData(self, asp_args, search_date):
+        post_data = urllib.urlencode(asp_args + (
+                ("dummy", "dummy field\tused for custom\tvalidator"),
+                ("drReceived$txtStart", search_date.strftime(date_format)),
+                ("drReceived$txtEnd", search_date.strftime(date_format)),
+                ("cboNumRecs", "99999"),
+                ("cmdSearch", "Search"),
+                ))
+
+        return post_data
+
+    def _sanitiseInfoUrl(self, url):
+        """The liverpool info urls arrive full of rubbish. This method tidies
+        them up."""
+
+        # We need to
+        # 1) Remove whitespace
+        # 2) Remove &#xA; and &#xD;
+
+        ws_re = re.compile("(?:(?:\s)|(?:&#x\w;))*")
+
+        return ''.join(ws_re.split(url))
+
+# FIXME - Merton, Shrewsbury, and South Norfolk need to be done here.
+# All are down today...
+
+class SouthNorfolkParser(PlanningExplorerParser):
+    use_firefox_user_agent = True
+
+class SouthShropshireParser(PlanningExplorerParser):
+    comments_email_address = "planning@southshropshire.gov.uk"
+    use_firefox_user_agent = True
+    info_url_path = "MVM/Online/PL/"
+
+    def _getPostData(self, asp_args, search_date):
+        local_date_format = "%d-%m-%Y"
+        year, month, day = search_date.timetuple()[:3]
+
+        post_data = urllib.urlencode(asp_args + (
+                ("edrDateSelection:htxtRange", "radRangeBetween"),
+                ("cboDateList", "DATE_RECEIVED"),
+                ("edrDateSelection:txtStart", search_date.strftime(local_date_format)),
+                ("edrDateSelection:txtEnd", search_date.strftime(local_date_format)),
+                ("edrDateSelection:txtDateReceived", "%(day)d-%(month)d-%(year)d~%(day)d-%(month)d-%(year)d" %({"day":day, "month":month, "year":year})),
+                ("cboNumRecs", "99999"),
+                ("csbtnSearch", "Search"),
+                ))
+        
+        return post_data
+
+class SouthTynesideParser(BroadlandLike, PlanningExplorerParser):
+    # Unlike the other BroadlandLike sites, there are postcodes :-)
+    pass
+
+
+class StockportParser(PlanningExplorerParser):
+    comments_email_address = "admin.dc@stockport.gov.uk"
+    info_url_path = "MVM/Online/PL/"
+
+    def _getPostData(self, asp_args, search_date):
+        post_data = urllib.urlencode(asp_args + (
+            ("drDateReceived:txtStart", search_date.strftime(date_format)),
+            ("drDateReceived:txtEnd", search_date.strftime(date_format)),
+            ("cboNumRecs", "99999"),
+            ("csbtnSearch", "Search"),),
+            )
+
+        return post_data
+
+# FIXME - should add Swansea, but it is currently down
+
+class TamworthParser(PlanningExplorerParser):
+    comments_email_address = "planningadmin@tamworth.gov.uk"
+    use_firefox_user_agent = True
+    info_url_path = "MVM/Online/PL/"
+
+class TraffordParser(PlanningExplorerParser):
+    # There are no postcodes on the Trafford site.
+    use_firefox_user_agent = True
+    address_td_no = 3
+
+class WestOxfordshireParser(PlanningExplorerParser):
+    address_td_no = 3
+    description_td_no = 1
+
+    use_firefox_user_agent = True
+
+if __name__ == '__main__':
+    # NOTE - 04/11/2007 is a sunday
+    # I'm using it to test that the scrapers behave on days with no apps.
+    
+    #parser = BlackburnParser("Blackburn With Darwen Borough Council", "Blackburn", "http://195.8.175.6/")
+    #parser = BroadlandParser("Broadland Council", "Broadland", "http://www.broadland.gov.uk/")
+    #parser = CamdenParser("London Borough of Camden", "Camden", "http://planningrecords.camden.gov.uk/")
+    #parser = CharnwoodParser("Charnwood Borough Council", "Charnwood", "http://portal.charnwoodbc.gov.uk/")
+    #parser = CreweParser("Crewe and Nantwich Borough Council", "Crewe and Nantwich", "http://portal.crewe-nantwich.gov.uk/")
+    #parser = EastStaffsParser("East Staffordshire Borough Council", "East Staffs", "http://www2.eaststaffsbc.gov.uk/")
+    #parser = EppingForestParser("Epping Forest District Council", "Epping Forest", "http://plan1.eppingforestdc.gov.uk/")
+    #parser = ForestHeathParser("Forest Heath District Council", "Forest Heath", "http://195.171.177.73/")
+    parser = HackneyParser("London Borough of Hackney", "Hackney", "http://www.hackney.gov.uk/servapps/")
+    #parser = KennetParser("Kennet District Council", "Kennet", "http://mvm-planning.kennet.gov.uk/")
+    #parser = LincolnParser("Lincoln City Council", "Lincoln", "http://online.lincoln.gov.uk/")
+    #parser = LiverpoolParser("Liverpool City Council", "Liverpool", "http://www.liverpool.gov.uk/")
+    #parser = SouthNorfolkParser("South Norfolk Council", "South Norfolk", "http://planning.south-norfolk.gov.uk/")
+    #parser = SouthShropshireParser("South Shropshire District Council", "South Shropshire", "http://194.201.44.102/")
+    #parser = SouthTynesideParser("South Tyneside Council", "South Tyneside", "http://poppy.southtyneside.gov.uk/")
+    #parser = StockportParser("Stockport Metropolitan District Council", "Stockport", "http://s1.stockport.gov.uk/council/eed/dc/planning/")
+    #parser = TamworthParser("Tamworth Borough Council", "Tamworth", "http://80.1.64.77/")
+    #parser = TraffordParser("Trafford Council", "Trafford", "http://planning.trafford.gov.uk/")
+    #parser = WestOxfordshireParser("West Oxfordshire District Council", "West Oxfordshire", "http://planning.westoxon.gov.uk/")
+    print parser.getResults(1, 11, 2007)
+
+# To Do
+
+# Sort out paging:
+# South Shropshire - pages on 6
+
+# Investigate catching unavailable message:
+# Charnwood