diff --git a/trunk/CGI/CGITemplate b/trunk/CGI/CGITemplate
new file mode 100644
index 0000000..e72f31e
--- /dev/null
+++ b/trunk/CGI/CGITemplate
@@ -0,0 +1,29 @@
+# This is the parser for %(authority_name)s.
+# it is generated from the file CGITemplate
+
+import cgi
+import cgitb
+#cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+
+authority_name = "%(authority_name)s"
+authority_short_name = "%(authority_short_name)s"
+base_url = "%(base_url)s"
+
+import PublicAccess
+
+parser = PublicAccess.PublicAccessParser(authority_name,
+                                         authority_short_name,
+                                         base_url)
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml"     # XML is following
+print
+print xml                          # print the xml
diff --git a/trunk/CGI/PlanningUtils.py b/trunk/CGI/PlanningUtils.py
new file mode 100644
index 0000000..3430576
--- /dev/null
+++ b/trunk/CGI/PlanningUtils.py
@@ -0,0 +1,101 @@
+__auth__ = None
+
+import re
+
+date_format = "%d/%m/%Y"
+
+
+def xmlQuote(text):
+    # Change &s to &amp;s
+    # I suspect there is probably some standard python
+    # function I should be using for this...
+    return text.replace('&', '&amp;')
+
+def fixNewlines(text):
+    # This can be used to sort out windows newlines
+    return text.replace("\r\n","\n")
+
+# So what can a postcode look like then?
+# This list of formats comes from http://www.mailsorttechnical.com/frequentlyaskedquestions.cfm
+#AN NAA  	M1 1AA
+#ANN NAA 	M60 1NW
+#AAN NAA 	CR2 6XH
+#AANN NAA 	DN55 1PT
+#ANA NAA 	W1A 1HP
+#AANA NAA 	EC1A 1BB
+
+postcode_regex = re.compile("[A-Z][A-Z]?\d(\d|[A-Z])? ?\d[A-Z][A-Z]")
+
+def getPostcodeFromText(text):
+    """This function takes a piece of text and returns the first
+    bit of it that looks like a postcode."""
+
+    postcode_match = postcode_regex.search(text)
+
+    if postcode_match is not None:
+        return postcode_match.group()
+    
+
+class PlanningAuthorityResults:
+    """This class represents a set of results of a planning search.
+
+       This should probably be separated out so that it can be used for
+       authorities other than Cherwell.
+       """
+
+    def __init__(self, authority_name, authority_short_name):
+	self.authority_name = authority_name
+	self.authority_short_name = authority_short_name
+	
+	# this will be a list of PlanningApplication objects
+	self.planning_applications = []
+
+
+    def addApplication(self, application):
+	self.planning_applications.append(application)
+
+    def __repr__(self):
+	return self.displayXML()
+
+    def displayXML(self):
+        """This should display the contents of this object in the planningalerts format.
+           i.e. in the same format as this one:
+           http://www.planningalerts.com/lambeth.xml
+           """
+
+	applications_bit = "".join([x.displayXML() for x in self.planning_applications])
+
+	return "<planning>\n" +\
+               "<authority_name>%s</authority_name>\n" %self.authority_name +\
+               "<authority_short_name>%s</authority_short_name>\n" %self.authority_short_name +\
+               "<applications>\n" + applications_bit +\
+	       "</applications>\n" +\
+               "</planning>\n"
+
+
+
+class PlanningApplication:
+    def __init__(self, no_postcode_default='No postcode'):
+        self.council_reference = None
+	self.address = None
+	self.postcode = no_postcode_default
+	self.description = None
+	self.info_url = None
+	self.comment_url = None
+
+        # expecting this as a datetime.date object
+	self.date_received = None
+
+    def __repr__(self):
+	return self.displayXML()
+
+    def displayXML(self):
+	return "<application>\n" +\
+	"<council_reference>%s</council_reference>\n" %xmlQuote(self.council_reference) +\
+        "<address>%s</address>\n" %xmlQuote(self.address) +\
+        "<postcode>%s</postcode>\n" %self.postcode +\
+	"<description>%s</description>\n" %xmlQuote(self.description) +\
+	"<info_url>%s</info_url>\n" %xmlQuote(self.info_url) +\
+	"<comment_url>%s</comment_url>\n" %xmlQuote(self.comment_url) +\
+        "<date_received>%s</date_received>\n" %self.date_received.strftime(date_format) +\
+        "</application>\n"
diff --git a/trunk/CGI/PublicAccess.py b/trunk/CGI/PublicAccess.py
new file mode 100644
index 0000000..db635be
--- /dev/null
+++ b/trunk/CGI/PublicAccess.py
@@ -0,0 +1,341 @@
+#!/usr/bin/python
+
+import urllib, urllib2
+import HTMLParser
+import urlparse
+import datetime, time
+
+import cookielib
+
+cookie_jar = cookielib.CookieJar()
+
+
+from PlanningUtils import fixNewlines, PlanningAuthorityResults, PlanningApplication
+
+
+search_form_url_end = "tdc/DcApplication/application_searchform.aspx"
+search_results_url_end = "tdc/DcApplication/application_searchresults.aspx"
+comments_url_end = "tdc/DcApplication/application_comments_entryform.aspx"
+
+class PublicAccessParser(HTMLParser.HTMLParser):
+    """This is the class which parses the PublicAccess search results page.
+    """
+
+    def __init__(self,
+                 authority_name,
+                 authority_short_name,
+                 base_url,
+                 debug=False):
+        
+	HTMLParser.HTMLParser.__init__(self)
+
+        self.authority_name = authority_name
+        self.authority_short_name = authority_short_name
+        self.base_url = base_url
+
+        self.debug = debug
+
+        # this will change to True when we enter the table of results
+        self._in_results_table = False
+
+        # this will be set to True when we have passed the header row
+        # in the results table
+        self._past_header_row = False
+
+        # this will be true when we are in a <td> in the results table
+        self._in_td = False
+
+        # For each row, this will say how many tds we have seen so far
+        self._td_count = 0
+
+        # The object which stores our set of planning application results
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+        # This will store the planning application we are currently working on.
+        self._current_application = None
+
+    def handle_starttag(self, tag, attrs):
+	if tag == "table":
+	    self.handle_start_table(attrs)
+        # we are only interested in tr tags if we are in the results table
+        elif self._in_results_table and tag == "tr":
+	    self.handle_start_tr(attrs)
+        # we are only interested in td tags if we are in the results table
+	elif self._in_results_table and tag == "td":
+	    self.handle_start_td(attrs)
+        # we are only interested in <a> tags if we are in the 6th td in
+        # the results table.
+	elif self._in_td and self._td_count == 6 and tag == "a":
+	    self.handle_start_a(attrs)
+	# If the tag is not one of these then we aren't interested
+
+    def handle_endtag(self, tag):
+        # we only need to consider end tags if we are in the results table
+	if self._in_results_table:
+	    if tag == "table":
+		self.handle_end_table()
+	    if tag == "tr":
+                self.handle_end_tr()
+            if tag == "td":
+		self.handle_end_td()
+
+    def handle_start_table(self, attrs):
+	for attr,value in attrs:
+	    if attr == "class":
+		if value == "cResultsForm":
+		    self._in_results_table = True
+		    break
+
+    def handle_end_table(self):
+        # If we see an end table tag, then note that we have left the
+        # results table. This method is only called if we are in that table.
+        self._in_results_table = False
+	
+
+    def handle_start_tr(self, attrs):
+	# The first tr we meet in the results table is just headers
+	# We will set a flag at the end of that tr to avoid creating
+        # a blank PlanningApplication
+	if self._past_header_row:
+	    # Create a candidate result object
+	    self._current_application = PlanningApplication()
+	    self._td_count = 0
+
+    def handle_end_tr(self):
+	# If we are in the results table, and not finishing the header row
+        # append the current result to the results list.
+	if self._past_header_row:
+	    self._results.addApplication(self._current_application)
+	else:
+	    # The first row of the results table is headers
+            # We want to do nothing until after it
+	    self._past_header_row = True	
+	
+    def handle_start_td(self, attrs):
+        # increase the td count by one
+	self._td_count += 1
+        
+        # note that we are now in a td
+	self._in_td = True
+
+    def handle_end_td(self):
+        # note that we are now not in a td
+	self._in_td = False
+
+    def handle_start_a(self, attrs):
+        # this method is only getting called if we are in the
+        # 6th td of a non-header row of the results table.
+
+        # go through the attributes of the <a> looking for one
+        # named 'href'
+	for attr,value in attrs:
+	    if attr == "href":
+                # the value of this tag is a relative url.
+                # parse it so we can get the query string from it
+		parsed_info_url = urlparse.urlparse(value)
+                
+		# the 4th part of the tuple is the query string
+		query_string = parsed_info_url[4]
+
+                # join this query string to the search URL, and store this as
+                # the info URL of the current planning application
+		self._current_application.info_url = urlparse.urljoin(self.base_url, value)
+
+                # Join this query string to the comments URL, and store this as
+                # the comments URL of the current planning application
+                comments_url = urlparse.urljoin(self.base_url, comments_url_end)
+		self._current_application.comment_url = urlparse.urljoin(comments_url, query_string)
+
+		# while we're here, let's follow some links to find the postcode...
+                # the postcode is in an input tag in the property page. This page
+                # can be found by following the info url.
+                # The newlines in the info page need fixing.
+		info_file_contents = fixNewlines(urllib2.urlopen(self._current_application.info_url).read())
+		
+		info_file_parser = PublicAccessInfoPageParser()
+		info_file_parser.feed(info_file_contents)
+
+		property_page_url = urlparse.urljoin(self._current_application.info_url, info_file_parser.property_page_url)
+		
+                # the newlines in this page need fixing
+		property_file_contents = fixNewlines(urllib2.urlopen(property_page_url).read())
+	
+		property_file_parser = PublicAccessPropertyPageParser()
+		property_file_parser.feed(property_file_contents)
+
+                # Set the postcode on the current planning application from the
+                # one found on the property page
+                if property_file_parser.postcode is not None:
+                    self._current_application.postcode = property_file_parser.postcode
+
+                # There is no need for us to look at any more attributes.
+		break
+	
+
+    def handle_data(self, data):
+	if self._in_td:
+            # The first td contains the reference
+	    if self._td_count == 1:
+	        self._current_application.council_reference = data
+                
+            # The second td contains the date the application was received
+	    elif self._td_count == 2:
+                year, month, day = time.strptime(data, "%d/%m/%Y")[:3]
+                received_date = datetime.date(year, month, day)
+
+	        self._current_application.date_received = received_date
+                
+            # The third td contains the address
+	    elif self._td_count == 3:
+		#data = data.replace("^M","\n")
+	        self._current_application.address = data
+                
+            # The fourth td contains the description
+	    elif self._td_count == 4:
+	        self._current_application.description = data
+	    # 5 is status - we don't need it.
+	    # 6 is a button - this is where we will get our postcode,
+	    # comment_url, and info_url from (when handling the <a> tag).
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        # First download the search form (in order to get a session cookie
+        search_form_request = urllib2.Request(urlparse.urljoin(self.base_url, search_form_url_end))
+        search_form_response = urllib2.urlopen(search_form_request)
+        
+        cookie_jar.extract_cookies(search_form_response, search_form_request)
+
+        
+        # We are only doing this first search in order to get a cookie
+        # The paging on the site doesn't work with cookies turned off.
+
+        search_data1 = urllib.urlencode({"searchType":"ADV",
+                                         "caseNo":"",
+                                         "PPReference":"",
+                                         "AltReference":"",
+                                         "srchtype":"",
+                                         "srchstatus":"",
+                                         "srchdecision":"",
+                                         "srchapstatus":"",
+                                         "srchappealdecision":"",
+                                         "srchwardcode":"",
+                                         "srchparishcode":"",
+                                         "srchagentdetails":"",
+                                         "srchDateReceivedStart":"%(day)02d/%(month)02d/%(year)d" %{"day":day ,"month": month ,"year": year}, 
+                                         "srchDateReceivedEnd":"%(day)02d/%(month)02d/%(year)d" %{"day":day, "month":month, "year":year} })
+
+        if self.debug:
+            print search_data1
+
+
+        search_url = urlparse.urljoin(self.base_url, search_results_url_end)
+        request1 = urllib2.Request(search_url, search_data1)
+        cookie_jar.add_cookie_header(request1)
+        response1 = urllib2.urlopen(request1)
+
+        # This search is the one we will actually use.
+        # a maximum of 100 results are returned on this site,
+        # hence setting "pagesize" to 100. I doubt there will ever
+        # be more than 100 in one day in PublicAccess...
+        # "currentpage" = 1 gets us to the first page of results
+        # (there will only be one anyway, as we are asking for 100 results...)
+
+#http://planning.york.gov.uk/PublicAccess/tdc/DcApplication/application_searchresults.aspx?szSearchDescription=Applications%20received%20between%2022/02/2007%20and%2022/02/2007&searchType=ADV&bccaseno=&currentpage=2&pagesize=10&module=P3
+
+        search_data2 = urllib.urlencode((("szSearchDescription","Applications received between %(day)02d/%(month)02d/%(year)d and %(day)02d/%(month)02d/%(year)d"%{"day":day ,"month": month ,"year": year}), ("searchType","ADV"), ("bccaseno",""), ("currentpage","1"), ("pagesize","100"), ("module","P3")))
+
+        if self.debug:
+            print search_data2
+
+        # This time we want to do a get request, so add the search data into the url
+        request2_url = urlparse.urljoin(self.base_url, search_results_url_end + "?" + search_data2)
+
+        request2 = urllib2.Request(request2_url)
+
+        # add the cookie we stored from our first search
+        cookie_jar.add_cookie_header(request2)
+
+        response2 = urllib2.urlopen(request2)
+
+        contents = fixNewlines(response2.read())
+
+        if self.debug:
+            print contents
+
+        self.feed(contents)
+
+        return self._results
+
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+
+
+
+class PublicAccessInfoPageParser(HTMLParser.HTMLParser):
+    """A parser to get the URL for the property details page out of the
+       info page (this url is needed in order to get the postcode of the
+       application.
+       """
+
+    def __init__(self):
+	HTMLParser.HTMLParser.__init__(self)
+
+	self.property_page_url = None
+
+    def handle_starttag(self, tag, attrs):
+        """The URL of the property details page is contained in an <a> tag in
+        an attribute with key 'A_btnPropertyDetails'. There is some garbage on
+        either side of it which we will have to clear up before storing it...
+
+        We go through the <a> tags looking for one with an attribute with
+        key 'id' and value 'A_btnPropertyDetails'. When we find it we go through
+        its attributes looking for one with key 'href' - the value of this attribute
+        contains the URL we want, after a bit of tidying up.
+
+        Once we have got the URL, there is no need for us to look at any more <a> tags.
+        """
+	if tag == "a" and self.property_page_url is None:
+	    if attrs.count(("id","A_btnPropertyDetails")) > 0:
+		for attr,value in attrs:
+		    if attr == "href":
+			the_link = value
+
+			# this has some garbage on either side of it...
+			# let's strip that off
+
+                        # the garbage on the left is separated by whitespace.
+                        # the garbage on the right is separated by a "'".
+
+			self.property_page_url = the_link.split()[1].split("'")[0]
+
+
+
+
+class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):
+    """A parser to get the postcode out of the property details page."""
+    def __init__(self):
+	HTMLParser.HTMLParser.__init__(self)
+
+	self.postcode = None
+
+    def handle_starttag(self, tag, attrs):
+        """The postcode is contained in an <input> tag.
+        This tag has an attribute 'name' with value postcode.
+        It also has an attribute 'value' with value the postcode of this application.
+
+        We go through the input tags looking for one with an attribute with
+        key 'name' and value 'postcode'. When we find one,
+        we look through its attributes for one with key 'value' - we store the value of this
+        attribute as self.postcode.
+
+        Once we have the postcode, there is no need to look at any more input tags.
+        """
+        
+	if tag == "input" and self.postcode is None:
+	    if attrs.count(("name","postcode")) > 0:
+		for attr,value in attrs:
+		    if attr == "value":
+			self.postcode = value
+
diff --git a/trunk/CGI/PublicAccessSites.csv b/trunk/CGI/PublicAccessSites.csv
new file mode 100644
index 0000000..18886bd
--- /dev/null
+++ b/trunk/CGI/PublicAccessSites.csv
@@ -0,0 +1,5 @@
+"authority_name", "authority_short_name", "base_url"
+"City of York Council", "York", "http://planning.york.gov.uk/PublicAccess/"
+"Cherwell District Council", "Cherwell", "http://cherweb.cherwell-dc.gov.uk/publicaccess/"
+"Angus Council", "Angus", "http://planning.angus.gov.uk/PublicAccess/"
+"Huntingdonshire District Council", "Huntingdonshire", "http://planning.huntsdc.gov.uk/publicaccess/"
diff --git a/trunk/CGI/SouthOxfordshire.cgi b/trunk/CGI/SouthOxfordshire.cgi
new file mode 100755
index 0000000..6e0322a
--- /dev/null
+++ b/trunk/CGI/SouthOxfordshire.cgi
@@ -0,0 +1,20 @@
+#!/usr/bin/python
+
+import cgi
+import cgitb; cgitb.enable(display=0, logdir="/tmp")
+
+
+form = cgi.FieldStorage()
+day = form.getfirst('day')
+month = form.getfirst('month')
+year = form.getfirst('year')
+
+from SouthOxfordshireParser import SouthOxfordshireParser
+
+parser = SouthOxfordshireParser()
+
+xml = parser.getResults(day, month, year)
+
+print "Content-Type: text/xml"     # XML is following
+print
+print xml                          # print the xml
diff --git a/trunk/CGI/SouthOxfordshire.py b/trunk/CGI/SouthOxfordshire.py
new file mode 100755
index 0000000..ef3066f
--- /dev/null
+++ b/trunk/CGI/SouthOxfordshire.py
@@ -0,0 +1,263 @@
+#!/usr/bin/python
+
+import cgi
+import cgitb; cgitb.enable(display=0, logdir="/tmp")
+
+import urllib, urllib2
+import HTMLParser
+import urlparse
+import datetime, time
+
+# This needs a page number inserting
+search_url = "http://www.southoxon.gov.uk/ccm/planning/ApplicationList.jsp?PAGE=%d"
+
+# This needs the council reference
+comment_url = "https://forms.southoxon.gov.uk/ufs/ufsmain?formid=PLANNINGCOMMENT&PLNGAPPL_REFERENCE=%(reference)s"
+
+authority_name = "South Oxfordshire District Council"
+authority_short_name = "South Oxfordshire"
+
+
+from PlanningUtils import fixNewlines, \
+                          getPostcodeFromText, \
+                          PlanningAuthorityResults, \
+                          PlanningApplication
+
+class SouthOxfordshireParser(HTMLParser.HTMLParser):
+    """In this case we'll take the date, so that we can avoid doing dowloads for
+    the other days in this week's file. This date should be a datetime.date object.
+    """
+    def __init__(self):
+	HTMLParser.HTMLParser.__init__(self)
+
+        self._requested_date = None
+
+        # We'll keep a count of the number of tables we have seen.
+        # All the interesting stuff is in table 3
+        self._table_count = 0
+
+        # While inside table 3, we'll keep a count of the number of
+        # <td>s we have seen. What is in which numbered <td> is detailed below.
+        # 1 reference
+        # 3 place and description
+        # 5 date received
+        # 2 and 4 are just padding
+        self._td_count = 0
+
+        # This is just a flag to say that we are now ready to get the reference
+        # from the next bit of data
+        self._get_reference = False
+
+        self._data = ''
+
+        # this will hold the application we are currently working on.
+        self._current_application = None
+        
+        # The object which stores our set of planning application results
+        self._results = PlanningAuthorityResults(authority_name, authority_short_name)
+
+    def handle_starttag(self, tag, attrs):
+        # if we see a table tag, increment the table count.
+        if tag == 'table':
+            self._table_count += 1
+            
+        # we are only interested in other tags if we are in table 3. 
+        if self._table_count == 3:
+            
+            # If we are starting a <tr>, create a new PlanningApplication object
+            # for the application currently being processed
+            if tag == 'tr':
+                self._current_application = PlanningApplication()
+
+            # if we see a td, increment the <td> count.
+            if tag == 'td':
+                self._td_count += 1
+
+            # if we are in the first <td>, and we see a link,
+            # then it is to the info page for this applicaion.
+            if tag == 'a' and self._td_count == 1:
+                for key, value in attrs:
+                    if key == 'href':
+                        url_end = value
+                        self._current_application.info_url = urlparse.urljoin(search_url,url_end)
+
+                        # We now know that the next bit of data is the reference
+                        self._get_reference = True
+                        
+                        # href is the only attribute we are interested in.
+                        break
+
+    def handle_endtag(self, tag):
+        # There is no need to do anything unless we are in table 3.
+        if self._table_count == 3:
+
+            # The end <tr> indicates that the current application is finished.
+            # Now we can fetch the info_page to get the address, postcode,
+            # and description.
+            # If we don't have a reference, then we are in the header row,
+            # which we don't want.
+            # There is no point in doing this if the date is not the requested one.
+            
+            if tag == 'tr' and \
+                   self._current_application.council_reference is not None and \
+                   self._current_application.date_received == self._requested_date:
+                
+                info_page_parser = SouthOxfordshireInfoURLParser()
+                info_page_parser.feed(urllib2.urlopen(self._current_application.info_url).read())
+
+                self._current_application.address = info_page_parser.address
+                self._current_application.postcode = getPostcodeFromText(info_page_parser.address)
+                self._current_application.description = info_page_parser.description
+
+                # Add the current application to the results set
+                self._results.addApplication(self._current_application)
+
+            # At the end of the 5th <td>, self._data should contain
+            # the received date of the application.
+            if tag == 'td' and self._td_count == 5:
+                app_year, app_month, app_day = tuple(time.strptime(self._data, "%d %B %Y")[:3])
+                self._current_application.date_received = datetime.date(app_year, app_month, app_day)
+                    
+                self._data = ''
+                self._td_count = 0
+
+    def handle_data(self, data):
+        # There is no need to do anything if we aren't in table 3.
+        if self._table_count == 3:
+            # If we are in the first <td>, and the get_reference flag is set,
+            # then the next data is the reference.
+            if self._td_count == 1 and self._get_reference:
+                self._current_application.council_reference = data
+
+                # The comment url can now be made, as it depends only on the reference.
+                # On this site, the link to the comment page is only displayed once
+                # the planning authority has decided who is handling this application
+                # and has opened consultations. The link below works straight away,
+                # and also works for apps for which the consultation period is over.
+                # I have no idea if anything is actually done with these comments if
+                # it is followed too early...
+                self._current_application.comment_url = comment_url %{'reference': self._current_application.council_reference}
+
+                # Set the get_reference flag back to False.
+                self._get_reference = False
+
+            # If we are in the 5th <td>, then we need to collect all the data together
+            # before we can use it. This is actually processed in handle_endtag.
+            if self._td_count == 5:
+                self._data += data
+
+    def handle_entityref( self, ref ):
+        # We might have some entity_refs to clear up.
+        # there is no need to bother with this if we aren't in the results table.
+        if self._table_count == 3 and self._td_count == 5:
+            if ref == 'nbsp':
+                self._data += ' '
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        """This will return an ApplicationResults object containg the
+        applications for the date passed in."""
+
+        today = datetime.date.today()
+        self.requested_date = datetime.date(year, month, day)
+        delta = today - self.requested_date
+
+        # to get the correct page, we need
+        # page ((days mod 7) + 1)
+        page_number = delta.days/7 + 1
+
+        response = urllib2.urlopen(search_url %page_number)
+
+        self.feed(response.read())
+
+        return self._results
+
+
+    def getResults(self, day, month, year):
+        return getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+class SouthOxfordshireInfoURLParser(HTMLParser.HTMLParser):
+    """This parser is to get the description and address out of the info page
+    for a South Oxfordshire application."""
+
+    def __init__(self):
+        HTMLParser.HTMLParser.__init__(self)
+
+        self.address = None
+        self.description = None
+
+        # These two states will be set to:
+        # 0 - if we haven't yet got that bit
+        # 1 - if we are currently working on it
+        # 2 - if we have finished
+        self._address_state = 0
+        self._description_state = 0
+
+        # We well need to know whether or not we are in a <td>
+        self._in_td = False
+
+        # This is used for collecting together date which comes in several bits.
+        self._data = ''
+        
+    def handle_starttag(self, tag, attrs):
+        # If we see the start of a <td> and we are still interested in some data
+        # then set the td flag to true, and blank the data
+        if tag == 'td' and (self._address_state < 2 or self._description_state < 2):
+            self._in_td = True
+            self._data = ''
+
+    def handle_endtag(self, tag):
+        if tag == 'td' and (self._address_state < 2 or self._description_state < 2):
+            # If we are working on the description,
+            # set description from _data and note that we need to work on it no more.
+            if self._description_state == 1:
+                self.description = self._data
+                self._description_state = 2
+
+
+            # If we are working on the address,
+            # set address from _data and note that we need to work on it no more.
+            elif self._address_state == 1:
+                self.address = self._data
+                self._address_state = 2
+
+            # If we see data which says 'Descripton',
+            # then set the description state to working.
+            elif self._data.strip() == 'Description':
+                self._description_state = 1
+                
+            # If we see data which says 'Location',
+            # then set the addresss state to working.
+            elif self._data.strip() == 'Location':
+                self._address_state = 1
+
+            # Note that we are leaving the <td>
+            self._in_td = False
+            
+    def handle_data(self, data):
+        # if we are in a td, and we are still interested in the data for something,
+        # append the current bit to self._data
+        if self._in_td and (self._address_state < 2 or self._description_state < 2):
+            self._data += data
+
+
+# TODO
+
+# find out what time of day this is run - does it matter that
+# we aren't being careful with daylight saving time etc.
+
+# Can we check that scraped email address really is
+# an email address?
+
+if __name__ == "__main__":
+    form = cgi.FieldStorage()
+    day = form.getfirst('day')
+    month = form.getfirst('month')
+    year = form.getfirst('year')
+
+    parser = SouthOxfordshireParser()
+    
+
+    print "Content-Type: text/xml"     # XML is following
+    print
+    print xml                          # print the xml
diff --git a/trunk/CGI/SouthOxfordshireParser.py b/trunk/CGI/SouthOxfordshireParser.py
new file mode 100644
index 0000000..0097ee5
--- /dev/null
+++ b/trunk/CGI/SouthOxfordshireParser.py
@@ -0,0 +1,248 @@
+
+import urllib, urllib2
+
+import HTMLParser
+import urlparse
+import datetime, time
+
+# This needs a page number inserting
+search_url = "http://www.southoxon.gov.uk/ccm/planning/ApplicationList.jsp?PAGE=%d"
+
+# This needs the council reference
+comment_url = "https://forms.southoxon.gov.uk/ufs/ufsmain?formid=PLANNINGCOMMENT&PLNGAPPL_REFERENCE=%(reference)s"
+
+authority_name = "South Oxfordshire District Council"
+authority_short_name = "South Oxfordshire"
+
+
+from PlanningUtils import fixNewlines, \
+                          getPostcodeFromText, \
+                          PlanningAuthorityResults, \
+                          PlanningApplication
+
+class SouthOxfordshireParser(HTMLParser.HTMLParser):
+    """In this case we'll take the date, so that we can avoid doing dowloads for
+    the other days in this week's file. This date should be a datetime.date object.
+    """
+    def __init__(self):
+	HTMLParser.HTMLParser.__init__(self)
+
+        self._requested_date = None
+
+        # We'll keep a count of the number of tables we have seen.
+        # All the interesting stuff is in table 3
+        self._table_count = 0
+
+        # While inside table 3, we'll keep a count of the number of
+        # <td>s we have seen. What is in which numbered <td> is detailed below.
+        # 1 reference
+        # 3 place and description
+        # 5 date received
+        # 2 and 4 are just padding
+        self._td_count = 0
+
+        # This is just a flag to say that we are now ready to get the reference
+        # from the next bit of data
+        self._get_reference = False
+
+        self._data = ''
+
+        # this will hold the application we are currently working on.
+        self._current_application = None
+        
+        # The object which stores our set of planning application results
+        self._results = PlanningAuthorityResults(authority_name, authority_short_name)
+
+    def handle_starttag(self, tag, attrs):
+        # if we see a table tag, increment the table count.
+        if tag == 'table':
+            self._table_count += 1
+            
+        # we are only interested in other tags if we are in table 3. 
+        if self._table_count == 3:
+            
+            # If we are starting a <tr>, create a new PlanningApplication object
+            # for the application currently being processed
+            if tag == 'tr':
+                self._current_application = PlanningApplication()
+
+            # if we see a td, increment the <td> count.
+            if tag == 'td':
+                self._td_count += 1
+
+            # if we are in the first <td>, and we see a link,
+            # then it is to the info page for this applicaion.
+            if tag == 'a' and self._td_count == 1:
+                for key, value in attrs:
+                    if key == 'href':
+                        url_end = value
+                        self._current_application.info_url = urlparse.urljoin(search_url,url_end)
+
+                        # We now know that the next bit of data is the reference
+                        self._get_reference = True
+                        
+                        # href is the only attribute we are interested in.
+                        break
+
+    def handle_endtag(self, tag):
+        # There is no need to do anything unless we are in table 3.
+        if self._table_count == 3:
+
+            # The end <tr> indicates that the current application is finished.
+            # Now we can fetch the info_page to get the address, postcode,
+            # and description.
+            # If we don't have a reference, then we are in the header row,
+            # which we don't want.
+            # There is no point in doing this if the date is not the requested one.
+
+            if tag == 'tr' and \
+                   self._current_application.council_reference is not None and \
+                   self._current_application.date_received == self._requested_date:
+                
+                info_page_parser = SouthOxfordshireInfoURLParser()
+                info_page_parser.feed(urllib2.urlopen(self._current_application.info_url).read())
+
+                self._current_application.address = info_page_parser.address
+                self._current_application.postcode = getPostcodeFromText(info_page_parser.address)
+                self._current_application.description = info_page_parser.description
+
+                # Add the current application to the results set
+                self._results.addApplication(self._current_application)
+
+            # At the end of the 5th <td>, self._data should contain
+            # the received date of the application.
+            if tag == 'td' and self._td_count == 5:
+                app_year, app_month, app_day = tuple(time.strptime(self._data, "%d %B %Y")[:3])
+                self._current_application.date_received = datetime.date(app_year, app_month, app_day)
+                self._data = ''
+                self._td_count = 0
+
+    def handle_data(self, data):
+        # There is no need to do anything if we aren't in table 3.
+        if self._table_count == 3:
+            # If we are in the first <td>, and the get_reference flag is set,
+            # then the next data is the reference.
+            if self._td_count == 1 and self._get_reference:
+                self._current_application.council_reference = data
+
+                # The comment url can now be made, as it depends only on the reference.
+                # On this site, the link to the comment page is only displayed once
+                # the planning authority has decided who is handling this application
+                # and has opened consultations. The link below works straight away,
+                # and also works for apps for which the consultation period is over.
+                # I have no idea if anything is actually done with these comments if
+                # it is followed too early...
+                self._current_application.comment_url = comment_url %{'reference': self._current_application.council_reference}
+
+                # Set the get_reference flag back to False.
+                self._get_reference = False
+
+            # If we are in the 5th <td>, then we need to collect all the data together
+            # before we can use it. This is actually processed in handle_endtag.
+            if self._td_count == 5:
+                self._data += data
+
+    def handle_entityref( self, ref ):
+        # We might have some entity_refs to clear up.
+        # there is no need to bother with this if we aren't in the results table.
+        if self._table_count == 3 and self._td_count == 5:
+            if ref == 'nbsp':
+                self._data += ' '
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        """This will return an ApplicationResults object containg the
+        applications for the date passed in."""
+
+        today = datetime.date.today()
+        self._requested_date = datetime.date(year, month, day)
+        delta = today - self._requested_date
+
+        # to get the correct page, we need
+        # page ((days mod 7) + 1)
+        page_number = delta.days/7 + 1
+
+        response = urllib2.urlopen(search_url %page_number)
+
+        contents = response.read()
+
+        self.feed(contents)
+
+        return self._results
+
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+class SouthOxfordshireInfoURLParser(HTMLParser.HTMLParser):
+    """This parser is to get the description and address out of the info page
+    for a South Oxfordshire application."""
+
+    def __init__(self):
+        HTMLParser.HTMLParser.__init__(self)
+
+        self.address = None
+        self.description = None
+
+        # These two states will be set to:
+        # 0 - if we haven't yet got that bit
+        # 1 - if we are currently working on it
+        # 2 - if we have finished
+        self._address_state = 0
+        self._description_state = 0
+
+        # We well need to know whether or not we are in a <td>
+        self._in_td = False
+
+        # This is used for collecting together date which comes in several bits.
+        self._data = ''
+        
+    def handle_starttag(self, tag, attrs):
+        # If we see the start of a <td> and we are still interested in some data
+        # then set the td flag to true, and blank the data
+        if tag == 'td' and (self._address_state < 2 or self._description_state < 2):
+            self._in_td = True
+            self._data = ''
+
+    def handle_endtag(self, tag):
+        if tag == 'td' and (self._address_state < 2 or self._description_state < 2):
+            # If we are working on the description,
+            # set description from _data and note that we need to work on it no more.
+            if self._description_state == 1:
+                self.description = self._data
+                self._description_state = 2
+
+
+            # If we are working on the address,
+            # set address from _data and note that we need to work on it no more.
+            elif self._address_state == 1:
+                self.address = self._data
+                self._address_state = 2
+
+            # If we see data which says 'Descripton',
+            # then set the description state to working.
+            elif self._data.strip() == 'Description':
+                self._description_state = 1
+                
+            # If we see data which says 'Location',
+            # then set the addresss state to working.
+            elif self._data.strip() == 'Location':
+                self._address_state = 1
+
+            # Note that we are leaving the <td>
+            self._in_td = False
+            
+    def handle_data(self, data):
+        # if we are in a td, and we are still interested in the data for something,
+        # append the current bit to self._data
+        if self._in_td and (self._address_state < 2 or self._description_state < 2):
+            self._data += data
+
+
+# TODO
+
+# find out what time of day this is run - does it matter that
+# we aren't being careful with daylight saving time etc.
+
+# Can we check that scraped email address really is
+# an email address?
diff --git a/trunk/CGI/generateCGIScripts.py b/trunk/CGI/generateCGIScripts.py
new file mode 100755
index 0000000..41b2ab1
--- /dev/null
+++ b/trunk/CGI/generateCGIScripts.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+
+list_of_sites_filename = "PublicAccessSites.csv"
+template_filename = "CGITemplate"
+python_location = "/usr/bin/python"
+
+import csv
+from os import chmod
+
+list_of_sites_file = open(list_of_sites_filename)
+csv_reader = csv.DictReader(list_of_sites_file, quoting=csv.QUOTE_ALL, skipinitialspace=True)
+
+template_contents = open(template_filename).read()
+
+template = "#!" + python_location +"\n\n" + template_contents
+
+for site_dict in csv_reader:
+    filename = "%s.cgi" %site_dict["authority_short_name"] 
+    contents = template %site_dict
+
+    this_file = open(filename, "w")
+    print "Writing %s" %filename
+    this_file.write(contents)
+    this_file.close()
+
+    chmod(filename, 0755)
+    
+# need to look at:
+# "Perth and Kinross Council", "Perthshire", "http://193.63.61.22/publicaccess/"