diff --git a/trunk/python_scrapers/Herefordshire.py b/trunk/python_scrapers/Herefordshire.py
new file mode 100644
index 0000000..19cfbfa
--- /dev/null
+++ b/trunk/python_scrapers/Herefordshire.py
@@ -0,0 +1,85 @@
+
+import urllib2
+import urllib
+import urlparse
+
+import datetime, time
+import cgi
+import re
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+ PlanningAuthorityResults, \
+ getPostcodeFromText
+
+date_format = "%d/%m/%Y"
+
+class HerefordshireParser:
+ comments_email_address = "Developmentcontrol@barnsley.gov.uk"
+
+ def __init__(self, *args):
+
+ self.authority_name = "Herefordshire County Council"
+ self.authority_short_name = "Herefordshire"
+ self.base_url = "http://www.herefordshire.gov.uk/gis/planListResults.aspx?pc=&address=&querytype=current&startdate=%(date)s&enddate=%(date)s&startrecord=0"
+ #As we are going to the info page, we may as well pick up the comment url from there.
+# self.comment_url = "http://www.herefordshire.gov.uk/gis/planDetailCommentAddress.aspx?ApplicationId=%s" # This need the reference inserting
+
+ self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+ def getResultsByDayMonthYear(self, day, month, year):
+ search_day = datetime.date(year, month, day)
+
+ post_data = urllib.urlencode(
+ (("show", "0"),
+ ("Go", "GO"),
+ )
+ )
+
+ # Now get the search page
+ response = urllib2.urlopen(self.base_url %{"date": search_day.strftime(date_format)})
+
+ soup = BeautifulSoup(response.read())
+
+ if not soup.find(text=re.compile("Sorry, no matches found")):
+ # There were apps for this date
+
+ trs = soup.find("table", {"class": "gis_table"}).findAll("tr")[2:]
+
+ for tr in trs:
+ application = PlanningApplication()
+ application.date_received = search_day
+
+ application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
+ application.council_reference = tr.a.string
+ # application.comment_url = self.comment_url %(application.council_reference)
+
+ tds = tr.findAll("td")
+
+ application.address = tds[1].string
+ application.postcode = getPostcodeFromText(application.address)
+
+ # This just gets us an initial segment of the description.
+ # We are going to have to download the info page...
+ #application.description = tds[2].string.strip()
+
+ info_response = urllib.urlopen(application.info_url)
+
+ info_soup = BeautifulSoup(info_response.read())
+
+ application.description = info_soup.find(text="Proposal:").findNext("td").string.strip()
+ application.comment_url = urlparse.urljoin(self.base_url, info_soup.find("a", title="Link to Planning Application Comment page")['href'])
+
+ self._results.addApplication(application)
+
+ return self._results
+
+ def getResults(self, day, month, year):
+ return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+if __name__ == '__main__':
+ parser = HerefordshireParser()
+ print parser.getResults(31,8,2008)
+
diff --git a/trunk/python_scrapers/OtherFilesToCopy.csv b/trunk/python_scrapers/OtherFilesToCopy.csv
index dafa174..1bf9904 100644
--- a/trunk/python_scrapers/OtherFilesToCopy.csv
+++ b/trunk/python_scrapers/OtherFilesToCopy.csv
@@ -55,3 +55,4 @@
"Halton.py", "420"
"Hampshire.py", "420"
"Hastings.py", "420"
+"Herefordshire.py", "420"
diff --git a/trunk/python_scrapers/PlanningExplorer.py b/trunk/python_scrapers/PlanningExplorer.py
index 2da3961..5c3e2ec 100644
--- a/trunk/python_scrapers/PlanningExplorer.py
+++ b/trunk/python_scrapers/PlanningExplorer.py
@@ -631,7 +631,7 @@ if __name__ == '__main__':
# NOTE - 04/11/2007 is a sunday
# I'm using it to test that the scrapers behave on days with no apps.
- parser = BlackburnParser("Blackburn With Darwen Borough Council", "Blackburn", "http://195.8.175.6/")
+# parser = BlackburnParser("Blackburn With Darwen Borough Council", "Blackburn", "http://195.8.175.6/")
# parser = BroadlandParser("Broadland Council", "Broadland", "http://www.broadland.gov.uk/")
# parser = CamdenParser("London Borough of Camden", "Camden", "http://planningrecords.camden.gov.uk/")
# parser = CharnwoodParser("Charnwood Borough Council", "Charnwood", "http://portal.charnwoodbc.gov.uk/")
@@ -639,7 +639,7 @@ if __name__ == '__main__':
# parser = EastStaffsParser("East Staffordshire Borough Council", "East Staffs", "http://www2.eaststaffsbc.gov.uk/")
# parser = EppingForestParser("Epping Forest District Council", "Epping Forest", "http://plan1.eppingforestdc.gov.uk/")
# parser = ForestHeathParser("Forest Heath District Council", "Forest Heath", "http://195.171.177.73/")
-# parser = HackneyParser("London Borough of Hackney", "Hackney", "http://www.hackney.gov.uk/servapps/")
+ parser = HackneyParser("London Borough of Hackney", "Hackney", "http://www.hackney.gov.uk/servapps/")
# parser = KennetParser("Kennet District Council", "Kennet", "http://mvm-planning.kennet.gov.uk/")
# parser = LincolnParser("Lincoln City Council", "Lincoln", "http://online.lincoln.gov.uk/")
# parser = LiverpoolParser("Liverpool City Council", "Liverpool", "http://www.liverpool.gov.uk/")
@@ -655,7 +655,7 @@ if __name__ == '__main__':
# parser = WalthamForestParser("Waltham Forest", "Waltham Forest", "http://planning.walthamforest.gov.uk/")
# parser = ConwyParser("Conwy County Borough Council", "Conwy", "http://www.conwy.gov.uk/")
# parser = MertonParser("London Borough of Merton", "Merton", "http://planning.merton.gov.uk")
- print parser.getResults(3, 7, 2008)
+ print parser.getResults(4, 9, 2008)
# To Do
diff --git a/trunk/python_scrapers/PlanningUtils.py b/trunk/python_scrapers/PlanningUtils.py
index 569d892..6d854e0 100644
--- a/trunk/python_scrapers/PlanningUtils.py
+++ b/trunk/python_scrapers/PlanningUtils.py
@@ -4,13 +4,6 @@ import re
date_format = "%d/%m/%Y"
-
-def xmlQuote(text):
- # Change &s to &s
- # I suspect there is probably some standard python
- # function I should be using for this...
- return text.replace('&', '&')
-
def fixNewlines(text):
# This can be used to sort out windows newlines
return text.replace("\r\n","\n")
@@ -112,13 +105,13 @@ class PlanningApplication:
#print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received
contents = [
- u"