From 33a1ee02abc7e671c07425cde1093c5eb2d1f108 Mon Sep 17 00:00:00 2001
From: "duncan.parkes" <duncan.parkes@cf68f80a-222c-0410-aabe-5f79d0504a29>
Date: Fri, 5 Sep 2008 14:14:10 +0000
Subject: [PATCH] Add Hampshire scraper.

---
 python_scrapers/Hampshire.py         | 78 ++++++++++++++++++++++++++++
 python_scrapers/OtherFilesToCopy.csv |  1 +
 python_scrapers/SitesToGenerate.csv  |  1 +
 3 files changed, 80 insertions(+)
 create mode 100644 python_scrapers/Hampshire.py

diff --git a/python_scrapers/Hampshire.py b/python_scrapers/Hampshire.py
new file mode 100644
index 0000000..d29e052
--- /dev/null
+++ b/python_scrapers/Hampshire.py
@@ -0,0 +1,78 @@
+"""
+This is the scraper for Hampshire.
+
+There appears to be no way to search by date received, so what we'll do is
+go to the currently open for consultation page and just use that.
+
+I don't think we need to worry about pagination, as there are hardly any.
+
+"""
+
+import urllib2
+import urllib
+import urlparse
+
+import datetime, time
+import cgi
+
+import re
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+    PlanningAuthorityResults, \
+    getPostcodeFromText
+
+date_format = "%d/%m/%Y"
+
+class HampshireParser:
+    def __init__(self, *args):
+
+        self.authority_name = "Hampshire County Council"
+        self.authority_short_name = "Hampshire"
+        self.base_url = "http://www3.hants.gov.uk/planning/mineralsandwaste/planning-applications/applications/applications-open.htm"
+
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        # Now get the search page
+        response = urllib2.urlopen(self.base_url)
+        soup = BeautifulSoup(response.read())
+
+        trs = soup.table.table.findAll("tr", {"class": re.compile("(?:odd)|(?:even)")})
+
+
+        for tr in trs:
+            application = PlanningApplication()
+
+            tds = tr.findAll("td")
+
+            application.council_reference = tds[0].a.string.strip()
+            application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
+            application.address = tds[2].string.strip()
+            application.postcode = getPostcodeFromText(application.address)
+            application.description = tds[3].string.strip()
+
+            # Fetch the info url in order to get the date received and the comment url
+
+            info_response = urllib2.urlopen(application.info_url)
+
+            info_soup = BeautifulSoup(info_response.read())
+
+            application.date_received = datetime.datetime.strptime(info_soup.find(text=re.compile("\s*Received:\s*")).findNext("td").string.strip(), date_format).date()
+
+            application.comment_url = urlparse.urljoin(self.base_url, info_soup.find("input", value="Comment on this application").parent['action'])
+
+
+            self._results.addApplication(application)
+
+        return self._results
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+if __name__ == '__main__':
+    parser = HampshireParser()
+    print parser.getResults(21,5,2008)
+
diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv
index 8aec0ed..4930687 100644
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -53,3 +53,4 @@
 "Harrow.py", "420"
 "Westminster.py", "420"
 "Halton.py", "420"
+"Hampshire.py", "420"
diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv
index 311c2ed..079c7a0 100644
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -257,3 +257,4 @@
 "London Borough of Harrow", "Harrow", "", "Harrow", "HarrowParser"
 "Westminster City Council", "Westminster", "", "Westminster", "WestminsterParser"
 "Halton Borough Council", "Halton", "", "Halton", "HaltonParser"
+"Hampshire County Council", "Hampshire", "", "Hampshire", "HampshireParser"