From 98f49f172f35faf88da5a870c36efaf27ba02e98 Mon Sep 17 00:00:00 2001
From: "duncan.parkes" <duncan.parkes@cf68f80a-222c-0410-aabe-5f79d0504a29>
Date: Tue, 12 Aug 2008 12:34:20 +0000
Subject: [PATCH] Add scraper for Hounslow.

---
 python_scrapers/Hounslow.py          | 72 ++++++++++++++++++++++++++++
 python_scrapers/OtherFilesToCopy.csv |  1 +
 python_scrapers/SitesToGenerate.csv  |  1 +
 3 files changed, 74 insertions(+)
 create mode 100644 python_scrapers/Hounslow.py

diff --git a/python_scrapers/Hounslow.py b/python_scrapers/Hounslow.py
new file mode 100644
index 0000000..2a42076
--- /dev/null
+++ b/python_scrapers/Hounslow.py
@@ -0,0 +1,72 @@
+import urllib2
+import urllib
+import urlparse
+
+import datetime, time
+import cgi
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+    PlanningAuthorityResults, \
+    getPostcodeFromText
+
+date_format = "%d/%m/%Y"
+
+class HounslowParser:
+    def __init__(self, *args):
+
+        self.authority_name = "London Borough of Hounslow"
+        self.authority_short_name = "Hounslow"
+        self.base_url = "http://planning.hounslow.gov.uk/planningv2/planning_summary.aspx?strWeekListType=SRCH&strRecTo=%(date)s&strRecFrom=%(date)s&strWard=ALL&strAppTyp=ALL&strWardTxt=All%%20Wards&strAppTypTxt=All%%20Application%%20Types&strArea=ALL&strAreaTxt=All%%20Areas&strStreet=ALL&strStreetTxt=All%%20Streets&strPC=&strLimit=500"
+        # Limited to 500 cases - putting 1000 causes a default value of 50 to be used. 500 should be plenty.
+
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        search_day = datetime.date(year, month, day)
+
+        # Now get the search page
+        response = urllib2.urlopen(self.base_url %{"date": search_day.strftime(date_format)})
+        soup = BeautifulSoup(response.read())
+
+        # Results are shown in a table each. The tables don't have any nice
+        # attributes, but they do all contain a NavString "Application",
+        # and nothing else does...
+        nav_strings = soup.findAll(text="Application")
+
+        for nav_string in nav_strings:
+            result_table = nav_string.findPrevious("table")
+
+            application = PlanningApplication()
+            application.date_received = search_day
+
+            links = result_table.findAll("a")
+
+            # We can get OSGB coordinates from the link to streetmap
+            map_qs_dict = cgi.parse_qs(urlparse.urlsplit(links[0]['href'])[3])
+            
+            application.osgb_x = map_qs_dict.get("x")[0]
+            application.osgb_y = map_qs_dict.get("y")[0]
+
+            application.council_reference = links[1].string.strip()
+            application.info_url = urlparse.urljoin(self.base_url, links[1]['href'])
+            application.comment_url = urlparse.urljoin(self.base_url, links[2]['href'])
+
+            application.address = ' '.join(links[0].previous.strip().split())
+            application.postcode = getPostcodeFromText(application.address)
+
+            application.description = links[2].previous.strip()
+
+            self._results.addApplication(application)
+
+        return self._results
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+if __name__ == '__main__':
+    parser = HounslowParser()
+    print parser.getResults(1,8,2008)
+
diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv
index 6fa19ef..4c0c81a 100644
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -49,3 +49,4 @@
 "Berwick.py", "420"
 "Birmingham.py", "420"
 "KingstonUponThames.py", "420"
+"Hounslow.py", "420"
diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv
index b6536f5..5ccb92f 100644
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -253,3 +253,4 @@
 "Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser"
 "Birmingham City Council", "Birmingham", "", "Birmingham", "BirminghamParser"
 "Royal Borough of Kingston upon Thames", "Kingston upon Thames", "", "KingstonUponThames", "KingstonParser"
+"London Borough of Hounslow", "Hounslow", "", "Hounslow", "HounslowParser"