From d754878ea33b2ab9332cea428d368b8a02b72ad1 Mon Sep 17 00:00:00 2001
From: "duncan.parkes" <duncan.parkes@cf68f80a-222c-0410-aabe-5f79d0504a29>
Date: Mon, 16 Jun 2008 11:00:20 +0000
Subject: [PATCH] Add Shropshire, North Yorkshire and South Northamptonshire.

---
 python_scrapers/OtherFilesToCopy.csv |   1 +
 python_scrapers/Shropshire.py        | 160 +++++++++++++++++++++++++++
 python_scrapers/SitesToGenerate.csv  |   3 +
 3 files changed, 164 insertions(+)
 create mode 100644 python_scrapers/Shropshire.py

diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv
index bf25769..c4994cd 100644
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -30,3 +30,4 @@
 "Flintshire.py", "420"
 "Maldon.py", "420"
 "Medway.py", "420"
+"Shropshire.py", "420"
diff --git a/python_scrapers/Shropshire.py b/python_scrapers/Shropshire.py
new file mode 100644
index 0000000..54bdd1e
--- /dev/null
+++ b/python_scrapers/Shropshire.py
@@ -0,0 +1,160 @@
+import urllib2
+import urllib
+import urlparse
+
+import datetime, time
+import cgi
+
+import re
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+    PlanningAuthorityResults, \
+    getPostcodeFromText
+
+date_format = "%d/%m/%Y"
+
+class ShropshireParser:
+    reference_input_name = "ApplNum"
+    contact_email_name = "offemail"
+
+    comment_url = None
+
+    use_validated_date = False
+
+    def _get_info_link_list(self, soup):
+        return [tr.a for tr in soup.find("table", id="tbllist").findAll("tr", recursive=False)[:-1]]
+
+    def _get_postcode(self, info_soup):
+        return info_soup.find("input", {"name": "Postcode"})['value']
+
+    def __init__(self, authority_name, authority_short_name, base_url, debug=False):
+        self.debug = debug
+
+        self.authority_name = authority_name
+        self.authority_short_name = authority_short_name
+        self.base_url = base_url
+        self._split_base_url = urlparse.urlsplit(base_url)
+        
+        self._current_application = None
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        search_date = datetime.date(year, month, day)
+        search_date_string = search_date.strftime(date_format)
+
+        if self.use_validated_date:
+            received_search_string = ""
+            validated_search_string = search_date_string
+        else:
+            received_search_string = search_date_string
+            validated_search_string = ""
+
+        search_data = urllib.urlencode([
+                ("txtAppNum", ""),
+                ("txtAppName", ""),
+                ("txtAppLocn", ""),
+                ("txtAppPCode", ""),
+                ("txtAppRecFrom", received_search_string),
+                ("txtAppRecTo", received_search_string),
+                ("txtAppDecFrom", ""),
+                ("txtAppDecTo", ""),
+                ("txtAppValFrom", validated_search_string),
+                ("txtAppValTo", validated_search_string),
+                ("district_drop", ""),
+                ("parish_drop", ""),
+                ("ward_drop", ""),
+                ("ft", "yes"),
+                ("submit1", "Submit"),
+                ])
+
+        split_search_url = self._split_base_url[:3] + (search_data, '')
+        search_url = urlparse.urlunsplit(split_search_url)
+
+        response = urllib2.urlopen(search_url)
+        soup = BeautifulSoup(response.read())
+
+        # Handle the case where there are no apps
+        if soup.find(text=re.compile("No applications matched your query")):
+            return self._results
+
+
+        info_link_list = self._get_info_link_list(soup)
+
+        for app_link in info_link_list:
+            self._current_application = PlanningApplication()
+
+            # We could get this from the info soup, but as we already know it, why bother.
+            self._current_application.date_received = search_date
+
+            self._current_application.info_url = urlparse.urljoin(self.base_url, app_link['href'])
+    
+            # To get the postcode we will need to download each info page
+            info_response = urllib2.urlopen(self._current_application.info_url)
+            info_soup = BeautifulSoup(info_response.read())
+
+            self._current_application.council_reference = info_soup.find("input", {"name": self.reference_input_name})['value']
+            self._current_application.address = info_soup.find("textarea", {"name": "Location"}).string.strip()
+            self._current_application.postcode = self._get_postcode(info_soup)
+                        
+            self._current_application.description = info_soup.find("textarea", {"name": "Proposal"}).string.strip()
+
+            if self.comment_url:
+                self._current_application.comment_url = self.comment_url
+            else:
+                self._current_application.comment_url = info_soup.find("input", {"name": self.contact_email_name})['value']
+
+            # There is an OSGB position here :-)
+            self._current_application.osgb_x = info_soup.find("input", {"name": "Easting"})['value']
+            self._current_application.osgb_y = info_soup.find("input", {"name": "Northing"})['value']
+
+            self._results.addApplication(self._current_application)
+
+        return self._results
+
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+
+class NorthYorkshireParser(ShropshireParser):
+    reference_input_name = "txtAppNum"
+    contact_email_name = "contactEmail"
+
+    comment_url = None
+
+    # The date we give as the date_received here is actually the validated date.
+    use_validated_date = True
+
+    def _get_postcode(self, info_soup):
+        return getPostcodeFromText(self._current_application.address)
+
+    def _get_info_link_list(self, soup):
+        return [div.a for div in soup.findAll("div", {"class": "listApplNum"})]
+
+
+class SouthNorthamptonshireParser(ShropshireParser):
+    reference_input_name = "txtAppNum"
+
+    comment_url = "http://www.southnorthants.gov.uk/mandoforms/servlet/com.mandoforms.server.MandoformsServer?MF_XML=ApplicationComments&MF_DEVICE=HTML"
+
+    def _get_postcode(self, info_soup):
+        return getPostcodeFromText(self._current_application.address)
+
+    def _get_info_link_list(self, soup):
+        return soup.find("div", {"class": "div-content-class"}).findAll("a")
+
+if __name__ == '__main__':
+    parser = ShropshireParser("Shropshire County Council", "Shropshire", "http://planning.shropshire.gov.uk/PlanAppList.asp")
+    print parser.getResults(6,6,2008)
+#    parser = NorthYorkshireParser("North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp")
+#    print parser.getResults(10,6,2008)
+#    parser = SouthNorthamptonshireParser("South Northamptonshire Council", "South Northamptonshire", "http://snc.planning-register.co.uk/PlanAppList.asp")
+#    print parser.getResults(5,6,2008)
+
+
+# TODO
+
+#1) Pagination: South Northants paginates at 25. I doubt this is a problem. Should also check out the others.
diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv
index c349638..394dbc4 100644
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -231,3 +231,6 @@
 "Maldon District Council", "Maldon", "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do", "Maldon", "MaldonParser"
 "Medway Council", "Medway", "", "Medway", "MedwayParser"
 "Pendle Borough Council", "Pendle", "http://bopdoccip.pendle.gov.uk/PlanApp/jsp/searchPlanApp-action.do", "Maldon", "PendleParser"
+"Shropshire County Council", "Shropshire", "http://planning.shropshire.gov.uk/PlanAppList.asp", "Shropshire", "ShropshireParser"
+"North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp", "Shropshire", "NorthYorkshireParser"
+"South Northamptonshire Council", "South Northamptonshire", "http://snc.planning-register.co.uk/PlanAppList.asp", "Shropshire", "SouthNorthamptonshireParser"