From e74b8cb63130d71ee302c51efdcbf0b782d9b45a Mon Sep 17 00:00:00 2001
From: "duncan.parkes" <duncan.parkes@cf68f80a-222c-0410-aabe-5f79d0504a29>
Date: Sat, 24 Nov 2007 16:03:45 +0000
Subject: [PATCH] Add a parser for sites with urls ending in searchPageLoad.do
 This includes: Cumbria Lincolnshire West Sussex

It should also include:
Dorset
Somerset

but they are both down.

All of these sites are county councils which are only responsible for a small number of apps, many of which are not
at sites which have postcodes, so don't expect too much from these scrapers!
---
 python_scrapers/AtriumePlanning.py   | 112 +++++++++++++++++++++++++++
 python_scrapers/OtherFilesToCopy.csv |   1 +
 python_scrapers/PlanningUtils.py     |  13 ++++
 python_scrapers/SitesToGenerate.csv  |   3 +
 4 files changed, 129 insertions(+)
 create mode 100644 python_scrapers/AtriumePlanning.py

diff --git a/python_scrapers/AtriumePlanning.py b/python_scrapers/AtriumePlanning.py
new file mode 100644
index 0000000..dda04a0
--- /dev/null
+++ b/python_scrapers/AtriumePlanning.py
@@ -0,0 +1,112 @@
+import urllib2
+import urllib
+import urlparse
+
+import datetime
+#import re
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+    PlanningAuthorityResults, \
+    getPostcodeFromText
+
+
+info_path = "loadFullDetails.do"
+comment_path = "loadRepresentation.do"
+
+class AtriumePlanningParser:
+    def __init__(self,
+                 authority_name,
+                 authority_short_name,
+                 base_url,
+                 debug=False):
+
+        self.authority_name = authority_name
+        self.authority_short_name = authority_short_name
+        self.base_url = base_url
+
+        self.info_url = urlparse.urljoin(base_url, info_path)
+        self.comment_url = urlparse.urljoin(base_url, comment_path)
+
+        self.debug = debug
+
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+
+        # The end date for the search needs to be one day after the start
+        # date - presumably the date is used as a timestamp at midnight
+        search_start_date = datetime.date(year, month, day)
+        search_end_date = search_start_date + datetime.timedelta(1)
+
+
+        search_data = urllib.urlencode({"dayRegStart": search_start_date.strftime("%d"),
+                    "monthRegStart": search_start_date.strftime("%b"),
+                    "yearRegStart": search_start_date.strftime("%Y"),
+                    "dayRegEnd": search_end_date.strftime("%d"),
+                    "monthRegEnd": search_end_date.strftime("%b"),
+                    "yearRegEnd": search_end_date.strftime("%Y"),
+                    "searchType": "current",
+                    "dispatch": "Search"
+                    })
+
+        response = urllib2.urlopen(self.base_url, search_data)
+
+        html =  response.read()
+
+        soup = BeautifulSoup(html)
+        
+        # Get a list of the trs in the results table
+        if soup.find(text="Results"):
+            
+            tds = soup.find(text="Results").parent.findNext("table").findAll("td")
+
+            for td in tds:
+                if td.string:
+                    if td.string.strip() == "Date Registered":
+                        # We are starting a new App
+                        self._current_application = PlanningApplication()
+                        self._current_application.date_received = datetime.datetime.strptime(td.findNext("td").string, "%d-%m-%Y")
+                    elif td.string.strip() == "Application Number":
+                        self._current_application.council_reference = td.findNext("td").string
+                    elif td.string.strip() == "Location":
+                        location = td.findNext("td").string
+                        self._current_application.address = location
+
+                        postcode = getPostcodeFromText(location)
+                        if postcode:
+                            self._current_application.postcode = postcode
+                    elif td.string.strip() == "Proposal":
+                        self._current_application.description = td.findNext("td").string
+                elif td.a and td.a.string.strip() == "View Full Details":
+                    # The info url is td.a
+                    messy_info_url = td.a["href"]
+
+                    # We need to get an id out of this url
+                    query_str = urlparse.urlsplit(messy_info_url)[3]
+
+                    self._current_application.info_url = self.info_url + "?" + query_str
+                    self._current_application.comment_url = self.comment_url + "?" + query_str
+
+                    if self._current_application.is_ready():
+                        self._results.addApplication(self._current_application)
+
+
+
+        return self._results
+
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+
+#if __name__ == '__main__':
+#    cumbria_parser = AtriumePlanningParser("Cumbria County Council", "Cumbria", "http://217.114.50.149:7778/ePlanningOPS/loadResults.do")
+
+#    print cumbria_parser.getResults(22,11,2007)
+#    lincolnshire_parser = AtriumePlanningParser("Lincolnshire County Council", "Lincolnshire", "")
+
+#    print cumbria_parser.getResults(22,11,2007)
+
diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv
index b95ed11..2883758 100644
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -12,3 +12,4 @@
 "NorthHerts.cgi", "493"
 "Enfield.cgi", "493"
 "RutlandLike.py", "420"
+"AtriumePlanning.py", "420"
\ No newline at end of file
diff --git a/python_scrapers/PlanningUtils.py b/python_scrapers/PlanningUtils.py
index 9210446..273a652 100644
--- a/python_scrapers/PlanningUtils.py
+++ b/python_scrapers/PlanningUtils.py
@@ -88,6 +88,19 @@ class PlanningApplication:
 
     def __repr__(self):
 	return self.displayXML()
+
+    def is_ready(self):
+        # This method tells us if the application is complete
+        # Because of the postcode default, we can't really
+        # check the postcode - make sure it is filled in when
+        # you do the address.
+        return self.council_reference \
+            and self.address \
+            and self.description \
+            and self.info_url \
+            and self.comment_url \
+            and self.date_received
+    
         
     def displayXML(self):
         #print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received
diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv
index c11b358..12a7f1f 100644
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -141,3 +141,6 @@
 "Melton Borough Council", "Melton", "http://www.meltononline.co.uk/planning/searchparam.asp", "RutlandLike", "RutlandLikeParser"
 "Harborough District Council", "Harborough", "http://pa.harborough.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
 "East Northamptonshire Council", "East Northants", "http://publicaccesssrv.east-northamptonshire.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
+"Cumbria County Council", "Cumbria", "http://217.114.50.149:7778/ePlanningOPS/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"
+"Lincolnshire County Council", "Lincolnshire", "http://apps1.lincolnshire.gov.uk/ePlanning/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"
+"West Sussex County Council", "West Sussex", "http://eplanning.westsussex.gov.uk/ePlanningOPS/loadResults.do", "AtriumePlanning", "AtriumePlanningParser"