From 6a2ed311a407bd5e49774cd7f547a905d3af6d4e Mon Sep 17 00:00:00 2001
From: "duncan.parkes" <duncan.parkes@gmail.com>
Date: Sun, 7 Jun 2009 23:07:19 +0000
Subject: [PATCH] Add Wychavon scraper from Thomas.

---
 OtherFilesToCopy.csv        |   3 +-
 SitesToGenerate.csv         |   1 +
 python_scrapers/Wychavon.py | 128 ++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 python_scrapers/Wychavon.py

diff --git a/OtherFilesToCopy.csv b/OtherFilesToCopy.csv
index d8e2a79..6f8fb30 100644
--- a/OtherFilesToCopy.csv
+++ b/OtherFilesToCopy.csv
@@ -68,4 +68,5 @@
 "Broxtowe.py", "420"
 "Mendip.py", "420"
 "Weymouth.py", "420"
-"Solihull.py", "420"
\ No newline at end of file
+"Solihull.py", "420"
+"Wychavon.py", "420"
diff --git a/SitesToGenerate.csv b/SitesToGenerate.csv
index 081094b..e6052e7 100644
--- a/SitesToGenerate.csv
+++ b/SitesToGenerate.csv
@@ -317,6 +317,7 @@
 "Wolverhampton City Council","Wolverhampton",,,,,,"http://planningonline.wolverhampton.gov.uk/PublicAccess/tdc/","PublicAccess","PublicAccessParser",
 "Worcester City Council","Worcester",,,,,,"http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry","Planet","PlanetParser",
 "Worthing Borough Council","Worthing",,,,,,"http://planning.worthing.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser",
+"Wychavon District Council","Wychavon",,,,,,,"Wychavon","WychavonParser",
 "Wycombe District Council","Wycombe",,,,,,"http://planningpa.wycombe.gov.uk/publicaccess/tdc/","PublicAccess","PublicAccessParser",
 "Wyre Forest District Council","Wyre Forest",,,,,,"http://www.wyreforest.gov.uk/fastweb/","FastWeb","FastWeb",
 "City of York Council","York",,,,,,"http://planning.york.gov.uk/PublicAccess/tdc/","PublicAccess","PublicAccessParser",
diff --git a/python_scrapers/Wychavon.py b/python_scrapers/Wychavon.py
new file mode 100644
index 0000000..43c7693
--- /dev/null
+++ b/python_scrapers/Wychavon.py
@@ -0,0 +1,128 @@
+"""
+This is the screenscraper for planning apps from Wychavon District Council.
+
+This appears to be an Acolnet variant, and is searched by a block of months.
+"""
+
+import urllib
+import urlparse
+
+import datetime
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+    PlanningAuthorityResults, \
+    getPostcodeFromText
+
+class WychavonParser:
+    
+    def __init__(self, *args):
+        self.authority_name = "Wychavon"
+        self.authority_short_name = "Wychavon"
+        # Currently hard coded--if this address updates, we'll need to scrape
+        # the search form to get it each time.
+        self.base_url = "http://www.e-wychavon.org.uk/scripts/plan2005/\
+acolnetcgi.exe?ACTION=UNWRAP&WhereDescription=General%20Search&\
+Whereclause3=%27%30%31%2F%7BEdtMonthEnd%7D%2F%7BEdtYearEnd%7D%27&\
+RIPNAME=Root%2EPages%2EPgeDC%2EPgeListCases"
+        
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        
+        form_data = "EdtYearNo=&EdtCaseNo=&EdtApplicant=&EdtAgent=&EdtLocation"\
+        + "=&EdtWard=&EdtMonthStart1=" + str(month) + "&EdtYearStart=" \
+        + str(year) + "&EdtMonthEnd=" + str(month) + "&EdtYearEnd="\
+        + str(year) + "&submit=Search"
+
+        # Fetch the results
+        response = urllib.urlopen(self.base_url, form_data)
+        soup = BeautifulSoup(response.read())
+        
+        
+        #Each set of results has its own table
+        results_tables = soup.findAll("table", cellpadding="2", cols="4")
+
+        for table in results_tables:
+            application = PlanningApplication()
+
+            trs = table.findAll("tr")
+            
+            application.council_reference = trs[0].findAll("td")[1].font.font.\
+                                                        font.string.strip()
+            
+            relative_info_url = trs[0].findAll("td")[1].a['href']
+            application.info_url = urlparse.urljoin(self.base_url, relative_info_url)
+            
+            application.address = trs[1].findAll("td")[1].font.string.strip()
+            application.postcode = getPostcodeFromText(application.address)
+            
+            #This code avoids an error if there's no description given.
+            descrip = trs[2].findAll("td")[1].font.string
+            if descrip == None:
+                application.description = ""
+            else:
+                application.description = descrip.strip()
+            
+            rec_m, rec_d, rec_y = trs[1].findAll("td")[3].font.string.strip().\
+                                                                    split("/")
+                                                                    
+            application.date_received = datetime.date(int(rec_y), int(rec_m), \
+                                                                    int(rec_d))
+            
+            apptype = trs[0].findAll("td")[3].font.string
+            # Avoids throwing an error if no apptype is given (this can happen)
+            if apptype != None:
+                apptype = apptype.strip()
+            
+            # Is all this really necessary? I don't know, but I've assumed that
+            # it is. The form will appear without the suffix, I don't know if
+            # the council's backend would accept it or not. Current behaviour
+            # is to degrade silently to no suffix if it can't match an
+            # application type.
+            if apptype == "Telecommunications":
+                # Don't know why it's a naked IP rather than sitting on the
+                # same site, but there it is.
+                application.comment_url = "http://81.171.139.151/WAM/createCom"\
+                +"ment.do?action=CreateApplicationComment&applicationType=PLANNI"\
+                +"NG&appNumber=T3/" + application.council_reference + "/TC"
+            else:
+                comment_url = "http://81.171.139.151/WAM/createComment.do?acti"\
+                +"on=CreateApplicationComment&applicationType=PLANNING&appNumber"\
+                +"=W/" + application.council_reference + "/"
+                suffix = ""
+                if apptype == "Householder planning application":
+                    suffix = "PP"
+                elif apptype == "Non-householder planning application":
+                    suffix = "PN"
+                elif apptype == "Outline applications":
+                    suffix = "OU"
+                elif apptype == "Change of use":
+                    suffix = "CU"
+                elif apptype == "Listed Building consent":
+                    suffix = "LB"
+                elif apptype == "Advertisement application":
+                    suffix = "AA"
+                elif apptype == "Certificate of Lawfulness Existing":
+                    suffix = "LUE"
+                elif apptype == "Approval of reserved matters":
+                    suffix = "VOC"
+                #These are all the ones that I found, except "Advice - Pre-app/
+                #Householder", the suffix for which is inconsistent. The suffix
+                #for this could be obtained by scraping the description page for
+                #each application.
+                application.comment_url = comment_url + suffix
+
+            self._results.addApplication(application)
+
+        return self._results
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+if __name__ == '__main__':
+    parser = WychavonParser()
+    #Put this in with constant numbers, copying the Barnsley example. Works for testing, but should it use the arguments for a real run?
+    print parser.getResults(16,3,2009)