From 8d823ca64955ad066f1c0c993d110f47906c3388 Mon Sep 17 00:00:00 2001
From: "duncan.parkes" <duncan.parkes@cf68f80a-222c-0410-aabe-5f79d0504a29>
Date: Tue, 10 Jun 2008 16:27:30 +0000
Subject: [PATCH] Add scraper for Barnsley.

---
 python_scrapers/Barnsley.py          | 83 ++++++++++++++++++++++++++++
 python_scrapers/OtherFilesToCopy.csv |  1 +
 python_scrapers/SitesToGenerate.csv  |  1 +
 3 files changed, 85 insertions(+)
 create mode 100644 python_scrapers/Barnsley.py

diff --git a/python_scrapers/Barnsley.py b/python_scrapers/Barnsley.py
new file mode 100644
index 0000000..7bfa236
--- /dev/null
+++ b/python_scrapers/Barnsley.py
@@ -0,0 +1,83 @@
+"""
+This is the screenscraper for planning apps for 
+Barnsley Metropolitan Borough Council.
+
+The apps for Barnsley are displayed in html pages one per week, starting on
+monday. There is no date_received, so we'll have to use the date of the 
+start of this week.
+
+There is no comment url, so we'll use the email address.
+
+Developmentcontrol@barnsley.gov.uk
+
+"""
+
+import urllib2
+import urllib
+import urlparse
+
+import datetime, time
+import cgi
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+    PlanningAuthorityResults, \
+    getPostcodeFromText
+
+date_format = "%d/%m/%Y"
+
+class BarnsleyParser:
+    comments_email_address = "Developmentcontrol@barnsley.gov.uk"
+
+    def __init__(self, *args):
+
+        self.authority_name = "Barnsley Metropolitan Borough Council"
+        self.authority_short_name = "Barnsley"
+        self.base_url = "http://applications.barnsley.gov.uk/service/development/week_compact.asp?AppDate=%s"
+
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        search_day = datetime.date(year, month, day)
+
+        # What we actually need is the monday before the date searched for:
+        monday_before = search_day - datetime.timedelta(search_day.weekday())
+
+        # Now get the search page
+        response = urllib2.urlopen(self.base_url %(monday_before.strftime(date_format)))
+        soup = BeautifulSoup(response.read())
+
+        result_tables = soup.findAll("table", align="Center", cellpadding="3")
+
+        for table in result_tables:
+            application = PlanningApplication()
+
+            # We can set the date received and the comment url straight away.
+            application.comment_url = self.comments_email_address
+
+            trs = table.findAll("tr")
+
+            application.council_reference = trs[0].a.string.strip()
+            relative_info_url = trs[0].a['href']
+
+            application.info_url = urlparse.urljoin(self.base_url, relative_info_url)
+
+            application.date_received = monday_before
+
+            application.address = trs[1].findAll("td")[1].string.strip()
+            application.postcode = getPostcodeFromText(application.address)
+            application.description = trs[2].findAll("td")[1].string.strip()
+
+            self._results.addApplication(application)
+
+        return self._results
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+if __name__ == '__main__':
+    parser = BarnsleyParser()
+    print parser.getResults(21,5,2008)
+
diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv
index 3cd7ceb..f35d274 100644
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -22,3 +22,4 @@
 "Planet.py", "420"
 "Ocella.py", "420"
 "IsleOfWight.py", "420"
+"Barnsley.py", "420"
diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv
index dd89421..efd5c58 100644
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -219,3 +219,4 @@
 "Mid Bedfordshire District Council", "Mid Beds", "http://www.midbeds.gov.uk/acolnetDC/DCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "MidBedsParser"
 "Cambridgeshire County Council", "Cambridgeshire", "http://planapps2.cambridgeshire.gov.uk/DCWebPages/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser"
 "Isle of Wight Council", "Isle of Wight", "", "IsleOfWight", "IsleOfWightParser"
+"Barnsley Metropolitan Borough Council", "Barnsley", "", "Barnsley", "BarnsleyParser"