From af50d991f3a0961d699a93eb12b5f00537155c7b Mon Sep 17 00:00:00 2001
From: "duncan.parkes" <duncan.parkes@cf68f80a-222c-0410-aabe-5f79d0504a29>
Date: Fri, 8 Aug 2008 15:54:31 +0000
Subject: [PATCH] Add scraper for Brent.

---
 trunk/python_scrapers/Brent.py             | 125 +++++++++++++++++++++
 trunk/python_scrapers/OtherFilesToCopy.csv |   1 +
 trunk/python_scrapers/SitesToGenerate.csv  |   1 +
 3 files changed, 127 insertions(+)
 create mode 100644 trunk/python_scrapers/Brent.py

diff --git a/trunk/python_scrapers/Brent.py b/trunk/python_scrapers/Brent.py
new file mode 100644
index 0000000..fce1f94
--- /dev/null
+++ b/trunk/python_scrapers/Brent.py
@@ -0,0 +1,125 @@
+
+import urllib2
+import urllib
+import urlparse
+
+import datetime, time
+import cgi
+import re
+
+from BeautifulSoup import BeautifulSoup
+
+from PlanningUtils import PlanningApplication, \
+    PlanningAuthorityResults, \
+    getPostcodeFromText
+
+date_format = "%d/%m/%Y"
+
+class BrentParser:
+    def __init__(self, *args):
+
+        self.authority_name = "London Borough of Brent"
+        self.authority_short_name = "Brent"
+#        self.base_url = "http://www.brent.gov.uk/servlet/ep.ext?extId=101149&byPeriod=Y&st=PL&periodUnits=day&periodMultiples=14"
+        self.base_url = "http://www.brent.gov.uk/servlet/ep.ext"
+
+        self._current_application = None
+
+        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
+
+
+    def getResultsByDayMonthYear(self, day, month, year):
+        search_day = datetime.date(year, month, day)
+
+        post_data = [
+            ("from", search_day.strftime(date_format)),
+            ("until", search_day.strftime(date_format)),
+            ("EXECUTEQUERY", "Query"),
+#            ("auth", "402"),
+            ("st", "PL"),
+            ("periodUnits", "day"),
+            ("periodMultiples", "14"),
+            ("title", "Search+by+Application+Date"),
+            ("instructions", "Enter+a+date+range+to+search+for+existing+applications+by+the+date+of+application.%0D%0A%3Cbr%3E%3Cbr%3E%0D%0A%3Cstrong%3ENote%3A%3C%2Fstrong%3E+Where+%27%28Applicant%27s+Description%29%27+appears+in+the+proposal%2C+the+text+may+subsequently+be+amended+when+the+application+is+checked."),
+            ("byFormat", "N"),
+            ("byOther1", "N"),
+            ("byOther2", "N"),
+            ("byOther3", "N"),
+            ("byOther4", "N"),
+            ("byOther5", "N"),
+            ("byPostcode", "N"),
+            ("byStreet", "N"),
+            ("byHouseNumber", "N"),
+            ("byAddress", "N"),
+            ("byPeriod", "Y"),
+            ("extId", "101149"), # I wonder what this is...
+            ("queried", "Y"),
+            ("other1Label", "Other1"),
+            ("other2Label", "Other2"),
+            ("other3Label", "Other3"),
+            ("other4Label", "Other4"),
+            ("other5Label", "Other5"),
+            ("other1List", ""),
+            ("other2List", ""),
+            ("other3List", ""),
+            ("other4List", ""),
+            ("other5List", ""),
+            ("periodLabel", "From"),
+            ("addressLabel", "Select+Address"),
+            ("print", "")
+            ]
+
+        # Now get the search page
+        response = urllib2.urlopen(self.base_url, urllib.urlencode(post_data))
+
+        soup = BeautifulSoup(response.read())
+
+        trs = soup.find(text="Search Results").findNext("table").findAll("tr")[:-1]
+
+        # There are six trs per application, ish
+
+        # The first contains the case no and the application date.
+        # The second contains the address
+        # The third contains the description
+        # The fourth contains the info page link
+        # The fifth contains the comment link (or a note that comments are currently not being accepted
+        # The sixth is a spacer.
+
+        count = 0
+        for tr in trs:
+            count +=1
+
+            ref = tr.find(text=re.compile("Case No:"))
+            
+            if ref:
+                self._current_application = PlanningApplication()
+                count = 1
+
+                self._current_application.council_reference = ref.split(":")[1].strip()
+                self._current_application.date_received = search_day
+
+            if count % 6 == 2:
+                self._current_application.address = tr.td.string.strip()
+                self._current_application.postcode = getPostcodeFromText(self._current_application.address)
+            if count % 6 == 3:
+                self._current_application.description = tr.td.string.strip()
+            if count % 6 == 4:
+                self._current_application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
+            if count % 6 == 5:
+                try:
+                    self._current_application.comment_url = urlparse.urljoin(self.base_url, tr.a['href'])
+                except:
+                    # Comments are not currently being accepted. We'll leave this app for the moment - we'll pick it up later if they start accepting comments
+                    continue
+            if count % 6 == 0 and self._current_application.is_ready():
+                self._results.addApplication(self._current_application)
+
+        return self._results
+
+    def getResults(self, day, month, year):
+        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
+
+if __name__ == '__main__':
+    parser = BrentParser()
+    print parser.getResults(6,8,2008)
+
diff --git a/trunk/python_scrapers/OtherFilesToCopy.csv b/trunk/python_scrapers/OtherFilesToCopy.csv
index c8b1be0..d47dc1d 100644
--- a/trunk/python_scrapers/OtherFilesToCopy.csv
+++ b/trunk/python_scrapers/OtherFilesToCopy.csv
@@ -44,3 +44,4 @@
 "Redbridge.cgi", "493"
 "AmberValley.py", "420"
 "Aberdeenshire.py", "420"
+"Brent.py", "420"
diff --git a/trunk/python_scrapers/SitesToGenerate.csv b/trunk/python_scrapers/SitesToGenerate.csv
index c825531..fdf069f 100644
--- a/trunk/python_scrapers/SitesToGenerate.csv
+++ b/trunk/python_scrapers/SitesToGenerate.csv
@@ -248,3 +248,4 @@
 "London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display", "SwiftLG", "SwiftLGParser"
 "Amber Valley Borough Council", "Amber Valley", "", "AmberValley", "AmberValleyParser"
 "Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser"
+"London Borough of Brent", "Brent", "", "Brent", "BrentParser"