From 8d823ca64955ad066f1c0c993d110f47906c3388 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Tue, 10 Jun 2008 16:27:30 +0000 Subject: [PATCH] Add scraper for Barnsley. --- python_scrapers/Barnsley.py | 83 ++++++++++++++++++++++++++++ python_scrapers/OtherFilesToCopy.csv | 1 + python_scrapers/SitesToGenerate.csv | 1 + 3 files changed, 85 insertions(+) create mode 100644 python_scrapers/Barnsley.py diff --git a/python_scrapers/Barnsley.py b/python_scrapers/Barnsley.py new file mode 100644 index 0000000..7bfa236 --- /dev/null +++ b/python_scrapers/Barnsley.py @@ -0,0 +1,83 @@ +""" +This is the screenscraper for planning apps for +Barnsley Metropolitan Borough Council. + +The apps for Barnsley are displayed in html pages one per week, starting on +monday. There is no date_received, so we'll have to use the date of the +start of this week. + +There is no comment url, so we'll use the email address. + +Developmentcontrol@barnsley.gov.uk + +""" + +import urllib2 +import urllib +import urlparse + +import datetime, time +import cgi + +from BeautifulSoup import BeautifulSoup + +from PlanningUtils import PlanningApplication, \ + PlanningAuthorityResults, \ + getPostcodeFromText + +date_format = "%d/%m/%Y" + +class BarnsleyParser: + comments_email_address = "Developmentcontrol@barnsley.gov.uk" + + def __init__(self, *args): + + self.authority_name = "Barnsley Metropolitan Borough Council" + self.authority_short_name = "Barnsley" + self.base_url = "http://applications.barnsley.gov.uk/service/development/week_compact.asp?AppDate=%s" + + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + + def getResultsByDayMonthYear(self, day, month, year): + search_day = datetime.date(year, month, day) + + # What we actually need is the monday before the date searched for: + monday_before = search_day - datetime.timedelta(search_day.weekday()) + + # Now get the search page + response = urllib2.urlopen(self.base_url %(monday_before.strftime(date_format))) + soup = BeautifulSoup(response.read()) + + result_tables = soup.findAll("table", align="Center", cellpadding="3") + + for table in result_tables: + application = PlanningApplication() + + # We can set the date received and the comment url straight away. + application.comment_url = self.comments_email_address + + trs = table.findAll("tr") + + application.council_reference = trs[0].a.string.strip() + relative_info_url = trs[0].a['href'] + + application.info_url = urlparse.urljoin(self.base_url, relative_info_url) + + application.date_received = monday_before + + application.address = trs[1].findAll("td")[1].string.strip() + application.postcode = getPostcodeFromText(application.address) + application.description = trs[2].findAll("td")[1].string.strip() + + self._results.addApplication(application) + + return self._results + + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +if __name__ == '__main__': + parser = BarnsleyParser() + print parser.getResults(21,5,2008) + diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv index 3cd7ceb..f35d274 100644 --- a/python_scrapers/OtherFilesToCopy.csv +++ b/python_scrapers/OtherFilesToCopy.csv @@ -22,3 +22,4 @@ "Planet.py", "420" "Ocella.py", "420" "IsleOfWight.py", "420" +"Barnsley.py", "420" diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv index dd89421..efd5c58 100644 --- a/python_scrapers/SitesToGenerate.csv +++ b/python_scrapers/SitesToGenerate.csv @@ -219,3 +219,4 @@ "Mid Bedfordshire District Council", "Mid Beds", "http://www.midbeds.gov.uk/acolnetDC/DCpages/acolnetcgi.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "MidBedsParser" "Cambridgeshire County Council", "Cambridgeshire", "http://planapps2.cambridgeshire.gov.uk/DCWebPages/AcolNetCGI.gov?ACTION=UNWRAP&RIPNAME=Root.pgesearch", "AcolnetParser", "AcolnetParser" "Isle of Wight Council", "Isle of Wight", "", "IsleOfWight", "IsleOfWightParser" +"Barnsley Metropolitan Borough Council", "Barnsley", "", "Barnsley", "BarnsleyParser"