From f076ecc3045774d3cfe39c8dc4dbe48400441a39 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Wed, 15 Oct 2008 14:43:12 +0000 Subject: [PATCH] Add scraper for Lichfield. Remove another unused import. --- trunk/python_scrapers/Carmarthenshire.py | 2 +- trunk/python_scrapers/Lichfield.py | 67 ++++++++++++++++++++++ trunk/python_scrapers/OtherFilesToCopy.csv | 1 + trunk/python_scrapers/SitesToGenerate.csv | 1 + 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 trunk/python_scrapers/Lichfield.py diff --git a/trunk/python_scrapers/Carmarthenshire.py b/trunk/python_scrapers/Carmarthenshire.py index bd08ac5..b0b3d46 100644 --- a/trunk/python_scrapers/Carmarthenshire.py +++ b/trunk/python_scrapers/Carmarthenshire.py @@ -2,7 +2,7 @@ import urllib2 import urllib import urlparse -import datetime, time +import datetime import cgi from BeautifulSoup import BeautifulSoup diff --git a/trunk/python_scrapers/Lichfield.py b/trunk/python_scrapers/Lichfield.py new file mode 100644 index 0000000..326572b --- /dev/null +++ b/trunk/python_scrapers/Lichfield.py @@ -0,0 +1,67 @@ +""" +Lichfield District council has no nice search page, but it does have a page +which has the applications received in the last 7 days, so we'll use this, +ignoring the date passed in. + +""" + +import urllib2 +import urlparse + +import datetime + +import BeautifulSoup + +from PlanningUtils import PlanningApplication, \ + PlanningAuthorityResults, \ + getPostcodeFromText + +date_format = "%d/%m/%Y" + +class LichfieldParser: + def __init__(self, *args): + + self.authority_name = "Lichfield District Council" + self.authority_short_name = "Lichfield" + self.base_url = "http://www.lichfielddc.gov.uk/site/scripts/planning_list.php" + + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + + def getResultsByDayMonthYear(self, day, month, year): + response = urllib2.urlopen(self.base_url) + soup = BeautifulSoup.BeautifulSoup(response.read()) + + trs = soup.find("table", {"class": "planningtable"}).tbody.findAll("tr") + + for tr in trs: + application = PlanningApplication() + + tds = tr.findAll("td") + + application.council_reference = tds[0].a.string.strip() + application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href']) + application.address = ' '.join(tds[1].contents[1].strip().split()[1:]) + application.postcode = getPostcodeFromText(application.address) + + + # We're going to need to download the info page in order to get + # the comment link, the date received, and the description. + + info_response = urllib2.urlopen(application.info_url) + info_soup = BeautifulSoup.BeautifulSoup(info_response.read()) + + application.description = info_soup.find(text="Proposal:").findPrevious("div").contents[1].strip() + application.date_received = datetime.datetime.strptime(info_soup.find(text="Date Application Valid:").findNext("span").string.strip(), date_format).date() + application.comment_url = info_soup.find("a", title="Comment on this planning application.")['href'] + + self._results.addApplication(application) + + return self._results + + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +if __name__ == '__main__': + parser = LichfieldParser() + print parser.getResults(12,10,2008) diff --git a/trunk/python_scrapers/OtherFilesToCopy.csv b/trunk/python_scrapers/OtherFilesToCopy.csv index 965da04..c3ccf12 100644 --- a/trunk/python_scrapers/OtherFilesToCopy.csv +++ b/trunk/python_scrapers/OtherFilesToCopy.csv @@ -61,3 +61,4 @@ "Gosport.py", "420" "WestDorset.py", "420" "Kirklees.py", "420" +"Lichfield.py", "420" diff --git a/trunk/python_scrapers/SitesToGenerate.csv b/trunk/python_scrapers/SitesToGenerate.csv index d802505..0f2277c 100644 --- a/trunk/python_scrapers/SitesToGenerate.csv +++ b/trunk/python_scrapers/SitesToGenerate.csv @@ -267,3 +267,4 @@ "Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser" "West Dorset District Council", "West Dorset", "", "WestDorset", "WestDorsetParser" "Kirklees Council", "Kirklees", "", "Kirklees", "KirkleesParser" +"Lichfield District Council", "Lichfield", "", "Lichfield", "LichfieldParser"