From fb7ba977ae79fa75c8435ada177c311269e843d8 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Fri, 8 Aug 2008 16:50:32 +0000 Subject: [PATCH] Add Carmarthenshire scraper. --- python_scrapers/Carmarthenshire.py | 78 ++++++++++++++++++++++++++++ python_scrapers/OtherFilesToCopy.csv | 1 + python_scrapers/SitesToGenerate.csv | 1 + 3 files changed, 80 insertions(+) create mode 100644 python_scrapers/Carmarthenshire.py diff --git a/python_scrapers/Carmarthenshire.py b/python_scrapers/Carmarthenshire.py new file mode 100644 index 0000000..bd08ac5 --- /dev/null +++ b/python_scrapers/Carmarthenshire.py @@ -0,0 +1,78 @@ +import urllib2 +import urllib +import urlparse + +import datetime, time +import cgi + +from BeautifulSoup import BeautifulSoup + +from PlanningUtils import PlanningApplication, \ + PlanningAuthorityResults, \ + getPostcodeFromText + +class CarmarthenshireParser: + def __init__(self, *args): + self.comments_email_address = "planning@carmarthenshire.gov.uk" + + self.authority_name = "Carmarthenshire County Council" + self.authority_short_name = "Carmarthenshire" + self.base_url = "http://www.carmarthenshire.gov.uk/CCC_APPS/eng/plannaps/CCC_PlanningApplicationsResults.asp?datemode=range&in_lo_date=%(day)s%%2F%(month)s%%2F%(year)s&in_hi_date=%(day)s%%2F%(month)s%%2F%(year)s&SUBMIT=Search" + + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + + def getResultsByDayMonthYear(self, day, month, year): + search_day = datetime.date(year, month, day) + + # Now get the search page + response = urllib2.urlopen(self.base_url %{"day": day, + "month": month, + "year": year, + }) + soup = BeautifulSoup(response.read()) + + trs = soup.findAll("tr", valign="middle") + + count = 0 + for tr in trs: + # The odd trs are just spacers + if count % 2 == 0: + application = PlanningApplication() + + tds = tr.findAll("td") + + application.date_received = search_day + application.council_reference = tds[1].a.string + application.address = tds[3].a.string + application.postcode = getPostcodeFromText(application.address) + + # All the links in this go to the same place... + application.info_url = urlparse.urljoin(self.base_url, tr.a['href']) + + # Still looking for description and comment url + + # For the description, we'll need the info page + info_soup = BeautifulSoup(urllib2.urlopen(application.info_url).read()) + + application.description = info_soup.find(text="Description").findNext("td").findNext("td").font.string + + # While we're here, lets get the OSGB grid ref + application.osgb_x, application.osgb_y = info_soup.find(text="Grid Reference").findNext("td").font.string.split("-") + + # We'll have to use an email address for comments + application.comment_url = self.comments_email_address + + self._results.addApplication(application) + + count += 1 + + return self._results + + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +if __name__ == '__main__': + parser = CarmarthenshireParser() + print parser.getResults(8,8,2008) + diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv index d47dc1d..e03987c 100644 --- a/python_scrapers/OtherFilesToCopy.csv +++ b/python_scrapers/OtherFilesToCopy.csv @@ -45,3 +45,4 @@ "AmberValley.py", "420" "Aberdeenshire.py", "420" "Brent.py", "420" +"Carmarthenshire.py", "420" diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv index fdf069f..ebc23c4 100644 --- a/python_scrapers/SitesToGenerate.csv +++ b/python_scrapers/SitesToGenerate.csv @@ -249,3 +249,4 @@ "Amber Valley Borough Council", "Amber Valley", "", "AmberValley", "AmberValleyParser" "Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser" "London Borough of Brent", "Brent", "", "Brent", "BrentParser" +"Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser"