diff --git a/python_scrapers/Aberdeenshire.py b/python_scrapers/Aberdeenshire.py index 7716230..abee6a0 100644 --- a/python_scrapers/Aberdeenshire.py +++ b/python_scrapers/Aberdeenshire.py @@ -3,7 +3,7 @@ import urllib2 import urllib import urlparse -import datetime, time +import datetime import cgi import re diff --git a/python_scrapers/Berwick.py b/python_scrapers/Berwick.py index 50c76b9..faff353 100644 --- a/python_scrapers/Berwick.py +++ b/python_scrapers/Berwick.py @@ -3,7 +3,7 @@ import urllib2 import urllib import urlparse -import datetime, time +import datetime import cgi from BeautifulSoup import BeautifulSoup diff --git a/python_scrapers/Kirklees.py b/python_scrapers/Kirklees.py new file mode 100644 index 0000000..370b47f --- /dev/null +++ b/python_scrapers/Kirklees.py @@ -0,0 +1,85 @@ +import urllib2 +import urllib +import urlparse + +import datetime, time +import cgi + +import BeautifulSoup + +from PlanningUtils import PlanningApplication, \ + PlanningAuthorityResults, \ + getPostcodeFromText + +search_date_format = "%d%%2F%m%%2F%Y" +received_date_format = "%d %b %Y" + +class KirkleesParser: + def __init__(self, *args): + + self.authority_name = "Kirklees Council" + self.authority_short_name = "Kirklees" + self.base_url = "http://www.kirklees.gov.uk/business/planning/List.asp?SrchApp=&SrchName=&SrchPostCode=&SrchStreet=&SrchDetails=&SrchLocality=&RorD=A&SrchDteFr=%(date)s&SrchDteTo=%(date)s&Submit=Search&pageNum=%(pagenum)d" + self.comments_email_address = "planning.contactcentre@kirklees.gov.uk" + + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + + def getResultsByDayMonthYear(self, day, month, year): + search_date = datetime.date(year, month, day) + + pagenum = 1 + + while pagenum: + response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format), + "pagenum": pagenum} + ) + soup = BeautifulSoup.BeautifulSoup(response.read()) + + # This is not a nice way to find the results table, but I can't + # see anything good to use, and it works... + + # There are two trs with style attributes per app. This will find all the first ones of the pairs. + trs = soup.find("table", border="0", cellpadding="0", cellspacing="2", width="100%", summary="").findAll("tr", style=True)[::2] + + for tr in trs: + tds = tr.findAll("td") + date_received = datetime.datetime.strptime(tds[3].string.strip(), received_date_format).date() + + # Stop looking through the list if we have found one which is earlier than the date searched for. + if date_received < search_date: + # If we break out, then we won't want the next page + pagenum = None + break + + application = PlanningApplication() + application.date_received = date_received + + application.council_reference = tds[0].small.string.strip() + + # The second