From 0bfe5dace9cb26c8773d6818d6d73634600417d7 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Fri, 5 Sep 2008 16:39:07 +0000 Subject: [PATCH] Add scraper for Hastings. Sadly, no decent info urls again. Had to use the search page. The real info url is only accessible with a referer. --- python_scrapers/Hastings.py | 85 ++++++++++++++++++++++++++++ python_scrapers/OtherFilesToCopy.csv | 1 + python_scrapers/SitesToGenerate.csv | 1 + 3 files changed, 87 insertions(+) create mode 100644 python_scrapers/Hastings.py diff --git a/python_scrapers/Hastings.py b/python_scrapers/Hastings.py new file mode 100644 index 0000000..5b59f9f --- /dev/null +++ b/python_scrapers/Hastings.py @@ -0,0 +1,85 @@ +""" +This is the scraper for Hastings. +""" + +import urllib2 +import urllib +import urlparse + +import datetime, time +import cgi + +from BeautifulSoup import BeautifulSoup + +from PlanningUtils import PlanningApplication, \ + PlanningAuthorityResults, \ + getPostcodeFromText + +date_format = "%d/%m/%Y" + +class HastingsParser: + def __init__(self, *args): + + self.authority_name = "Hastings Borough Council" + self.authority_short_name = "Hastings" +# self.base_url = "http://www.hastings.gov.uk/planning/view_applications.aspx" + self.base_url = "http://www.hastings.gov.uk/planning/SearchResults.aspx" + + # Due to the idiotic design of the Hastings site, we can't give a proper info url. + # There is a sensible URL, but it only works with a referer. + self.info_url = "http://www.hastings.gov.uk/planning/view_applications.aspx" + + self.comment_url_template = "http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=%s&syskey=%s" + + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + + def getResultsByDayMonthYear(self, day, month, year): + search_day = datetime.date(year, month, day) + + post_data = urllib.urlencode(( + ("type", "app"), + ("time", "0"), + )) + + # Now get the search page + response = urllib2.urlopen(self.base_url, post_data) + soup = BeautifulSoup(response.read()) + + caseno_strings = soup.findAll(text="Case No:") + + for caseno_string in caseno_strings: + application = PlanningApplication() + + application.council_reference = caseno_string.findNext("a").string.strip() + info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href']) + + # See above for why we can't use the proper info url. + application.info_url = self.info_url + + # In order to avoid doing a download to find the comment page, we'll + # get the system key from this url + + syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0] + + application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date() + + application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip() + application.postcode = getPostcodeFromText(application.address) + + application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip() + +#http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642 + application.comment_url = self.comment_url_template %(application.council_reference, syskey) + + self._results.addApplication(application) + + return self._results + + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +if __name__ == '__main__': + parser = HastingsParser() + print parser.getResults(2,9,2008) + diff --git a/python_scrapers/OtherFilesToCopy.csv b/python_scrapers/OtherFilesToCopy.csv index 4930687..dafa174 100644 --- a/python_scrapers/OtherFilesToCopy.csv +++ b/python_scrapers/OtherFilesToCopy.csv @@ -54,3 +54,4 @@ "Westminster.py", "420" "Halton.py", "420" "Hampshire.py", "420" +"Hastings.py", "420" diff --git a/python_scrapers/SitesToGenerate.csv b/python_scrapers/SitesToGenerate.csv index 079c7a0..5de335b 100644 --- a/python_scrapers/SitesToGenerate.csv +++ b/python_scrapers/SitesToGenerate.csv @@ -258,3 +258,4 @@ "Westminster City Council", "Westminster", "", "Westminster", "WestminsterParser" "Halton Borough Council", "Halton", "", "Halton", "HaltonParser" "Hampshire County Council", "Hampshire", "", "Hampshire", "HampshireParser" +"Hastings Borough Council", "Hastings", "", "Hastings", "HastingsParser"