Add scraper for Hastings. Sadly, no decent info urls again. Had to use the search page. The real info url is only

accessible with a referer.
16 years ago · 0bfe5dace9
--- a/python_scrapers/Hastings.py
+++ b/python_scrapers/Hastings.py
@@ -0,0 +1,85 @@
 """
 This is the scraper for Hastings.
 """

 import urllib2
 import urllib
 import urlparse

 import datetime, time
 import cgi

 from BeautifulSoup import BeautifulSoup

 from PlanningUtils import PlanningApplication, \
    PlanningAuthorityResults, \
    getPostcodeFromText

 date_format = "%d/%m/%Y"

 class HastingsParser:
    def __init__(self, *args):

        self.authority_name = "Hastings Borough Council"
        self.authority_short_name = "Hastings"
 #        self.base_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
        self.base_url = "http://www.hastings.gov.uk/planning/SearchResults.aspx"

        # Due to the idiotic design of the Hastings site, we can't give a proper info url.
        # There is a sensible URL, but it only works with a referer.
        self.info_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"

        self.comment_url_template = "http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=%s&syskey=%s"

        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)

        post_data = urllib.urlencode((
                ("type", "app"),
                ("time", "0"),
                ))
                                     
        # Now get the search page
        response = urllib2.urlopen(self.base_url, post_data)
        soup = BeautifulSoup(response.read())

        caseno_strings = soup.findAll(text="Case No:")

        for caseno_string in caseno_strings:
            application = PlanningApplication()

            application.council_reference = caseno_string.findNext("a").string.strip()
            info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href'])

            # See above for why we can't use the proper info url.
            application.info_url = self.info_url

            # In order to avoid doing a download to find the comment page, we'll
            # get the system key from this url

            syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0]

            application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date()

            application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip()
            application.postcode = getPostcodeFromText(application.address)

            application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip()

 #http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642
            application.comment_url = self.comment_url_template %(application.council_reference, syskey)

            self._results.addApplication(application)

        return self._results

    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

 if __name__ == '__main__':
    parser = HastingsParser()
    print parser.getResults(2,9,2008)

--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -54,3 +54,4 @@
 "Westminster.py", "420"
 "Halton.py", "420"
 "Hampshire.py", "420"
 "Hastings.py", "420"
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -258,3 +258,4 @@
 "Westminster City Council", "Westminster", "", "Westminster", "WestminsterParser"
 "Halton Borough Council", "Halton", "", "Halton", "HaltonParser"
 "Hampshire County Council", "Hampshire", "", "Hampshire", "HampshireParser"
 "Hastings Borough Council", "Hastings", "", "Hastings", "HastingsParser"