Add scraper for Hounslow.

16 years ago · 98f49f172f
--- a/python_scrapers/Hounslow.py
+++ b/python_scrapers/Hounslow.py
@@ -0,0 +1,72 @@
 import urllib2
 import urllib
 import urlparse
 import datetime, time
 import cgi
 from BeautifulSoup import BeautifulSoup
 from PlanningUtils import PlanningApplication, \
    PlanningAuthorityResults, \
    getPostcodeFromText
 date_format = "%d/%m/%Y"
 class HounslowParser:
    def __init__(self, *args):
        self.authority_name = "London Borough of Hounslow"
        self.authority_short_name = "Hounslow"
        self.base_url = "http://planning.hounslow.gov.uk/planningv2/planning_summary.aspx?strWeekListType=SRCH&strRecTo=%(date)s&strRecFrom=%(date)s&strWard=ALL&strAppTyp=ALL&strWardTxt=All%%20Wards&strAppTypTxt=All%%20Application%%20Types&strArea=ALL&strAreaTxt=All%%20Areas&strStreet=ALL&strStreetTxt=All%%20Streets&strPC=&strLimit=500"
        # Limited to 500 cases - putting 1000 causes a default value of 50 to be used. 500 should be plenty.
        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)
        # Now get the search page
        response = urllib2.urlopen(self.base_url %{"date": search_day.strftime(date_format)})
        soup = BeautifulSoup(response.read())
        # Results are shown in a table each. The tables don't have any nice
        # attributes, but they do all contain a NavString "Application",
        # and nothing else does...
        nav_strings = soup.findAll(text="Application")
        for nav_string in nav_strings:
            result_table = nav_string.findPrevious("table")
            application = PlanningApplication()
            application.date_received = search_day
            links = result_table.findAll("a")
            # We can get OSGB coordinates from the link to streetmap
            map_qs_dict = cgi.parse_qs(urlparse.urlsplit(links[0]['href'])[3])
            application.osgb_x = map_qs_dict.get("x")[0]
            application.osgb_y = map_qs_dict.get("y")[0]
            application.council_reference = links[1].string.strip()
            application.info_url = urlparse.urljoin(self.base_url, links[1]['href'])
            application.comment_url = urlparse.urljoin(self.base_url, links[2]['href'])
            application.address = ' '.join(links[0].previous.strip().split())
            application.postcode = getPostcodeFromText(application.address)
            application.description = links[2].previous.strip()
            self._results.addApplication(application)
        return self._results
    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
 if __name__ == '__main__':
    parser = HounslowParser()
    print parser.getResults(1,8,2008)
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -49,3 +49,4 @@
 "Berwick.py", "420"
 "Birmingham.py", "420"
 "KingstonUponThames.py", "420"
 "Hounslow.py", "420"
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -253,3 +253,4 @@
 "Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser"
 "Birmingham City Council", "Birmingham", "", "Birmingham", "BirminghamParser"
 "Royal Borough of Kingston upon Thames", "Kingston upon Thames", "", "KingstonUponThames", "KingstonParser"
 "London Borough of Hounslow", "Hounslow", "", "Hounslow", "HounslowParser"