Add Berwick scraper.

16 years ago · fed099cffb
--- a/python_scrapers/Berwick.py
+++ b/python_scrapers/Berwick.py
@@ -0,0 +1,82 @@
 import urllib2
 import urllib
 import urlparse
 import datetime, time
 import cgi
 from BeautifulSoup import BeautifulSoup
 from PlanningUtils import PlanningApplication, \
    PlanningAuthorityResults, \
    getPostcodeFromText
 search_date_format = "%d%m%y"
 reg_date_format = "%d/%m/%y"
 class BerwickParser:
    comments_email_address = "planning@berwick-upon-tweed.gov.uk"
    def __init__(self, *args):
        self.authority_name = "Berwick-upon-Tweed Borough Council"
        self.authority_short_name = "Berwick"
        self.base_url = "http://www.berwick-upon-tweed.gov.uk/planning/register/wl/%s.htm"
        self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
    def getResultsByDayMonthYear(self, day, month, year):
        search_day = datetime.date(year, month, day)
        monday_before = search_day - datetime.timedelta(search_day.weekday())
        thursday = monday_before + datetime.timedelta(3)
        if search_day.weekday() > 3: # i.e. It is friday, saturday, or sunday
            # We need to add a week
            thursday = thursday + datetime.timedelta(7)
        this_url = self.base_url %(thursday.strftime(search_date_format))
        # Now get the search page
        response = urllib2.urlopen(this_url)
        soup = BeautifulSoup(response.read())
        # Each app is stored in a table of its own. The tables don't have
        # any useful attributes, so we'll find all the NavigableString objects
        # which look like " Application Number:" and then look at the 
        #tables they are in.
        nav_strings = soup.findAll(text=" Application Number:")
        for nav_string in nav_strings:
            application = PlanningApplication()
            application.council_reference = nav_string.findNext("p").string.strip()
            result_table = nav_string.findPrevious("table")
            application.date_received = datetime.datetime.strptime(result_table.find(text=" Registration Date: ").findNext("p").contents[0].strip(), reg_date_format)
            application.osgb_x = result_table.find(text=" Easting:").findNext("p").string.strip()
            application.osgb_y = result_table.find(text=" Northing:").findNext("p").string.strip()
            application.description = result_table.find(text=" Proposed Development:").findNext("p").string.strip()
            application.address = result_table.find(text=" Location:").findNext("p").string.strip()
            application.postcode = getPostcodeFromText(application.address)
            application.info_url = this_url
            application.comment_url = self.comments_email_address
            self._results.addApplication(application)
        return self._results
    def getResults(self, day, month, year):
        return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
 if __name__ == '__main__':
    parser = BerwickParser()
    print parser.getResults(21,5,2008)
--- a/python_scrapers/OtherFilesToCopy.csv
+++ b/python_scrapers/OtherFilesToCopy.csv
@@ -46,3 +46,4 @@
 "Aberdeenshire.py", "420"
 "Brent.py", "420"
 "Carmarthenshire.py", "420"
 "Berwick.py", "420"
--- a/python_scrapers/SitesToGenerate.csv
+++ b/python_scrapers/SitesToGenerate.csv
@@ -250,3 +250,4 @@
 "Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser"
 "London Borough of Brent", "Brent", "", "Brent", "BrentParser"
 "Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser"
 "Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser"