diff --git a/trunk/python_scrapers/Berwick.py b/trunk/python_scrapers/Berwick.py new file mode 100644 index 0000000..50c76b9 --- /dev/null +++ b/trunk/python_scrapers/Berwick.py @@ -0,0 +1,82 @@ + +import urllib2 +import urllib +import urlparse + +import datetime, time +import cgi + +from BeautifulSoup import BeautifulSoup + +from PlanningUtils import PlanningApplication, \ + PlanningAuthorityResults, \ + getPostcodeFromText + +search_date_format = "%d%m%y" +reg_date_format = "%d/%m/%y" + +class BerwickParser: + comments_email_address = "planning@berwick-upon-tweed.gov.uk" + + def __init__(self, *args): + + self.authority_name = "Berwick-upon-Tweed Borough Council" + self.authority_short_name = "Berwick" + self.base_url = "http://www.berwick-upon-tweed.gov.uk/planning/register/wl/%s.htm" + + self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) + + + def getResultsByDayMonthYear(self, day, month, year): + search_day = datetime.date(year, month, day) + + monday_before = search_day - datetime.timedelta(search_day.weekday()) + + thursday = monday_before + datetime.timedelta(3) + if search_day.weekday() > 3: # i.e. It is friday, saturday, or sunday + # We need to add a week + thursday = thursday + datetime.timedelta(7) + + this_url = self.base_url %(thursday.strftime(search_date_format)) + # Now get the search page + response = urllib2.urlopen(this_url) + soup = BeautifulSoup(response.read()) + + # Each app is stored in a table of its own. The tables don't have + # any useful attributes, so we'll find all the NavigableString objects + # which look like " Application Number:" and then look at the + #tables they are in. + + nav_strings = soup.findAll(text=" Application Number:") + + for nav_string in nav_strings: + application = PlanningApplication() + + application.council_reference = nav_string.findNext("p").string.strip() + + result_table = nav_string.findPrevious("table") + + application.date_received = datetime.datetime.strptime(result_table.find(text=" Registration Date: ").findNext("p").contents[0].strip(), reg_date_format) + + application.osgb_x = result_table.find(text=" Easting:").findNext("p").string.strip() + application.osgb_y = result_table.find(text=" Northing:").findNext("p").string.strip() + + application.description = result_table.find(text=" Proposed Development:").findNext("p").string.strip() + application.address = result_table.find(text=" Location:").findNext("p").string.strip() + application.postcode = getPostcodeFromText(application.address) + + application.info_url = this_url + + application.comment_url = self.comments_email_address + + self._results.addApplication(application) + + return self._results + + def getResults(self, day, month, year): + return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() + +if __name__ == '__main__': + parser = BerwickParser() + print parser.getResults(21,5,2008) + diff --git a/trunk/python_scrapers/OtherFilesToCopy.csv b/trunk/python_scrapers/OtherFilesToCopy.csv index e03987c..62cb1fb 100644 --- a/trunk/python_scrapers/OtherFilesToCopy.csv +++ b/trunk/python_scrapers/OtherFilesToCopy.csv @@ -46,3 +46,4 @@ "Aberdeenshire.py", "420" "Brent.py", "420" "Carmarthenshire.py", "420" +"Berwick.py", "420" diff --git a/trunk/python_scrapers/SitesToGenerate.csv b/trunk/python_scrapers/SitesToGenerate.csv index ebc23c4..7197adf 100644 --- a/trunk/python_scrapers/SitesToGenerate.csv +++ b/trunk/python_scrapers/SitesToGenerate.csv @@ -250,3 +250,4 @@ "Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser" "London Borough of Brent", "Brent", "", "Brent", "BrentParser" "Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser" +"Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser" \ No newline at end of file