Browse Source

Add Berwick scraper.

master
duncan.parkes 16 years ago
parent
commit
fed099cffb
3 changed files with 84 additions and 0 deletions
  1. +82
    -0
      python_scrapers/Berwick.py
  2. +1
    -0
      python_scrapers/OtherFilesToCopy.csv
  3. +1
    -0
      python_scrapers/SitesToGenerate.csv

+ 82
- 0
python_scrapers/Berwick.py View File

@@ -0,0 +1,82 @@

import urllib2
import urllib
import urlparse

import datetime, time
import cgi

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

search_date_format = "%d%m%y"
reg_date_format = "%d/%m/%y"

class BerwickParser:
comments_email_address = "planning@berwick-upon-tweed.gov.uk"

def __init__(self, *args):

self.authority_name = "Berwick-upon-Tweed Borough Council"
self.authority_short_name = "Berwick"
self.base_url = "http://www.berwick-upon-tweed.gov.uk/planning/register/wl/%s.htm"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)

monday_before = search_day - datetime.timedelta(search_day.weekday())

thursday = monday_before + datetime.timedelta(3)
if search_day.weekday() > 3: # i.e. It is friday, saturday, or sunday
# We need to add a week
thursday = thursday + datetime.timedelta(7)

this_url = self.base_url %(thursday.strftime(search_date_format))
# Now get the search page
response = urllib2.urlopen(this_url)
soup = BeautifulSoup(response.read())

# Each app is stored in a table of its own. The tables don't have
# any useful attributes, so we'll find all the NavigableString objects
# which look like " Application Number:" and then look at the
#tables they are in.

nav_strings = soup.findAll(text=" Application Number:")

for nav_string in nav_strings:
application = PlanningApplication()

application.council_reference = nav_string.findNext("p").string.strip()

result_table = nav_string.findPrevious("table")

application.date_received = datetime.datetime.strptime(result_table.find(text=" Registration Date: ").findNext("p").contents[0].strip(), reg_date_format)

application.osgb_x = result_table.find(text=" Easting:").findNext("p").string.strip()
application.osgb_y = result_table.find(text=" Northing:").findNext("p").string.strip()

application.description = result_table.find(text=" Proposed Development:").findNext("p").string.strip()
application.address = result_table.find(text=" Location:").findNext("p").string.strip()
application.postcode = getPostcodeFromText(application.address)

application.info_url = this_url

application.comment_url = self.comments_email_address

self._results.addApplication(application)

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = BerwickParser()
print parser.getResults(21,5,2008)


+ 1
- 0
python_scrapers/OtherFilesToCopy.csv View File

@@ -46,3 +46,4 @@
"Aberdeenshire.py", "420" "Aberdeenshire.py", "420"
"Brent.py", "420" "Brent.py", "420"
"Carmarthenshire.py", "420" "Carmarthenshire.py", "420"
"Berwick.py", "420"

+ 1
- 0
python_scrapers/SitesToGenerate.csv View File

@@ -250,3 +250,4 @@
"Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser" "Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser"
"London Borough of Brent", "Brent", "", "Brent", "BrentParser" "London Borough of Brent", "Brent", "", "Brent", "BrentParser"
"Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser" "Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser"
"Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser"

Loading…
Cancel
Save