ソースを参照

Add Berwick scraper.

import/raw
duncan.parkes 17年前
コミット
06a293dc26
3個のファイルの変更84行の追加0行の削除
  1. +82
    -0
      trunk/python_scrapers/Berwick.py
  2. +1
    -0
      trunk/python_scrapers/OtherFilesToCopy.csv
  3. +1
    -0
      trunk/python_scrapers/SitesToGenerate.csv

+ 82
- 0
trunk/python_scrapers/Berwick.py ファイルの表示

@@ -0,0 +1,82 @@

import urllib2
import urllib
import urlparse

import datetime, time
import cgi

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

search_date_format = "%d%m%y"
reg_date_format = "%d/%m/%y"

class BerwickParser:
comments_email_address = "planning@berwick-upon-tweed.gov.uk"

def __init__(self, *args):

self.authority_name = "Berwick-upon-Tweed Borough Council"
self.authority_short_name = "Berwick"
self.base_url = "http://www.berwick-upon-tweed.gov.uk/planning/register/wl/%s.htm"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)

monday_before = search_day - datetime.timedelta(search_day.weekday())

thursday = monday_before + datetime.timedelta(3)
if search_day.weekday() > 3: # i.e. It is friday, saturday, or sunday
# We need to add a week
thursday = thursday + datetime.timedelta(7)

this_url = self.base_url %(thursday.strftime(search_date_format))
# Now get the search page
response = urllib2.urlopen(this_url)
soup = BeautifulSoup(response.read())

# Each app is stored in a table of its own. The tables don't have
# any useful attributes, so we'll find all the NavigableString objects
# which look like " Application Number:" and then look at the
#tables they are in.

nav_strings = soup.findAll(text=" Application Number:")

for nav_string in nav_strings:
application = PlanningApplication()

application.council_reference = nav_string.findNext("p").string.strip()

result_table = nav_string.findPrevious("table")

application.date_received = datetime.datetime.strptime(result_table.find(text=" Registration Date: ").findNext("p").contents[0].strip(), reg_date_format)

application.osgb_x = result_table.find(text=" Easting:").findNext("p").string.strip()
application.osgb_y = result_table.find(text=" Northing:").findNext("p").string.strip()

application.description = result_table.find(text=" Proposed Development:").findNext("p").string.strip()
application.address = result_table.find(text=" Location:").findNext("p").string.strip()
application.postcode = getPostcodeFromText(application.address)

application.info_url = this_url

application.comment_url = self.comments_email_address

self._results.addApplication(application)

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = BerwickParser()
print parser.getResults(21,5,2008)


+ 1
- 0
trunk/python_scrapers/OtherFilesToCopy.csv ファイルの表示

@@ -46,3 +46,4 @@
"Aberdeenshire.py", "420" "Aberdeenshire.py", "420"
"Brent.py", "420" "Brent.py", "420"
"Carmarthenshire.py", "420" "Carmarthenshire.py", "420"
"Berwick.py", "420"

+ 1
- 0
trunk/python_scrapers/SitesToGenerate.csv ファイルの表示

@@ -250,3 +250,4 @@
"Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser" "Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser"
"London Borough of Brent", "Brent", "", "Brent", "BrentParser" "London Borough of Brent", "Brent", "", "Brent", "BrentParser"
"Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser" "Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser"
"Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser"

読み込み中…
キャンセル
保存