Browse Source

Add scraper for Kingston upon Thames.

import/raw
duncan.parkes 16 years ago
parent
commit
3c85f0d0dd
3 changed files with 72 additions and 0 deletions
  1. +70
    -0
      trunk/python_scrapers/KingstonUponThames.py
  2. +1
    -0
      trunk/python_scrapers/OtherFilesToCopy.csv
  3. +1
    -0
      trunk/python_scrapers/SitesToGenerate.csv

+ 70
- 0
trunk/python_scrapers/KingstonUponThames.py View File

@@ -0,0 +1,70 @@

import urllib2
import urllib
import urlparse

import datetime, time
import cgi

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d/%b/%Y"

class KingstonParser:
comments_email_address = "dc@rbk.kingston.gov.uk"

def __init__(self, *args):
self.authority_name = "Royal Borough of Kingston upon Thames"
self.authority_short_name = "Kingston upon Thames"
self.base_url = "http://maps.kingston.gov.uk/isis_main/planning/planning_summary.aspx?strWeekListType=SRCH&strRecTo=%(date)s&strRecFrom=%(date)s&strWard=ALL&strAppTyp=ALL&strWardTxt=All%%20Wards&strAppTypTxt=All%%20Application%%20Types&strStreets=ALL&strStreetsTxt=All%%20Streets&strLimit=500"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)

# Now get the search page
response = urllib2.urlopen(self.base_url %{"date": search_day.strftime(date_format)})
soup = BeautifulSoup(response.read())

# Each app is stored in a table on it's own.
# These tables don't have any nice distinguishing features,
# but they do all contain a NavigableString "Application",
# and nothing else in the page does.
nav_strings = soup.findAll(text="Application")
for nav_string in nav_strings:
results_table = nav_string.findPrevious("table")

application = PlanningApplication()
application.date_received = search_day

application.council_reference = results_table.a.string.strip()
application.info_url = urlparse.urljoin(self.base_url, results_table.a['href'])
application.address = results_table.findAll("td")[7].a.string.strip()

application.postcode = getPostcodeFromText(application.address)
application.description = results_table.findAll("td")[-1].contents[0].strip()

# A few applications have comment urls, but most don't.
# When they do, they have a case officer - I don't think we can
# work out the other urls - even if they exist.
# Best to use the email address.
application.comment_url = self.comments_email_address

self._results.addApplication(application)

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = KingstonParser()
print parser.getResults(2,8,2008)


+ 1
- 0
trunk/python_scrapers/OtherFilesToCopy.csv View File

@@ -48,3 +48,4 @@
"Carmarthenshire.py", "420" "Carmarthenshire.py", "420"
"Berwick.py", "420" "Berwick.py", "420"
"Birmingham.py", "420" "Birmingham.py", "420"
"KingstonUponThames.py", "420"

+ 1
- 0
trunk/python_scrapers/SitesToGenerate.csv View File

@@ -252,3 +252,4 @@
"Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser" "Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser"
"Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser" "Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser"
"Birmingham City Council", "Birmingham", "", "Birmingham", "BirminghamParser" "Birmingham City Council", "Birmingham", "", "Birmingham", "BirminghamParser"
"Royal Borough of Kingston upon Thames", "Kingston upon Thames", "", "KingstonUponThames", "KingstonParser"

Loading…
Cancel
Save