瀏覽代碼

Add scraper for Kingston upon Thames.

master
duncan.parkes 17 年之前
父節點
當前提交
cbf14b5169
共有 3 個檔案被更改,包括 72 行新增0 行删除
  1. +70
    -0
      python_scrapers/KingstonUponThames.py
  2. +1
    -0
      python_scrapers/OtherFilesToCopy.csv
  3. +1
    -0
      python_scrapers/SitesToGenerate.csv

+ 70
- 0
python_scrapers/KingstonUponThames.py 查看文件

@@ -0,0 +1,70 @@

import urllib2
import urllib
import urlparse

import datetime, time
import cgi

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d/%b/%Y"

class KingstonParser:
comments_email_address = "dc@rbk.kingston.gov.uk"

def __init__(self, *args):
self.authority_name = "Royal Borough of Kingston upon Thames"
self.authority_short_name = "Kingston upon Thames"
self.base_url = "http://maps.kingston.gov.uk/isis_main/planning/planning_summary.aspx?strWeekListType=SRCH&strRecTo=%(date)s&strRecFrom=%(date)s&strWard=ALL&strAppTyp=ALL&strWardTxt=All%%20Wards&strAppTypTxt=All%%20Application%%20Types&strStreets=ALL&strStreetsTxt=All%%20Streets&strLimit=500"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)

# Now get the search page
response = urllib2.urlopen(self.base_url %{"date": search_day.strftime(date_format)})
soup = BeautifulSoup(response.read())

# Each app is stored in a table on it's own.
# These tables don't have any nice distinguishing features,
# but they do all contain a NavigableString "Application",
# and nothing else in the page does.
nav_strings = soup.findAll(text="Application")
for nav_string in nav_strings:
results_table = nav_string.findPrevious("table")

application = PlanningApplication()
application.date_received = search_day

application.council_reference = results_table.a.string.strip()
application.info_url = urlparse.urljoin(self.base_url, results_table.a['href'])
application.address = results_table.findAll("td")[7].a.string.strip()

application.postcode = getPostcodeFromText(application.address)
application.description = results_table.findAll("td")[-1].contents[0].strip()

# A few applications have comment urls, but most don't.
# When they do, they have a case officer - I don't think we can
# work out the other urls - even if they exist.
# Best to use the email address.
application.comment_url = self.comments_email_address

self._results.addApplication(application)

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = KingstonParser()
print parser.getResults(2,8,2008)


+ 1
- 0
python_scrapers/OtherFilesToCopy.csv 查看文件

@@ -48,3 +48,4 @@
"Carmarthenshire.py", "420" "Carmarthenshire.py", "420"
"Berwick.py", "420" "Berwick.py", "420"
"Birmingham.py", "420" "Birmingham.py", "420"
"KingstonUponThames.py", "420"

+ 1
- 0
python_scrapers/SitesToGenerate.csv 查看文件

@@ -252,3 +252,4 @@
"Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser" "Carmarthenshire County Council", "Carmarthenshire", "", "Carmarthenshire", "CarmarthenshireParser"
"Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser" "Berwick-upon-Tweed Borough Council", "Berwick", "", "Berwick", "BerwickParser"
"Birmingham City Council", "Birmingham", "", "Birmingham", "BirminghamParser" "Birmingham City Council", "Birmingham", "", "Birmingham", "BirminghamParser"
"Royal Borough of Kingston upon Thames", "Kingston upon Thames", "", "KingstonUponThames", "KingstonParser"

Loading…
取消
儲存