Pārlūkot izejas kodu

Add Kensington and Chelsea Scraper.

import/raw
duncan.parkes pirms 18 gadiem
vecāks
revīzija
2a0fe9d90a
3 mainītis faili ar 74 papildinājumiem un 0 dzēšanām
  1. +72
    -0
      trunk/python_scrapers/Kensington.py
  2. +1
    -0
      trunk/python_scrapers/OtherFilesToCopy.csv
  3. +1
    -0
      trunk/python_scrapers/SitesToGenerate.csv

+ 72
- 0
trunk/python_scrapers/Kensington.py Parādīt failu

@@ -0,0 +1,72 @@

import urllib2
import urllib
import urlparse

import datetime, time
import cgi

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d/%m/%Y"

class KensingtonParser:

def __init__(self, *args):

self.authority_name = "The Royal Borough of Kensington and Chelsea"
self.authority_short_name = "Kensington and Chelsea"
self.base_url = "http://www.rbkc.gov.uk/Planning/scripts/weeklyresults.asp"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)

# We want the sunday of the week being searched for.
# (sunday is at the end of the week).
friday = search_day - datetime.timedelta(search_day.weekday()) + datetime.timedelta(4)

# Not using urllib.urlencode as it insists on turning the "+" into "%2B"
post_data = "WeekEndDate=%d%%2F%d%%2F%d&order=Received+Date&submit=search" %(friday.day, friday.month, friday.year)


# Now get the search page
response = urllib2.urlopen(self.base_url, post_data)
soup = BeautifulSoup(response.read())

trs = soup.find("table", summary="Planning Application search results table").findAll("tr")[1:]

for tr in trs:
application = PlanningApplication()

tds = tr.findAll("td")

# Not sure why these are entities. We'll convert them back.
application.council_reference = tds[0].a.contents[1].strip().replace("/", "/")
application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
application.comment_url = application.info_url

application.date_received = datetime.datetime(*(time.strptime(tds[1].string.strip(), date_format)[0:6]))

application.address = tds[2].string.strip()
application.postcode = getPostcodeFromText(application.address)

application.description = tds[3].string.strip()

self._results.addApplication(application)
return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = KensingtonParser()
print parser.getResults(11,6,2008)


+ 1
- 0
trunk/python_scrapers/OtherFilesToCopy.csv Parādīt failu

@@ -24,3 +24,4 @@
"IsleOfWight.py", "420" "IsleOfWight.py", "420"
"Barnsley.py", "420" "Barnsley.py", "420"
"Shetland.py", "420" "Shetland.py", "420"
"Kensington.py", "420"

+ 1
- 0
trunk/python_scrapers/SitesToGenerate.csv Parādīt failu

@@ -222,3 +222,4 @@
"Barnsley Metropolitan Borough Council", "Barnsley", "", "Barnsley", "BarnsleyParser" "Barnsley Metropolitan Borough Council", "Barnsley", "", "Barnsley", "BarnsleyParser"
"Daventry District Council", "Daventry", "http://www.daventrydc.gov.uk/swiftlg/apas/run/wphappcriteria.display", "SwiftLG", "SwiftLGParser" "Daventry District Council", "Daventry", "http://www.daventrydc.gov.uk/swiftlg/apas/run/wphappcriteria.display", "SwiftLG", "SwiftLGParser"
"Shetland Islands Council", "Shetland Islands", "", "Shetland", "ShetlandParser" "Shetland Islands Council", "Shetland Islands", "", "Shetland", "ShetlandParser"
"The Royal Borough of Kensington and Chelsea", "Kensington and Chelsea", "", "Kensington", "KensingtonParser"

Notiek ielāde…
Atcelt
Saglabāt