Browse Source

Add scraper for Lichfield. Remove another unused import.

import/raw
duncan.parkes 16 years ago
parent
commit
f076ecc304
4 changed files with 70 additions and 1 deletions
  1. +1
    -1
      trunk/python_scrapers/Carmarthenshire.py
  2. +67
    -0
      trunk/python_scrapers/Lichfield.py
  3. +1
    -0
      trunk/python_scrapers/OtherFilesToCopy.csv
  4. +1
    -0
      trunk/python_scrapers/SitesToGenerate.csv

+ 1
- 1
trunk/python_scrapers/Carmarthenshire.py View File

@@ -2,7 +2,7 @@ import urllib2
import urllib import urllib
import urlparse import urlparse


import datetime, time
import datetime
import cgi import cgi


from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup


+ 67
- 0
trunk/python_scrapers/Lichfield.py View File

@@ -0,0 +1,67 @@
"""
Lichfield District council has no nice search page, but it does have a page
which has the applications received in the last 7 days, so we'll use this,
ignoring the date passed in.

"""

import urllib2
import urlparse

import datetime

import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d/%m/%Y"

class LichfieldParser:
def __init__(self, *args):

self.authority_name = "Lichfield District Council"
self.authority_short_name = "Lichfield"
self.base_url = "http://www.lichfielddc.gov.uk/site/scripts/planning_list.php"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
response = urllib2.urlopen(self.base_url)
soup = BeautifulSoup.BeautifulSoup(response.read())

trs = soup.find("table", {"class": "planningtable"}).tbody.findAll("tr")

for tr in trs:
application = PlanningApplication()

tds = tr.findAll("td")

application.council_reference = tds[0].a.string.strip()
application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
application.address = ' '.join(tds[1].contents[1].strip().split()[1:])
application.postcode = getPostcodeFromText(application.address)


# We're going to need to download the info page in order to get
# the comment link, the date received, and the description.

info_response = urllib2.urlopen(application.info_url)
info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

application.description = info_soup.find(text="Proposal:").findPrevious("div").contents[1].strip()
application.date_received = datetime.datetime.strptime(info_soup.find(text="Date Application Valid:").findNext("span").string.strip(), date_format).date()
application.comment_url = info_soup.find("a", title="Comment on this planning application.")['href']

self._results.addApplication(application)

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = LichfieldParser()
print parser.getResults(12,10,2008)

+ 1
- 0
trunk/python_scrapers/OtherFilesToCopy.csv View File

@@ -61,3 +61,4 @@
"Gosport.py", "420" "Gosport.py", "420"
"WestDorset.py", "420" "WestDorset.py", "420"
"Kirklees.py", "420" "Kirklees.py", "420"
"Lichfield.py", "420"

+ 1
- 0
trunk/python_scrapers/SitesToGenerate.csv View File

@@ -267,3 +267,4 @@
"Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser" "Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser"
"West Dorset District Council", "West Dorset", "", "WestDorset", "WestDorsetParser" "West Dorset District Council", "West Dorset", "", "WestDorset", "WestDorsetParser"
"Kirklees Council", "Kirklees", "", "Kirklees", "KirkleesParser" "Kirklees Council", "Kirklees", "", "Kirklees", "KirkleesParser"
"Lichfield District Council", "Lichfield", "", "Lichfield", "LichfieldParser"

Loading…
Cancel
Save