Quellcode durchsuchen

Add scraper for Lichfield. Remove another unused import.

master
duncan.parkes vor 16 Jahren
Ursprung
Commit
74b3bedb63
4 geänderte Dateien mit 70 neuen und 1 gelöschten Zeilen
  1. +1
    -1
      python_scrapers/Carmarthenshire.py
  2. +67
    -0
      python_scrapers/Lichfield.py
  3. +1
    -0
      python_scrapers/OtherFilesToCopy.csv
  4. +1
    -0
      python_scrapers/SitesToGenerate.csv

+ 1
- 1
python_scrapers/Carmarthenshire.py Datei anzeigen

@@ -2,7 +2,7 @@ import urllib2
import urllib
import urlparse

import datetime, time
import datetime
import cgi

from BeautifulSoup import BeautifulSoup


+ 67
- 0
python_scrapers/Lichfield.py Datei anzeigen

@@ -0,0 +1,67 @@
"""
Lichfield District council has no nice search page, but it does have a page
which has the applications received in the last 7 days, so we'll use this,
ignoring the date passed in.

"""

import urllib2
import urlparse

import datetime

import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d/%m/%Y"

class LichfieldParser:
def __init__(self, *args):

self.authority_name = "Lichfield District Council"
self.authority_short_name = "Lichfield"
self.base_url = "http://www.lichfielddc.gov.uk/site/scripts/planning_list.php"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
response = urllib2.urlopen(self.base_url)
soup = BeautifulSoup.BeautifulSoup(response.read())

trs = soup.find("table", {"class": "planningtable"}).tbody.findAll("tr")

for tr in trs:
application = PlanningApplication()

tds = tr.findAll("td")

application.council_reference = tds[0].a.string.strip()
application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
application.address = ' '.join(tds[1].contents[1].strip().split()[1:])
application.postcode = getPostcodeFromText(application.address)


# We're going to need to download the info page in order to get
# the comment link, the date received, and the description.

info_response = urllib2.urlopen(application.info_url)
info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

application.description = info_soup.find(text="Proposal:").findPrevious("div").contents[1].strip()
application.date_received = datetime.datetime.strptime(info_soup.find(text="Date Application Valid:").findNext("span").string.strip(), date_format).date()
application.comment_url = info_soup.find("a", title="Comment on this planning application.")['href']

self._results.addApplication(application)

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = LichfieldParser()
print parser.getResults(12,10,2008)

+ 1
- 0
python_scrapers/OtherFilesToCopy.csv Datei anzeigen

@@ -61,3 +61,4 @@
"Gosport.py", "420"
"WestDorset.py", "420"
"Kirklees.py", "420"
"Lichfield.py", "420"

+ 1
- 0
python_scrapers/SitesToGenerate.csv Datei anzeigen

@@ -267,3 +267,4 @@
"Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser"
"West Dorset District Council", "West Dorset", "", "WestDorset", "WestDorsetParser"
"Kirklees Council", "Kirklees", "", "Kirklees", "KirkleesParser"
"Lichfield District Council", "Lichfield", "", "Lichfield", "LichfieldParser"

Laden…
Abbrechen
Speichern