Explorar el Código

Add scraper for Lichfield. Remove another unused import.

master
duncan.parkes hace 16 años
padre
commit
74b3bedb63
Se han modificado 4 ficheros con 70 adiciones y 1 borrados
  1. +1
    -1
      python_scrapers/Carmarthenshire.py
  2. +67
    -0
      python_scrapers/Lichfield.py
  3. +1
    -0
      python_scrapers/OtherFilesToCopy.csv
  4. +1
    -0
      python_scrapers/SitesToGenerate.csv

+ 1
- 1
python_scrapers/Carmarthenshire.py Ver fichero

@@ -2,7 +2,7 @@ import urllib2
import urllib
import urlparse

import datetime, time
import datetime
import cgi

from BeautifulSoup import BeautifulSoup


+ 67
- 0
python_scrapers/Lichfield.py Ver fichero

@@ -0,0 +1,67 @@
"""
Lichfield District council has no nice search page, but it does have a page
which has the applications received in the last 7 days, so we'll use this,
ignoring the date passed in.

"""

import urllib2
import urlparse

import datetime

import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d/%m/%Y"

class LichfieldParser:
def __init__(self, *args):

self.authority_name = "Lichfield District Council"
self.authority_short_name = "Lichfield"
self.base_url = "http://www.lichfielddc.gov.uk/site/scripts/planning_list.php"

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
response = urllib2.urlopen(self.base_url)
soup = BeautifulSoup.BeautifulSoup(response.read())

trs = soup.find("table", {"class": "planningtable"}).tbody.findAll("tr")

for tr in trs:
application = PlanningApplication()

tds = tr.findAll("td")

application.council_reference = tds[0].a.string.strip()
application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
application.address = ' '.join(tds[1].contents[1].strip().split()[1:])
application.postcode = getPostcodeFromText(application.address)


# We're going to need to download the info page in order to get
# the comment link, the date received, and the description.

info_response = urllib2.urlopen(application.info_url)
info_soup = BeautifulSoup.BeautifulSoup(info_response.read())

application.description = info_soup.find(text="Proposal:").findPrevious("div").contents[1].strip()
application.date_received = datetime.datetime.strptime(info_soup.find(text="Date Application Valid:").findNext("span").string.strip(), date_format).date()
application.comment_url = info_soup.find("a", title="Comment on this planning application.")['href']

self._results.addApplication(application)

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = LichfieldParser()
print parser.getResults(12,10,2008)

+ 1
- 0
python_scrapers/OtherFilesToCopy.csv Ver fichero

@@ -61,3 +61,4 @@
"Gosport.py", "420"
"WestDorset.py", "420"
"Kirklees.py", "420"
"Lichfield.py", "420"

+ 1
- 0
python_scrapers/SitesToGenerate.csv Ver fichero

@@ -267,3 +267,4 @@
"Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser"
"West Dorset District Council", "West Dorset", "", "WestDorset", "WestDorsetParser"
"Kirklees Council", "Kirklees", "", "Kirklees", "KirkleesParser"
"Lichfield District Council", "Lichfield", "", "Lichfield", "LichfieldParser"

Cargando…
Cancelar
Guardar