Просмотр исходного кода

Add a RutlandLike scraper

This will handle Rutland, Melton, and any other sites 
we find which look like them.
East Northants and Harborough both also have sites of this sort, but I suspect they are
being replaced by PublicAccess...
master
duncan.parkes 16 лет назад
Родитель
Сommit
dec1a6984b
4 измененных файлов: 106 добавлений и 4 удалений
  1. +2
    -1
      python_scrapers/OtherFilesToCopy.csv
  2. +3
    -3
      python_scrapers/PublicAccess.py
  3. +97
    -0
      python_scrapers/RutlandLike.py
  4. +4
    -0
      python_scrapers/SitesToGenerate.csv

+ 2
- 1
python_scrapers/OtherFilesToCopy.csv Просмотреть файл

@@ -10,4 +10,5 @@
"Broxbourne.cgi", "493"
"EastHerts.cgi", "493"
"NorthHerts.cgi", "493"
"Enfield.cgi", "493"
"Enfield.cgi", "493"
"RutlandLike.py", "420"

+ 3
- 3
python_scrapers/PublicAccess.py Просмотреть файл

@@ -350,10 +350,10 @@ class PublicAccessPropertyPageParser(HTMLParser.HTMLParser):


if __name__ == '__main__':
day = 31
month = 8
day = 20
month = 11
year = 2007

parser = PublicAccessParser("Bristol", "Bristol", "http://e2eweb.bristol-city.gov.uk/PublicAccess/tdc/", True)
parser = PublicAccessParser("Hambleton", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/", True)
print parser.getResults(day, month, year)

+ 97
- 0
python_scrapers/RutlandLike.py Просмотреть файл

@@ -0,0 +1,97 @@
import urllib2
import urllib
import urlparse

import datetime
#import re

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d/%m/%Y"

# Where the council reference fills the gap
comment_url_end = "comment.asp?%s"

#comment_regex = re.compile("Comment on this ")


class RutlandLikeParser:
def __init__(self,
authority_name,
authority_short_name,
base_url,
debug=False):

self.authority_name = authority_name
self.authority_short_name = authority_short_name
self.base_url = base_url

self.debug = debug

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_date = datetime.date(year, month, day)
date_string = search_date.strftime(date_format)
search_data = urllib.urlencode({"reference": "",
"undecided": "yes",
"dateFrom": date_string,
"dateTo": date_string,
"Address": "",
"validate": "true",
})


request = urllib2.Request(self.base_url, search_data)
response = urllib2.urlopen(request)

html = response.read()

soup = BeautifulSoup(html)

tables = soup.findAll("table", {"style": "width:auto;"})

if not tables:
return self._results

# We don't want the first or last tr
trs = tables[0].findAll("tr")[1:-1]

for tr in trs:
app = PlanningApplication()

tds = tr.findAll("td")

if len(tds) == 4:
local_info_url = tds[0].a['href']
app.info_url = urlparse.urljoin(self.base_url, local_info_url)
app.council_reference = tds[0].a.string

app.address = tds[1].string
app.postcode = getPostcodeFromText(app.address)

app.description = tds[2].string

app.comment_url = urlparse.urljoin(self.base_url, comment_url_end %app.council_reference)
app.date_received = search_date

self._results.addApplication(app)

return self._results


def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()


if __name__ == '__main__':
rutland_parser = RutlandLikeParser("Rutland long", "Rutland", "http://www.meltononline.co.uk/planning/searchparam.asp")

print rutland_parser.getResults(15,11,2007)


+ 4
- 0
python_scrapers/SitesToGenerate.csv Просмотреть файл

@@ -137,3 +137,7 @@
"Chester-le-Street District Council", "Chester-le-Street", "http://planning.chester-le-street.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Vale of the White Horse District Council", "Vale of the White Horse", "http://planning.whitehorsedc.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Corby Borough Council", "Corby", "https://publicaccess.corby.gov.uk/publicaccess/tdc/", "PublicAccess", "PublicAccessParser"
"Rutland County Council", "Rutland", "http://www.rutland.gov.uk/wellandplanning/searchparam.asp", "RutlandLike", "RutlandLikeParser"
"Melton Borough Council", "Melton", "http://www.meltononline.co.uk/planning/searchparam.asp", "RutlandLike", "RutlandLikeParser"
"Harborough District Council", "Harborough", "http://pa.harborough.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"
"East Northamptonshire Council", "East Northants", "http://publicaccesssrv.east-northamptonshire.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser"

Загрузка…
Отмена
Сохранить