Browse Source

Add scraper for Brent.

import/raw
duncan.parkes 16 years ago
parent
commit
af50d991f3
3 changed files with 127 additions and 0 deletions
  1. +125
    -0
      trunk/python_scrapers/Brent.py
  2. +1
    -0
      trunk/python_scrapers/OtherFilesToCopy.csv
  3. +1
    -0
      trunk/python_scrapers/SitesToGenerate.csv

+ 125
- 0
trunk/python_scrapers/Brent.py View File

@@ -0,0 +1,125 @@

import urllib2
import urllib
import urlparse

import datetime, time
import cgi
import re

from BeautifulSoup import BeautifulSoup

from PlanningUtils import PlanningApplication, \
PlanningAuthorityResults, \
getPostcodeFromText

date_format = "%d/%m/%Y"

class BrentParser:
def __init__(self, *args):

self.authority_name = "London Borough of Brent"
self.authority_short_name = "Brent"
# self.base_url = "http://www.brent.gov.uk/servlet/ep.ext?extId=101149&byPeriod=Y&st=PL&periodUnits=day&periodMultiples=14"
self.base_url = "http://www.brent.gov.uk/servlet/ep.ext"

self._current_application = None

self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)


def getResultsByDayMonthYear(self, day, month, year):
search_day = datetime.date(year, month, day)

post_data = [
("from", search_day.strftime(date_format)),
("until", search_day.strftime(date_format)),
("EXECUTEQUERY", "Query"),
# ("auth", "402"),
("st", "PL"),
("periodUnits", "day"),
("periodMultiples", "14"),
("title", "Search+by+Application+Date"),
("instructions", "Enter+a+date+range+to+search+for+existing+applications+by+the+date+of+application.%0D%0A%3Cbr%3E%3Cbr%3E%0D%0A%3Cstrong%3ENote%3A%3C%2Fstrong%3E+Where+%27%28Applicant%27s+Description%29%27+appears+in+the+proposal%2C+the+text+may+subsequently+be+amended+when+the+application+is+checked."),
("byFormat", "N"),
("byOther1", "N"),
("byOther2", "N"),
("byOther3", "N"),
("byOther4", "N"),
("byOther5", "N"),
("byPostcode", "N"),
("byStreet", "N"),
("byHouseNumber", "N"),
("byAddress", "N"),
("byPeriod", "Y"),
("extId", "101149"), # I wonder what this is...
("queried", "Y"),
("other1Label", "Other1"),
("other2Label", "Other2"),
("other3Label", "Other3"),
("other4Label", "Other4"),
("other5Label", "Other5"),
("other1List", ""),
("other2List", ""),
("other3List", ""),
("other4List", ""),
("other5List", ""),
("periodLabel", "From"),
("addressLabel", "Select+Address"),
("print", "")
]

# Now get the search page
response = urllib2.urlopen(self.base_url, urllib.urlencode(post_data))

soup = BeautifulSoup(response.read())

trs = soup.find(text="Search Results").findNext("table").findAll("tr")[:-1]

# There are six trs per application, ish

# The first contains the case no and the application date.
# The second contains the address
# The third contains the description
# The fourth contains the info page link
# The fifth contains the comment link (or a note that comments are currently not being accepted
# The sixth is a spacer.

count = 0
for tr in trs:
count +=1

ref = tr.find(text=re.compile("Case No:"))
if ref:
self._current_application = PlanningApplication()
count = 1

self._current_application.council_reference = ref.split(":")[1].strip()
self._current_application.date_received = search_day

if count % 6 == 2:
self._current_application.address = tr.td.string.strip()
self._current_application.postcode = getPostcodeFromText(self._current_application.address)
if count % 6 == 3:
self._current_application.description = tr.td.string.strip()
if count % 6 == 4:
self._current_application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
if count % 6 == 5:
try:
self._current_application.comment_url = urlparse.urljoin(self.base_url, tr.a['href'])
except:
# Comments are not currently being accepted. We'll leave this app for the moment - we'll pick it up later if they start accepting comments
continue
if count % 6 == 0 and self._current_application.is_ready():
self._results.addApplication(self._current_application)

return self._results

def getResults(self, day, month, year):
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()

if __name__ == '__main__':
parser = BrentParser()
print parser.getResults(6,8,2008)


+ 1
- 0
trunk/python_scrapers/OtherFilesToCopy.csv View File

@@ -44,3 +44,4 @@
"Redbridge.cgi", "493" "Redbridge.cgi", "493"
"AmberValley.py", "420" "AmberValley.py", "420"
"Aberdeenshire.py", "420" "Aberdeenshire.py", "420"
"Brent.py", "420"

+ 1
- 0
trunk/python_scrapers/SitesToGenerate.csv View File

@@ -248,3 +248,4 @@
"London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display", "SwiftLG", "SwiftLGParser" "London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display", "SwiftLG", "SwiftLGParser"
"Amber Valley Borough Council", "Amber Valley", "", "AmberValley", "AmberValleyParser" "Amber Valley Borough Council", "Amber Valley", "", "AmberValley", "AmberValleyParser"
"Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser" "Aberdeenshire Council", "Aberdeenshire", "", "Aberdeenshire", "AberdeenshireParser"
"London Borough of Brent", "Brent", "", "Brent", "BrentParser"

Loading…
Cancel
Save