|
@@ -0,0 +1,113 @@ |
|
|
|
|
|
import urllib2 |
|
|
|
|
|
import urllib |
|
|
|
|
|
import urlparse |
|
|
|
|
|
|
|
|
|
|
|
import datetime, time |
|
|
|
|
|
import cgi |
|
|
|
|
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulSoup |
|
|
|
|
|
|
|
|
|
|
|
from PlanningUtils import PlanningApplication, \ |
|
|
|
|
|
PlanningAuthorityResults, \ |
|
|
|
|
|
getPostcodeFromText |
|
|
|
|
|
|
|
|
|
|
|
date_format = "%d/%m/%Y" |
|
|
|
|
|
|
|
|
|
|
|
class BroxtoweParser: |
|
|
|
|
|
def __init__(self, *args): |
|
|
|
|
|
|
|
|
|
|
|
self.authority_name = "Broxtowe Borough Council" |
|
|
|
|
|
self.authority_short_name = "Broxtowe" |
|
|
|
|
|
self.base_url = "http://planning.broxtowe.gov.uk" |
|
|
|
|
|
|
|
|
|
|
|
self.info_url = "http://planning.broxtowe.gov.uk/ApplicationDetail.aspx?RefVal=%s" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getResultsByDayMonthYear(self, day, month, year): |
|
|
|
|
|
search_day = datetime.date(year, month, day) |
|
|
|
|
|
|
|
|
|
|
|
# Now get the search page |
|
|
|
|
|
get_response = urllib2.urlopen(self.base_url) |
|
|
|
|
|
get_soup = BeautifulSoup(get_response.read()) |
|
|
|
|
|
|
|
|
|
|
|
# These are the inputs with a default value |
|
|
|
|
|
inputs_needed = [(x['id'], x['value']) for x in get_soup.form.findAll("input", value=True, type=lambda x: x != "submit")] |
|
|
|
|
|
|
|
|
|
|
|
# Add the submit button |
|
|
|
|
|
inputs_needed.append(('cmdWeeklyList', 'Search Database')) |
|
|
|
|
|
|
|
|
|
|
|
# We also need to add the date we want to search for. |
|
|
|
|
|
# This is the friday after the date searched for. |
|
|
|
|
|
# At weekends this will get you the friday before, but that isn't |
|
|
|
|
|
# a problem as there are no apps then. |
|
|
|
|
|
friday = search_day + datetime.timedelta(4 - search_day.weekday()) |
|
|
|
|
|
|
|
|
|
|
|
inputs_needed.append(("ddlWeeklyList", friday.strftime(date_format))) |
|
|
|
|
|
|
|
|
|
|
|
# We'd like as many results as we can get away with on one page. |
|
|
|
|
|
# 50 is the largest option offerend |
|
|
|
|
|
inputs_needed.append(("ddlResultsPerPageWeeklyList", "50")) |
|
|
|
|
|
|
|
|
|
|
|
post_data = dict(inputs_needed) |
|
|
|
|
|
post_url = get_response.url |
|
|
|
|
|
|
|
|
|
|
|
# In case something goes wrong here, let's break out of the loop after at most 10 passes |
|
|
|
|
|
passes = 0 |
|
|
|
|
|
|
|
|
|
|
|
while True: |
|
|
|
|
|
passes += 1 |
|
|
|
|
|
|
|
|
|
|
|
post_response = urllib2.urlopen(post_url, urllib.urlencode(post_data)) |
|
|
|
|
|
post_soup = BeautifulSoup(post_response.read()) |
|
|
|
|
|
|
|
|
|
|
|
result_tables = post_soup.table.findAll("table") |
|
|
|
|
|
|
|
|
|
|
|
for result_table in result_tables: |
|
|
|
|
|
application = PlanningApplication() |
|
|
|
|
|
|
|
|
|
|
|
application.address = ', '.join(result_table.findPrevious("b").string.strip().split("\r")) |
|
|
|
|
|
application.postcode = getPostcodeFromText(application.address) |
|
|
|
|
|
|
|
|
|
|
|
trs = result_table.findAll("tr") |
|
|
|
|
|
|
|
|
|
|
|
application.council_reference = trs[0].findAll("td")[1].string.strip() |
|
|
|
|
|
application.date_received = datetime.datetime.strptime(trs[1].findAll("td")[1].string.strip(), date_format).date() |
|
|
|
|
|
application.description = trs[3].findAll("td")[1].string.strip() |
|
|
|
|
|
|
|
|
|
|
|
application.info_url = self.info_url %(urllib.quote(application.council_reference)) |
|
|
|
|
|
|
|
|
|
|
|
# In order to avoid having to do a download for every app, |
|
|
|
|
|
# I'm setting the comment url to be the same as the info_url. |
|
|
|
|
|
# There is a comment page which can be got to by pressing the button |
|
|
|
|
|
application.comment_url = application.info_url |
|
|
|
|
|
|
|
|
|
|
|
self._results.addApplication(application) |
|
|
|
|
|
|
|
|
|
|
|
# Which page are we on? |
|
|
|
|
|
page_no = int(post_soup.find("span", id="lblPageNo").b.string) |
|
|
|
|
|
total_pages = int(post_soup.find("span", id="lblTotalPages").b.string) |
|
|
|
|
|
|
|
|
|
|
|
if passes > 10 or not page_no < total_pages: |
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
post_data = [ |
|
|
|
|
|
("__EVENTTARGET", "hlbNext"), |
|
|
|
|
|
("__EVENTARGUMENT", ""), |
|
|
|
|
|
("__VIEWSTATE", post_soup.find("input", id="__VIEWSTATE")['value']), |
|
|
|
|
|
("__EVENTVALIDATION", post_soup.find("input", id="__EVENTVALIDATION")['value']), |
|
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
post_url = urlparse.urljoin(post_response.url, post_soup.find("form")['action']) |
|
|
|
|
|
|
|
|
|
|
|
return self._results |
|
|
|
|
|
|
|
|
|
|
|
def getResults(self, day, month, year): |
|
|
|
|
|
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
parser = BroxtoweParser() |
|
|
|
|
|
print parser.getResults(3,10,2008) |
|
|
|
|
|
|