|
@@ -0,0 +1,122 @@ |
|
|
|
|
|
|
|
|
|
|
|
import urllib2 |
|
|
|
|
|
import urllib |
|
|
|
|
|
import urlparse |
|
|
|
|
|
|
|
|
|
|
|
import datetime, time |
|
|
|
|
|
import cgi |
|
|
|
|
|
import re |
|
|
|
|
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulSoup |
|
|
|
|
|
|
|
|
|
|
|
import cookielib |
|
|
|
|
|
cookie_jar = cookielib.CookieJar() |
|
|
|
|
|
|
|
|
|
|
|
from PlanningUtils import PlanningApplication, \ |
|
|
|
|
|
PlanningAuthorityResults, \ |
|
|
|
|
|
getPostcodeFromText |
|
|
|
|
|
|
|
|
|
|
|
date_format = "%d/%m/%Y" |
|
|
|
|
|
|
|
|
|
|
|
class BirminghamParser: |
|
|
|
|
|
comments_email_address = "planning.enquiries@birmingham.gov.uk" |
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, *args): |
|
|
|
|
|
|
|
|
|
|
|
self.authority_name = "Birmingham City Council" |
|
|
|
|
|
self.authority_short_name = "Birmingham" |
|
|
|
|
|
|
|
|
|
|
|
self.get_url = "http://www.birmingham.gov.uk/GenerateContent?CONTENT_ITEM_ID=67548&CONTENT_ITEM_TYPE=0&MENU_ID=12189" |
|
|
|
|
|
# What a lovely intuitive URL it is. |
|
|
|
|
|
self.for_cookie_url = "http://www.birmingham.gov.uk/PSR/control/main" |
|
|
|
|
|
self.post_url = "http://www.birmingham.gov.uk/PSR/control/searchresults" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getResultsByDayMonthYear(self, day, month, year): |
|
|
|
|
|
search_day = datetime.date(year, month, day) |
|
|
|
|
|
|
|
|
|
|
|
# We seem to need to get this page in order to get a cookie |
|
|
|
|
|
for_cookie_request = urllib2.Request(self.for_cookie_url) |
|
|
|
|
|
for_cookie_response = urllib2.urlopen(for_cookie_request) |
|
|
|
|
|
cookie_jar.extract_cookies(for_cookie_response, for_cookie_request) |
|
|
|
|
|
|
|
|
|
|
|
post_data = [ |
|
|
|
|
|
("JAVASCRIPT_ENABLED", "FALSE"), |
|
|
|
|
|
("txt_PSR_CurrentSearchPage", "0"), |
|
|
|
|
|
("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Application_Form"), |
|
|
|
|
|
("txt_PSR_Application_ApplicationNumber", ""), |
|
|
|
|
|
("txt_PSR_Application_Status", "awaitingDecision"), |
|
|
|
|
|
("txt_PSR_Application_TypeOfApplication", ""), |
|
|
|
|
|
("txt_PSR_Application_DecisionType", ""), |
|
|
|
|
|
("txt_PSR_Application_District", ""), |
|
|
|
|
|
("txt_PSR_Application_Ward", ""), |
|
|
|
|
|
("txt_PSR_Application_Location", ""), |
|
|
|
|
|
("txt_PSR_Application_Applicant", ""), |
|
|
|
|
|
("txt_PSR_Application_Agent", ""), |
|
|
|
|
|
("txt_PSR_Application_SearchDay", day), |
|
|
|
|
|
("txt_PSR_Application_SearchMonth", month-1), # Months are counted from zero... |
|
|
|
|
|
("txt_PSR_Application_SearchYear", year), |
|
|
|
|
|
("txt_PSR_Application_SearchToDay", day), |
|
|
|
|
|
("txt_PSR_Application_SearchToMonth", month-1), # Months are counted from zero... |
|
|
|
|
|
("txt_PSR_Application_SearchToYear", year), |
|
|
|
|
|
("txt_PSR_Application_SearchSortOrder", "LatestFirst"), |
|
|
|
|
|
("txt_PSR_Application_ResultsSkipRows", "0"), |
|
|
|
|
|
("txt_PSR_Application_ResultsPerPage", "1000"), # That should be enough to keep things on one page |
|
|
|
|
|
("btn_PSR_Application_ApplicationSearch", "Search"), |
|
|
|
|
|
("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Appeals_Form"), |
|
|
|
|
|
("txt_PSR_Appeals_ApplicationNumber", ""), |
|
|
|
|
|
("txt_PSR_Appeals_Status", "awaitingDecision"), |
|
|
|
|
|
("txt_PSR_Appeals_TypeOfAppeal", ""), |
|
|
|
|
|
("txt_PSR_Appeals_DecisionType", ""), |
|
|
|
|
|
("txt_PSR_Appeals_District", ""), |
|
|
|
|
|
("txt_PSR_Appeals_Ward", ""), |
|
|
|
|
|
("txt_PSR_Appeals_Location", ""), |
|
|
|
|
|
("txt_PSR_Appeals_Applicant", ""), |
|
|
|
|
|
("txt_PSR_Appeals_Agent", ""), |
|
|
|
|
|
("txt_PSR_Appeals_SearchDay", ""), |
|
|
|
|
|
("txt_PSR_Appeals_SearchMonth", ""), |
|
|
|
|
|
("txt_PSR_Appeals_SearchYear", ""), |
|
|
|
|
|
("txt_PSR_Appeals_SearchToDay", ""), |
|
|
|
|
|
("txt_PSR_Appeals_SearchToMonth", ""), |
|
|
|
|
|
("txt_PSR_Appeals_SearchToYear", ""), |
|
|
|
|
|
("txt_PSR_Appeals_SearchSortOrder", "LatestFirst"), |
|
|
|
|
|
("txt_PSR_Appeals_ResultsSkipRows", "0"), |
|
|
|
|
|
("txt_PSR_Appeals_ResultsPerPage", "10"), |
|
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
post_request = urllib2.Request(self.post_url, urllib.urlencode(post_data)) |
|
|
|
|
|
cookie_jar.add_cookie_header(post_request) |
|
|
|
|
|
|
|
|
|
|
|
post_response = urllib2.urlopen(post_request) |
|
|
|
|
|
|
|
|
|
|
|
soup = BeautifulSoup(post_response.read()) |
|
|
|
|
|
|
|
|
|
|
|
result_tables = soup.findAll("table", summary=re.compile("Summary of planning application")) |
|
|
|
|
|
|
|
|
|
|
|
for result_table in result_tables: |
|
|
|
|
|
application = PlanningApplication() |
|
|
|
|
|
application.info_url = urlparse.urljoin(self.post_url, result_table.find(text="Application number").findNext("a")['href']) |
|
|
|
|
|
application.council_reference = result_table.find(text="Application number").findNext("a").string |
|
|
|
|
|
application.date_received = search_day |
|
|
|
|
|
application.address = result_table.find(text="Location").findNext("td").p.string |
|
|
|
|
|
application.postcode = getPostcodeFromText(application.address) |
|
|
|
|
|
application.description = result_table.find(text="Proposal").findNext("td").p.string.replace(" ", " ").strip() |
|
|
|
|
|
|
|
|
|
|
|
# Comment link gives an Access Denied, so we'll have to use the email |
|
|
|
|
|
application.comment_url = self.comments_email_address |
|
|
|
|
|
|
|
|
|
|
|
self._results.addApplication(application) |
|
|
|
|
|
|
|
|
|
|
|
return self._results |
|
|
|
|
|
|
|
|
|
|
|
def getResults(self, day, month, year): |
|
|
|
|
|
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
parser = BirminghamParser() |
|
|
|
|
|
print parser.getResults(1,8,2008) |
|
|
|
|
|
|