|
|
@@ -0,0 +1,206 @@ |
|
|
|
import urllib2 |
|
|
|
import urllib |
|
|
|
import urlparse |
|
|
|
|
|
|
|
import datetime |
|
|
|
import time |
|
|
|
import re |
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulSoup |
|
|
|
|
|
|
|
from PlanningUtils import PlanningApplication, \ |
|
|
|
PlanningAuthorityResults, \ |
|
|
|
getPostcodeFromText |
|
|
|
|
|
|
|
def clean_string(a_string): |
|
|
|
return ' '.join(' '.join(a_string.split(" ")).strip().split()) |
|
|
|
|
|
|
|
def remove_params(url): |
|
|
|
# Probably a bit naughty to use both urlparse and urlunsplit here, |
|
|
|
# but it does what we want - removing the jsessionid param |
|
|
|
|
|
|
|
parsed_url = urlparse.urlparse(url) |
|
|
|
params_free_url = urlparse.urlunsplit(parsed_url[:3] + parsed_url[4:]) |
|
|
|
|
|
|
|
return params_free_url |
|
|
|
|
|
|
|
class WAMParser: |
|
|
|
address_column = 2 |
|
|
|
date_format = "%d/%b/%Y" |
|
|
|
|
|
|
|
def __init__(self, |
|
|
|
authority_name, |
|
|
|
authority_short_name, |
|
|
|
base_url, |
|
|
|
debug=False): |
|
|
|
|
|
|
|
self.authority_name = authority_name |
|
|
|
self.authority_short_name = authority_short_name |
|
|
|
self.base_url = base_url |
|
|
|
|
|
|
|
self.debug = debug |
|
|
|
|
|
|
|
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) |
|
|
|
|
|
|
|
def _get_search_data(self, year, month, day): |
|
|
|
timestamp = time.mktime((year, month, day, 0,0,0,0,0,0)) |
|
|
|
|
|
|
|
# The parameter endDate appears to be 1000*a timestamp |
|
|
|
time_input = str(int(timestamp*1000)) |
|
|
|
|
|
|
|
#http://wam.boroughofpoole.com/WAM/pas/searchApplications.do;jsessionid=BCC7DFD1C42DC210A7BE5BA616683CDE |
|
|
|
# areaCode=%25&sortOrder=1&endDate=1197213359015&applicationType=%25&Button=Search |
|
|
|
|
|
|
|
search_data = ( |
|
|
|
("areaCode", "%"), |
|
|
|
("sortOrder", "1"), |
|
|
|
("endDate", time_input), |
|
|
|
("applicationType", "%"), |
|
|
|
("Button", "Search"), |
|
|
|
) |
|
|
|
|
|
|
|
return search_data |
|
|
|
|
|
|
|
def getResultsByDayMonthYear(self, day, month, year): |
|
|
|
search_data_tuple = self._get_search_data(year, month, day) |
|
|
|
search_data = urllib.urlencode(search_data_tuple) |
|
|
|
|
|
|
|
response = urllib2.urlopen(self.base_url, search_data) |
|
|
|
|
|
|
|
html = response.read() |
|
|
|
|
|
|
|
soup = BeautifulSoup(html) |
|
|
|
|
|
|
|
results_table = soup.find(text=re.compile("Your search returned the following")).findNext("table") |
|
|
|
|
|
|
|
# FIXME - deal with the empty results case |
|
|
|
# FIXME - deal with later pages of results |
|
|
|
|
|
|
|
trs = results_table.findAll("tr")[1:] |
|
|
|
|
|
|
|
self._current_application = PlanningApplication() |
|
|
|
|
|
|
|
for tr in trs: |
|
|
|
try: |
|
|
|
|
|
|
|
tds = tr.findAll("td") |
|
|
|
|
|
|
|
date_received_string = tds[0].contents[0].strip() |
|
|
|
self._current_application.date_received = datetime.datetime.strptime(clean_string(date_received_string), self.date_format) |
|
|
|
|
|
|
|
relative_info_url = tr.a['href'] |
|
|
|
info_url_no_params = remove_params(relative_info_url) |
|
|
|
|
|
|
|
#Now we join on the base url to make it absolute |
|
|
|
self._current_application.info_url = urlparse.urljoin(self.base_url, info_url_no_params) |
|
|
|
|
|
|
|
self._current_application.council_reference = tr.a.string |
|
|
|
|
|
|
|
address = clean_string(tds[self.address_column].string) |
|
|
|
self._current_application.address = address |
|
|
|
self._current_application.postcode = getPostcodeFromText(address) |
|
|
|
|
|
|
|
# self._current_application.description = clean_string(tds[self.description_column].string) |
|
|
|
|
|
|
|
# Fetch the info page |
|
|
|
|
|
|
|
info_response = urllib2.urlopen(self._current_application.info_url) |
|
|
|
|
|
|
|
info_html = info_response.read() |
|
|
|
info_soup = BeautifulSoup(info_html) |
|
|
|
|
|
|
|
try: |
|
|
|
relative_comment_url = info_soup.find("a", href=re.compile("createComment.do"))['href'] |
|
|
|
comment_url_no_params = remove_params(relative_comment_url) |
|
|
|
|
|
|
|
self._current_application.comment_url = urlparse.urljoin(self.base_url, comment_url_no_params) |
|
|
|
except: # FIXME - specialize the except |
|
|
|
if self.debug: |
|
|
|
print "No comment url for %s" %(self._current_application.council_reference) |
|
|
|
self._current_application.comment_url = "None" |
|
|
|
|
|
|
|
# Some WAM sites have the description in the results page, |
|
|
|
# but since they don't all have it there, we'll get it from here... |
|
|
|
|
|
|
|
description_td = info_soup.find(text="Development:").findNext("td") |
|
|
|
|
|
|
|
# Sometimes the description is in a span in the td, sometimes it is directly there. |
|
|
|
self._current_application.description = (description_td.string or description_td.span.string).strip() |
|
|
|
|
|
|
|
self._results.addApplication(self._current_application) |
|
|
|
|
|
|
|
except SystemExit: |
|
|
|
# It seems a shame to miss out on all the apps from an authority just because one breaks... |
|
|
|
if self._current_application.council_reference: |
|
|
|
if self.debug: |
|
|
|
print "Failed to add %s" %(self._current_application.council_reference) |
|
|
|
else: |
|
|
|
if self.debug: |
|
|
|
print "Failed to add an application" |
|
|
|
|
|
|
|
self._current_application = PlanningApplication() |
|
|
|
|
|
|
|
return self._results |
|
|
|
|
|
|
|
def getResults(self, day, month, year): |
|
|
|
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PooleParser(WAMParser): |
|
|
|
address_column = 1 |
|
|
|
|
|
|
|
class BraintreeParser(WAMParser): |
|
|
|
date_format = "%d %b %Y" |
|
|
|
|
|
|
|
def _get_search_data(self, year, month, day): |
|
|
|
# Braintree |
|
|
|
# action=showWeeklyList&areaCode=%25&sortOrder=1&endDate=1203249969656&applicationType=%25&Button=Search |
|
|
|
search_data = WAMParser._get_search_data(self, year, month, day) |
|
|
|
|
|
|
|
return (("action", "showWeeklyList"),) + search_data |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
#parser = WAMParser("Barking and Dagenham", "Barking and Dagenham", "http://idoxwam.lbbd.gov.uk:8081/WAM/pas/searchApplications.do", debug=True) |
|
|
|
#parser = BraintreeParser("Braintree", "Braintree", "http://planningapp.braintree.gov.uk/WAM1/weeklyApplications.do", debug=True) |
|
|
|
# Camden |
|
|
|
#parser = WAMParser("Castle Point", "Castle Point", "http://wam.castlepoint.gov.uk/WAM/pas/searchApplications.do", debug=True) |
|
|
|
#Chichester - Done as PublicAccess |
|
|
|
#parser = BraintreeParser("Colchester", "Colchester", "http://www.planning.colchester.gov.uk/WAM/weeklyApplications.do", debug=True) |
|
|
|
#parser = WAMParser("East Lothian", "East Lothian", "http://www.planning.eastlothian.gov.uk/WAM/pas/searchApplications.do", debug=True) |
|
|
|
#parser = BraintreeParser("North Somerset", "North Somerset", "http://wam.n-somerset.gov.uk/MULTIWAM/weeklyApplications.do", debug=True) |
|
|
|
#parser = WAMParser("Nottingham", "Nottingham", "http://plan4.nottinghamcity.gov.uk/WAM/pas/searchApplications.do", debug=True) |
|
|
|
#parser = PooleParser("Poole long", "Poole", "http://wam.boroughofpoole.com/WAM/pas/searchApplications.do", debug=True) |
|
|
|
#parser = WAMParser("Rother long", "Rother", "http://www.planning.rother.gov.uk/WAM/pas/searchApplications.do", debug=True) |
|
|
|
#parser = BraintreeParser("South Gloucestershire", "South Gloucestershire", "http://planning.southglos.gov.uk/WAM/pas/WeeklyApplications.do", debug=True) |
|
|
|
#parser = BraintreeParser("South Gloucestershire", "South Gloucestershire", "http://planning.southglos.gov.uk/WAM/pas/WeeklyApplications.do", debug=True) |
|
|
|
#parser = WAMParser("South Norfolk", "South Norfolk", "http://wam.south-norfolk.gov.uk/WAM/pas/searchApplications.do", debug=True) |
|
|
|
parser = BraintreeParser("Tower Hamlets", "Tower Hamlets", "http://194.201.98.213/WAM/weeklyApplications.do", debug=True) |
|
|
|
#parser = WAMParser("Westminster", "Westminster", "http://idocs.westminster.gov.uk:8080/WAM/search/pas/index.htm", debug=True) |
|
|
|
|
|
|
|
print parser.getResults(8,2,2008) |
|
|
|
|
|
|
|
# Left to fix |
|
|
|
|
|
|
|
# All: |
|
|
|
# Paging |
|
|
|
# Coping with no apps |
|
|
|
|
|
|
|
|
|
|
|
# Barking and Dagenham - done |
|
|
|
# Braintree - done |
|
|
|
# Camden - also has a PlanningExplorer, which is done (so not bothering) |
|
|
|
# Castle Point - done |
|
|
|
# Chichester - not needed (PublicAccess site done) |
|
|
|
# Colchester - done. like Braintree |
|
|
|
# East Lothian - done |
|
|
|
# North Somerset - done. like Braintree |
|
|
|
# Nottingham - done (sometimes no comments) |
|
|
|
# Poole - done |
|
|
|
# Rother - done |
|
|
|
# South Gloucestershire - done. like Braintree |
|
|
|
# South Norfolk - Works, but no postcodes. Also, the search link here points to PlanningExplorer. I think we should assume this is the current site. |
|
|
|
# Tower Hamlets - done. Like Braintree. |
|
|
|
# Westminster - not done: clearly WAM underneath, but with a wrapper. |
|
|
|
|