|
@@ -0,0 +1,120 @@ |
|
|
|
|
|
import urllib2 |
|
|
|
|
|
import urllib |
|
|
|
|
|
import urlparse |
|
|
|
|
|
|
|
|
|
|
|
import datetime, time |
|
|
|
|
|
import cgi |
|
|
|
|
|
|
|
|
|
|
|
import BeautifulSoup |
|
|
|
|
|
|
|
|
|
|
|
import cookielib |
|
|
|
|
|
cookie_jar = cookielib.CookieJar() |
|
|
|
|
|
|
|
|
|
|
|
from PlanningUtils import PlanningApplication, \ |
|
|
|
|
|
PlanningAuthorityResults, \ |
|
|
|
|
|
getPostcodeFromText |
|
|
|
|
|
|
|
|
|
|
|
date_format = "%d/%m/%Y" |
|
|
|
|
|
|
|
|
|
|
|
class EastbourneParser: |
|
|
|
|
|
def __init__(self, *args): |
|
|
|
|
|
|
|
|
|
|
|
self.authority_name = "Eastbourne Borough Council" |
|
|
|
|
|
self.authority_short_name = "Eastbourne" |
|
|
|
|
|
# self.base_url = "http://www.eastbourne.gov.uk/planningapplications/search.asp" |
|
|
|
|
|
self.first_url = "http://www.eastbourne.gov.uk/planningapplications/index.asp" |
|
|
|
|
|
self.base_url = "http://www.eastbourne.gov.uk/planningapplications/results.asp" |
|
|
|
|
|
|
|
|
|
|
|
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getResultsByDayMonthYear(self, day, month, year): |
|
|
|
|
|
search_day = datetime.date(year, month, day) |
|
|
|
|
|
|
|
|
|
|
|
# There's going to be some faffing around here. We need a cookie to say we have agreed to some T&Cs. |
|
|
|
|
|
|
|
|
|
|
|
# First get the search page - we'll be redirected somewhere else for not having the cookie |
|
|
|
|
|
|
|
|
|
|
|
first_request = urllib2.Request(self.first_url) |
|
|
|
|
|
first_response = urllib2.urlopen(first_request) |
|
|
|
|
|
cookie_jar.extract_cookies(first_response, first_request) |
|
|
|
|
|
|
|
|
|
|
|
first_page_soup = BeautifulSoup.BeautifulSoup(first_response.read()) |
|
|
|
|
|
|
|
|
|
|
|
first_page_action = urlparse.urljoin(self.first_url, first_page_soup.form['action']) |
|
|
|
|
|
|
|
|
|
|
|
the_input = first_page_soup.form.input |
|
|
|
|
|
|
|
|
|
|
|
second_page_post_data = urllib.urlencode( |
|
|
|
|
|
( |
|
|
|
|
|
(the_input['name'], the_input['value']), |
|
|
|
|
|
) |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
second_request = urllib2.Request(first_page_action, second_page_post_data) |
|
|
|
|
|
cookie_jar.add_cookie_header(second_request) |
|
|
|
|
|
second_response = urllib2.urlopen(second_request) |
|
|
|
|
|
cookie_jar.extract_cookies(second_response, second_request) |
|
|
|
|
|
|
|
|
|
|
|
# Now (finally) get the search page |
|
|
|
|
|
|
|
|
|
|
|
#ApplicationNumber=&AddressPrefix=&Postcode=&CaseOfficer=&WardMember=&DateReceivedStart=31%2F08%2F2008&DateReceivedEnd=31%2F08%2F2008&DateDecidedStart=&DateDecidedEnd=&Locality=&AgentName=&ApplicantName=&ShowDecided=&DecisionLevel=&Sort1=FullAddressPrefix&Sort2=DateReceived+DESC&Submit=Search |
|
|
|
|
|
|
|
|
|
|
|
post_data = urllib.urlencode( |
|
|
|
|
|
( |
|
|
|
|
|
("ApplicationNumber", ""), |
|
|
|
|
|
("AddressPrefix", ""), |
|
|
|
|
|
("Postcode", ""), |
|
|
|
|
|
("CaseOfficer", ""), |
|
|
|
|
|
("WardMember", ""), |
|
|
|
|
|
("DateReceivedStart", search_day.strftime(date_format)), |
|
|
|
|
|
("DateReceivedEnd", search_day.strftime(date_format)), |
|
|
|
|
|
("DateDecidedStart", ""), |
|
|
|
|
|
("DateDecidedEnd", ""), |
|
|
|
|
|
("Locality", ""), |
|
|
|
|
|
("AgentName", ""), |
|
|
|
|
|
("ApplicantName", ""), |
|
|
|
|
|
("ShowDecided", ""), |
|
|
|
|
|
("DecisionLevel", ""), |
|
|
|
|
|
("Sort1", "FullAddressPrefix"), |
|
|
|
|
|
("Sort2", "DateReceived DESC"), |
|
|
|
|
|
("Submit", "Search"), |
|
|
|
|
|
) |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
search_request = urllib2.Request(self.base_url) |
|
|
|
|
|
cookie_jar.add_cookie_header(search_request) |
|
|
|
|
|
search_response = urllib2.urlopen(search_request, post_data) |
|
|
|
|
|
|
|
|
|
|
|
soup = BeautifulSoup.BeautifulSoup(search_response.read()) |
|
|
|
|
|
|
|
|
|
|
|
app_no_strings = soup.findAll(text="App. No.:") |
|
|
|
|
|
|
|
|
|
|
|
for app_no_string in app_no_strings: |
|
|
|
|
|
application = PlanningApplication() |
|
|
|
|
|
application.date_received = search_day |
|
|
|
|
|
|
|
|
|
|
|
application.council_reference = app_no_string.findNext("a").string.strip() |
|
|
|
|
|
application.info_url = urlparse.urljoin(self.base_url, app_no_string.findNext("a")['href']) |
|
|
|
|
|
|
|
|
|
|
|
application.address = ' '.join([x.strip() for x in app_no_string.findNext(text="Site Address:").findNext("td").contents if type(x) == BeautifulSoup.NavigableString]) |
|
|
|
|
|
application.postcode = getPostcodeFromText(application.address) |
|
|
|
|
|
|
|
|
|
|
|
application.comment_url = urlparse.urljoin(self.base_url, app_no_string.findNext(text="Comment on application").parent['href']) |
|
|
|
|
|
|
|
|
|
|
|
application.description = app_no_string.findNext(text="Description:").findNext("td").string.strip() |
|
|
|
|
|
|
|
|
|
|
|
self._results.addApplication(application) |
|
|
|
|
|
|
|
|
|
|
|
return self._results |
|
|
|
|
|
|
|
|
|
|
|
def getResults(self, day, month, year): |
|
|
|
|
|
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
parser = EastbourneParser() |
|
|
|
|
|
print parser.getResults(1,9,2008) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# TODO - currently paginates at 20 |