Factor out CookieAddingHTTPRedirectHandler.import/raw
@@ -0,0 +1,100 @@ | |||
import urllib2 | |||
import urllib | |||
import urlparse | |||
import datetime, time | |||
import cgi | |||
import re | |||
import cookielib | |||
cookie_jar = cookielib.CookieJar() | |||
from BeautifulSoup import BeautifulSoup | |||
from PlanningUtils import PlanningApplication, \ | |||
PlanningAuthorityResults, \ | |||
getPostcodeFromText | |||
from HTTPHandlers import CookieAddingHTTPRedirectHandler | |||
cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar)) | |||
search_date_format = "%m/%d/%Y" #That's right, the search date is US style. | |||
info_page_date_format = "%d/%m/%Y" # and the info page is UK style | |||
class GosportParser: | |||
def __init__(self, *args): | |||
self.authority_name = "Gosport Borough Council" | |||
self.authority_short_name = "Gosport" | |||
self.base_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationSearch2.aspx" | |||
self.info_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationDetails.aspx?ID=%s" | |||
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) | |||
def getResultsByDayMonthYear(self, day, month, year): | |||
search_date = datetime.date(year, month, day) | |||
get_request = urllib2.Request(self.base_url) | |||
get_response = urllib2.urlopen(get_request) | |||
cookie_jar.extract_cookies(get_response, get_request) | |||
get_soup = BeautifulSoup(get_response.read()) | |||
post_data = ( | |||
("__VIEWSTATE", get_soup.find("input", {"name": "__VIEWSTATE"})["value"]), | |||
("pgid", get_soup.find("input", {"name": "pgid"})["value"]), | |||
("action", "Search"), | |||
# ("ApplicationSearch21%3AtbDevAddress", ""), | |||
# ("ApplicationSearch21%3AtbApplicantName", ""), | |||
# ("ApplicationSearch21%3AtbAgentName", ""), | |||
("ApplicationSearch21:tbDateSubmitted", "10/01/2008"), | |||
("ApplicationSearch21:btnDateSubmitted", "Search"), | |||
# ("ApplicationSearch21%3AtbDateDetermined", ""), | |||
) | |||
post_request = urllib2.Request(self.base_url, urllib.urlencode(post_data)) | |||
cookie_jar.add_cookie_header(post_request) | |||
post_response = cookie_handling_opener.open(post_request) | |||
post_soup = BeautifulSoup(post_response.read()) | |||
# Discard the first <tr>, which contains headers | |||
trs = post_soup.find("table", id="SearchResults1_dgSearchResults").findAll("tr")[1:] | |||
for tr in trs: | |||
application = PlanningApplication() | |||
tds = tr.findAll("td") | |||
application.council_reference = tds[0].string.strip() | |||
application.address = tds[1].string.strip() | |||
application.postcode = getPostcodeFromText(application.address) | |||
application.description = tds[2].string.strip() | |||
application.date_received = datetime.datetime(*(time.strptime(tds[3].string.strip(), info_page_date_format)[0:6])) | |||
application.info_url = self.info_url %(application.council_reference) | |||
# The comment url must be accessed by a POST, so we'll just use the info url for that as well | |||
application.comment_url = application.info_url | |||
self._results.addApplication(application) | |||
return self._results | |||
def getResults(self, day, month, year): | |||
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() | |||
if __name__ == '__main__': | |||
parser = GosportParser() | |||
print parser.getResults(1,10,2008) | |||
@@ -0,0 +1,18 @@ | |||
from urllib2 import HTTPRedirectHandler | |||
class CookieAddingHTTPRedirectHandler(HTTPRedirectHandler): | |||
"""The standard python HttpRedirectHandler doesn't add a cookie to the new request after a 302. This handler does.""" | |||
def __init__(self, cookie_jar): | |||
self.cookie_jar = cookie_jar | |||
# This really ought to call the superclasses init method, but there doesn't seem to be one. | |||
def redirect_request(self, *args): | |||
new_request = HTTPRedirectHandler.redirect_request(self, *args) | |||
# We need to add a cookie from the cookie_jar | |||
self.cookie_jar.add_cookie_header(new_request) | |||
return new_request |
@@ -18,16 +18,9 @@ search_date_format = "%d-%m-%Y" # Format used for the accepted date when searchi | |||
possible_date_formats = [search_date_format, "%d/%m/%Y"] | |||
class CookieAddingHTTPRedirectHandler(urllib2.HTTPRedirectHandler): | |||
"""The standard python HttpRedirectHandler doesn't add a cookie to the new request after a 302. This handler does.""" | |||
def redirect_request(self, req, fp, code, msg, headers, newurl): | |||
new_request = urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) | |||
# We need to add a cookie from the cookie_jar | |||
cookie_jar.add_cookie_header(new_request) | |||
from HTTPHandlers import CookieAddingHTTPRedirectHandler | |||
return new_request | |||
cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler()) | |||
cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar)) | |||
class OcellaParser: | |||
@@ -206,7 +199,7 @@ if __name__ == '__main__': | |||
# parser = OcellaParser("North East Lincolnshire", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,64104&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Uttlesford", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page/portal/plan/weekly") | |||
# parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL") | |||
parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL") | |||
# parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL") | |||
parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly") | |||
@@ -1,6 +1,7 @@ | |||
"filename", "permissions" | |||
"PublicAccess.py", "420" | |||
"PlanningUtils.py", "420" | |||
"HTTPHandlers.py", "420" | |||
"SouthOxfordshireParser.py", "420" | |||
"SouthOxfordshire.cgi", "493" | |||
"ApplicationSearchServletParser.py", "420" | |||
@@ -58,3 +59,4 @@ | |||
"Herefordshire.py", "420" | |||
"Exmoor.py", "420" | |||
"Eastbourne.py", "420" | |||
"Gosport.py", "420" |
@@ -264,3 +264,4 @@ | |||
"Eastbourne Borough Council", "Eastbourne", "", "Eastbourne", "EastbourneParser" | |||
"Waltham Forest Council", "Waltham Forest", "http://planning.walthamforest.gov.uk/", "PlanningExplorer", "WalthamForestParser" | |||
"Caerphilly County Borough Council", "Caerphilly", "http://publicaccess.caerphilly.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | |||
"Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser" |