@@ -0,0 +1,100 @@ | |||||
import urllib2 | |||||
import urllib | |||||
import urlparse | |||||
import datetime, time | |||||
import cgi | |||||
import re | |||||
import cookielib | |||||
cookie_jar = cookielib.CookieJar() | |||||
from BeautifulSoup import BeautifulSoup | |||||
from PlanningUtils import PlanningApplication, \ | |||||
PlanningAuthorityResults, \ | |||||
getPostcodeFromText | |||||
from HTTPHandlers import CookieAddingHTTPRedirectHandler | |||||
cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar)) | |||||
search_date_format = "%m/%d/%Y" #That's right, the search date is US style. | |||||
info_page_date_format = "%d/%m/%Y" # and the info page is UK style | |||||
class GosportParser: | |||||
def __init__(self, *args): | |||||
self.authority_name = "Gosport Borough Council" | |||||
self.authority_short_name = "Gosport" | |||||
self.base_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationSearch2.aspx" | |||||
self.info_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationDetails.aspx?ID=%s" | |||||
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) | |||||
def getResultsByDayMonthYear(self, day, month, year): | |||||
search_date = datetime.date(year, month, day) | |||||
get_request = urllib2.Request(self.base_url) | |||||
get_response = urllib2.urlopen(get_request) | |||||
cookie_jar.extract_cookies(get_response, get_request) | |||||
get_soup = BeautifulSoup(get_response.read()) | |||||
post_data = ( | |||||
("__VIEWSTATE", get_soup.find("input", {"name": "__VIEWSTATE"})["value"]), | |||||
("pgid", get_soup.find("input", {"name": "pgid"})["value"]), | |||||
("action", "Search"), | |||||
# ("ApplicationSearch21%3AtbDevAddress", ""), | |||||
# ("ApplicationSearch21%3AtbApplicantName", ""), | |||||
# ("ApplicationSearch21%3AtbAgentName", ""), | |||||
("ApplicationSearch21:tbDateSubmitted", "10/01/2008"), | |||||
("ApplicationSearch21:btnDateSubmitted", "Search"), | |||||
# ("ApplicationSearch21%3AtbDateDetermined", ""), | |||||
) | |||||
post_request = urllib2.Request(self.base_url, urllib.urlencode(post_data)) | |||||
cookie_jar.add_cookie_header(post_request) | |||||
post_response = cookie_handling_opener.open(post_request) | |||||
post_soup = BeautifulSoup(post_response.read()) | |||||
# Discard the first <tr>, which contains headers | |||||
trs = post_soup.find("table", id="SearchResults1_dgSearchResults").findAll("tr")[1:] | |||||
for tr in trs: | |||||
application = PlanningApplication() | |||||
tds = tr.findAll("td") | |||||
application.council_reference = tds[0].string.strip() | |||||
application.address = tds[1].string.strip() | |||||
application.postcode = getPostcodeFromText(application.address) | |||||
application.description = tds[2].string.strip() | |||||
application.date_received = datetime.datetime(*(time.strptime(tds[3].string.strip(), info_page_date_format)[0:6])) | |||||
application.info_url = self.info_url %(application.council_reference) | |||||
# The comment url must be accessed by a POST, so we'll just use the info url for that as well | |||||
application.comment_url = application.info_url | |||||
self._results.addApplication(application) | |||||
return self._results | |||||
def getResults(self, day, month, year): | |||||
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() | |||||
if __name__ == '__main__': | |||||
parser = GosportParser() | |||||
print parser.getResults(1,10,2008) | |||||
@@ -0,0 +1,18 @@ | |||||
from urllib2 import HTTPRedirectHandler | |||||
class CookieAddingHTTPRedirectHandler(HTTPRedirectHandler): | |||||
"""The standard python HttpRedirectHandler doesn't add a cookie to the new request after a 302. This handler does.""" | |||||
def __init__(self, cookie_jar): | |||||
self.cookie_jar = cookie_jar | |||||
# This really ought to call the superclasses init method, but there doesn't seem to be one. | |||||
def redirect_request(self, *args): | |||||
new_request = HTTPRedirectHandler.redirect_request(self, *args) | |||||
# We need to add a cookie from the cookie_jar | |||||
self.cookie_jar.add_cookie_header(new_request) | |||||
return new_request |
@@ -18,16 +18,9 @@ search_date_format = "%d-%m-%Y" # Format used for the accepted date when searchi | |||||
possible_date_formats = [search_date_format, "%d/%m/%Y"] | possible_date_formats = [search_date_format, "%d/%m/%Y"] | ||||
class CookieAddingHTTPRedirectHandler(urllib2.HTTPRedirectHandler): | |||||
"""The standard python HttpRedirectHandler doesn't add a cookie to the new request after a 302. This handler does.""" | |||||
def redirect_request(self, req, fp, code, msg, headers, newurl): | |||||
new_request = urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) | |||||
# We need to add a cookie from the cookie_jar | |||||
cookie_jar.add_cookie_header(new_request) | |||||
from HTTPHandlers import CookieAddingHTTPRedirectHandler | |||||
return new_request | |||||
cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler()) | |||||
cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar)) | |||||
class OcellaParser: | class OcellaParser: | ||||
@@ -206,7 +199,7 @@ if __name__ == '__main__': | |||||
# parser = OcellaParser("North East Lincolnshire", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,64104&_dad=portal&_schema=PORTAL") | # parser = OcellaParser("North East Lincolnshire", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,64104&_dad=portal&_schema=PORTAL") | ||||
# parser = OcellaParser("Uttlesford", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page/portal/plan/weekly") | # parser = OcellaParser("Uttlesford", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page/portal/plan/weekly") | ||||
# parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL") | # parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL") | ||||
parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL") | |||||
# parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL") | |||||
# parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL") | # parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL") | ||||
parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly") | parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly") | ||||
@@ -1,6 +1,7 @@ | |||||
"filename", "permissions" | "filename", "permissions" | ||||
"PublicAccess.py", "420" | "PublicAccess.py", "420" | ||||
"PlanningUtils.py", "420" | "PlanningUtils.py", "420" | ||||
"HTTPHandlers.py", "420" | |||||
"SouthOxfordshireParser.py", "420" | "SouthOxfordshireParser.py", "420" | ||||
"SouthOxfordshire.cgi", "493" | "SouthOxfordshire.cgi", "493" | ||||
"ApplicationSearchServletParser.py", "420" | "ApplicationSearchServletParser.py", "420" | ||||
@@ -58,3 +59,4 @@ | |||||
"Herefordshire.py", "420" | "Herefordshire.py", "420" | ||||
"Exmoor.py", "420" | "Exmoor.py", "420" | ||||
"Eastbourne.py", "420" | "Eastbourne.py", "420" | ||||
"Gosport.py", "420" |
@@ -264,3 +264,4 @@ | |||||
"Eastbourne Borough Council", "Eastbourne", "", "Eastbourne", "EastbourneParser" | "Eastbourne Borough Council", "Eastbourne", "", "Eastbourne", "EastbourneParser" | ||||
"Waltham Forest Council", "Waltham Forest", "http://planning.walthamforest.gov.uk/", "PlanningExplorer", "WalthamForestParser" | "Waltham Forest Council", "Waltham Forest", "http://planning.walthamforest.gov.uk/", "PlanningExplorer", "WalthamForestParser" | ||||
"Caerphilly County Borough Council", "Caerphilly", "http://publicaccess.caerphilly.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | "Caerphilly County Borough Council", "Caerphilly", "http://publicaccess.caerphilly.gov.uk/PublicAccess/tdc/", "PublicAccess", "PublicAccessParser" | ||||
"Gosport Borough Council", "Gosport", "", "Gosport", "GosportParser" |