|
|
@@ -0,0 +1,155 @@ |
|
|
|
import urllib2 |
|
|
|
import urllib |
|
|
|
import urlparse |
|
|
|
|
|
|
|
from cgi import parse_qs |
|
|
|
|
|
|
|
import datetime |
|
|
|
|
|
|
|
import cookielib |
|
|
|
|
|
|
|
cookie_jar = cookielib.CookieJar() |
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulSoup |
|
|
|
|
|
|
|
from PlanningUtils import PlanningApplication, \ |
|
|
|
PlanningAuthorityResults, \ |
|
|
|
getPostcodeFromText |
|
|
|
|
|
|
|
date_format = "%d/%m/%Y" |
|
|
|
|
|
|
|
class PlanetParser: |
|
|
|
def __init__(self, |
|
|
|
authority_name, |
|
|
|
authority_short_name, |
|
|
|
base_url, |
|
|
|
debug=False): |
|
|
|
|
|
|
|
self.authority_name = authority_name |
|
|
|
self.authority_short_name = authority_short_name |
|
|
|
self.base_url = base_url |
|
|
|
|
|
|
|
self.debug = debug |
|
|
|
|
|
|
|
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) |
|
|
|
|
|
|
|
def getResultsByDayMonthYear(self, day, month, year): |
|
|
|
# What is the serviceKey for this council? |
|
|
|
# It's in our base url |
|
|
|
query_string = urlparse.urlsplit(self.base_url)[3] |
|
|
|
|
|
|
|
# This query string just contains the servicekey |
|
|
|
query_dict = parse_qs(query_string) |
|
|
|
|
|
|
|
service_key = query_dict['serviceKey'][0] |
|
|
|
|
|
|
|
# First get the form |
|
|
|
get_request = urllib2.Request(self.base_url) |
|
|
|
get_response = urllib2.urlopen(get_request) |
|
|
|
|
|
|
|
cookie_jar.extract_cookies(get_response, get_request) |
|
|
|
|
|
|
|
# We also need to get the security token |
|
|
|
get_soup = BeautifulSoup(get_response.read()) |
|
|
|
|
|
|
|
security_token = get_soup.find('input', {'name': 'securityToken'})['value'] |
|
|
|
|
|
|
|
# Now post to it |
|
|
|
search_date = datetime.date(year, month, day) |
|
|
|
|
|
|
|
search_data = urllib.urlencode( |
|
|
|
{ |
|
|
|
"serviceKey":service_key, |
|
|
|
"securityToken": security_token, |
|
|
|
"STEP":"Planet_SearchCriteria", |
|
|
|
#X.resultCount= |
|
|
|
"X.pageNumber": "0", |
|
|
|
"X.searchCriteria_StartDate": search_date.strftime(date_format), |
|
|
|
"X.searchCriteria_EndDate": search_date.strftime(date_format), |
|
|
|
} |
|
|
|
) |
|
|
|
|
|
|
|
post_request = urllib2.Request(self.base_url, search_data) |
|
|
|
cookie_jar.add_cookie_header(post_request) |
|
|
|
|
|
|
|
post_response = urllib2.urlopen(post_request) |
|
|
|
|
|
|
|
post_soup = BeautifulSoup(post_response.read()) |
|
|
|
|
|
|
|
# Now we need to find the results. We'll do this by searching for the text "Ref No" and then going forward from there. For some reason a search for the table gets the table without contents |
|
|
|
|
|
|
|
ref_no_text = post_soup.find(text="Ref No") |
|
|
|
|
|
|
|
first_tr = ref_no_text.findNext("tr") |
|
|
|
|
|
|
|
other_trs = first_tr.findNextSiblings() |
|
|
|
|
|
|
|
trs = [first_tr] + other_trs |
|
|
|
|
|
|
|
for tr in trs: |
|
|
|
self._current_application = PlanningApplication() |
|
|
|
|
|
|
|
# We don't need to get the date, it's the one we searched for. |
|
|
|
self._current_application.date_received = search_date |
|
|
|
|
|
|
|
tds = tr.findAll("td") |
|
|
|
|
|
|
|
self._current_application.council_reference = tds[0].a.string.strip() |
|
|
|
self._current_application.address = tds[1].string.strip() |
|
|
|
self._current_application.postcode = getPostcodeFromText(self._current_application.address) |
|
|
|
|
|
|
|
self._current_application.description = tds[2].string.strip() |
|
|
|
|
|
|
|
# There is no good info url, so we just give the search page. |
|
|
|
self._current_application.info_url = self.base_url |
|
|
|
|
|
|
|
# Similarly for the comment url |
|
|
|
self._current_application.comment_url = self.base_url |
|
|
|
|
|
|
|
self._results.addApplication(self._current_application) |
|
|
|
|
|
|
|
return self._results |
|
|
|
|
|
|
|
|
|
|
|
# post data for worcester |
|
|
|
# hopefully we can ignore almost all of this... |
|
|
|
|
|
|
|
#ACTION=NEXT |
|
|
|
#serviceKey=SysDoc-PlanetApplicationEnquiry |
|
|
|
#serviceGeneration= |
|
|
|
#securityToken=NTgxMjE3OTExMjA4OQ%3D%3D |
|
|
|
#enquiry= |
|
|
|
#STEP=Planet_SearchCriteria |
|
|
|
#RECEIVED= |
|
|
|
#COMMENTS= |
|
|
|
#LAST_UPDATED= |
|
|
|
#status= |
|
|
|
#X.endEnquiry= |
|
|
|
#X.resultCount= |
|
|
|
#X.applicationNotFound= |
|
|
|
#X.pageNumber=0 |
|
|
|
#X.searchCriteria_ApplicationReference= |
|
|
|
#X.searchCriteria_StartDate=20%2F04%2F2008 |
|
|
|
#X.searchCriteria_EndDate=20%2F04%2F2008 |
|
|
|
#X.searchCriteria_Ward= |
|
|
|
#X.searchCriteria_Parish= |
|
|
|
#X.searchCriteria_Address= |
|
|
|
#X.searchCriteria_Postcode= |
|
|
|
#X.searchCriteria_ApplicantName= |
|
|
|
#X.searchCriteria_AgentName= |
|
|
|
#X.searchCriteria_UndecidedApplications= |
|
|
|
|
|
|
|
return self._results |
|
|
|
|
|
|
|
|
|
|
|
def getResults(self, day, month, year): |
|
|
|
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
# parser = PlanetParser("Worcester City Council", "Worcester", "http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry", debug=True) |
|
|
|
# parser = PlanetParser("Elmbridge Borough Council", "Elmbridge", "http://www2.elmbridge.gov.uk/Planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry") |
|
|
|
# parser = PlanetParser("North Lincolnshire Council", "North Lincolnshire", "http://www.planning.northlincs.gov.uk/planet/ispforms.asp?ServiceKey=SysDoc-PlanetApplicationEnquiry") |
|
|
|
# parser = PlanetParser("Rydale District Council", "Rydale", "http://www.ryedale.gov.uk/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry") |
|
|
|
parser = PlanetParser("Tewkesbury Borough Council", "Tewkesbury", "http://planning.tewkesbury.gov.uk/Planet/ispforms.asp?serviceKey=07WCC04163103430") |
|
|
|
print parser.getResults(21,5,2008) |