|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- import urllib2
- import urllib
- import urlparse
-
- from cgi import parse_qs
-
- import datetime
-
- import cookielib
-
- cookie_jar = cookielib.CookieJar()
-
- from BeautifulSoup import BeautifulSoup
-
- from PlanningUtils import PlanningApplication, \
- PlanningAuthorityResults, \
- getPostcodeFromText
-
- date_format = "%d/%m/%Y"
-
- class PlanetParser:
- def __init__(self,
- authority_name,
- authority_short_name,
- base_url,
- debug=False):
-
- self.authority_name = authority_name
- self.authority_short_name = authority_short_name
- self.base_url = base_url
-
- self.debug = debug
-
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
- def get_info_url(self, soup_fragment):
- return self.base_url
-
- def get_comment_url(self, soup_fragment):
- return self.get_info_url(soup_fragment)
-
- def getResultsByDayMonthYear(self, day, month, year):
- # What is the serviceKey for this council?
- # It's in our base url
- query_string = urlparse.urlsplit(self.base_url)[3]
-
- # This query string just contains the servicekey
- query_dict = parse_qs(query_string)
-
- service_key = query_dict['serviceKey'][0]
-
- # First get the form
- get_request = urllib2.Request(self.base_url)
- get_response = urllib2.urlopen(get_request)
-
- cookie_jar.extract_cookies(get_response, get_request)
-
- # We also need to get the security token
- get_soup = BeautifulSoup(get_response.read())
-
- security_token = get_soup.find('input', {'name': 'securityToken'})['value']
-
- # Now post to it
- search_date = datetime.date(year, month, day)
-
- search_data = urllib.urlencode(
- {
- "serviceKey":service_key,
- "securityToken": security_token,
- "STEP":"Planet_SearchCriteria",
- #X.resultCount=
- "X.pageNumber": "0",
- "X.searchCriteria_StartDate": search_date.strftime(date_format),
- "X.searchCriteria_EndDate": search_date.strftime(date_format),
- }
- )
-
- post_request = urllib2.Request(self.base_url, search_data)
- cookie_jar.add_cookie_header(post_request)
-
- post_response = urllib2.urlopen(post_request)
-
- post_soup = BeautifulSoup(post_response.read())
-
- # Now we need to find the results. We'll do this by searching for the text "Ref No" and then going forward from there. For some reason a search for the table gets the table without contents
-
- ref_no_text = post_soup.find(text="Ref No")
-
- first_tr = ref_no_text.findNext("tr")
-
- other_trs = first_tr.findNextSiblings()
-
- trs = [first_tr] + other_trs
-
- for tr in trs:
- self._current_application = PlanningApplication()
-
- # We don't need to get the date, it's the one we searched for.
- self._current_application.date_received = search_date
-
- tds = tr.findAll("td")
-
- self._current_application.council_reference = tds[0].a.string.strip()
- self._current_application.address = tds[1].string.strip()
- self._current_application.postcode = getPostcodeFromText(self._current_application.address)
-
- self._current_application.description = tds[2].string.strip()
-
- # There is no good info url, so we just give the search page.
- self._current_application.info_url = self.get_info_url(tr)
-
- # Similarly for the comment url
- self._current_application.comment_url = self.get_comment_url(tr)
-
- self._results.addApplication(self._current_application)
-
- return self._results
-
-
- # post data for worcester
- # hopefully we can ignore almost all of this...
-
- #ACTION=NEXT
- #serviceKey=SysDoc-PlanetApplicationEnquiry
- #serviceGeneration=
- #securityToken=NTgxMjE3OTExMjA4OQ%3D%3D
- #enquiry=
- #STEP=Planet_SearchCriteria
- #RECEIVED=
- #COMMENTS=
- #LAST_UPDATED=
- #status=
- #X.endEnquiry=
- #X.resultCount=
- #X.applicationNotFound=
- #X.pageNumber=0
- #X.searchCriteria_ApplicationReference=
- #X.searchCriteria_StartDate=20%2F04%2F2008
- #X.searchCriteria_EndDate=20%2F04%2F2008
- #X.searchCriteria_Ward=
- #X.searchCriteria_Parish=
- #X.searchCriteria_Address=
- #X.searchCriteria_Postcode=
- #X.searchCriteria_ApplicantName=
- #X.searchCriteria_AgentName=
- #X.searchCriteria_UndecidedApplications=
-
- return self._results
-
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
-
- class ElmbridgeParser(PlanetParser):
- info_url_template = "http://emaps.elmbridge.gov.uk/LinkToSoftwareAG.aspx?appref=%s"
-
- def get_info_url(self, soup_fragment):
- return self.info_url_template %self._current_application.council_reference
-
-
- if __name__ == '__main__':
- parser = ElmbridgeParser("Elmbridge Borough Council", "Elmbridge", "http://www2.elmbridge.gov.uk/Planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry")
- # parser = PlanetParser("North Lincolnshire Council", "North Lincolnshire", "http://www.planning.northlincs.gov.uk/planet/ispforms.asp?ServiceKey=SysDoc-PlanetApplicationEnquiry")
- # parser = PlanetParser("Rydale District Council", "Rydale", "http://www.ryedale.gov.uk/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry")
- # parser = PlanetParser("Tewkesbury Borough Council", "Tewkesbury", "http://planning.tewkesbury.gov.uk/Planet/ispforms.asp?serviceKey=07WCC04163103430")
- # parser = PlanetParser("Worcester City Council", "Worcester", "http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry", debug=True)
- print parser.getResults(1,5,2009)
-
- # TODO
-
- # 1) Pagination
- # 2) Work OK with no results.
-
- # 3) Use OSGB for Tewkesbury?
|