Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

176 lines
5.8 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. from cgi import parse_qs
  5. import datetime
  6. import cookielib
  7. cookie_jar = cookielib.CookieJar()
  8. from BeautifulSoup import BeautifulSoup
  9. from PlanningUtils import PlanningApplication, \
  10. PlanningAuthorityResults, \
  11. getPostcodeFromText
  12. date_format = "%d/%m/%Y"
  13. class PlanetParser:
  14. def __init__(self,
  15. authority_name,
  16. authority_short_name,
  17. base_url,
  18. debug=False):
  19. self.authority_name = authority_name
  20. self.authority_short_name = authority_short_name
  21. self.base_url = base_url
  22. self.debug = debug
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def get_info_url(self, soup_fragment):
  25. return self.base_url
  26. def get_comment_url(self, soup_fragment):
  27. return self.get_info_url(soup_fragment)
  28. def getResultsByDayMonthYear(self, day, month, year):
  29. # What is the serviceKey for this council?
  30. # It's in our base url
  31. query_string = urlparse.urlsplit(self.base_url)[3]
  32. # This query string just contains the servicekey
  33. query_dict = parse_qs(query_string)
  34. service_key = query_dict['serviceKey'][0]
  35. # First get the form
  36. get_request = urllib2.Request(self.base_url)
  37. get_response = urllib2.urlopen(get_request)
  38. cookie_jar.extract_cookies(get_response, get_request)
  39. # We also need to get the security token
  40. get_soup = BeautifulSoup(get_response.read())
  41. security_token = get_soup.find('input', {'name': 'securityToken'})['value']
  42. # Now post to it
  43. search_date = datetime.date(year, month, day)
  44. search_data = urllib.urlencode(
  45. {
  46. "serviceKey":service_key,
  47. "securityToken": security_token,
  48. "STEP":"Planet_SearchCriteria",
  49. #X.resultCount=
  50. "X.pageNumber": "0",
  51. "X.searchCriteria_StartDate": search_date.strftime(date_format),
  52. "X.searchCriteria_EndDate": search_date.strftime(date_format),
  53. }
  54. )
  55. post_request = urllib2.Request(self.base_url, search_data)
  56. cookie_jar.add_cookie_header(post_request)
  57. post_response = urllib2.urlopen(post_request)
  58. post_soup = BeautifulSoup(post_response.read())
  59. # Now we need to find the results. We'll do this by searching for the text "Ref No" and then going forward from there. For some reason a search for the table gets the table without contents
  60. ref_no_text = post_soup.find(text="Ref No")
  61. first_tr = ref_no_text.findNext("tr")
  62. other_trs = first_tr.findNextSiblings()
  63. trs = [first_tr] + other_trs
  64. for tr in trs:
  65. self._current_application = PlanningApplication()
  66. # We don't need to get the date, it's the one we searched for.
  67. self._current_application.date_received = search_date
  68. tds = tr.findAll("td")
  69. self._current_application.council_reference = tds[0].a.string.strip()
  70. self._current_application.address = tds[1].string.strip()
  71. self._current_application.postcode = getPostcodeFromText(self._current_application.address)
  72. self._current_application.description = tds[2].string.strip()
  73. # There is no good info url, so we just give the search page.
  74. self._current_application.info_url = self.get_info_url(tr)
  75. # Similarly for the comment url
  76. self._current_application.comment_url = self.get_comment_url(tr)
  77. self._results.addApplication(self._current_application)
  78. return self._results
  79. # post data for worcester
  80. # hopefully we can ignore almost all of this...
  81. #ACTION=NEXT
  82. #serviceKey=SysDoc-PlanetApplicationEnquiry
  83. #serviceGeneration=
  84. #securityToken=NTgxMjE3OTExMjA4OQ%3D%3D
  85. #enquiry=
  86. #STEP=Planet_SearchCriteria
  87. #RECEIVED=
  88. #COMMENTS=
  89. #LAST_UPDATED=
  90. #status=
  91. #X.endEnquiry=
  92. #X.resultCount=
  93. #X.applicationNotFound=
  94. #X.pageNumber=0
  95. #X.searchCriteria_ApplicationReference=
  96. #X.searchCriteria_StartDate=20%2F04%2F2008
  97. #X.searchCriteria_EndDate=20%2F04%2F2008
  98. #X.searchCriteria_Ward=
  99. #X.searchCriteria_Parish=
  100. #X.searchCriteria_Address=
  101. #X.searchCriteria_Postcode=
  102. #X.searchCriteria_ApplicantName=
  103. #X.searchCriteria_AgentName=
  104. #X.searchCriteria_UndecidedApplications=
  105. return self._results
  106. def getResults(self, day, month, year):
  107. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  108. class ElmbridgeParser(PlanetParser):
  109. info_url_template = "http://emaps.elmbridge.gov.uk/LinkToSoftwareAG.aspx?appref=%s"
  110. def get_info_url(self, soup_fragment):
  111. return self.info_url_template %self._current_application.council_reference
  112. if __name__ == '__main__':
  113. parser = ElmbridgeParser("Elmbridge Borough Council", "Elmbridge", "http://www2.elmbridge.gov.uk/Planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry")
  114. # parser = PlanetParser("North Lincolnshire Council", "North Lincolnshire", "http://www.planning.northlincs.gov.uk/planet/ispforms.asp?ServiceKey=SysDoc-PlanetApplicationEnquiry")
  115. # parser = PlanetParser("Rydale District Council", "Rydale", "http://www.ryedale.gov.uk/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry")
  116. # parser = PlanetParser("Tewkesbury Borough Council", "Tewkesbury", "http://planning.tewkesbury.gov.uk/Planet/ispforms.asp?serviceKey=07WCC04163103430")
  117. # parser = PlanetParser("Worcester City Council", "Worcester", "http://www.worcester.gov.uk:8080/planet/ispforms.asp?serviceKey=SysDoc-PlanetApplicationEnquiry", debug=True)
  118. print parser.getResults(1,5,2009)
  119. # TODO
  120. # 1) Pagination
  121. # 2) Work OK with no results.
  122. # 3) Use OSGB for Tewkesbury?