Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

208 řádky
8.4 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import time
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. def clean_string(a_string):
  12. return ' '.join(' '.join(a_string.split(" ")).strip().split())
  13. def remove_params(url):
  14. # Probably a bit naughty to use both urlparse and urlunsplit here,
  15. # but it does what we want - removing the jsessionid param
  16. parsed_url = urlparse.urlparse(url)
  17. params_free_url = urlparse.urlunsplit(parsed_url[:3] + parsed_url[4:])
  18. return params_free_url
  19. class WAMParser:
  20. address_column = 2
  21. date_format = "%d/%b/%Y"
  22. def __init__(self,
  23. authority_name,
  24. authority_short_name,
  25. base_url,
  26. debug=False):
  27. self.authority_name = authority_name
  28. self.authority_short_name = authority_short_name
  29. self.base_url = base_url
  30. self.debug = debug
  31. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  32. def _get_search_data(self, year, month, day):
  33. timestamp = time.mktime((year, month, day, 0,0,0,0,0,0))
  34. # The parameter endDate appears to be 1000*a timestamp
  35. time_input = str(int(timestamp*1000))
  36. #http://wam.boroughofpoole.com/WAM/pas/searchApplications.do;jsessionid=BCC7DFD1C42DC210A7BE5BA616683CDE
  37. # areaCode=%25&sortOrder=1&endDate=1197213359015&applicationType=%25&Button=Search
  38. search_data = (
  39. ("areaCode", "%"),
  40. ("sortOrder", "1"),
  41. ("endDate", time_input),
  42. ("applicationType", "%"),
  43. ("Button", "Search"),
  44. )
  45. return search_data
  46. def getResultsByDayMonthYear(self, day, month, year):
  47. search_data_tuple = self._get_search_data(year, month, day)
  48. search_data = urllib.urlencode(search_data_tuple)
  49. response = urllib2.urlopen(self.base_url, search_data)
  50. html = response.read()
  51. soup = BeautifulSoup(html)
  52. results_table = soup.find(text=re.compile("Your search returned the following")).findNext("table")
  53. # FIXME - deal with the empty results case
  54. # FIXME - deal with later pages of results
  55. trs = results_table.findAll("tr")[1:]
  56. self._current_application = PlanningApplication()
  57. for tr in trs:
  58. try:
  59. tds = tr.findAll("td")
  60. date_received_string = tds[0].contents[0].strip()
  61. # Some day we'll be on python 2.5, and we'll be able to use the nicer version below...
  62. self._current_application.date_received = datetime.datetime(*(time.strptime(clean_string(date_received_string), self.date_format)[0:6]))
  63. #self._current_application.date_received = datetime.datetime.strptime(clean_string(date_received_string), self.date_format)
  64. relative_info_url = tr.a['href']
  65. info_url_no_params = remove_params(relative_info_url)
  66. #Now we join on the base url to make it absolute
  67. self._current_application.info_url = urlparse.urljoin(self.base_url, info_url_no_params)
  68. self._current_application.council_reference = tr.a.string
  69. address = clean_string(tds[self.address_column].string)
  70. self._current_application.address = address
  71. self._current_application.postcode = getPostcodeFromText(address)
  72. # self._current_application.description = clean_string(tds[self.description_column].string)
  73. # Fetch the info page
  74. info_response = urllib2.urlopen(self._current_application.info_url)
  75. info_html = info_response.read()
  76. info_soup = BeautifulSoup(info_html)
  77. try:
  78. relative_comment_url = info_soup.find("a", href=re.compile("createComment.do"))['href']
  79. comment_url_no_params = remove_params(relative_comment_url)
  80. self._current_application.comment_url = urlparse.urljoin(self.base_url, comment_url_no_params)
  81. except: # FIXME - specialize the except
  82. if self.debug:
  83. print "No comment url for %s" %(self._current_application.council_reference)
  84. self._current_application.comment_url = "None"
  85. # Some WAM sites have the description in the results page,
  86. # but since they don't all have it there, we'll get it from here...
  87. description_td = info_soup.find(text="Development:").findNext("td")
  88. # Sometimes the description is in a span in the td, sometimes it is directly there.
  89. self._current_application.description = (description_td.string or description_td.span.string).strip()
  90. self._results.addApplication(self._current_application)
  91. except:
  92. # It seems a shame to miss out on all the apps from an authority just because one breaks...
  93. if self._current_application.council_reference:
  94. if self.debug:
  95. print "Failed to add %s" %(self._current_application.council_reference)
  96. else:
  97. if self.debug:
  98. print "Failed to add an application"
  99. self._current_application = PlanningApplication()
  100. return self._results
  101. def getResults(self, day, month, year):
  102. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  103. class PooleParser(WAMParser):
  104. address_column = 1
  105. class BraintreeParser(WAMParser):
  106. date_format = "%d %b %Y"
  107. def _get_search_data(self, year, month, day):
  108. # Braintree
  109. # action=showWeeklyList&areaCode=%25&sortOrder=1&endDate=1203249969656&applicationType=%25&Button=Search
  110. search_data = WAMParser._get_search_data(self, year, month, day)
  111. return (("action", "showWeeklyList"),) + search_data
  112. if __name__ == '__main__':
  113. #parser = BraintreeParser("Braintree", "Braintree", "http://planningapp.braintree.gov.uk/WAM1/weeklyApplications.do", debug=True)
  114. # Camden
  115. # parser = WAMParser("Castle Point", "Castle Point", "http://wam.castlepoint.gov.uk/WAM/pas/searchApplications.do")#, debug=True)
  116. #Chichester - Done as PublicAccess
  117. #parser = BraintreeParser("Colchester", "Colchester", "http://www.planning.colchester.gov.uk/WAM/weeklyApplications.do", debug=True)
  118. #parser = WAMParser("East Lothian", "East Lothian", "http://www.planning.eastlothian.gov.uk/WAM/pas/searchApplications.do", debug=True)
  119. #parser = BraintreeParser("North Somerset", "North Somerset", "http://wam.n-somerset.gov.uk/MULTIWAM/weeklyApplications.do", debug=True)
  120. parser = WAMParser("Nottingham", "Nottingham", "http://plan4.nottinghamcity.gov.uk/WAM/pas/searchApplications.do", debug=True)
  121. #parser = PooleParser("Poole long", "Poole", "http://wam.boroughofpoole.com/WAM/pas/searchApplications.do", debug=True)
  122. #parser = WAMParser("Rother long", "Rother", "http://www.planning.rother.gov.uk/WAM/pas/searchApplications.do", debug=True)
  123. #parser = BraintreeParser("South Gloucestershire", "South Gloucestershire", "http://planning.southglos.gov.uk/WAM/pas/WeeklyApplications.do", debug=True)
  124. #parser = WAMParser("South Norfolk", "South Norfolk", "http://wam.south-norfolk.gov.uk/WAM/pas/searchApplications.do", debug=True)
  125. #parser = BraintreeParser("Tower Hamlets", "Tower Hamlets", "http://194.201.98.213/WAM/weeklyApplications.do", debug=True)
  126. #parser = WAMParser("Westminster", "Westminster", "http://idocs.westminster.gov.uk:8080/WAM/search/pas/index.htm", debug=True)
  127. print parser.getResults(31,8,2008)
  128. # Left to fix
  129. # All:
  130. # Paging
  131. # Coping with no apps
  132. # Barking and Dagenham - done
  133. # Braintree - done
  134. # Camden - also has a PlanningExplorer, which is done (so not bothering)
  135. # Castle Point - done
  136. # Chichester - not needed (PublicAccess site done)
  137. # Colchester - done. like Braintree
  138. # East Lothian - done
  139. # North Somerset - done. like Braintree
  140. # Nottingham - done (sometimes no comments)
  141. # Poole - done
  142. # Rother - done
  143. # South Gloucestershire - done. like Braintree
  144. # South Norfolk - Works, but no postcodes. Also, the search link here points to PlanningExplorer. I think we should assume this is the current site.
  145. # Tower Hamlets - done. Like Braintree.
  146. # Westminster - not done: clearly WAM underneath, but with a wrapper.