Automatically exported from code.google.com/p/planningalerts
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 
 

208 行
8.4 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import time
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. def clean_string(a_string):
  12. return ' '.join(' '.join(a_string.split(" ")).strip().split())
  13. def remove_params(url):
  14. # Probably a bit naughty to use both urlparse and urlunsplit here,
  15. # but it does what we want - removing the jsessionid param
  16. parsed_url = urlparse.urlparse(url)
  17. params_free_url = urlparse.urlunsplit(parsed_url[:3] + parsed_url[4:])
  18. return params_free_url
  19. class WAMParser:
  20. address_column = 2
  21. date_format = "%d/%b/%Y"
  22. def __init__(self,
  23. authority_name,
  24. authority_short_name,
  25. base_url,
  26. debug=False):
  27. self.authority_name = authority_name
  28. self.authority_short_name = authority_short_name
  29. self.base_url = base_url
  30. self.debug = debug
  31. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  32. def _get_search_data(self, year, month, day):
  33. timestamp = time.mktime((year, month, day, 0,0,0,0,0,0))
  34. # The parameter endDate appears to be 1000*a timestamp
  35. time_input = str(int(timestamp*1000))
  36. #http://wam.boroughofpoole.com/WAM/pas/searchApplications.do;jsessionid=BCC7DFD1C42DC210A7BE5BA616683CDE
  37. # areaCode=%25&sortOrder=1&endDate=1197213359015&applicationType=%25&Button=Search
  38. search_data = (
  39. ("areaCode", "%"),
  40. ("sortOrder", "1"),
  41. ("endDate", time_input),
  42. ("applicationType", "%"),
  43. ("Button", "Search"),
  44. )
  45. return search_data
  46. def getResultsByDayMonthYear(self, day, month, year):
  47. search_data_tuple = self._get_search_data(year, month, day)
  48. search_data = urllib.urlencode(search_data_tuple)
  49. response = urllib2.urlopen(self.base_url, search_data)
  50. html = response.read()
  51. soup = BeautifulSoup(html)
  52. results_table = soup.find(text=re.compile("Your search returned the following")).findNext("table")
  53. # FIXME - deal with the empty results case
  54. # FIXME - deal with later pages of results
  55. trs = results_table.findAll("tr")[1:]
  56. self._current_application = PlanningApplication()
  57. for tr in trs:
  58. try:
  59. tds = tr.findAll("td")
  60. date_received_string = tds[0].contents[0].strip()
  61. # Some day we'll be on python 2.5, and we'll be able to use the nicer version below...
  62. self._current_application.date_received = datetime.datetime(*(time.strptime(clean_string(date_received_string), self.date_format)[0:6]))
  63. #self._current_application.date_received = datetime.datetime.strptime(clean_string(date_received_string), self.date_format)
  64. relative_info_url = tr.a['href']
  65. info_url_no_params = remove_params(relative_info_url)
  66. #Now we join on the base url to make it absolute
  67. self._current_application.info_url = urlparse.urljoin(self.base_url, info_url_no_params)
  68. self._current_application.council_reference = tr.a.string
  69. address = clean_string(tds[self.address_column].string)
  70. self._current_application.address = address
  71. self._current_application.postcode = getPostcodeFromText(address)
  72. # self._current_application.description = clean_string(tds[self.description_column].string)
  73. # Fetch the info page
  74. info_response = urllib2.urlopen(self._current_application.info_url)
  75. info_html = info_response.read()
  76. info_soup = BeautifulSoup(info_html)
  77. try:
  78. relative_comment_url = info_soup.find("a", href=re.compile("createComment.do"))['href']
  79. comment_url_no_params = remove_params(relative_comment_url)
  80. self._current_application.comment_url = urlparse.urljoin(self.base_url, comment_url_no_params)
  81. except: # FIXME - specialize the except
  82. if self.debug:
  83. print "No comment url for %s" %(self._current_application.council_reference)
  84. self._current_application.comment_url = "None"
  85. # Some WAM sites have the description in the results page,
  86. # but since they don't all have it there, we'll get it from here...
  87. description_td = info_soup.find(text="Development:").findNext("td")
  88. # Sometimes the description is in a span in the td, sometimes it is directly there.
  89. self._current_application.description = (description_td.string or description_td.span.string).strip()
  90. self._results.addApplication(self._current_application)
  91. except:
  92. # It seems a shame to miss out on all the apps from an authority just because one breaks...
  93. if self._current_application.council_reference:
  94. if self.debug:
  95. print "Failed to add %s" %(self._current_application.council_reference)
  96. else:
  97. if self.debug:
  98. print "Failed to add an application"
  99. self._current_application = PlanningApplication()
  100. return self._results
  101. def getResults(self, day, month, year):
  102. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  103. class PooleParser(WAMParser):
  104. address_column = 1
  105. class BraintreeParser(WAMParser):
  106. date_format = "%d %b %Y"
  107. def _get_search_data(self, year, month, day):
  108. # Braintree
  109. # action=showWeeklyList&areaCode=%25&sortOrder=1&endDate=1203249969656&applicationType=%25&Button=Search
  110. search_data = WAMParser._get_search_data(self, year, month, day)
  111. return (("action", "showWeeklyList"),) + search_data
  112. if __name__ == '__main__':
  113. #parser = BraintreeParser("Braintree", "Braintree", "http://planningapp.braintree.gov.uk/WAM1/weeklyApplications.do", debug=True)
  114. # Camden
  115. # parser = WAMParser("Castle Point", "Castle Point", "http://wam.castlepoint.gov.uk/WAM/pas/searchApplications.do")#, debug=True)
  116. #Chichester - Done as PublicAccess
  117. #parser = BraintreeParser("Colchester", "Colchester", "http://www.planning.colchester.gov.uk/WAM/weeklyApplications.do", debug=True)
  118. #parser = WAMParser("East Lothian", "East Lothian", "http://www.planning.eastlothian.gov.uk/WAM/pas/searchApplications.do", debug=True)
  119. #parser = BraintreeParser("North Somerset", "North Somerset", "http://wam.n-somerset.gov.uk/MULTIWAM/weeklyApplications.do", debug=True)
  120. parser = WAMParser("Nottingham", "Nottingham", "http://plan4.nottinghamcity.gov.uk/WAM/pas/searchApplications.do", debug=True)
  121. #parser = PooleParser("Poole long", "Poole", "http://wam.boroughofpoole.com/WAM/pas/searchApplications.do", debug=True)
  122. #parser = WAMParser("Rother long", "Rother", "http://www.planning.rother.gov.uk/WAM/pas/searchApplications.do", debug=True)
  123. #parser = BraintreeParser("South Gloucestershire", "South Gloucestershire", "http://planning.southglos.gov.uk/WAM/pas/WeeklyApplications.do", debug=True)
  124. #parser = WAMParser("South Norfolk", "South Norfolk", "http://wam.south-norfolk.gov.uk/WAM/pas/searchApplications.do", debug=True)
  125. #parser = BraintreeParser("Tower Hamlets", "Tower Hamlets", "http://194.201.98.213/WAM/weeklyApplications.do", debug=True)
  126. #parser = WAMParser("Westminster", "Westminster", "http://idocs.westminster.gov.uk:8080/WAM/search/pas/index.htm", debug=True)
  127. print parser.getResults(31,8,2008)
  128. # Left to fix
  129. # All:
  130. # Paging
  131. # Coping with no apps
  132. # Barking and Dagenham - done
  133. # Braintree - done
  134. # Camden - also has a PlanningExplorer, which is done (so not bothering)
  135. # Castle Point - done
  136. # Chichester - not needed (PublicAccess site done)
  137. # Colchester - done. like Braintree
  138. # East Lothian - done
  139. # North Somerset - done. like Braintree
  140. # Nottingham - done (sometimes no comments)
  141. # Poole - done
  142. # Rother - done
  143. # South Gloucestershire - done. like Braintree
  144. # South Norfolk - Works, but no postcodes. Also, the search link here points to PlanningExplorer. I think we should assume this is the current site.
  145. # Tower Hamlets - done. Like Braintree.
  146. # Westminster - not done: clearly WAM underneath, but with a wrapper.