Automatically exported from
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

210 rivejä
8.8 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import time
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. def clean_string(a_string):
  12. return ' '.join(' '.join(a_string.split(" ")).strip().split())
  13. def remove_params(url):
  14. # Probably a bit naughty to use both urlparse and urlunsplit here,
  15. # but it does what we want - removing the jsessionid param
  16. parsed_url = urlparse.urlparse(url)
  17. params_free_url = urlparse.urlunsplit(parsed_url[:3] + parsed_url[4:])
  18. return params_free_url
  19. class WAMParser:
  20. address_column = 2
  21. date_format = "%d/%b/%Y"
  22. def __init__(self,
  23. authority_name,
  24. authority_short_name,
  25. base_url,
  26. debug=False):
  27. self.authority_name = authority_name
  28. self.authority_short_name = authority_short_name
  29. self.base_url = base_url
  30. self.debug = debug
  31. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  32. def _get_search_data(self, year, month, day):
  33. timestamp = time.mktime((year, month, day, 0,0,0,0,0,0))
  34. # The parameter endDate appears to be 1000*a timestamp
  35. time_input = str(int(timestamp*1000))
  36. #;jsessionid=BCC7DFD1C42DC210A7BE5BA616683CDE
  37. # areaCode=%25&sortOrder=1&endDate=1197213359015&applicationType=%25&Button=Search
  38. search_data = (
  39. ("areaCode", "%"),
  40. ("sortOrder", "1"),
  41. ("endDate", time_input),
  42. ("applicationType", "%"),
  43. ("Button", "Search"),
  44. )
  45. return search_data
  46. def getResultsByDayMonthYear(self, day, month, year):
  47. search_data_tuple = self._get_search_data(year, month, day)
  48. search_data = urllib.urlencode(search_data_tuple)
  49. response = urllib2.urlopen(self.base_url, search_data)
  50. html =
  51. soup = BeautifulSoup(html)
  52. results_table = soup.find(text=re.compile("Your search returned the following")).findNext("table")
  53. # FIXME - deal with the empty results case
  54. # FIXME - deal with later pages of results
  55. trs = results_table.findAll("tr")[1:]
  56. self._current_application = PlanningApplication()
  57. for tr in trs:
  58. try:
  59. tds = tr.findAll("td")
  60. date_received_string = tds[0].contents[0].strip()
  61. # Some day we'll be on python 2.5, and we'll be able to use the nicer version below...
  62. self._current_application.date_received = datetime.datetime(*(time.strptime(clean_string(date_received_string), self.date_format)[0:6]))
  63. #self._current_application.date_received = datetime.datetime.strptime(clean_string(date_received_string), self.date_format)
  64. relative_info_url = tr.a['href']
  65. info_url_no_params = remove_params(relative_info_url)
  66. #Now we join on the base url to make it absolute
  67. self._current_application.info_url = urlparse.urljoin(self.base_url, info_url_no_params)
  68. self._current_application.council_reference = tr.a.string
  69. address = clean_string(tds[self.address_column].string)
  70. self._current_application.address = address
  71. self._current_application.postcode = getPostcodeFromText(address)
  72. # self._current_application.description = clean_string(tds[self.description_column].string)
  73. # Fetch the info page
  74. info_response = urllib2.urlopen(self._current_application.info_url)
  75. info_html =
  76. info_soup = BeautifulSoup(info_html)
  77. try:
  78. relative_comment_url = info_soup.find("a", href=re.compile(""))['href']
  79. comment_url_no_params = remove_params(relative_comment_url)
  80. self._current_application.comment_url = urlparse.urljoin(self.base_url, comment_url_no_params)
  81. except: # FIXME - specialize the except
  82. if self.debug:
  83. print "No comment url for %s" %(self._current_application.council_reference)
  84. self._current_application.comment_url = "None"
  85. # Some WAM sites have the description in the results page,
  86. # but since they don't all have it there, we'll get it from here...
  87. description_td = info_soup.find(text="Development:").findNext("td")
  88. # Sometimes the description is in a span in the td, sometimes it is directly there.
  89. self._current_application.description = (description_td.string or description_td.span.string).strip()
  90. self._results.addApplication(self._current_application)
  91. except SystemExit:
  92. # It seems a shame to miss out on all the apps from an authority just because one breaks...
  93. if self._current_application.council_reference:
  94. if self.debug:
  95. print "Failed to add %s" %(self._current_application.council_reference)
  96. else:
  97. if self.debug:
  98. print "Failed to add an application"
  99. self._current_application = PlanningApplication()
  100. return self._results
  101. def getResults(self, day, month, year):
  102. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  103. class PooleParser(WAMParser):
  104. address_column = 1
  105. class BraintreeParser(WAMParser):
  106. date_format = "%d %b %Y"
  107. def _get_search_data(self, year, month, day):
  108. # Braintree
  109. # action=showWeeklyList&areaCode=%25&sortOrder=1&endDate=1203249969656&applicationType=%25&Button=Search
  110. search_data = WAMParser._get_search_data(self, year, month, day)
  111. return (("action", "showWeeklyList"),) + search_data
  112. if __name__ == '__main__':
  113. #parser = WAMParser("Barking and Dagenham", "Barking and Dagenham", "", debug=True)
  114. #parser = BraintreeParser("Braintree", "Braintree", "", debug=True)
  115. # Camden
  116. #parser = WAMParser("Castle Point", "Castle Point", "", debug=True)
  117. #Chichester - Done as PublicAccess
  118. #parser = BraintreeParser("Colchester", "Colchester", "", debug=True)
  119. #parser = WAMParser("East Lothian", "East Lothian", "", debug=True)
  120. #parser = BraintreeParser("North Somerset", "North Somerset", "", debug=True)
  121. #parser = WAMParser("Nottingham", "Nottingham", "", debug=True)
  122. #parser = PooleParser("Poole long", "Poole", "", debug=True)
  123. #parser = WAMParser("Rother long", "Rother", "", debug=True)
  124. #parser = BraintreeParser("South Gloucestershire", "South Gloucestershire", "", debug=True)
  125. #parser = BraintreeParser("South Gloucestershire", "South Gloucestershire", "", debug=True)
  126. #parser = WAMParser("South Norfolk", "South Norfolk", "", debug=True)
  127. parser = BraintreeParser("Tower Hamlets", "Tower Hamlets", "", debug=True)
  128. #parser = WAMParser("Westminster", "Westminster", "", debug=True)
  129. print parser.getResults(8,2,2008)
  130. # Left to fix
  131. # All:
  132. # Paging
  133. # Coping with no apps
  134. # Barking and Dagenham - done
  135. # Braintree - done
  136. # Camden - also has a PlanningExplorer, which is done (so not bothering)
  137. # Castle Point - done
  138. # Chichester - not needed (PublicAccess site done)
  139. # Colchester - done. like Braintree
  140. # East Lothian - done
  141. # North Somerset - done. like Braintree
  142. # Nottingham - done (sometimes no comments)
  143. # Poole - done
  144. # Rother - done
  145. # South Gloucestershire - done. like Braintree
  146. # South Norfolk - Works, but no postcodes. Also, the search link here points to PlanningExplorer. I think we should assume this is the current site.
  147. # Tower Hamlets - done. Like Braintree.
  148. # Westminster - not done: clearly WAM underneath, but with a wrapper.