Automatically exported from
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

519 lines
18 KiB

  1. import urllib, urllib2
  2. import HTMLParser
  3. import urlparse
  4. import datetime, time
  5. from PlanningUtils import PlanningAuthorityResults, \
  6. getPostcodeFromText, \
  7. PlanningApplication
  8. # The search results list will give us reference, location, description,
  9. # and info url of each app.
  10. # The info page gives us the received date,
  11. # and comment_url
  12. class ApplicationSearchServletParser(HTMLParser.HTMLParser):
  13. """Parser for ApplicationSearchServlet sites.
  14. """
  15. # These indicate the column of the main table containing this
  16. # piece of information.
  17. # They should be overridden in subclasses
  18. #self._rows_to_ignore_at_start = None
  19. _reference_col_no = None
  20. _location_col_no = None
  21. _description_col_no = None
  22. def __init__(self,
  23. authority_name,
  24. authority_short_name,
  25. base_url,
  26. debug=False):
  27. HTMLParser.HTMLParser.__init__(self)
  28. self.authority_name = authority_name
  29. self.authority_short_name = authority_short_name
  30. self.base_url = base_url
  31. self.debug = debug
  32. self.search_url = urlparse.urljoin(self.base_url, "portal/servlets/ApplicationSearchServlet")
  33. self._comment_url = urlparse.urljoin(self.base_url, "portal/servlets/PlanningComments?REFNO=%(council_reference)s")
  34. self._requested_date = None
  35. # 0 - no
  36. # 1 - maybe
  37. # 2 - yes
  38. # 3 - finished
  39. self._in_results_table = 0
  40. self._tr_count = 0
  41. self._td_count = 0
  42. self._data_list = []
  43. # this will hold the application we are currently working on.
  44. self._current_application = None
  45. # The object which stores our set of planning application results
  46. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  47. def _checkAttrsForResultsTable(self, attrs):
  48. raise SystemError
  49. def handle_starttag(self, tag, attrs):
  50. if self.debug:
  51. print tag, attrs
  52. if tag == "table" and self._in_results_table == 0:
  53. self._in_results_table = 1
  54. self._checkAttrsForResultsTable(attrs)
  55. elif tag == "tr" and self._in_results_table == 2:
  56. self._tr_count += 1
  57. self._td_count = 0
  58. self._data_list = []
  59. self._current_application = PlanningApplication()
  60. elif tag == "td" and self._in_results_table == 2:
  61. self._td_count += 1
  62. elif tag == "a" and self._in_results_table == 2 and self._td_count == self._reference_col_no:
  63. # The href attribute contains the link to the info page
  64. for (key, value) in attrs:
  65. if key == "href":
  66. self._current_application.info_url = urlparse.urljoin(self.search_url, value)
  67. def handle_endtag(self, tag):
  68. if self.debug:
  69. print "ending: " , tag
  70. if tag == "table" and self._in_results_table == 2:
  71. self._in_results_table = 3
  72. elif tag == "tr" and self._in_results_table == 2:
  73. if self._current_application.council_reference is not None:
  74. # get the received date
  75. #info_response = urllib2.urlopen(self._current_application.info_url)
  76. #info_page_parser = InfoPageParser()
  77. #info_page_parser.feed(
  78. self._current_application.date_received = self._requested_date#info_page_parser.date_received
  79. self._results.addApplication(self._current_application)
  80. elif tag == "td" and self._in_results_table == 2:
  81. if self._td_count == self._location_col_no:
  82. data = ' '.join(self._data_list).strip()
  83. self._current_application.address = data
  84. postcode = getPostcodeFromText(data)
  85. if postcode is not None:
  86. self._current_application.postcode = postcode
  87. self._data_list = []
  88. elif self._td_count == self._description_col_no:
  89. data = ' '.join(self._data_list).strip()
  90. self._current_application.description = data
  91. self._data_list = []
  92. elif tag == 'a' and self._in_results_table == 2 and self._td_count == self._reference_col_no:
  93. data = ''.join(self._data_list).strip()
  94. self._current_application.council_reference = data
  95. self._current_application.comment_url = self._comment_url %{"council_reference": data}
  96. self._data_list = []
  97. def handle_data(self, data):
  98. if self.debug:
  99. print data
  100. if self._in_results_table == 2:
  101. if self._td_count == self._reference_col_no or \
  102. self._td_count == self._location_col_no or \
  103. self._td_count == self._description_col_no:
  104. self._data_list.append(data.strip())
  105. def getResultsByDayMonthYear(self, day, month, year):
  106. """This will return an ApplicationResults object containg the
  107. applications for the date passed in."""
  108. # Were going to need a datetime object for the requested date
  109. self._requested_date =, month, day)
  110. required_format = "%d-%m-%Y"
  111. search_data = urllib.urlencode({"ReceivedDateFrom":self._requested_date.strftime(required_format),
  112. "ReceivedDateTo":self._requested_date.strftime(required_format)})
  113. search_request = urllib2.Request(self.search_url, search_data)
  114. search_response = urllib2.urlopen(search_request)
  115. search_contents =
  116. self.feed(search_contents)
  117. return self._results
  118. def getResults(self, day, month, year):
  119. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  120. class CoventrySearchParser(ApplicationSearchServletParser):
  121. # results table spotter
  122. # width="100%" border="0"
  123. _reference_col_no = 1
  124. _location_col_no = 5
  125. _description_col_no = 8
  126. def _checkAttrsForResultsTable(self, attrs):
  127. got_width = False
  128. got_border = False
  129. for key, value in attrs:
  130. if key == 'width' and value == '100%':
  131. got_width = True
  132. elif key == 'border' and value == '0':
  133. got_border = True
  134. if got_width and got_border:
  135. self._in_results_table = 2
  136. else:
  137. self._in_results_table = 0
  138. class AllerdaleSearchParser(ApplicationSearchServletParser):
  139. # results table spotter
  140. #class="nis_table" summary="Table of planning applications that matched your query, showing reference number, received date, and address"
  141. _reference_col_no = 1
  142. _location_col_no = 3
  143. _description_col_no = 6
  144. def _checkAttrsForResultsTable(self, attrs):
  145. got_class = False
  146. got_summary = False
  147. for key, value in attrs:
  148. if key == 'class' and value == 'nis_table':
  149. got_class = True
  150. elif key == 'summary' and value == 'Table of planning applications that matched your query, showing reference number, received date, and address':
  151. got_summary = True
  152. if got_class and got_summary:
  153. self._in_results_table = 2
  154. else:
  155. self._in_results_table = 0
  156. class AlnwickSearchParser(ApplicationSearchServletParser):
  157. # results table spotter
  158. # width="100%" class="niscontent"
  159. _reference_col_no = 1
  160. _location_col_no = 2
  161. _description_col_no = 7
  162. def _checkAttrsForResultsTable(self, attrs):
  163. got_class = False
  164. for key, value in attrs:
  165. if key == 'class' and value == 'niscontent':
  166. got_class = True
  167. if got_class:
  168. self._in_results_table = 2
  169. else:
  170. self._in_results_table = 0
  171. class BarrowSearchParser(ApplicationSearchServletParser):
  172. # results table spotter
  173. # width="100%" border="0"
  174. _reference_col_no = 1
  175. _location_col_no = 3
  176. _description_col_no = 6
  177. def _checkAttrsForResultsTable(self, attrs):
  178. got_width = False
  179. got_border = False
  180. for key, value in attrs:
  181. if key == 'width' and value == '100%':
  182. got_width = True
  183. elif key == 'border' and value == '0':
  184. got_border = True
  185. if got_width and got_border:
  186. self._in_results_table = 2
  187. else:
  188. self._in_results_table = 0
  189. class HartlepoolSearchParser(ApplicationSearchServletParser):
  190. # results table spotter
  191. # summary="Table of planning applications that matched your query, showing reference number, received date, and address"
  192. _reference_col_no = 1
  193. _location_col_no = 2
  194. _description_col_no = 3
  195. def _checkAttrsForResultsTable(self, attrs):
  196. got_summary = False
  197. for key, value in attrs:
  198. if key == 'summary' and value == "Table of planning applications that matched your query, showing reference number, received date, and address":
  199. got_summary = True
  200. if got_summary:
  201. self._in_results_table = 2
  202. else:
  203. self._in_results_table = 0
  204. class NorthWarksSearchParser(ApplicationSearchServletParser):
  205. # results table spotter
  206. # table width="100%" border="0" cellspacing="0" cellpadding="0"
  207. _reference_col_no = 1
  208. _location_col_no = 3
  209. _description_col_no = 4
  210. def _checkAttrsForResultsTable(self, attrs):
  211. got_width = False
  212. got_border = False
  213. got_cellspacing = False
  214. got_cellpadding = False
  215. for key, value in attrs:
  216. if key == 'width' and value == "100%":
  217. got_width = True
  218. elif key == 'border' and value == '0':
  219. got_border = True
  220. elif key == 'cellspacing' and value == '0':
  221. got_cellspacing = True
  222. elif key == 'cellpadding' and value == '0':
  223. got_cellpadding = True
  224. if got_width and got_border and got_cellspacing and got_cellpadding:
  225. self._in_results_table = 2
  226. else:
  227. self._in_results_table = 0
  228. class StHelensSearchParser(ApplicationSearchServletParser):
  229. # results table spotter
  230. # summary="Search Results List"
  231. _reference_col_no = 1
  232. _location_col_no = 2
  233. _description_col_no = 5
  234. def _checkAttrsForResultsTable(self, attrs):
  235. got_summary = False
  236. for key, value in attrs:
  237. if key == 'summary' and value == "Search Results List":
  238. got_summary = True
  239. if got_summary:
  240. self._in_results_table = 2
  241. else:
  242. self._in_results_table = 0
  243. class EasingtonSearchParser(ApplicationSearchServletParser):
  244. # results table spotter
  245. #table width="100%" border="0" cellspacing="0" cellpadding="0"
  246. _reference_col_no = 1
  247. _location_col_no = 3
  248. _description_col_no = 6
  249. def _checkAttrsForResultsTable(self, attrs):
  250. got_width = False
  251. got_border = False
  252. got_cellspacing = False
  253. got_cellpadding = False
  254. for key, value in attrs:
  255. if key == 'width' and value == "100%":
  256. got_width = True
  257. elif key == 'border' and value == '0':
  258. got_border = True
  259. elif key == 'cellspacing' and value == '0':
  260. got_cellspacing = True
  261. elif key == 'cellpadding' and value == '0':
  262. got_cellpadding = True
  263. if got_width and got_border and got_cellspacing and got_cellpadding:
  264. self._in_results_table = 2
  265. else:
  266. self._in_results_table = 0
  267. class HighPeakSearchParser(ApplicationSearchServletParser):
  268. # results table spotter
  269. # table class="data" width="95%"
  270. _reference_col_no = 1
  271. _location_col_no = 2
  272. _description_col_no = 5
  273. def _checkAttrsForResultsTable(self, attrs):
  274. got_class = False
  275. got_width = False
  276. for key, value in attrs:
  277. if key == 'class' and value == "data":
  278. got_class = True
  279. if key == 'width' and value == "95%":
  280. got_width = True
  281. if got_class and got_width:
  282. self._in_results_table = 2
  283. else:
  284. self._in_results_table = 0
  285. class WearValleySearchParser(ApplicationSearchServletParser):
  286. # results table spotter
  287. # table summary="Table of planning applications that matched your query, showing reference number, received date, and address"
  288. _reference_col_no = 1
  289. _location_col_no = 3
  290. _description_col_no = 4
  291. def _checkAttrsForResultsTable(self, attrs):
  292. got_summary= False
  293. for key, value in attrs:
  294. if key == 'summary' and value == "Table of planning applications that matched your query, showing reference number, received date, and address":
  295. got_summary = True
  296. if got_summary:
  297. self._in_results_table = 2
  298. else:
  299. self._in_results_table = 0
  300. class WellingboroughSearchParser(ApplicationSearchServletParser):
  301. # results table spotter
  302. #table width="100%" border="0"
  303. _reference_col_no = 1
  304. _location_col_no = 3
  305. _description_col_no = 6
  306. def _checkAttrsForResultsTable(self, attrs):
  307. got_width = False
  308. got_border = False
  309. for key, value in attrs:
  310. if key == 'width' and value == "100%":
  311. got_width = True
  312. elif key == 'border' and value == "0":
  313. got_border = True
  314. if got_width and got_border:
  315. self._in_results_table = 2
  316. else:
  317. self._in_results_table = 0
  318. class EalingSearchParser(ApplicationSearchServletParser):
  319. # results table spotter
  320. # table width="100%" cellspacing="0px" border="1px" cellpadding="2px" bordercolor="#FFFFFF"
  321. _reference_col_no = 1
  322. _location_col_no = 3
  323. _description_col_no = 4
  324. def _checkAttrsForResultsTable(self, attrs):
  325. got_width = False
  326. got_cellspacing = False
  327. got_border = False
  328. got_cellpadding = False
  329. got_bordercolor = False
  330. for key, value in attrs:
  331. if key == 'width' and value == "100%":
  332. got_width = True
  333. elif key == 'cellspacing' and value == "0px":
  334. got_cellspacing = True
  335. elif key == 'border' and value == "1px":
  336. got_border = True
  337. elif key == 'cellpadding' and value == "2px":
  338. got_cellpadding = True
  339. elif key == 'bordercolor' and value == "#FFFFFF":
  340. got_bordercolor = True
  341. if got_width and got_cellspacing and got_border and got_cellpadding and got_bordercolor:
  342. self._in_results_table = 2
  343. else:
  344. self._in_results_table = 0
  345. class HaringeySearchParser(ApplicationSearchServletParser):
  346. # results table spotter
  347. # summary="Application Results"
  348. _reference_col_no = 1
  349. _location_col_no = 2
  350. _description_col_no = 5
  351. def _checkAttrsForResultsTable(self, attrs):
  352. got_summary= False
  353. for key, value in attrs:
  354. if key == 'summary' and value == "Application Results":
  355. got_summary = True
  356. if got_summary:
  357. self._in_results_table = 2
  358. else:
  359. self._in_results_table = 0
  360. class DenbighshireSearchParser(ApplicationSearchServletParser):
  361. # results table spotter
  362. #table width="100%" border="0"
  363. _reference_col_no = 1
  364. _location_col_no = 3
  365. _description_col_no = 5
  366. def _checkAttrsForResultsTable(self, attrs):
  367. got_width = False
  368. got_border = False
  369. for key, value in attrs:
  370. if key == 'width' and value == "100%":
  371. got_width = True
  372. elif key == 'border' and value == "0":
  373. got_border = True
  374. if got_width and got_border:
  375. self._in_results_table = 2
  376. else:
  377. self._in_results_table = 0
  378. if __name__ == "__main__":
  379. #parser = CoventrySearchParser("Coventry", "Coventry", "")
  380. #print parser.getResults(28,3,2007)
  381. #parser = AllerdaleSearchParser("Allerdale", "Allerdale", "")
  382. #print parser.getResults(28,3,2007)
  383. #parser = AlnwickSearchParser("Alnwick", "Alnwick", "")
  384. #print parser.getResults(28,3,2007)
  385. #parser = BarrowSearchParser("Barrow", "Barrow", "")
  386. #print parser.getResults(28,3,2007)
  387. #parser = HartlepoolSearchParser("Hartlepool", "Hartlepool", "")
  388. #print parser.getResults(28,3,2007)
  389. #parser = NorthWarksSearchParser("North Warwickshire", "North Warks", "")
  390. #print parser.getResults(28,3,2007)
  391. #parser = StHelensSearchParser("St Helens", "St Helens", "")
  392. #print parser.getResults(28,3,2007)
  393. #parser = EasingtonSearchParser("Easington", "Easington", "")
  394. #print parser.getResults(28,3,2007)
  395. #parser = HighPeakSearchParser("High Peak", "High Peak", "")
  396. #print parser.getResults(20,3,2007)
  397. #parser = WearValleySearchParser("Wear Valley", "Wear Valley", "")
  398. #print parser.getResults(20,3,2007)
  399. #parser = WellingboroughSearchParser("Wellingborough", "Wellingborough", "")
  400. #print parser.getResults(20,3,2007)
  401. #parser = EalingSearchParser("Ealing", "Ealing", "")
  402. #print parser.getResults(20,3,2007)
  403. #parser = HaringeySearchParser("Haringey", "Haringey", "")
  404. #print parser.getResults(20,3,2007)
  405. #parser = DenbighshireSearchParser("Denbighshire", "Denbighshire", "")
  406. #print parser.getResults(20,3,2007)
  407. pass