Automatically exported from
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

121 lines
4.8 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. from BeautifulSoup import BeautifulSoup
  6. from PlanningUtils import PlanningApplication, \
  7. PlanningAuthorityResults, \
  8. getPostcodeFromText
  9. info_path = ""
  10. comment_path = ""
  11. class AtriumePlanningParser:
  12. def __init__(self,
  13. authority_name,
  14. authority_short_name,
  15. base_url,
  16. debug=False):
  17. self.authority_name = authority_name
  18. self.authority_short_name = authority_short_name
  19. self.base_url = base_url
  20. self.info_url = urlparse.urljoin(base_url, info_path)
  21. self.comment_url = urlparse.urljoin(base_url, comment_path)
  22. self.debug = debug
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. # The end date for the search needs to be one day after the start
  26. # date - presumably the date is used as a timestamp at midnight
  27. search_start_date =, month, day)
  28. search_end_date = search_start_date + datetime.timedelta(1)
  29. search_data = urllib.urlencode({"dayRegStart": search_start_date.strftime("%d"),
  30. "monthRegStart": search_start_date.strftime("%b"),
  31. "yearRegStart": search_start_date.strftime("%Y"),
  32. "dayRegEnd": search_end_date.strftime("%d"),
  33. "monthRegEnd": search_end_date.strftime("%b"),
  34. "yearRegEnd": search_end_date.strftime("%Y"),
  35. "searchType": "current",
  36. "dispatch": "Search"
  37. })
  38. response = urllib2.urlopen(self.base_url, search_data)
  39. html =
  40. soup = BeautifulSoup(html)
  41. # Get a list of the trs in the results table
  42. if soup.find(text="Results"):
  43. tds = soup.find(text="Results").parent.findNext("table").findAll("td")
  44. for td in tds:
  45. if td.string:
  46. if td.string.strip() == "Date Registered":
  47. # We are starting a new App
  48. self._current_application = PlanningApplication()
  49. #
  50. day, month, year = [int(x) for x in td.findNext("td").string.split("-")]
  51. self._current_application.date_received =, month, day)
  52. # FIXME - when python on haggis is a bit newer,
  53. #we can do the following, which is neater
  54. #(and get rid of the import of time).
  55. #self._current_application.date_received = datetime.datetime.strptime(td.findNext("td").string, "%d-%m-%Y")
  56. elif td.string.strip() == "Application Number":
  57. self._current_application.council_reference = td.findNext("td").string
  58. elif td.string.strip() == "Location":
  59. location = td.findNext("td").string
  60. self._current_application.address = location
  61. postcode = getPostcodeFromText(location)
  62. if postcode:
  63. self._current_application.postcode = postcode
  64. elif td.string.strip() == "Proposal":
  65. self._current_application.description = td.findNext("td").string
  66. elif td.a and td.a.string.strip() == "View Full Details":
  67. # The info url is td.a
  68. messy_info_url = td.a["href"]
  69. # We need to get an id out of this url
  70. query_str = urlparse.urlsplit(messy_info_url)[3]
  71. self._current_application.info_url = self.info_url + "?" + query_str
  72. self._current_application.comment_url = self.comment_url + "?" + query_str
  73. if self._current_application.is_ready():
  74. self._results.addApplication(self._current_application)
  75. return self._results
  76. def getResults(self, day, month, year):
  77. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  78. if __name__ == '__main__':
  79. # cumbria_parser = AtriumePlanningParser("Cumbria County Council", "Cumbria", "")
  80. # print cumbria_parser.getResults(22,11,2007)
  81. # lincolnshire_parser = AtriumePlanningParser("Lincolnshire County Council", "Lincolnshire", "")
  82. # print cumbria_parser.getResults(22,11,2007)
  83. parser = AtriumePlanningParser("Dorset County Council", "Dorset", "")
  84. print parser.getResults(13,11,2007)