Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

113 lines
4.2 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. #import re
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. info_path = "loadFullDetails.do"
  11. comment_path = "loadRepresentation.do"
  12. class AtriumePlanningParser:
  13. def __init__(self,
  14. authority_name,
  15. authority_short_name,
  16. base_url,
  17. debug=False):
  18. self.authority_name = authority_name
  19. self.authority_short_name = authority_short_name
  20. self.base_url = base_url
  21. self.info_url = urlparse.urljoin(base_url, info_path)
  22. self.comment_url = urlparse.urljoin(base_url, comment_path)
  23. self.debug = debug
  24. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  25. def getResultsByDayMonthYear(self, day, month, year):
  26. # The end date for the search needs to be one day after the start
  27. # date - presumably the date is used as a timestamp at midnight
  28. search_start_date = datetime.date(year, month, day)
  29. search_end_date = search_start_date + datetime.timedelta(1)
  30. search_data = urllib.urlencode({"dayRegStart": search_start_date.strftime("%d"),
  31. "monthRegStart": search_start_date.strftime("%b"),
  32. "yearRegStart": search_start_date.strftime("%Y"),
  33. "dayRegEnd": search_end_date.strftime("%d"),
  34. "monthRegEnd": search_end_date.strftime("%b"),
  35. "yearRegEnd": search_end_date.strftime("%Y"),
  36. "searchType": "current",
  37. "dispatch": "Search"
  38. })
  39. response = urllib2.urlopen(self.base_url, search_data)
  40. html = response.read()
  41. soup = BeautifulSoup(html)
  42. # Get a list of the trs in the results table
  43. if soup.find(text="Results"):
  44. tds = soup.find(text="Results").parent.findNext("table").findAll("td")
  45. for td in tds:
  46. if td.string:
  47. if td.string.strip() == "Date Registered":
  48. # We are starting a new App
  49. self._current_application = PlanningApplication()
  50. self._current_application.date_received = datetime.datetime.strptime(td.findNext("td").string, "%d-%m-%Y")
  51. elif td.string.strip() == "Application Number":
  52. self._current_application.council_reference = td.findNext("td").string
  53. elif td.string.strip() == "Location":
  54. location = td.findNext("td").string
  55. self._current_application.address = location
  56. postcode = getPostcodeFromText(location)
  57. if postcode:
  58. self._current_application.postcode = postcode
  59. elif td.string.strip() == "Proposal":
  60. self._current_application.description = td.findNext("td").string
  61. elif td.a and td.a.string.strip() == "View Full Details":
  62. # The info url is td.a
  63. messy_info_url = td.a["href"]
  64. # We need to get an id out of this url
  65. query_str = urlparse.urlsplit(messy_info_url)[3]
  66. self._current_application.info_url = self.info_url + "?" + query_str
  67. self._current_application.comment_url = self.comment_url + "?" + query_str
  68. if self._current_application.is_ready():
  69. self._results.addApplication(self._current_application)
  70. return self._results
  71. def getResults(self, day, month, year):
  72. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  73. #if __name__ == '__main__':
  74. # cumbria_parser = AtriumePlanningParser("Cumbria County Council", "Cumbria", "http://217.114.50.149:7778/ePlanningOPS/loadResults.do")
  75. # print cumbria_parser.getResults(22,11,2007)
  76. # lincolnshire_parser = AtriumePlanningParser("Lincolnshire County Council", "Lincolnshire", "")
  77. # print cumbria_parser.getResults(22,11,2007)