Automatically exported from code.google.com/p/planningalerts
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

AtriumePlanning.py 4.8 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. from BeautifulSoup import BeautifulSoup
  6. from PlanningUtils import PlanningApplication, \
  7. PlanningAuthorityResults, \
  8. getPostcodeFromText
  9. info_path = "loadFullDetails.do"
  10. comment_path = "loadRepresentation.do"
  11. class AtriumePlanningParser:
  12. def __init__(self,
  13. authority_name,
  14. authority_short_name,
  15. base_url,
  16. debug=False):
  17. self.authority_name = authority_name
  18. self.authority_short_name = authority_short_name
  19. self.base_url = base_url
  20. self.info_url = urlparse.urljoin(base_url, info_path)
  21. self.comment_url = urlparse.urljoin(base_url, comment_path)
  22. self.debug = debug
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. # The end date for the search needs to be one day after the start
  26. # date - presumably the date is used as a timestamp at midnight
  27. search_start_date = datetime.date(year, month, day)
  28. search_end_date = search_start_date + datetime.timedelta(1)
  29. search_data = urllib.urlencode({"dayRegStart": search_start_date.strftime("%d"),
  30. "monthRegStart": search_start_date.strftime("%b"),
  31. "yearRegStart": search_start_date.strftime("%Y"),
  32. "dayRegEnd": search_end_date.strftime("%d"),
  33. "monthRegEnd": search_end_date.strftime("%b"),
  34. "yearRegEnd": search_end_date.strftime("%Y"),
  35. "searchType": "current",
  36. "dispatch": "Search"
  37. })
  38. response = urllib2.urlopen(self.base_url, search_data)
  39. html = response.read()
  40. soup = BeautifulSoup(html)
  41. # Get a list of the trs in the results table
  42. if soup.find(text="Results"):
  43. tds = soup.find(text="Results").parent.findNext("table").findAll("td")
  44. for td in tds:
  45. if td.string:
  46. if td.string.strip() == "Date Registered":
  47. # We are starting a new App
  48. self._current_application = PlanningApplication()
  49. #
  50. day, month, year = [int(x) for x in td.findNext("td").string.split("-")]
  51. self._current_application.date_received = datetime.date(year, month, day)
  52. # FIXME - when python on haggis is a bit newer,
  53. #we can do the following, which is neater
  54. #(and get rid of the import of time).
  55. #self._current_application.date_received = datetime.datetime.strptime(td.findNext("td").string, "%d-%m-%Y")
  56. elif td.string.strip() == "Application Number":
  57. self._current_application.council_reference = td.findNext("td").string
  58. elif td.string.strip() == "Location":
  59. location = td.findNext("td").string
  60. self._current_application.address = location
  61. postcode = getPostcodeFromText(location)
  62. if postcode:
  63. self._current_application.postcode = postcode
  64. elif td.string.strip() == "Proposal":
  65. self._current_application.description = td.findNext("td").string
  66. elif td.a and td.a.string.strip() == "View Full Details":
  67. # The info url is td.a
  68. messy_info_url = td.a["href"]
  69. # We need to get an id out of this url
  70. query_str = urlparse.urlsplit(messy_info_url)[3]
  71. self._current_application.info_url = self.info_url + "?" + query_str
  72. self._current_application.comment_url = self.comment_url + "?" + query_str
  73. if self._current_application.is_ready():
  74. self._results.addApplication(self._current_application)
  75. return self._results
  76. def getResults(self, day, month, year):
  77. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  78. if __name__ == '__main__':
  79. # cumbria_parser = AtriumePlanningParser("Cumbria County Council", "Cumbria", "http://217.114.50.149:7778/ePlanningOPS/loadResults.do")
  80. # print cumbria_parser.getResults(22,11,2007)
  81. # lincolnshire_parser = AtriumePlanningParser("Lincolnshire County Council", "Lincolnshire", "")
  82. # print cumbria_parser.getResults(22,11,2007)
  83. parser = AtriumePlanningParser("Dorset County Council", "Dorset", "http://www.dorsetforyou.com/ePlanning/loadResults.do")
  84. print parser.getResults(13,11,2007)