Automatically exported from code.google.com/p/planningalerts
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

121 linhas
4.8 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. from BeautifulSoup import BeautifulSoup
  6. from PlanningUtils import PlanningApplication, \
  7. PlanningAuthorityResults, \
  8. getPostcodeFromText
  9. info_path = "loadFullDetails.do"
  10. comment_path = "loadRepresentation.do"
  11. class AtriumePlanningParser:
  12. def __init__(self,
  13. authority_name,
  14. authority_short_name,
  15. base_url,
  16. debug=False):
  17. self.authority_name = authority_name
  18. self.authority_short_name = authority_short_name
  19. self.base_url = base_url
  20. self.info_url = urlparse.urljoin(base_url, info_path)
  21. self.comment_url = urlparse.urljoin(base_url, comment_path)
  22. self.debug = debug
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. # The end date for the search needs to be one day after the start
  26. # date - presumably the date is used as a timestamp at midnight
  27. search_start_date = datetime.date(year, month, day)
  28. search_end_date = search_start_date + datetime.timedelta(1)
  29. search_data = urllib.urlencode({"dayRegStart": search_start_date.strftime("%d"),
  30. "monthRegStart": search_start_date.strftime("%b"),
  31. "yearRegStart": search_start_date.strftime("%Y"),
  32. "dayRegEnd": search_end_date.strftime("%d"),
  33. "monthRegEnd": search_end_date.strftime("%b"),
  34. "yearRegEnd": search_end_date.strftime("%Y"),
  35. "searchType": "current",
  36. "dispatch": "Search"
  37. })
  38. response = urllib2.urlopen(self.base_url, search_data)
  39. html = response.read()
  40. soup = BeautifulSoup(html)
  41. # Get a list of the trs in the results table
  42. if soup.find(text="Results"):
  43. tds = soup.find(text="Results").parent.findNext("table").findAll("td")
  44. for td in tds:
  45. if td.string:
  46. if td.string.strip() == "Date Registered":
  47. # We are starting a new App
  48. self._current_application = PlanningApplication()
  49. #
  50. day, month, year = [int(x) for x in td.findNext("td").string.split("-")]
  51. self._current_application.date_received = datetime.date(year, month, day)
  52. # FIXME - when python on haggis is a bit newer,
  53. #we can do the following, which is neater
  54. #(and get rid of the import of time).
  55. #self._current_application.date_received = datetime.datetime.strptime(td.findNext("td").string, "%d-%m-%Y")
  56. elif td.string.strip() == "Application Number":
  57. self._current_application.council_reference = td.findNext("td").string
  58. elif td.string.strip() == "Location":
  59. location = td.findNext("td").string
  60. self._current_application.address = location
  61. postcode = getPostcodeFromText(location)
  62. if postcode:
  63. self._current_application.postcode = postcode
  64. elif td.string.strip() == "Proposal":
  65. self._current_application.description = td.findNext("td").string
  66. elif td.a and td.a.string.strip() == "View Full Details":
  67. # The info url is td.a
  68. messy_info_url = td.a["href"]
  69. # We need to get an id out of this url
  70. query_str = urlparse.urlsplit(messy_info_url)[3]
  71. self._current_application.info_url = self.info_url + "?" + query_str
  72. self._current_application.comment_url = self.comment_url + "?" + query_str
  73. if self._current_application.is_ready():
  74. self._results.addApplication(self._current_application)
  75. return self._results
  76. def getResults(self, day, month, year):
  77. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  78. if __name__ == '__main__':
  79. # cumbria_parser = AtriumePlanningParser("Cumbria County Council", "Cumbria", "http://217.114.50.149:7778/ePlanningOPS/loadResults.do")
  80. # print cumbria_parser.getResults(22,11,2007)
  81. # lincolnshire_parser = AtriumePlanningParser("Lincolnshire County Council", "Lincolnshire", "")
  82. # print cumbria_parser.getResults(22,11,2007)
  83. parser = AtriumePlanningParser("Dorset County Council", "Dorset", "http://www.dorsetforyou.com/ePlanning/loadResults.do")
  84. print parser.getResults(13,11,2007)