Automatically exported from code.google.com/p/planningalerts
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 
 
 

162 líneas
6.2 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. date_format = "%d/%m/%Y"
  12. class ShropshireParser:
  13. reference_input_name = "ApplNum"
  14. contact_email_name = "offemail"
  15. comment_url = None
  16. use_validated_date = False
  17. def _get_info_link_list(self, soup):
  18. return [tr.a for tr in soup.find("table", id="tbllist").findAll("tr", recursive=False)[:-1]]
  19. def _get_postcode(self, info_soup):
  20. return info_soup.find("input", {"name": "Postcode"})['value']
  21. def __init__(self, authority_name, authority_short_name, base_url, debug=False):
  22. self.debug = debug
  23. self.authority_name = authority_name
  24. self.authority_short_name = authority_short_name
  25. self.base_url = base_url
  26. self._split_base_url = urlparse.urlsplit(base_url)
  27. self._current_application = None
  28. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  29. def getResultsByDayMonthYear(self, day, month, year):
  30. search_date = datetime.date(year, month, day)
  31. search_date_string = search_date.strftime(date_format)
  32. if self.use_validated_date:
  33. received_search_string = ""
  34. validated_search_string = search_date_string
  35. else:
  36. received_search_string = search_date_string
  37. validated_search_string = ""
  38. search_data = urllib.urlencode([
  39. ("txtAppNum", ""),
  40. ("txtAppName", ""),
  41. ("txtAppLocn", ""),
  42. ("txtAppPCode", ""),
  43. ("txtAppRecFrom", received_search_string),
  44. ("txtAppRecTo", received_search_string),
  45. ("txtAppDecFrom", ""),
  46. ("txtAppDecTo", ""),
  47. ("txtAppValFrom", validated_search_string),
  48. ("txtAppValTo", validated_search_string),
  49. ("district_drop", ""),
  50. ("parish_drop", ""),
  51. ("ward_drop", ""),
  52. ("ft", "yes"),
  53. ("submit1", "Submit"),
  54. ])
  55. split_search_url = self._split_base_url[:3] + (search_data, '')
  56. search_url = urlparse.urlunsplit(split_search_url)
  57. response = urllib2.urlopen(search_url)
  58. soup = BeautifulSoup(response.read())
  59. # Handle the case where there are no apps
  60. if soup.find(text=re.compile("No applications matched your query")):
  61. return self._results
  62. info_link_list = self._get_info_link_list(soup)
  63. for app_link in info_link_list:
  64. self._current_application = PlanningApplication()
  65. # We could get this from the info soup, but as we already know it, why bother.
  66. self._current_application.date_received = search_date
  67. self._current_application.info_url = urlparse.urljoin(self.base_url, app_link['href'])
  68. # To get the postcode we will need to download each info page
  69. info_response = urllib2.urlopen(self._current_application.info_url)
  70. info_soup = BeautifulSoup(info_response.read())
  71. self._current_application.council_reference = info_soup.find("input", {"name": self.reference_input_name})['value']
  72. self._current_application.address = info_soup.find("textarea", {"name": "Location"}).string.strip()
  73. self._current_application.postcode = self._get_postcode(info_soup)
  74. self._current_application.description = info_soup.find("textarea", {"name": "Proposal"}).string.strip()
  75. if self.comment_url:
  76. self._current_application.comment_url = self.comment_url
  77. else:
  78. self._current_application.comment_url = info_soup.find("input", {"name": self.contact_email_name})['value']
  79. # There is an OSGB position here :-)
  80. self._current_application.osgb_x = info_soup.find("input", {"name": "Easting"})['value']
  81. self._current_application.osgb_y = info_soup.find("input", {"name": "Northing"})['value']
  82. self._results.addApplication(self._current_application)
  83. return self._results
  84. def getResults(self, day, month, year):
  85. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  86. class NorthYorkshireParser(ShropshireParser):
  87. reference_input_name = "txtAppNum"
  88. contact_email_name = "contactEmail"
  89. comment_url = None
  90. # The date we give as the date_received here is actually the validated date.
  91. use_validated_date = True
  92. def _get_postcode(self, info_soup):
  93. return getPostcodeFromText(self._current_application.address)
  94. def _get_info_link_list(self, soup):
  95. return [div.a for div in soup.findAll("div", {"class": "listApplNum"})]
  96. class SouthNorthamptonshireParser(ShropshireParser):
  97. reference_input_name = "txtAppNum"
  98. comment_url = "http://www.southnorthants.gov.uk/mandoforms/servlet/com.mandoforms.server.MandoformsServer?MF_XML=ApplicationComments&MF_DEVICE=HTML"
  99. def _get_postcode(self, info_soup):
  100. return getPostcodeFromText(self._current_application.address)
  101. def _get_info_link_list(self, soup):
  102. return soup.find("div", {"class": "div-content-class"}).findAll("a")
  103. if __name__ == '__main__':
  104. # parser = ShropshireParser("Shropshire County Council", "Shropshire", "http://planning.shropshire.gov.uk/PlanAppList.asp")
  105. # print parser.getResults(6,6,2008)
  106. parser = NorthYorkshireParser("North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp")
  107. # print parser.getResults(10,6,2008)
  108. # parser = SouthNorthamptonshireParser("South Northamptonshire Council", "South Northamptonshire", "http://snc.planning-register.co.uk/PlanAppList.asp")
  109. # print parser.getResults(5,6,2008)
  110. # parser = ShropshireParser("Purbeck District Council", "Purbeck", "http://www.purbeck.gov.uk/planning/PlanAppList.asp")
  111. print parser.getResults(12,6,2009)
  112. # TODO
  113. #1) Pagination: South Northants paginates at 25. I doubt this is a problem. Should also check out the others.