Automatically exported from code.google.com/p/planningalerts
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。
 
 
 
 
 
 

162 行
6.2 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. date_format = "%d/%m/%Y"
  12. class ShropshireParser:
  13. reference_input_name = "ApplNum"
  14. contact_email_name = "offemail"
  15. comment_url = None
  16. use_validated_date = False
  17. def _get_info_link_list(self, soup):
  18. return [tr.a for tr in soup.find("table", id="tbllist").findAll("tr", recursive=False)[:-1]]
  19. def _get_postcode(self, info_soup):
  20. return info_soup.find("input", {"name": "Postcode"})['value']
  21. def __init__(self, authority_name, authority_short_name, base_url, debug=False):
  22. self.debug = debug
  23. self.authority_name = authority_name
  24. self.authority_short_name = authority_short_name
  25. self.base_url = base_url
  26. self._split_base_url = urlparse.urlsplit(base_url)
  27. self._current_application = None
  28. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  29. def getResultsByDayMonthYear(self, day, month, year):
  30. search_date = datetime.date(year, month, day)
  31. search_date_string = search_date.strftime(date_format)
  32. if self.use_validated_date:
  33. received_search_string = ""
  34. validated_search_string = search_date_string
  35. else:
  36. received_search_string = search_date_string
  37. validated_search_string = ""
  38. search_data = urllib.urlencode([
  39. ("txtAppNum", ""),
  40. ("txtAppName", ""),
  41. ("txtAppLocn", ""),
  42. ("txtAppPCode", ""),
  43. ("txtAppRecFrom", received_search_string),
  44. ("txtAppRecTo", received_search_string),
  45. ("txtAppDecFrom", ""),
  46. ("txtAppDecTo", ""),
  47. ("txtAppValFrom", validated_search_string),
  48. ("txtAppValTo", validated_search_string),
  49. ("district_drop", ""),
  50. ("parish_drop", ""),
  51. ("ward_drop", ""),
  52. ("ft", "yes"),
  53. ("submit1", "Submit"),
  54. ])
  55. split_search_url = self._split_base_url[:3] + (search_data, '')
  56. search_url = urlparse.urlunsplit(split_search_url)
  57. response = urllib2.urlopen(search_url)
  58. soup = BeautifulSoup(response.read())
  59. # Handle the case where there are no apps
  60. if soup.find(text=re.compile("No applications matched your query")):
  61. return self._results
  62. info_link_list = self._get_info_link_list(soup)
  63. for app_link in info_link_list:
  64. self._current_application = PlanningApplication()
  65. # We could get this from the info soup, but as we already know it, why bother.
  66. self._current_application.date_received = search_date
  67. self._current_application.info_url = urlparse.urljoin(self.base_url, app_link['href'])
  68. # To get the postcode we will need to download each info page
  69. info_response = urllib2.urlopen(self._current_application.info_url)
  70. info_soup = BeautifulSoup(info_response.read())
  71. self._current_application.council_reference = info_soup.find("input", {"name": self.reference_input_name})['value']
  72. self._current_application.address = info_soup.find("textarea", {"name": "Location"}).string.strip()
  73. self._current_application.postcode = self._get_postcode(info_soup)
  74. self._current_application.description = info_soup.find("textarea", {"name": "Proposal"}).string.strip()
  75. if self.comment_url:
  76. self._current_application.comment_url = self.comment_url
  77. else:
  78. self._current_application.comment_url = info_soup.find("input", {"name": self.contact_email_name})['value']
  79. # There is an OSGB position here :-)
  80. self._current_application.osgb_x = info_soup.find("input", {"name": "Easting"})['value']
  81. self._current_application.osgb_y = info_soup.find("input", {"name": "Northing"})['value']
  82. self._results.addApplication(self._current_application)
  83. return self._results
  84. def getResults(self, day, month, year):
  85. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  86. class NorthYorkshireParser(ShropshireParser):
  87. reference_input_name = "txtAppNum"
  88. contact_email_name = "contactEmail"
  89. comment_url = None
  90. # The date we give as the date_received here is actually the validated date.
  91. use_validated_date = True
  92. def _get_postcode(self, info_soup):
  93. return getPostcodeFromText(self._current_application.address)
  94. def _get_info_link_list(self, soup):
  95. return [div.a for div in soup.findAll("div", {"class": "listApplNum"})]
  96. class SouthNorthamptonshireParser(ShropshireParser):
  97. reference_input_name = "txtAppNum"
  98. comment_url = "http://www.southnorthants.gov.uk/mandoforms/servlet/com.mandoforms.server.MandoformsServer?MF_XML=ApplicationComments&MF_DEVICE=HTML"
  99. def _get_postcode(self, info_soup):
  100. return getPostcodeFromText(self._current_application.address)
  101. def _get_info_link_list(self, soup):
  102. return soup.find("div", {"class": "div-content-class"}).findAll("a")
  103. if __name__ == '__main__':
  104. # parser = ShropshireParser("Shropshire County Council", "Shropshire", "http://planning.shropshire.gov.uk/PlanAppList.asp")
  105. # print parser.getResults(6,6,2008)
  106. # parser = NorthYorkshireParser("North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp")
  107. # print parser.getResults(10,6,2008)
  108. # parser = SouthNorthamptonshireParser("South Northamptonshire Council", "South Northamptonshire", "http://snc.planning-register.co.uk/PlanAppList.asp")
  109. # print parser.getResults(5,6,2008)
  110. parser = ShropshireParser("Purbeck District Council", "Purbeck", "http://www.purbeck.gov.uk/planning/PlanAppList.asp")
  111. print parser.getResults(5,6,2008)
  112. # TODO
  113. #1) Pagination: South Northants paginates at 25. I doubt this is a problem. Should also check out the others.