Automatically exported from code.google.com/p/planningalerts
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 
 

115 行
3.7 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. date_format = "%d/%m/%Y"
  12. class MaldonParser:
  13. comment_email_address = "dc.planning@maldon.gov.uk"
  14. def __init__(self, authority_name, authority_short_name, base_url, debug=False):
  15. self.debug = debug
  16. self.authority_name = authority_name
  17. self.authority_short_name = authority_short_name
  18. self.base_url = base_url
  19. self.info_url = urlparse.urljoin(base_url, "searchPlan.jsp")
  20. self._split_base_url = urlparse.urlsplit(self.base_url)
  21. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  22. def getResultsByDayMonthYear(self, day, month, year):
  23. search_date = datetime.date(year, month, day)
  24. search_date_string = search_date.strftime(date_format)
  25. search_data = urllib.urlencode(
  26. [("RegisteredDateFrom", search_date_string),
  27. ("RegisteredDateTo", search_date_string),
  28. ]
  29. )
  30. split_search_url = self._split_base_url[:3] + (search_data, '')
  31. search_url = urlparse.urlunsplit(split_search_url)
  32. response = urllib2.urlopen(search_url)
  33. soup = BeautifulSoup(response.read())
  34. # First check if we have the no apps found page
  35. if soup.find(text="No Applications Found"):
  36. return self._results
  37. # Not a very good way of finding the table, but it works for the moment.
  38. results_table = soup.find("table", cellpadding="5px")
  39. trs = results_table.findAll("tr")[1:]
  40. tr_counter = 0
  41. while tr_counter < len(trs):
  42. tr = trs[tr_counter]
  43. if tr_counter % 2 == 0:
  44. application = PlanningApplication()
  45. application.date_received = search_date
  46. application.comment_url = self.comment_email_address
  47. tds = tr.findAll("td")
  48. application.council_reference = tds[0].b.string.strip()
  49. application.address = ' '.join(tds[2].string.split())
  50. application.postcode = getPostcodeFromText(application.address)
  51. # This is what it ought to be, but you can't get there without a sodding cookie.
  52. # I guess we'll have to send people to the front page
  53. # application.info_url = urlparse.urljoin(self.base_url, tr.find("a", title="Click here to view application details")['href'])
  54. application.info_url = self.info_url
  55. else:
  56. description = tr.td.string
  57. if tr.td.string is not None:
  58. application.description = tr.td.string.strip()
  59. else:
  60. application.description = "Description Missing"
  61. self._results.addApplication(application)
  62. tr_counter += 1
  63. return self._results
  64. def getResults(self, day, month, year):
  65. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  66. class PendleParser(MaldonParser):
  67. comment_email_address = "planning@pendle.gov.uk"
  68. if __name__ == '__main__':
  69. #parser = MaldonParser("Maldon District Council", "Maldon", "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do")
  70. parser = PendleParser("Pendle Borough Council", "Pendle", "http://bopdoccip.pendle.gov.uk/PlanApp/jsp/searchPlanApp-action.do")
  71. print parser.getResults(21,5,2008)
  72. # TODO
  73. # 1) Email the council about non-linkable info page.
  74. # 2) Email the council about missing descriptions?