Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

115 rivejä
3.7 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. date_format = "%d/%m/%Y"
  12. class MaldonParser:
  13. comment_email_address = "dc.planning@maldon.gov.uk"
  14. def __init__(self, authority_name, authority_short_name, base_url, debug=False):
  15. self.debug = debug
  16. self.authority_name = authority_name
  17. self.authority_short_name = authority_short_name
  18. self.base_url = base_url
  19. self.info_url = urlparse.urljoin(base_url, "searchPlan.jsp")
  20. self._split_base_url = urlparse.urlsplit(self.base_url)
  21. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  22. def getResultsByDayMonthYear(self, day, month, year):
  23. search_date = datetime.date(year, month, day)
  24. search_date_string = search_date.strftime(date_format)
  25. search_data = urllib.urlencode(
  26. [("RegisteredDateFrom", search_date_string),
  27. ("RegisteredDateTo", search_date_string),
  28. ]
  29. )
  30. split_search_url = self._split_base_url[:3] + (search_data, '')
  31. search_url = urlparse.urlunsplit(split_search_url)
  32. response = urllib2.urlopen(search_url)
  33. soup = BeautifulSoup(response.read())
  34. # First check if we have the no apps found page
  35. if soup.find(text="No Applications Found"):
  36. return self._results
  37. # Not a very good way of finding the table, but it works for the moment.
  38. results_table = soup.find("table", cellpadding="5px")
  39. trs = results_table.findAll("tr")[1:]
  40. tr_counter = 0
  41. while tr_counter < len(trs):
  42. tr = trs[tr_counter]
  43. if tr_counter % 2 == 0:
  44. application = PlanningApplication()
  45. application.date_received = search_date
  46. application.comment_url = self.comment_email_address
  47. tds = tr.findAll("td")
  48. application.council_reference = tds[0].b.string.strip()
  49. application.address = ' '.join(tds[2].string.split())
  50. application.postcode = getPostcodeFromText(application.address)
  51. # This is what it ought to be, but you can't get there without a sodding cookie.
  52. # I guess we'll have to send people to the front page
  53. # application.info_url = urlparse.urljoin(self.base_url, tr.find("a", title="Click here to view application details")['href'])
  54. application.info_url = self.info_url
  55. else:
  56. description = tr.td.string
  57. if tr.td.string is not None:
  58. application.description = tr.td.string.strip()
  59. else:
  60. application.description = "Description Missing"
  61. self._results.addApplication(application)
  62. tr_counter += 1
  63. return self._results
  64. def getResults(self, day, month, year):
  65. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  66. class PendleParser(MaldonParser):
  67. comment_email_address = "planning@pendle.gov.uk"
  68. if __name__ == '__main__':
  69. #parser = MaldonParser("Maldon District Council", "Maldon", "http://forms.maldon.gov.uk:8080/PlanApp/jsp/searchPlanApp-action.do")
  70. parser = PendleParser("Pendle Borough Council", "Pendle", "http://bopdoccip.pendle.gov.uk/PlanApp/jsp/searchPlanApp-action.do")
  71. print parser.getResults(21,5,2008)
  72. # TODO
  73. # 1) Email the council about non-linkable info page.
  74. # 2) Email the council about missing descriptions?