Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

126 строки
4.6 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. date_format = "%d/%m/%Y"
  12. class BrentParser:
  13. def __init__(self, *args):
  14. self.authority_name = "London Borough of Brent"
  15. self.authority_short_name = "Brent"
  16. # self.base_url = "http://www.brent.gov.uk/servlet/ep.ext?extId=101149&byPeriod=Y&st=PL&periodUnits=day&periodMultiples=14"
  17. self.base_url = "http://www.brent.gov.uk/servlet/ep.ext"
  18. self._current_application = None
  19. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  20. def getResultsByDayMonthYear(self, day, month, year):
  21. search_day = datetime.date(year, month, day)
  22. post_data = [
  23. ("from", search_day.strftime(date_format)),
  24. ("until", search_day.strftime(date_format)),
  25. ("EXECUTEQUERY", "Query"),
  26. # ("auth", "402"),
  27. ("st", "PL"),
  28. ("periodUnits", "day"),
  29. ("periodMultiples", "14"),
  30. ("title", "Search+by+Application+Date"),
  31. ("instructions", "Enter+a+date+range+to+search+for+existing+applications+by+the+date+of+application.%0D%0A%3Cbr%3E%3Cbr%3E%0D%0A%3Cstrong%3ENote%3A%3C%2Fstrong%3E+Where+%27%28Applicant%27s+Description%29%27+appears+in+the+proposal%2C+the+text+may+subsequently+be+amended+when+the+application+is+checked."),
  32. ("byFormat", "N"),
  33. ("byOther1", "N"),
  34. ("byOther2", "N"),
  35. ("byOther3", "N"),
  36. ("byOther4", "N"),
  37. ("byOther5", "N"),
  38. ("byPostcode", "N"),
  39. ("byStreet", "N"),
  40. ("byHouseNumber", "N"),
  41. ("byAddress", "N"),
  42. ("byPeriod", "Y"),
  43. ("extId", "101149"), # I wonder what this is...
  44. ("queried", "Y"),
  45. ("other1Label", "Other1"),
  46. ("other2Label", "Other2"),
  47. ("other3Label", "Other3"),
  48. ("other4Label", "Other4"),
  49. ("other5Label", "Other5"),
  50. ("other1List", ""),
  51. ("other2List", ""),
  52. ("other3List", ""),
  53. ("other4List", ""),
  54. ("other5List", ""),
  55. ("periodLabel", "From"),
  56. ("addressLabel", "Select+Address"),
  57. ("print", "")
  58. ]
  59. # Now get the search page
  60. response = urllib2.urlopen(self.base_url, urllib.urlencode(post_data))
  61. soup = BeautifulSoup(response.read())
  62. trs = soup.find(text="Search Results").findNext("table").findAll("tr")[:-1]
  63. # There are six trs per application, ish
  64. # The first contains the case no and the application date.
  65. # The second contains the address
  66. # The third contains the description
  67. # The fourth contains the info page link
  68. # The fifth contains the comment link (or a note that comments are currently not being accepted
  69. # The sixth is a spacer.
  70. count = 0
  71. for tr in trs:
  72. count +=1
  73. ref = tr.find(text=re.compile("Case No:"))
  74. if ref:
  75. self._current_application = PlanningApplication()
  76. count = 1
  77. self._current_application.council_reference = ref.split(":")[1].strip()
  78. self._current_application.date_received = search_day
  79. if count % 6 == 2:
  80. self._current_application.address = tr.td.string.strip()
  81. self._current_application.postcode = getPostcodeFromText(self._current_application.address)
  82. if count % 6 == 3:
  83. self._current_application.description = tr.td.string.strip()
  84. if count % 6 == 4:
  85. self._current_application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
  86. if count % 6 == 5:
  87. try:
  88. self._current_application.comment_url = urlparse.urljoin(self.base_url, tr.a['href'])
  89. except:
  90. # Comments are not currently being accepted. We'll leave this app for the moment - we'll pick it up later if they start accepting comments
  91. continue
  92. if count % 6 == 0 and self._current_application.is_ready():
  93. self._results.addApplication(self._current_application)
  94. return self._results
  95. def getResults(self, day, month, year):
  96. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  97. if __name__ == '__main__':
  98. parser = BrentParser()
  99. print parser.getResults(6,8,2008)