Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

195 lines
11 KiB

  1. """
  2. This is the screenscraper for the planning applications in Amber Valley.
  3. We have to get the initial search page so that we can use the __VIEWSTATE
  4. parameter.
  5. The start and end dates have to be separated by 1 day - I presume they are
  6. interpreting dates as a datetime at midnight...
  7. BeautifulSoup doesn't seem to be able to cope with what comes back from the
  8. post, so we'll use HTMLParser.
  9. The info reference link uses javascript (typical). As far as I can see there is no way to link directly to the info page for an application, so we'll just have to link to the search page.
  10. Bizarrely, the comment url is fine. e.g.
  11. http://www.ambervalley.gov.uk/services/environment/landandpremises/planningtownandcountry/planningapplications/planappcommentform.htm?frm_AppNum=AVA-2008-0955&frm_SiteAddress=147+Derby+Road%0dDuffield%0dBelper%0dDerbyshire%0dDE56+4FQ%0d&frm_Proposal=Rear+single+storey+extension+and+loft+conversion
  12. """
  13. import urllib2
  14. import urllib
  15. import urlparse
  16. import HTMLParser
  17. import datetime
  18. from BeautifulSoup import BeautifulSoup
  19. from PlanningUtils import PlanningApplication, \
  20. PlanningAuthorityResults, \
  21. getPostcodeFromText
  22. #date_format = "%d/%m/%Y"
  23. class AmberValleyParser(HTMLParser.HTMLParser):
  24. def __init__(self, *args):
  25. HTMLParser.HTMLParser.__init__(self)
  26. self._in_result_table = False
  27. self._td_count = None
  28. self._get_ref = False
  29. self._get_description = False
  30. self.authority_name = "Amber Valley Borough Council"
  31. self.authority_short_name = "Amber Valley"
  32. self.base_url = "http://www.ambervalley.gov.uk/AVBC/Core/TemplateHandler.aspx?NRMODE=Published&NRNODEGUID=%7bAF862CF0-5C6D-4115-9979-5956B24D12DF%7d&NRORIGINALURL=%2fservices%2fenvironment%2flandandpremises%2fplanningtownandcountry%2fplanningapplications%2fPlanningApplicationRegister%2ehtm&NRCACHEHINT=Guest#filterbottom"
  33. self.comment_url_template = "http://www.ambervalley.gov.uk/services/environment/landandpremises/planningtownandcountry/planningapplications/planappcommentform.htm?frm_AppNum=%(reference)s&frm_SiteAddress=%(address)s&frm_Proposal=%(description)s"
  34. self._current_application = None
  35. self._search_date = None
  36. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  37. def handle_starttag(self, tag, attrs):
  38. if tag == "table":
  39. for key, value in attrs:
  40. if key == "class" and value == "test":
  41. self._current_application = PlanningApplication()
  42. # We can set the date_received immediately
  43. self._current_application.date_received = self._search_date
  44. self._in_result_table = True
  45. self._td_count = 0
  46. break
  47. elif tag == "td":
  48. if self._in_result_table:
  49. self._td_count += 1
  50. self._get_description = False
  51. elif tag == "a" and self._td_count == 1:
  52. self._get_ref = True
  53. def handle_endtag(self, tag):
  54. if tag == "table" and self._in_result_table:
  55. self._current_application.description = self._current_application.description.strip()
  56. self._current_application.address = ' '.join(self._current_application.address.strip().split())
  57. self._current_application.postcode = getPostcodeFromText(self._current_application.address)
  58. self._current_application.info_url = self.base_url # Can't link to the info page, due to javascript idiocy.
  59. self._current_application.comment_url = self.comment_url_template %{"reference": urllib.quote_plus(self._current_application.council_reference),
  60. "address": urllib.quote_plus(self._current_application.address),
  61. "description": urllib.quote_plus(self._current_application.description),
  62. }
  63. self._results.addApplication(self._current_application)
  64. self._in_result_table = False
  65. self._td_count = None
  66. if tag == "a":
  67. self._get_ref = False
  68. def handle_startendtag(self, tag, attrs):
  69. if tag == "br" and self._td_count == 2:
  70. self._get_description = True
  71. def handle_data(self, data):
  72. if self._get_ref == True:
  73. self._current_application.council_reference = data
  74. elif self._td_count == 2:
  75. # This td contains the address (including postcode)
  76. # and the description
  77. if self._get_description:
  78. # We have passed the <br />, and are looking for the description
  79. if not self._current_application.description:
  80. self._current_application.description = data
  81. else:
  82. self._current_application.description += data
  83. else:
  84. # We have not yet passed the <br /> and are looking for the address and postcode.
  85. if not self._current_application.address:
  86. self._current_application.address = data
  87. else:
  88. self._current_application.address += data
  89. def getResultsByDayMonthYear(self, day, month, year):
  90. self._search_date = search_start_date = datetime.date(year, month, day)
  91. search_end_date = search_start_date + datetime.timedelta(1)
  92. # Now get the search page
  93. get_response = urllib2.urlopen(self.base_url)
  94. soup = BeautifulSoup(get_response.read())
  95. form = soup.find("form", id="__aspnetForm")
  96. # We're going to need __VIEWSTATE for our post
  97. viewstate = form.find("input", {"name":"__VIEWSTATE"})['value']
  98. action = form['action']
  99. # Now we have what we need to do a POST
  100. post_url = urlparse.urljoin(self.base_url, action)
  101. # Example post data without the __VIEWSTATE
  102. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AtxbAppNumber=
  103. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AtxbAddressKeyword=
  104. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstDayStart=30
  105. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstMonthStart=Jul
  106. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstYearStart=2008
  107. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstDayEnd=8
  108. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstMonthEnd=Aug
  109. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstYearEnd=2008
  110. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3ArblDateType=0
  111. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstDistance=
  112. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AtxbPostcode=
  113. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstWards=
  114. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstParishes=
  115. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AlstOrderBy=RegisterDate+DESC
  116. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3ArblViewType=List
  117. # MainControl%3ACustomFunctionality_ZoneMain%3AEmbeddedUserControlPlaceholderControl1%3A_ctl0%3AmyFilter%3AbtnQueryPlanApps=Lookup
  118. post_data = urllib.urlencode([
  119. ("__VIEWSTATE", viewstate),
  120. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:txbAppNumber", ""),
  121. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:txbAddressKeyword", ""),
  122. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstDayStart", search_start_date.day), # Using the attribute directly to avoid the leading 0
  123. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstMonthStart", search_start_date.strftime("%b")),
  124. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstYearStart", search_start_date.strftime("%Y")),
  125. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstDayEnd", search_end_date.day), # Using the attribute directly to avoid the leading 0
  126. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstMonthEnd", search_end_date.strftime("%b")),
  127. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstYearEnd", search_end_date.strftime("%Y")),
  128. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:rblDateType", "0"),
  129. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstDistance", ""),
  130. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:txbPostcode", ""),
  131. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstWards", ""),
  132. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstParishes", ""),
  133. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:lstOrderBy", "RegisterDate DESC"),
  134. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:rblViewType", "List"),
  135. ("MainControl:CustomFunctionality_ZoneMain:EmbeddedUserControlPlaceholderControl1:_ctl0:myFilter:btnQueryPlanApps", "Lookup"),
  136. ])
  137. post_response = urllib2.urlopen(post_url, post_data)
  138. self.feed(post_response.read())
  139. return self._results
  140. def getResults(self, day, month, year):
  141. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  142. if __name__ == '__main__':
  143. parser = AmberValleyParser()
  144. print parser.getResults(4,8,2008)