Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

114 line
4.2 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d/%m/%Y"
  11. class BroxtoweParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Broxtowe Borough Council"
  14. self.authority_short_name = "Broxtowe"
  15. self.base_url = "http://planning.broxtowe.gov.uk"
  16. self.info_url = "http://planning.broxtowe.gov.uk/ApplicationDetail.aspx?RefVal=%s"
  17. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  18. def getResultsByDayMonthYear(self, day, month, year):
  19. search_day = datetime.date(year, month, day)
  20. # Now get the search page
  21. get_response = urllib2.urlopen(self.base_url)
  22. get_soup = BeautifulSoup(get_response.read())
  23. # These are the inputs with a default value
  24. inputs_needed = [(x['id'], x['value']) for x in get_soup.form.findAll("input", value=True, type=lambda x: x != "submit")]
  25. # Add the submit button
  26. inputs_needed.append(('cmdWeeklyList', 'Search Database'))
  27. # We also need to add the date we want to search for.
  28. # This is the friday after the date searched for.
  29. # At weekends this will get you the friday before, but that isn't
  30. # a problem as there are no apps then.
  31. friday = search_day + datetime.timedelta(4 - search_day.weekday())
  32. inputs_needed.append(("ddlWeeklyList", friday.strftime(date_format)))
  33. # We'd like as many results as we can get away with on one page.
  34. # 50 is the largest option offerend
  35. inputs_needed.append(("ddlResultsPerPageWeeklyList", "50"))
  36. post_data = dict(inputs_needed)
  37. post_url = get_response.url
  38. # In case something goes wrong here, let's break out of the loop after at most 10 passes
  39. passes = 0
  40. while True:
  41. passes += 1
  42. post_response = urllib2.urlopen(post_url, urllib.urlencode(post_data))
  43. post_soup = BeautifulSoup(post_response.read())
  44. result_tables = post_soup.table.findAll("table")
  45. for result_table in result_tables:
  46. application = PlanningApplication()
  47. application.address = ', '.join(result_table.findPrevious("b").string.strip().split("\r"))
  48. application.postcode = getPostcodeFromText(application.address)
  49. trs = result_table.findAll("tr")
  50. application.council_reference = trs[0].findAll("td")[1].string.strip()
  51. application.date_received = datetime.datetime.strptime(trs[1].findAll("td")[1].string.strip(), date_format).date()
  52. application.description = trs[3].findAll("td")[1].string.strip()
  53. application.info_url = self.info_url %(urllib.quote(application.council_reference))
  54. # In order to avoid having to do a download for every app,
  55. # I'm setting the comment url to be the same as the info_url.
  56. # There is a comment page which can be got to by pressing the button
  57. application.comment_url = application.info_url
  58. self._results.addApplication(application)
  59. # Which page are we on?
  60. page_no = int(post_soup.find("span", id="lblPageNo").b.string)
  61. total_pages = int(post_soup.find("span", id="lblTotalPages").b.string)
  62. if passes > 10 or not page_no < total_pages:
  63. break
  64. post_data = [
  65. ("__EVENTTARGET", "hlbNext"),
  66. ("__EVENTARGUMENT", ""),
  67. ("__VIEWSTATE", post_soup.find("input", id="__VIEWSTATE")['value']),
  68. ("__EVENTVALIDATION", post_soup.find("input", id="__EVENTVALIDATION")['value']),
  69. ]
  70. post_url = urlparse.urljoin(post_response.url, post_soup.find("form")['action'])
  71. return self._results
  72. def getResults(self, day, month, year):
  73. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  74. if __name__ == '__main__':
  75. parser = BroxtoweParser()
  76. print parser.getResults(3,10,2008)