Automatically exported from code.google.com/p/planningalerts
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 
 
 
 

114 Zeilen
4.2 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d/%m/%Y"
  11. class BroxtoweParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Broxtowe Borough Council"
  14. self.authority_short_name = "Broxtowe"
  15. self.base_url = "http://planning.broxtowe.gov.uk"
  16. self.info_url = "http://planning.broxtowe.gov.uk/ApplicationDetail.aspx?RefVal=%s"
  17. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  18. def getResultsByDayMonthYear(self, day, month, year):
  19. search_day = datetime.date(year, month, day)
  20. # Now get the search page
  21. get_response = urllib2.urlopen(self.base_url)
  22. get_soup = BeautifulSoup(get_response.read())
  23. # These are the inputs with a default value
  24. inputs_needed = [(x['id'], x['value']) for x in get_soup.form.findAll("input", value=True, type=lambda x: x != "submit")]
  25. # Add the submit button
  26. inputs_needed.append(('cmdWeeklyList', 'Search Database'))
  27. # We also need to add the date we want to search for.
  28. # This is the friday after the date searched for.
  29. # At weekends this will get you the friday before, but that isn't
  30. # a problem as there are no apps then.
  31. friday = search_day + datetime.timedelta(4 - search_day.weekday())
  32. inputs_needed.append(("ddlWeeklyList", friday.strftime(date_format)))
  33. # We'd like as many results as we can get away with on one page.
  34. # 50 is the largest option offerend
  35. inputs_needed.append(("ddlResultsPerPageWeeklyList", "50"))
  36. post_data = dict(inputs_needed)
  37. post_url = get_response.url
  38. # In case something goes wrong here, let's break out of the loop after at most 10 passes
  39. passes = 0
  40. while True:
  41. passes += 1
  42. post_response = urllib2.urlopen(post_url, urllib.urlencode(post_data))
  43. post_soup = BeautifulSoup(post_response.read())
  44. result_tables = post_soup.table.findAll("table")
  45. for result_table in result_tables:
  46. application = PlanningApplication()
  47. application.address = ', '.join(result_table.findPrevious("b").string.strip().split("\r"))
  48. application.postcode = getPostcodeFromText(application.address)
  49. trs = result_table.findAll("tr")
  50. application.council_reference = trs[0].findAll("td")[1].string.strip()
  51. application.date_received = datetime.datetime.strptime(trs[1].findAll("td")[1].string.strip(), date_format).date()
  52. application.description = trs[3].findAll("td")[1].string.strip()
  53. application.info_url = self.info_url %(urllib.quote(application.council_reference))
  54. # In order to avoid having to do a download for every app,
  55. # I'm setting the comment url to be the same as the info_url.
  56. # There is a comment page which can be got to by pressing the button
  57. application.comment_url = application.info_url
  58. self._results.addApplication(application)
  59. # Which page are we on?
  60. page_no = int(post_soup.find("span", id="lblPageNo").b.string)
  61. total_pages = int(post_soup.find("span", id="lblTotalPages").b.string)
  62. if passes > 10 or not page_no < total_pages:
  63. break
  64. post_data = [
  65. ("__EVENTTARGET", "hlbNext"),
  66. ("__EVENTARGUMENT", ""),
  67. ("__VIEWSTATE", post_soup.find("input", id="__VIEWSTATE")['value']),
  68. ("__EVENTVALIDATION", post_soup.find("input", id="__EVENTVALIDATION")['value']),
  69. ]
  70. post_url = urlparse.urljoin(post_response.url, post_soup.find("form")['action'])
  71. return self._results
  72. def getResults(self, day, month, year):
  73. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  74. if __name__ == '__main__':
  75. parser = BroxtoweParser()
  76. print parser.getResults(3,10,2008)