Automatically exported from code.google.com/p/planningalerts
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。
 
 
 
 
 
 

114 行
4.2 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d/%m/%Y"
  11. class BroxtoweParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Broxtowe Borough Council"
  14. self.authority_short_name = "Broxtowe"
  15. self.base_url = "http://planning.broxtowe.gov.uk"
  16. self.info_url = "http://planning.broxtowe.gov.uk/ApplicationDetail.aspx?RefVal=%s"
  17. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  18. def getResultsByDayMonthYear(self, day, month, year):
  19. search_day = datetime.date(year, month, day)
  20. # Now get the search page
  21. get_response = urllib2.urlopen(self.base_url)
  22. get_soup = BeautifulSoup(get_response.read())
  23. # These are the inputs with a default value
  24. inputs_needed = [(x['id'], x['value']) for x in get_soup.form.findAll("input", value=True, type=lambda x: x != "submit")]
  25. # Add the submit button
  26. inputs_needed.append(('cmdWeeklyList', 'Search Database'))
  27. # We also need to add the date we want to search for.
  28. # This is the friday after the date searched for.
  29. # At weekends this will get you the friday before, but that isn't
  30. # a problem as there are no apps then.
  31. friday = search_day + datetime.timedelta(4 - search_day.weekday())
  32. inputs_needed.append(("ddlWeeklyList", friday.strftime(date_format)))
  33. # We'd like as many results as we can get away with on one page.
  34. # 50 is the largest option offerend
  35. inputs_needed.append(("ddlResultsPerPageWeeklyList", "50"))
  36. post_data = dict(inputs_needed)
  37. post_url = get_response.url
  38. # In case something goes wrong here, let's break out of the loop after at most 10 passes
  39. passes = 0
  40. while True:
  41. passes += 1
  42. post_response = urllib2.urlopen(post_url, urllib.urlencode(post_data))
  43. post_soup = BeautifulSoup(post_response.read())
  44. result_tables = post_soup.table.findAll("table")
  45. for result_table in result_tables:
  46. application = PlanningApplication()
  47. application.address = ', '.join(result_table.findPrevious("b").string.strip().split("\r"))
  48. application.postcode = getPostcodeFromText(application.address)
  49. trs = result_table.findAll("tr")
  50. application.council_reference = trs[0].findAll("td")[1].string.strip()
  51. application.date_received = datetime.datetime.strptime(trs[1].findAll("td")[1].string.strip(), date_format).date()
  52. application.description = trs[3].findAll("td")[1].string.strip()
  53. application.info_url = self.info_url %(urllib.quote(application.council_reference))
  54. # In order to avoid having to do a download for every app,
  55. # I'm setting the comment url to be the same as the info_url.
  56. # There is a comment page which can be got to by pressing the button
  57. application.comment_url = application.info_url
  58. self._results.addApplication(application)
  59. # Which page are we on?
  60. page_no = int(post_soup.find("span", id="lblPageNo").b.string)
  61. total_pages = int(post_soup.find("span", id="lblTotalPages").b.string)
  62. if passes > 10 or not page_no < total_pages:
  63. break
  64. post_data = [
  65. ("__EVENTTARGET", "hlbNext"),
  66. ("__EVENTARGUMENT", ""),
  67. ("__VIEWSTATE", post_soup.find("input", id="__VIEWSTATE")['value']),
  68. ("__EVENTVALIDATION", post_soup.find("input", id="__EVENTVALIDATION")['value']),
  69. ]
  70. post_url = urlparse.urljoin(post_response.url, post_soup.find("form")['action'])
  71. return self._results
  72. def getResults(self, day, month, year):
  73. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  74. if __name__ == '__main__':
  75. parser = BroxtoweParser()
  76. print parser.getResults(3,10,2008)