Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

123 行
5.1 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. import cookielib
  9. cookie_jar = cookielib.CookieJar()
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class BirminghamParser:
  15. comments_email_address = "planning.enquiries@birmingham.gov.uk"
  16. def __init__(self, *args):
  17. self.authority_name = "Birmingham City Council"
  18. self.authority_short_name = "Birmingham"
  19. self.get_url = "http://www.birmingham.gov.uk/GenerateContent?CONTENT_ITEM_ID=67548&CONTENT_ITEM_TYPE=0&MENU_ID=12189"
  20. # What a lovely intuitive URL it is.
  21. self.for_cookie_url = "http://www.birmingham.gov.uk/PSR/control/main"
  22. self.post_url = "http://www.birmingham.gov.uk/PSR/control/searchresults"
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. search_day = datetime.date(year, month, day)
  26. # We seem to need to get this page in order to get a cookie
  27. for_cookie_request = urllib2.Request(self.for_cookie_url)
  28. for_cookie_response = urllib2.urlopen(for_cookie_request)
  29. cookie_jar.extract_cookies(for_cookie_response, for_cookie_request)
  30. post_data = [
  31. ("JAVASCRIPT_ENABLED", "FALSE"),
  32. ("txt_PSR_CurrentSearchPage", "0"),
  33. ("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Application_Form"),
  34. ("txt_PSR_Application_ApplicationNumber", ""),
  35. ("txt_PSR_Application_Status", "awaitingDecision"),
  36. ("txt_PSR_Application_TypeOfApplication", ""),
  37. ("txt_PSR_Application_DecisionType", ""),
  38. ("txt_PSR_Application_District", ""),
  39. ("txt_PSR_Application_Ward", ""),
  40. ("txt_PSR_Application_Location", ""),
  41. ("txt_PSR_Application_Applicant", ""),
  42. ("txt_PSR_Application_Agent", ""),
  43. ("txt_PSR_Application_SearchDay", day),
  44. ("txt_PSR_Application_SearchMonth", month-1), # Months are counted from zero...
  45. ("txt_PSR_Application_SearchYear", year),
  46. ("txt_PSR_Application_SearchToDay", day),
  47. ("txt_PSR_Application_SearchToMonth", month-1), # Months are counted from zero...
  48. ("txt_PSR_Application_SearchToYear", year),
  49. ("txt_PSR_Application_SearchSortOrder", "LatestFirst"),
  50. ("txt_PSR_Application_ResultsSkipRows", "0"),
  51. ("txt_PSR_Application_ResultsPerPage", "1000"), # That should be enough to keep things on one page
  52. ("btn_PSR_Application_ApplicationSearch", "Search"),
  53. ("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Appeals_Form"),
  54. ("txt_PSR_Appeals_ApplicationNumber", ""),
  55. ("txt_PSR_Appeals_Status", "awaitingDecision"),
  56. ("txt_PSR_Appeals_TypeOfAppeal", ""),
  57. ("txt_PSR_Appeals_DecisionType", ""),
  58. ("txt_PSR_Appeals_District", ""),
  59. ("txt_PSR_Appeals_Ward", ""),
  60. ("txt_PSR_Appeals_Location", ""),
  61. ("txt_PSR_Appeals_Applicant", ""),
  62. ("txt_PSR_Appeals_Agent", ""),
  63. ("txt_PSR_Appeals_SearchDay", ""),
  64. ("txt_PSR_Appeals_SearchMonth", ""),
  65. ("txt_PSR_Appeals_SearchYear", ""),
  66. ("txt_PSR_Appeals_SearchToDay", ""),
  67. ("txt_PSR_Appeals_SearchToMonth", ""),
  68. ("txt_PSR_Appeals_SearchToYear", ""),
  69. ("txt_PSR_Appeals_SearchSortOrder", "LatestFirst"),
  70. ("txt_PSR_Appeals_ResultsSkipRows", "0"),
  71. ("txt_PSR_Appeals_ResultsPerPage", "10"),
  72. ]
  73. post_request = urllib2.Request(self.post_url, urllib.urlencode(post_data))
  74. cookie_jar.add_cookie_header(post_request)
  75. post_response = urllib2.urlopen(post_request)
  76. soup = BeautifulSoup(post_response.read())
  77. result_tables = soup.findAll("table", summary=re.compile("Summary of planning application"))
  78. for result_table in result_tables:
  79. application = PlanningApplication()
  80. application.info_url = urlparse.urljoin(self.post_url, result_table.find(text="Application number").findNext("a")['href'])
  81. application.council_reference = result_table.find(text="Application number").findNext("a").string
  82. application.date_received = search_day
  83. application.address = result_table.find(text="Location").findNext("td").p.string
  84. application.postcode = getPostcodeFromText(application.address)
  85. application.description = result_table.find(text="Proposal").findNext("td").p.string.replace(" ", " ").strip()
  86. # Comment link gives an Access Denied, so we'll have to use the email
  87. application.comment_url = self.comments_email_address
  88. self._results.addApplication(application)
  89. return self._results
  90. def getResults(self, day, month, year):
  91. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  92. if __name__ == '__main__':
  93. parser = BirminghamParser()
  94. print parser.getResults(1,8,2008)