Automatically exported from code.google.com/p/planningalerts
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.
 
 
 
 
 
 

123 рядки
5.1 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. import cookielib
  9. cookie_jar = cookielib.CookieJar()
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class BirminghamParser:
  15. comments_email_address = "planning.enquiries@birmingham.gov.uk"
  16. def __init__(self, *args):
  17. self.authority_name = "Birmingham City Council"
  18. self.authority_short_name = "Birmingham"
  19. self.get_url = "http://www.birmingham.gov.uk/GenerateContent?CONTENT_ITEM_ID=67548&CONTENT_ITEM_TYPE=0&MENU_ID=12189"
  20. # What a lovely intuitive URL it is.
  21. self.for_cookie_url = "http://www.birmingham.gov.uk/PSR/control/main"
  22. self.post_url = "http://www.birmingham.gov.uk/PSR/control/searchresults"
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. search_day = datetime.date(year, month, day)
  26. # We seem to need to get this page in order to get a cookie
  27. for_cookie_request = urllib2.Request(self.for_cookie_url)
  28. for_cookie_response = urllib2.urlopen(for_cookie_request)
  29. cookie_jar.extract_cookies(for_cookie_response, for_cookie_request)
  30. post_data = [
  31. ("JAVASCRIPT_ENABLED", "FALSE"),
  32. ("txt_PSR_CurrentSearchPage", "0"),
  33. ("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Application_Form"),
  34. ("txt_PSR_Application_ApplicationNumber", ""),
  35. ("txt_PSR_Application_Status", "awaitingDecision"),
  36. ("txt_PSR_Application_TypeOfApplication", ""),
  37. ("txt_PSR_Application_DecisionType", ""),
  38. ("txt_PSR_Application_District", ""),
  39. ("txt_PSR_Application_Ward", ""),
  40. ("txt_PSR_Application_Location", ""),
  41. ("txt_PSR_Application_Applicant", ""),
  42. ("txt_PSR_Application_Agent", ""),
  43. ("txt_PSR_Application_SearchDay", day),
  44. ("txt_PSR_Application_SearchMonth", month-1), # Months are counted from zero...
  45. ("txt_PSR_Application_SearchYear", year),
  46. ("txt_PSR_Application_SearchToDay", day),
  47. ("txt_PSR_Application_SearchToMonth", month-1), # Months are counted from zero...
  48. ("txt_PSR_Application_SearchToYear", year),
  49. ("txt_PSR_Application_SearchSortOrder", "LatestFirst"),
  50. ("txt_PSR_Application_ResultsSkipRows", "0"),
  51. ("txt_PSR_Application_ResultsPerPage", "1000"), # That should be enough to keep things on one page
  52. ("btn_PSR_Application_ApplicationSearch", "Search"),
  53. ("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Appeals_Form"),
  54. ("txt_PSR_Appeals_ApplicationNumber", ""),
  55. ("txt_PSR_Appeals_Status", "awaitingDecision"),
  56. ("txt_PSR_Appeals_TypeOfAppeal", ""),
  57. ("txt_PSR_Appeals_DecisionType", ""),
  58. ("txt_PSR_Appeals_District", ""),
  59. ("txt_PSR_Appeals_Ward", ""),
  60. ("txt_PSR_Appeals_Location", ""),
  61. ("txt_PSR_Appeals_Applicant", ""),
  62. ("txt_PSR_Appeals_Agent", ""),
  63. ("txt_PSR_Appeals_SearchDay", ""),
  64. ("txt_PSR_Appeals_SearchMonth", ""),
  65. ("txt_PSR_Appeals_SearchYear", ""),
  66. ("txt_PSR_Appeals_SearchToDay", ""),
  67. ("txt_PSR_Appeals_SearchToMonth", ""),
  68. ("txt_PSR_Appeals_SearchToYear", ""),
  69. ("txt_PSR_Appeals_SearchSortOrder", "LatestFirst"),
  70. ("txt_PSR_Appeals_ResultsSkipRows", "0"),
  71. ("txt_PSR_Appeals_ResultsPerPage", "10"),
  72. ]
  73. post_request = urllib2.Request(self.post_url, urllib.urlencode(post_data))
  74. cookie_jar.add_cookie_header(post_request)
  75. post_response = urllib2.urlopen(post_request)
  76. soup = BeautifulSoup(post_response.read())
  77. result_tables = soup.findAll("table", summary=re.compile("Summary of planning application"))
  78. for result_table in result_tables:
  79. application = PlanningApplication()
  80. application.info_url = urlparse.urljoin(self.post_url, result_table.find(text="Application number").findNext("a")['href'])
  81. application.council_reference = result_table.find(text="Application number").findNext("a").string
  82. application.date_received = search_day
  83. application.address = result_table.find(text="Location").findNext("td").p.string
  84. application.postcode = getPostcodeFromText(application.address)
  85. application.description = result_table.find(text="Proposal").findNext("td").p.string.replace(" ", " ").strip()
  86. # Comment link gives an Access Denied, so we'll have to use the email
  87. application.comment_url = self.comments_email_address
  88. self._results.addApplication(application)
  89. return self._results
  90. def getResults(self, day, month, year):
  91. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  92. if __name__ == '__main__':
  93. parser = BirminghamParser()
  94. print parser.getResults(1,8,2008)