Automatically exported from code.google.com/p/planningalerts
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 
 
 
 

123 Zeilen
5.1 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. import cookielib
  9. cookie_jar = cookielib.CookieJar()
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class BirminghamParser:
  15. comments_email_address = "planning.enquiries@birmingham.gov.uk"
  16. def __init__(self, *args):
  17. self.authority_name = "Birmingham City Council"
  18. self.authority_short_name = "Birmingham"
  19. self.get_url = "http://www.birmingham.gov.uk/GenerateContent?CONTENT_ITEM_ID=67548&CONTENT_ITEM_TYPE=0&MENU_ID=12189"
  20. # What a lovely intuitive URL it is.
  21. self.for_cookie_url = "http://www.birmingham.gov.uk/PSR/control/main"
  22. self.post_url = "http://www.birmingham.gov.uk/PSR/control/searchresults"
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. search_day = datetime.date(year, month, day)
  26. # We seem to need to get this page in order to get a cookie
  27. for_cookie_request = urllib2.Request(self.for_cookie_url)
  28. for_cookie_response = urllib2.urlopen(for_cookie_request)
  29. cookie_jar.extract_cookies(for_cookie_response, for_cookie_request)
  30. post_data = [
  31. ("JAVASCRIPT_ENABLED", "FALSE"),
  32. ("txt_PSR_CurrentSearchPage", "0"),
  33. ("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Application_Form"),
  34. ("txt_PSR_Application_ApplicationNumber", ""),
  35. ("txt_PSR_Application_Status", "awaitingDecision"),
  36. ("txt_PSR_Application_TypeOfApplication", ""),
  37. ("txt_PSR_Application_DecisionType", ""),
  38. ("txt_PSR_Application_District", ""),
  39. ("txt_PSR_Application_Ward", ""),
  40. ("txt_PSR_Application_Location", ""),
  41. ("txt_PSR_Application_Applicant", ""),
  42. ("txt_PSR_Application_Agent", ""),
  43. ("txt_PSR_Application_SearchDay", day),
  44. ("txt_PSR_Application_SearchMonth", month-1), # Months are counted from zero...
  45. ("txt_PSR_Application_SearchYear", year),
  46. ("txt_PSR_Application_SearchToDay", day),
  47. ("txt_PSR_Application_SearchToMonth", month-1), # Months are counted from zero...
  48. ("txt_PSR_Application_SearchToYear", year),
  49. ("txt_PSR_Application_SearchSortOrder", "LatestFirst"),
  50. ("txt_PSR_Application_ResultsSkipRows", "0"),
  51. ("txt_PSR_Application_ResultsPerPage", "1000"), # That should be enough to keep things on one page
  52. ("btn_PSR_Application_ApplicationSearch", "Search"),
  53. ("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Appeals_Form"),
  54. ("txt_PSR_Appeals_ApplicationNumber", ""),
  55. ("txt_PSR_Appeals_Status", "awaitingDecision"),
  56. ("txt_PSR_Appeals_TypeOfAppeal", ""),
  57. ("txt_PSR_Appeals_DecisionType", ""),
  58. ("txt_PSR_Appeals_District", ""),
  59. ("txt_PSR_Appeals_Ward", ""),
  60. ("txt_PSR_Appeals_Location", ""),
  61. ("txt_PSR_Appeals_Applicant", ""),
  62. ("txt_PSR_Appeals_Agent", ""),
  63. ("txt_PSR_Appeals_SearchDay", ""),
  64. ("txt_PSR_Appeals_SearchMonth", ""),
  65. ("txt_PSR_Appeals_SearchYear", ""),
  66. ("txt_PSR_Appeals_SearchToDay", ""),
  67. ("txt_PSR_Appeals_SearchToMonth", ""),
  68. ("txt_PSR_Appeals_SearchToYear", ""),
  69. ("txt_PSR_Appeals_SearchSortOrder", "LatestFirst"),
  70. ("txt_PSR_Appeals_ResultsSkipRows", "0"),
  71. ("txt_PSR_Appeals_ResultsPerPage", "10"),
  72. ]
  73. post_request = urllib2.Request(self.post_url, urllib.urlencode(post_data))
  74. cookie_jar.add_cookie_header(post_request)
  75. post_response = urllib2.urlopen(post_request)
  76. soup = BeautifulSoup(post_response.read())
  77. result_tables = soup.findAll("table", summary=re.compile("Summary of planning application"))
  78. for result_table in result_tables:
  79. application = PlanningApplication()
  80. application.info_url = urlparse.urljoin(self.post_url, result_table.find(text="Application number").findNext("a")['href'])
  81. application.council_reference = result_table.find(text="Application number").findNext("a").string
  82. application.date_received = search_day
  83. application.address = result_table.find(text="Location").findNext("td").p.string
  84. application.postcode = getPostcodeFromText(application.address)
  85. application.description = result_table.find(text="Proposal").findNext("td").p.string.replace(" ", " ").strip()
  86. # Comment link gives an Access Denied, so we'll have to use the email
  87. application.comment_url = self.comments_email_address
  88. self._results.addApplication(application)
  89. return self._results
  90. def getResults(self, day, month, year):
  91. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  92. if __name__ == '__main__':
  93. parser = BirminghamParser()
  94. print parser.getResults(1,8,2008)