Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

123 řádky
5.1 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. from BeautifulSoup import BeautifulSoup
  8. import cookielib
  9. cookie_jar = cookielib.CookieJar()
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class BirminghamParser:
  15. comments_email_address = "planning.enquiries@birmingham.gov.uk"
  16. def __init__(self, *args):
  17. self.authority_name = "Birmingham City Council"
  18. self.authority_short_name = "Birmingham"
  19. self.get_url = "http://www.birmingham.gov.uk/GenerateContent?CONTENT_ITEM_ID=67548&CONTENT_ITEM_TYPE=0&MENU_ID=12189"
  20. # What a lovely intuitive URL it is.
  21. self.for_cookie_url = "http://www.birmingham.gov.uk/PSR/control/main"
  22. self.post_url = "http://www.birmingham.gov.uk/PSR/control/searchresults"
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. search_day = datetime.date(year, month, day)
  26. # We seem to need to get this page in order to get a cookie
  27. for_cookie_request = urllib2.Request(self.for_cookie_url)
  28. for_cookie_response = urllib2.urlopen(for_cookie_request)
  29. cookie_jar.extract_cookies(for_cookie_response, for_cookie_request)
  30. post_data = [
  31. ("JAVASCRIPT_ENABLED", "FALSE"),
  32. ("txt_PSR_CurrentSearchPage", "0"),
  33. ("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Application_Form"),
  34. ("txt_PSR_Application_ApplicationNumber", ""),
  35. ("txt_PSR_Application_Status", "awaitingDecision"),
  36. ("txt_PSR_Application_TypeOfApplication", ""),
  37. ("txt_PSR_Application_DecisionType", ""),
  38. ("txt_PSR_Application_District", ""),
  39. ("txt_PSR_Application_Ward", ""),
  40. ("txt_PSR_Application_Location", ""),
  41. ("txt_PSR_Application_Applicant", ""),
  42. ("txt_PSR_Application_Agent", ""),
  43. ("txt_PSR_Application_SearchDay", day),
  44. ("txt_PSR_Application_SearchMonth", month-1), # Months are counted from zero...
  45. ("txt_PSR_Application_SearchYear", year),
  46. ("txt_PSR_Application_SearchToDay", day),
  47. ("txt_PSR_Application_SearchToMonth", month-1), # Months are counted from zero...
  48. ("txt_PSR_Application_SearchToYear", year),
  49. ("txt_PSR_Application_SearchSortOrder", "LatestFirst"),
  50. ("txt_PSR_Application_ResultsSkipRows", "0"),
  51. ("txt_PSR_Application_ResultsPerPage", "1000"), # That should be enough to keep things on one page
  52. ("btn_PSR_Application_ApplicationSearch", "Search"),
  53. ("PSR_CURRENT_FORM", "psr_Application_PSRSearch_Appeals_Form"),
  54. ("txt_PSR_Appeals_ApplicationNumber", ""),
  55. ("txt_PSR_Appeals_Status", "awaitingDecision"),
  56. ("txt_PSR_Appeals_TypeOfAppeal", ""),
  57. ("txt_PSR_Appeals_DecisionType", ""),
  58. ("txt_PSR_Appeals_District", ""),
  59. ("txt_PSR_Appeals_Ward", ""),
  60. ("txt_PSR_Appeals_Location", ""),
  61. ("txt_PSR_Appeals_Applicant", ""),
  62. ("txt_PSR_Appeals_Agent", ""),
  63. ("txt_PSR_Appeals_SearchDay", ""),
  64. ("txt_PSR_Appeals_SearchMonth", ""),
  65. ("txt_PSR_Appeals_SearchYear", ""),
  66. ("txt_PSR_Appeals_SearchToDay", ""),
  67. ("txt_PSR_Appeals_SearchToMonth", ""),
  68. ("txt_PSR_Appeals_SearchToYear", ""),
  69. ("txt_PSR_Appeals_SearchSortOrder", "LatestFirst"),
  70. ("txt_PSR_Appeals_ResultsSkipRows", "0"),
  71. ("txt_PSR_Appeals_ResultsPerPage", "10"),
  72. ]
  73. post_request = urllib2.Request(self.post_url, urllib.urlencode(post_data))
  74. cookie_jar.add_cookie_header(post_request)
  75. post_response = urllib2.urlopen(post_request)
  76. soup = BeautifulSoup(post_response.read())
  77. result_tables = soup.findAll("table", summary=re.compile("Summary of planning application"))
  78. for result_table in result_tables:
  79. application = PlanningApplication()
  80. application.info_url = urlparse.urljoin(self.post_url, result_table.find(text="Application number").findNext("a")['href'])
  81. application.council_reference = result_table.find(text="Application number").findNext("a").string
  82. application.date_received = search_day
  83. application.address = result_table.find(text="Location").findNext("td").p.string
  84. application.postcode = getPostcodeFromText(application.address)
  85. application.description = result_table.find(text="Proposal").findNext("td").p.string.replace(" ", " ").strip()
  86. # Comment link gives an Access Denied, so we'll have to use the email
  87. application.comment_url = self.comments_email_address
  88. self._results.addApplication(application)
  89. return self._results
  90. def getResults(self, day, month, year):
  91. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  92. if __name__ == '__main__':
  93. parser = BirminghamParser()
  94. print parser.getResults(1,8,2008)