Automatically exported from code.google.com/p/planningalerts
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 
 
 

180 líneas
8.0 KiB

  1. #!/usr/local/bin/python
  2. import urllib, urllib2
  3. import urlparse
  4. import datetime
  5. import re
  6. import BeautifulSoup
  7. import cookielib
  8. cookie_jar = cookielib.CookieJar()
  9. from PlanningUtils import fixNewlines, getPostcodeFromText, PlanningAuthorityResults, PlanningApplication
  10. search_form_url_end = "DcApplication/application_searchform.aspx"
  11. search_results_url_end = "DcApplication/application_searchresults.aspx"
  12. comments_url_end = "DcApplication/application_comments_entryform.aspx"
  13. def index_or_none(a_list, item):
  14. """
  15. Returns the index of item in a_list, or None, if it isn't in the list.
  16. """
  17. return a_list.count(item) and a_list.index(item)
  18. class PublicAccessParser:
  19. """This is the class which parses the PublicAccess search results page.
  20. """
  21. def __init__(self,
  22. authority_name,
  23. authority_short_name,
  24. base_url,
  25. debug=False):
  26. self.authority_name = authority_name
  27. self.authority_short_name = authority_short_name
  28. self.base_url = base_url
  29. self.debug = debug
  30. # The object which stores our set of planning application results
  31. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  32. def getResultsByDayMonthYear(self, day, month, year):
  33. search_date = datetime.date(year, month, day)
  34. # First download the search form (in order to get a session cookie
  35. search_form_request = urllib2.Request(urlparse.urljoin(self.base_url, search_form_url_end))
  36. search_form_response = urllib2.urlopen(search_form_request)
  37. cookie_jar.extract_cookies(search_form_response, search_form_request)
  38. # We are only doing this first search in order to get a cookie
  39. # The paging on the site doesn't work with cookies turned off.
  40. search_data1 = urllib.urlencode({"searchType":"ADV",
  41. "caseNo":"",
  42. "PPReference":"",
  43. "AltReference":"",
  44. "srchtype":"",
  45. "srchstatus":"",
  46. "srchdecision":"",
  47. "srchapstatus":"",
  48. "srchappealdecision":"",
  49. "srchwardcode":"",
  50. "srchparishcode":"",
  51. "srchagentdetails":"",
  52. "srchDateReceivedStart":"%(day)02d/%(month)02d/%(year)d" %{"day":day ,"month": month ,"year": year},
  53. "srchDateReceivedEnd":"%(day)02d/%(month)02d/%(year)d" %{"day":day, "month":month, "year":year} })
  54. if self.debug:
  55. print search_data1
  56. search_url = urlparse.urljoin(self.base_url, search_results_url_end)
  57. request1 = urllib2.Request(search_url, search_data1)
  58. cookie_jar.add_cookie_header(request1)
  59. response1 = urllib2.urlopen(request1)
  60. # This search is the one we will actually use.
  61. # a maximum of 100 results are returned on this site,
  62. # hence setting "pagesize" to 100. I doubt there will ever
  63. # be more than 100 in one day in PublicAccess...
  64. # "currentpage" = 1 gets us to the first page of results
  65. # (there will only be one anyway, as we are asking for 100 results...)
  66. #http://planning.york.gov.uk/PublicAccess/tdc/DcApplication/application_searchresults.aspx?szSearchDescription=Applications%20received%20between%2022/02/2007%20and%2022/02/2007&searchType=ADV&bccaseno=&currentpage=2&pagesize=10&module=P3
  67. search_data2 = urllib.urlencode((("szSearchDescription","Applications received between %(day)02d/%(month)02d/%(year)d and %(day)02d/%(month)02d/%(year)d"%{"day":day ,"month": month ,"year": year}), ("searchType","ADV"), ("bccaseno",""), ("currentpage","1"), ("pagesize","100"), ("module","P3")))
  68. if self.debug:
  69. print search_data2
  70. # This time we want to do a get request, so add the search data into the url
  71. request2_url = urlparse.urljoin(self.base_url, search_results_url_end + "?" + search_data2)
  72. request2 = urllib2.Request(request2_url)
  73. # add the cookie we stored from our first search
  74. cookie_jar.add_cookie_header(request2)
  75. response2 = urllib2.urlopen(request2)
  76. contents = fixNewlines(response2.read())
  77. if self.debug:
  78. print contents
  79. soup = BeautifulSoup.BeautifulSoup(contents)
  80. results_table = soup.find("table", {"class": "cResultsForm"})
  81. # First, we work out what column each thing of interest is in from the headings
  82. headings = [x.string for x in results_table.findAll("th")]
  83. ref_col = index_or_none(headings, "Application Ref.") or \
  84. index_or_none(headings, "Case Number") or \
  85. index_or_none(headings, "Application Number")
  86. address_col = headings.index("Address")
  87. description_col = headings.index("Proposal")
  88. comments_url = urlparse.urljoin(self.base_url, comments_url_end)
  89. for tr in results_table.findAll("tr")[1:]:
  90. application = PlanningApplication()
  91. application.date_received = search_date
  92. tds = tr.findAll(re.compile("t[dh]"))
  93. application.council_reference = tds[ref_col].string.strip()
  94. application.address = tds[address_col].string.strip()
  95. application.description = tds[description_col].string.strip()
  96. application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
  97. # We need the query string from this url to make the comments_url
  98. query_string = urlparse.urlsplit(application.info_url)[3]
  99. # This is probably slightly naughty, but I'm just going to add the querystring
  100. # on to the end manually
  101. application.comment_url = "%s?%s" %(comments_url, query_string)
  102. self._results.addApplication(application)
  103. return self._results
  104. def getResults(self, day, month, year):
  105. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  106. if __name__ == '__main__':
  107. day = 20
  108. month = 12
  109. year = 2008
  110. #parser = PublicAccessParser("East Northants", "East Northants", "http://publicaccesssrv.east-northamptonshire.gov.uk/PublicAccess/tdc/", True)
  111. parser = PublicAccessParser("Cherwell District Council", "Cherwell", "http://cherweb.cherwell-dc.gov.uk/publicaccess/tdc/", False)
  112. #parser = PublicAccessParser("Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/", True)
  113. #parser = PublicAccessParser("Durham City Council", "Durham", "http://publicaccess.durhamcity.gov.uk/publicaccess/tdc/", True)
  114. #parser = PublicAccessParser("Moray Council", "Moray", "http://public.moray.gov.uk/publicaccess/tdc/", True)
  115. # parser = PublicAccessParser("Sheffield City Council", "Sheffield", "http://planning.sheffield.gov.uk/publicaccess/tdc/")
  116. # parser = PublicAccessParser("London Borough of Barking and Dagenham", "Barking and Dagenham", "http://paweb.barking-dagenham.gov.uk/PublicAccess/tdc/")
  117. # parser = PublicAccessParser("Reading Borough Council", "Reading", "http://planning.reading.gov.uk/publicaccess/tdc/")
  118. # parser = PublicAccessParser("Lancaster City Council", "Lancaster", "http://planapps.lancaster.gov.uk/PublicAccess/tdc/")
  119. # parser = PublicAccessParser("Harrogate Borough Council", "Harrogate", "http://publicaccess.harrogate.gov.uk/publicaccess/tdc/")
  120. # parser = PublicAccessParser("West Lancashire District Council", "West Lancashire", "http://publicaccess.westlancsdc.gov.uk/PublicAccess/tdc/")
  121. # parser = PublicAccessParser("Torbay Council", "Torbay", "http://www.torbay.gov.uk/publicaccess/tdc/")
  122. # parser = PublicAccessParser("Hambleton District Council", "Hambleton", "http://planning.hambleton.gov.uk/publicaccess/tdc/")
  123. print parser.getResults(day, month, year)