Automatically exported from code.google.com/p/planningalerts
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.
 
 
 
 
 
 

121 rinda
4.5 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import BeautifulSoup
  7. import cookielib
  8. cookie_jar = cookielib.CookieJar()
  9. from PlanningUtils import PlanningApplication, \
  10. PlanningAuthorityResults, \
  11. getPostcodeFromText
  12. date_format = "%d/%m/%Y"
  13. class EastbourneParser:
  14. def __init__(self, *args):
  15. self.authority_name = "Eastbourne Borough Council"
  16. self.authority_short_name = "Eastbourne"
  17. # self.base_url = "http://www.eastbourne.gov.uk/planningapplications/search.asp"
  18. self.first_url = "http://www.eastbourne.gov.uk/planningapplications/index.asp"
  19. self.base_url = "http://www.eastbourne.gov.uk/planningapplications/results.asp"
  20. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  21. def getResultsByDayMonthYear(self, day, month, year):
  22. search_day = datetime.date(year, month, day)
  23. # There's going to be some faffing around here. We need a cookie to say we have agreed to some T&Cs.
  24. # First get the search page - we'll be redirected somewhere else for not having the cookie
  25. first_request = urllib2.Request(self.first_url)
  26. first_response = urllib2.urlopen(first_request)
  27. cookie_jar.extract_cookies(first_response, first_request)
  28. first_page_soup = BeautifulSoup.BeautifulSoup(first_response.read())
  29. first_page_action = urlparse.urljoin(self.first_url, first_page_soup.form['action'])
  30. the_input = first_page_soup.form.input
  31. second_page_post_data = urllib.urlencode(
  32. (
  33. (the_input['name'], the_input['value']),
  34. )
  35. )
  36. second_request = urllib2.Request(first_page_action, second_page_post_data)
  37. cookie_jar.add_cookie_header(second_request)
  38. second_response = urllib2.urlopen(second_request)
  39. cookie_jar.extract_cookies(second_response, second_request)
  40. # Now (finally) get the search page
  41. #ApplicationNumber=&AddressPrefix=&Postcode=&CaseOfficer=&WardMember=&DateReceivedStart=31%2F08%2F2008&DateReceivedEnd=31%2F08%2F2008&DateDecidedStart=&DateDecidedEnd=&Locality=&AgentName=&ApplicantName=&ShowDecided=&DecisionLevel=&Sort1=FullAddressPrefix&Sort2=DateReceived+DESC&Submit=Search
  42. post_data = urllib.urlencode(
  43. (
  44. ("ApplicationNumber", ""),
  45. ("AddressPrefix", ""),
  46. ("Postcode", ""),
  47. ("CaseOfficer", ""),
  48. ("WardMember", ""),
  49. ("DateReceivedStart", search_day.strftime(date_format)),
  50. ("DateReceivedEnd", search_day.strftime(date_format)),
  51. ("DateDecidedStart", ""),
  52. ("DateDecidedEnd", ""),
  53. ("Locality", ""),
  54. ("AgentName", ""),
  55. ("ApplicantName", ""),
  56. ("ShowDecided", ""),
  57. ("DecisionLevel", ""),
  58. ("Sort1", "FullAddressPrefix"),
  59. ("Sort2", "DateReceived DESC"),
  60. ("Submit", "Search"),
  61. )
  62. )
  63. search_request = urllib2.Request(self.base_url)
  64. cookie_jar.add_cookie_header(search_request)
  65. search_response = urllib2.urlopen(search_request, post_data)
  66. soup = BeautifulSoup.BeautifulSoup(search_response.read())
  67. app_no_strings = soup.findAll(text="App. No.:")
  68. for app_no_string in app_no_strings:
  69. application = PlanningApplication()
  70. application.date_received = search_day
  71. application.council_reference = app_no_string.findNext("a").string.strip()
  72. application.info_url = urlparse.urljoin(self.base_url, app_no_string.findNext("a")['href'])
  73. application.address = ' '.join([x.strip() for x in app_no_string.findNext(text="Site Address:").findNext("td").contents if type(x) == BeautifulSoup.NavigableString])
  74. application.postcode = getPostcodeFromText(application.address)
  75. application.comment_url = urlparse.urljoin(self.base_url, app_no_string.findNext(text="Comment on application").parent['href'])
  76. application.description = app_no_string.findNext(text="Description:").findNext("td").string.strip()
  77. self._results.addApplication(application)
  78. return self._results
  79. def getResults(self, day, month, year):
  80. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  81. if __name__ == '__main__':
  82. parser = EastbourneParser()
  83. print parser.getResults(1,9,2008)
  84. # TODO - currently paginates at 20