Automatically exported from code.google.com/p/planningalerts
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

101 linhas
3.4 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. import cookielib
  8. cookie_jar = cookielib.CookieJar()
  9. from BeautifulSoup import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. from HTTPHandlers import CookieAddingHTTPRedirectHandler
  14. cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar))
  15. search_date_format = "%m/%d/%Y" #That's right, the search date is US style.
  16. info_page_date_format = "%d/%m/%Y" # and the info page is UK style
  17. class GosportParser:
  18. def __init__(self, *args):
  19. self.authority_name = "Gosport Borough Council"
  20. self.authority_short_name = "Gosport"
  21. self.base_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationSearch2.aspx"
  22. self.info_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationDetails.aspx?ID=%s"
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. search_date = datetime.date(year, month, day)
  26. get_request = urllib2.Request(self.base_url)
  27. get_response = urllib2.urlopen(get_request)
  28. cookie_jar.extract_cookies(get_response, get_request)
  29. get_soup = BeautifulSoup(get_response.read())
  30. post_data = (
  31. ("__VIEWSTATE", get_soup.find("input", {"name": "__VIEWSTATE"})["value"]),
  32. ("pgid", get_soup.find("input", {"name": "pgid"})["value"]),
  33. ("action", "Search"),
  34. # ("ApplicationSearch21%3AtbDevAddress", ""),
  35. # ("ApplicationSearch21%3AtbApplicantName", ""),
  36. # ("ApplicationSearch21%3AtbAgentName", ""),
  37. ("ApplicationSearch21:tbDateSubmitted", "10/01/2008"),
  38. ("ApplicationSearch21:btnDateSubmitted", "Search"),
  39. # ("ApplicationSearch21%3AtbDateDetermined", ""),
  40. )
  41. post_request = urllib2.Request(self.base_url, urllib.urlencode(post_data))
  42. cookie_jar.add_cookie_header(post_request)
  43. post_response = cookie_handling_opener.open(post_request)
  44. post_soup = BeautifulSoup(post_response.read())
  45. # Discard the first <tr>, which contains headers
  46. trs = post_soup.find("table", id="SearchResults1_dgSearchResults").findAll("tr")[1:]
  47. for tr in trs:
  48. application = PlanningApplication()
  49. tds = tr.findAll("td")
  50. application.council_reference = tds[0].string.strip()
  51. application.address = tds[1].string.strip()
  52. application.postcode = getPostcodeFromText(application.address)
  53. application.description = tds[2].string.strip()
  54. application.date_received = datetime.datetime(*(time.strptime(tds[3].string.strip(), info_page_date_format)[0:6]))
  55. application.info_url = self.info_url %(application.council_reference)
  56. # The comment url must be accessed by a POST, so we'll just use the info url for that as well
  57. application.comment_url = application.info_url
  58. self._results.addApplication(application)
  59. return self._results
  60. def getResults(self, day, month, year):
  61. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  62. if __name__ == '__main__':
  63. parser = GosportParser()
  64. print parser.getResults(1,10,2008)