Automatically exported from code.google.com/p/planningalerts
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 
 
 

101 líneas
3.4 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. import cookielib
  8. cookie_jar = cookielib.CookieJar()
  9. from BeautifulSoup import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. from HTTPHandlers import CookieAddingHTTPRedirectHandler
  14. cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar))
  15. search_date_format = "%m/%d/%Y" #That's right, the search date is US style.
  16. info_page_date_format = "%d/%m/%Y" # and the info page is UK style
  17. class GosportParser:
  18. def __init__(self, *args):
  19. self.authority_name = "Gosport Borough Council"
  20. self.authority_short_name = "Gosport"
  21. self.base_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationSearch2.aspx"
  22. self.info_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationDetails.aspx?ID=%s"
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. search_date = datetime.date(year, month, day)
  26. get_request = urllib2.Request(self.base_url)
  27. get_response = urllib2.urlopen(get_request)
  28. cookie_jar.extract_cookies(get_response, get_request)
  29. get_soup = BeautifulSoup(get_response.read())
  30. post_data = (
  31. ("__VIEWSTATE", get_soup.find("input", {"name": "__VIEWSTATE"})["value"]),
  32. ("pgid", get_soup.find("input", {"name": "pgid"})["value"]),
  33. ("action", "Search"),
  34. # ("ApplicationSearch21%3AtbDevAddress", ""),
  35. # ("ApplicationSearch21%3AtbApplicantName", ""),
  36. # ("ApplicationSearch21%3AtbAgentName", ""),
  37. ("ApplicationSearch21:tbDateSubmitted", search_date.strftime(search_date_format)),
  38. ("ApplicationSearch21:btnDateSubmitted", "Search"),
  39. # ("ApplicationSearch21%3AtbDateDetermined", ""),
  40. )
  41. post_request = urllib2.Request(self.base_url, urllib.urlencode(post_data))
  42. cookie_jar.add_cookie_header(post_request)
  43. post_response = cookie_handling_opener.open(post_request)
  44. post_soup = BeautifulSoup(post_response.read())
  45. # Discard the first <tr>, which contains headers
  46. trs = post_soup.find("table", id="SearchResults1_dgSearchResults").findAll("tr")[1:]
  47. for tr in trs:
  48. application = PlanningApplication()
  49. tds = tr.findAll("td")
  50. application.council_reference = tds[0].string.strip()
  51. application.address = tds[1].string.strip()
  52. application.postcode = getPostcodeFromText(application.address)
  53. application.description = tds[2].string.strip()
  54. application.date_received = datetime.datetime(*(time.strptime(tds[3].string.strip(), info_page_date_format)[0:6]))
  55. application.info_url = self.info_url %(application.council_reference)
  56. # The comment url must be accessed by a POST, so we'll just use the info url for that as well
  57. application.comment_url = application.info_url
  58. self._results.addApplication(application)
  59. return self._results
  60. def getResults(self, day, month, year):
  61. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  62. if __name__ == '__main__':
  63. parser = GosportParser()
  64. print parser.getResults(12,6,2009)