Automatically exported from code.google.com/p/planningalerts
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 
 
 

101 строка
3.4 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. import cookielib
  8. cookie_jar = cookielib.CookieJar()
  9. from BeautifulSoup import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. from HTTPHandlers import CookieAddingHTTPRedirectHandler
  14. cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar))
  15. search_date_format = "%m/%d/%Y" #That's right, the search date is US style.
  16. info_page_date_format = "%d/%m/%Y" # and the info page is UK style
  17. class GosportParser:
  18. def __init__(self, *args):
  19. self.authority_name = "Gosport Borough Council"
  20. self.authority_short_name = "Gosport"
  21. self.base_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationSearch2.aspx"
  22. self.info_url = "http://www.gosport.gov.uk/gbcplanning/ApplicationDetails.aspx?ID=%s"
  23. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  24. def getResultsByDayMonthYear(self, day, month, year):
  25. search_date = datetime.date(year, month, day)
  26. get_request = urllib2.Request(self.base_url)
  27. get_response = urllib2.urlopen(get_request)
  28. cookie_jar.extract_cookies(get_response, get_request)
  29. get_soup = BeautifulSoup(get_response.read())
  30. post_data = (
  31. ("__VIEWSTATE", get_soup.find("input", {"name": "__VIEWSTATE"})["value"]),
  32. ("pgid", get_soup.find("input", {"name": "pgid"})["value"]),
  33. ("action", "Search"),
  34. # ("ApplicationSearch21%3AtbDevAddress", ""),
  35. # ("ApplicationSearch21%3AtbApplicantName", ""),
  36. # ("ApplicationSearch21%3AtbAgentName", ""),
  37. ("ApplicationSearch21:tbDateSubmitted", "10/01/2008"),
  38. ("ApplicationSearch21:btnDateSubmitted", "Search"),
  39. # ("ApplicationSearch21%3AtbDateDetermined", ""),
  40. )
  41. post_request = urllib2.Request(self.base_url, urllib.urlencode(post_data))
  42. cookie_jar.add_cookie_header(post_request)
  43. post_response = cookie_handling_opener.open(post_request)
  44. post_soup = BeautifulSoup(post_response.read())
  45. # Discard the first <tr>, which contains headers
  46. trs = post_soup.find("table", id="SearchResults1_dgSearchResults").findAll("tr")[1:]
  47. for tr in trs:
  48. application = PlanningApplication()
  49. tds = tr.findAll("td")
  50. application.council_reference = tds[0].string.strip()
  51. application.address = tds[1].string.strip()
  52. application.postcode = getPostcodeFromText(application.address)
  53. application.description = tds[2].string.strip()
  54. application.date_received = datetime.datetime(*(time.strptime(tds[3].string.strip(), info_page_date_format)[0:6]))
  55. application.info_url = self.info_url %(application.council_reference)
  56. # The comment url must be accessed by a POST, so we'll just use the info url for that as well
  57. application.comment_url = application.info_url
  58. self._results.addApplication(application)
  59. return self._results
  60. def getResults(self, day, month, year):
  61. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  62. if __name__ == '__main__':
  63. parser = GosportParser()
  64. print parser.getResults(1,10,2008)