Automatically exported from code.google.com/p/planningalerts
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 
 

86 行
3.1 KiB

  1. """
  2. This is the scraper for Hastings.
  3. """
  4. import urllib2
  5. import urllib
  6. import urlparse
  7. import datetime, time
  8. import cgi
  9. from BeautifulSoup import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class HastingsParser:
  15. def __init__(self, *args):
  16. self.authority_name = "Hastings Borough Council"
  17. self.authority_short_name = "Hastings"
  18. # self.base_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  19. self.base_url = "http://www.hastings.gov.uk/planning/SearchResults.aspx"
  20. # Due to the idiotic design of the Hastings site, we can't give a proper info url.
  21. # There is a sensible URL, but it only works with a referer.
  22. self.info_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  23. self.comment_url_template = "http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=%s&syskey=%s"
  24. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  25. def getResultsByDayMonthYear(self, day, month, year):
  26. search_day = datetime.date(year, month, day)
  27. post_data = urllib.urlencode((
  28. ("type", "app"),
  29. ("time", "0"),
  30. ))
  31. # Now get the search page
  32. response = urllib2.urlopen(self.base_url, post_data)
  33. soup = BeautifulSoup(response.read())
  34. caseno_strings = soup.findAll(text="Case No:")
  35. for caseno_string in caseno_strings:
  36. application = PlanningApplication()
  37. application.council_reference = caseno_string.findNext("a").string.strip()
  38. info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href'])
  39. # See above for why we can't use the proper info url.
  40. application.info_url = self.info_url
  41. # In order to avoid doing a download to find the comment page, we'll
  42. # get the system key from this url
  43. syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0]
  44. application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date()
  45. application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip()
  46. application.postcode = getPostcodeFromText(application.address)
  47. application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip()
  48. #http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642
  49. application.comment_url = self.comment_url_template %(application.council_reference, syskey)
  50. self._results.addApplication(application)
  51. return self._results
  52. def getResults(self, day, month, year):
  53. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  54. if __name__ == '__main__':
  55. parser = HastingsParser()
  56. print parser.getResults(12,6,2009)