Automatically exported from code.google.com/p/planningalerts
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

86 linhas
3.1 KiB

  1. """
  2. This is the scraper for Hastings.
  3. """
  4. import urllib2
  5. import urllib
  6. import urlparse
  7. import datetime, time
  8. import cgi
  9. from BeautifulSoup import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class HastingsParser:
  15. def __init__(self, *args):
  16. self.authority_name = "Hastings Borough Council"
  17. self.authority_short_name = "Hastings"
  18. # self.base_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  19. self.base_url = "http://www.hastings.gov.uk/planning/SearchResults.aspx"
  20. # Due to the idiotic design of the Hastings site, we can't give a proper info url.
  21. # There is a sensible URL, but it only works with a referer.
  22. self.info_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  23. self.comment_url_template = "http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=%s&syskey=%s"
  24. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  25. def getResultsByDayMonthYear(self, day, month, year):
  26. search_day = datetime.date(year, month, day)
  27. post_data = urllib.urlencode((
  28. ("type", "app"),
  29. ("time", "0"),
  30. ))
  31. # Now get the search page
  32. response = urllib2.urlopen(self.base_url, post_data)
  33. soup = BeautifulSoup(response.read())
  34. caseno_strings = soup.findAll(text="Case No:")
  35. for caseno_string in caseno_strings:
  36. application = PlanningApplication()
  37. application.council_reference = caseno_string.findNext("a").string.strip()
  38. info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href'])
  39. # See above for why we can't use the proper info url.
  40. application.info_url = self.info_url
  41. # In order to avoid doing a download to find the comment page, we'll
  42. # get the system key from this url
  43. syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0]
  44. application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date()
  45. application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip()
  46. application.postcode = getPostcodeFromText(application.address)
  47. application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip()
  48. #http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642
  49. application.comment_url = self.comment_url_template %(application.council_reference, syskey)
  50. self._results.addApplication(application)
  51. return self._results
  52. def getResults(self, day, month, year):
  53. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  54. if __name__ == '__main__':
  55. parser = HastingsParser()
  56. print parser.getResults(12,6,2009)