Automatically exported from code.google.com/p/planningalerts
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 
 
 
 
 

86 řádky
3.1 KiB

  1. """
  2. This is the scraper for Hastings.
  3. """
  4. import urllib2
  5. import urllib
  6. import urlparse
  7. import datetime, time
  8. import cgi
  9. from BeautifulSoup import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class HastingsParser:
  15. def __init__(self, *args):
  16. self.authority_name = "Hastings Borough Council"
  17. self.authority_short_name = "Hastings"
  18. # self.base_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  19. self.base_url = "http://www.hastings.gov.uk/planning/SearchResults.aspx"
  20. # Due to the idiotic design of the Hastings site, we can't give a proper info url.
  21. # There is a sensible URL, but it only works with a referer.
  22. self.info_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  23. self.comment_url_template = "http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=%s&syskey=%s"
  24. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  25. def getResultsByDayMonthYear(self, day, month, year):
  26. search_day = datetime.date(year, month, day)
  27. post_data = urllib.urlencode((
  28. ("type", "app"),
  29. ("time", "0"),
  30. ))
  31. # Now get the search page
  32. response = urllib2.urlopen(self.base_url, post_data)
  33. soup = BeautifulSoup(response.read())
  34. caseno_strings = soup.findAll(text="Case No:")
  35. for caseno_string in caseno_strings:
  36. application = PlanningApplication()
  37. application.council_reference = caseno_string.findNext("a").string.strip()
  38. info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href'])
  39. # See above for why we can't use the proper info url.
  40. application.info_url = self.info_url
  41. # In order to avoid doing a download to find the comment page, we'll
  42. # get the system key from this url
  43. syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0]
  44. application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date()
  45. application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip()
  46. application.postcode = getPostcodeFromText(application.address)
  47. application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip()
  48. #http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642
  49. application.comment_url = self.comment_url_template %(application.council_reference, syskey)
  50. self._results.addApplication(application)
  51. return self._results
  52. def getResults(self, day, month, year):
  53. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  54. if __name__ == '__main__':
  55. parser = HastingsParser()
  56. print parser.getResults(12,6,2009)