Automatically exported from code.google.com/p/planningalerts
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 
 
 
 

86 lignes
3.1 KiB

  1. """
  2. This is the scraper for Hastings.
  3. """
  4. import urllib2
  5. import urllib
  6. import urlparse
  7. import datetime, time
  8. import cgi
  9. from BeautifulSoup import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class HastingsParser:
  15. def __init__(self, *args):
  16. self.authority_name = "Hastings Borough Council"
  17. self.authority_short_name = "Hastings"
  18. # self.base_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  19. self.base_url = "http://www.hastings.gov.uk/planning/SearchResults.aspx"
  20. # Due to the idiotic design of the Hastings site, we can't give a proper info url.
  21. # There is a sensible URL, but it only works with a referer.
  22. self.info_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  23. self.comment_url_template = "http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=%s&syskey=%s"
  24. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  25. def getResultsByDayMonthYear(self, day, month, year):
  26. search_day = datetime.date(year, month, day)
  27. post_data = urllib.urlencode((
  28. ("type", "app"),
  29. ("time", "0"),
  30. ))
  31. # Now get the search page
  32. response = urllib2.urlopen(self.base_url, post_data)
  33. soup = BeautifulSoup(response.read())
  34. caseno_strings = soup.findAll(text="Case No:")
  35. for caseno_string in caseno_strings:
  36. application = PlanningApplication()
  37. application.council_reference = caseno_string.findNext("a").string.strip()
  38. info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href'])
  39. # See above for why we can't use the proper info url.
  40. application.info_url = self.info_url
  41. # In order to avoid doing a download to find the comment page, we'll
  42. # get the system key from this url
  43. syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0]
  44. application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date()
  45. application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip()
  46. application.postcode = getPostcodeFromText(application.address)
  47. application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip()
  48. #http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642
  49. application.comment_url = self.comment_url_template %(application.council_reference, syskey)
  50. self._results.addApplication(application)
  51. return self._results
  52. def getResults(self, day, month, year):
  53. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  54. if __name__ == '__main__':
  55. parser = HastingsParser()
  56. print parser.getResults(12,6,2009)