Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

86 rivejä
3.1 KiB

  1. """
  2. This is the scraper for Hastings.
  3. """
  4. import urllib2
  5. import urllib
  6. import urlparse
  7. import datetime, time
  8. import cgi
  9. from BeautifulSoup import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. date_format = "%d/%m/%Y"
  14. class HastingsParser:
  15. def __init__(self, *args):
  16. self.authority_name = "Hastings Borough Council"
  17. self.authority_short_name = "Hastings"
  18. # self.base_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  19. self.base_url = "http://www.hastings.gov.uk/planning/SearchResults.aspx"
  20. # Due to the idiotic design of the Hastings site, we can't give a proper info url.
  21. # There is a sensible URL, but it only works with a referer.
  22. self.info_url = "http://www.hastings.gov.uk/planning/view_applications.aspx"
  23. self.comment_url_template = "http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=%s&syskey=%s"
  24. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  25. def getResultsByDayMonthYear(self, day, month, year):
  26. search_day = datetime.date(year, month, day)
  27. post_data = urllib.urlencode((
  28. ("type", "app"),
  29. ("time", "0"),
  30. ))
  31. # Now get the search page
  32. response = urllib2.urlopen(self.base_url, post_data)
  33. soup = BeautifulSoup(response.read())
  34. caseno_strings = soup.findAll(text="Case No:")
  35. for caseno_string in caseno_strings:
  36. application = PlanningApplication()
  37. application.council_reference = caseno_string.findNext("a").string.strip()
  38. info_url = urlparse.urljoin(self.base_url, caseno_string.findNext("a")['href'])
  39. # See above for why we can't use the proper info url.
  40. application.info_url = self.info_url
  41. # In order to avoid doing a download to find the comment page, we'll
  42. # get the system key from this url
  43. syskey = cgi.parse_qs(urlparse.urlsplit(info_url)[3])['id'][0]
  44. application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Registration Date:").findNext("p").string.strip(), date_format).date()
  45. application.address = caseno_string.findNext(text="Location:").findNext("p").string.strip()
  46. application.postcode = getPostcodeFromText(application.address)
  47. application.description = caseno_string.findNext(text="Proposal:").findNext("p").string.strip()
  48. #http://www.hastings.gov.uk/planning/planningapp_comments.aspx?appNumber=HS/FA/08/00631&syskey=95642
  49. application.comment_url = self.comment_url_template %(application.council_reference, syskey)
  50. self._results.addApplication(application)
  51. return self._results
  52. def getResults(self, day, month, year):
  53. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  54. if __name__ == '__main__':
  55. parser = HastingsParser()
  56. print parser.getResults(2,9,2008)