Automatically exported from code.google.com/p/planningalerts
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Harrow.py 2.4 KiB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. import re
  11. location_re = re.compile("Location:")
  12. date_received_re = re.compile("Date first received:")
  13. date_format = "%d %b %Y"
  14. class HarrowParser:
  15. def __init__(self, *args):
  16. self.authority_name = "London Borough of Harrow"
  17. self.authority_short_name = "Harrow"
  18. # This is a link to the last seven days applications
  19. # The day, month, and year arguments will be ignored.
  20. self.base_url = "http://www.harrow.gov.uk/www4/planning/dcweek1.asp"
  21. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  22. def getResultsByDayMonthYear(self, day, month, year):
  23. # Now get the search page
  24. response = urllib2.urlopen(self.base_url)
  25. soup = BeautifulSoup(response.read())
  26. # Each application contains the nav string "Application: "
  27. nav_strings = soup.findAll(text="Application: ")
  28. for nav_string in nav_strings:
  29. application = PlanningApplication()
  30. application.council_reference = nav_string.findPrevious("tr").findAll("td", limit=2)[1].string.strip()
  31. application.address = nav_string.findNext(text=location_re).split(":")[1].strip()
  32. application.postcode = getPostcodeFromText(application.address)
  33. application.description = nav_string.findNext(text="Proposal: ").findNext("td").string.strip()
  34. application.comment_url = urlparse.urljoin(self.base_url, nav_string.findNext(text="Proposal: ").findNext("a")['href'])
  35. application.date_received = datetime.datetime.strptime(nav_string.findNext(text=date_received_re).split(": ")[1], date_format).date()
  36. # FIXME: There is no appropriate info_url for the Harrow apps.
  37. # I'll put the base url for the moment, but as that is
  38. # a list of apps from the last 7 days that will quickly be out of date.
  39. application.info_url = self.base_url
  40. self._results.addApplication(application)
  41. return self._results
  42. def getResults(self, day, month, year):
  43. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  44. if __name__ == '__main__':
  45. parser = HarrowParser()
  46. print parser.getResults(21,5,2008)