Automatically exported from code.google.com/p/planningalerts
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 
 
 

80 rader
2.9 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import re
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d%%2F%m%%2F%Y"
  11. class CalderdaleParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Calderdale Council"
  14. self.authority_short_name = "Calderdale"
  15. self.base_url = "http://www.calderdale.gov.uk/environment/planning/search-applications/planapps.jsp?status=0&date1=%(date)s&date2=%(date)s&Search=Search"
  16. self.info_url = "http://www.calderdale.gov.uk/environment/planning/search-applications/planapps.jsp?app=%s&Search=Search"
  17. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  18. def getResultsByDayMonthYear(self, day, month, year):
  19. search_date = datetime.date(year, month, day)
  20. next_page_url = self.base_url %{"date": search_date.strftime(date_format)}
  21. while next_page_url:
  22. try:
  23. response = urllib2.urlopen(next_page_url)
  24. except urllib2.HTTPError:
  25. # This is what seems to happen if there are no apps
  26. break
  27. soup = BeautifulSoup(response.read())
  28. next = soup.find(text="Next")
  29. if next:
  30. next_page_url = urlparse.urljoin(self.base_url, next.parent['href'])
  31. else:
  32. next_page_url = None
  33. # There is an <h3> for each app that we can use
  34. for h3 in soup.findAll("h3", {"class": "resultsnavbar"}):
  35. application = PlanningApplication()
  36. application.date_received = search_date
  37. application.council_reference = h3.string.split(": ")[1]
  38. application.description = h3.findNext("div").find(text="Proposal:").parent.nextSibling.strip()
  39. application.address = ', '.join(h3.findNext("div").find(text="Address of proposal:").parent.nextSibling.strip().split("\r"))
  40. application.postcode = getPostcodeFromText(application.address)
  41. application.comment_url = urlparse.urljoin(self.base_url, h3.findNext("div").find(text=re.compile("Comment on Application")).parent['href'])
  42. application.info_url = self.info_url %(urllib.quote(application.council_reference))
  43. application.osgb_x, application.osgb_y = h3.findNext("div").find(text="Grid Reference:").parent.nextSibling.strip().split()
  44. self._results.addApplication(application)
  45. return self._results
  46. def getResults(self, day, month, year):
  47. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  48. if __name__ == '__main__':
  49. parser = CalderdaleParser()
  50. print parser.getResults(1,10,2008)
  51. # TODO
  52. # 1) Find a better way to deal with the no apps situation.