Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

80 lines
2.9 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import re
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d%%2F%m%%2F%Y"
  11. class CalderdaleParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Calderdale Council"
  14. self.authority_short_name = "Calderdale"
  15. self.base_url = "http://www.calderdale.gov.uk/environment/planning/search-applications/planapps.jsp?status=0&date1=%(date)s&date2=%(date)s&Search=Search"
  16. self.info_url = "http://www.calderdale.gov.uk/environment/planning/search-applications/planapps.jsp?app=%s&Search=Search"
  17. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  18. def getResultsByDayMonthYear(self, day, month, year):
  19. search_date = datetime.date(year, month, day)
  20. next_page_url = self.base_url %{"date": search_date.strftime(date_format)}
  21. while next_page_url:
  22. try:
  23. response = urllib2.urlopen(next_page_url)
  24. except urllib2.HTTPError:
  25. # This is what seems to happen if there are no apps
  26. break
  27. soup = BeautifulSoup(response.read())
  28. next = soup.find(text="Next")
  29. if next:
  30. next_page_url = urlparse.urljoin(self.base_url, next.parent['href'])
  31. else:
  32. next_page_url = None
  33. # There is an <h3> for each app that we can use
  34. for h3 in soup.findAll("h3", {"class": "resultsnavbar"}):
  35. application = PlanningApplication()
  36. application.date_received = search_date
  37. application.council_reference = h3.string.split(": ")[1]
  38. application.description = h3.findNext("div").find(text="Proposal:").parent.nextSibling.strip()
  39. application.address = ', '.join(h3.findNext("div").find(text="Address of proposal:").parent.nextSibling.strip().split("\r"))
  40. application.postcode = getPostcodeFromText(application.address)
  41. application.comment_url = urlparse.urljoin(self.base_url, h3.findNext("div").find(text=re.compile("Comment on Application")).parent['href'])
  42. application.info_url = self.info_url %(urllib.quote(application.council_reference))
  43. application.osgb_x, application.osgb_y = h3.findNext("div").find(text="Grid Reference:").parent.nextSibling.strip().split()
  44. self._results.addApplication(application)
  45. return self._results
  46. def getResults(self, day, month, year):
  47. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  48. if __name__ == '__main__':
  49. parser = CalderdaleParser()
  50. print parser.getResults(1,10,2008)
  51. # TODO
  52. # 1) Find a better way to deal with the no apps situation.