Automatically exported from code.google.com/p/planningalerts
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 
 
 

80 líneas
2.9 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import re
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d%%2F%m%%2F%Y"
  11. class CalderdaleParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Calderdale Council"
  14. self.authority_short_name = "Calderdale"
  15. self.base_url = "http://www.calderdale.gov.uk/environment/planning/search-applications/planapps.jsp?status=0&date1=%(date)s&date2=%(date)s&Search=Search"
  16. self.info_url = "http://www.calderdale.gov.uk/environment/planning/search-applications/planapps.jsp?app=%s&Search=Search"
  17. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  18. def getResultsByDayMonthYear(self, day, month, year):
  19. search_date = datetime.date(year, month, day)
  20. next_page_url = self.base_url %{"date": search_date.strftime(date_format)}
  21. while next_page_url:
  22. try:
  23. response = urllib2.urlopen(next_page_url)
  24. except urllib2.HTTPError:
  25. # This is what seems to happen if there are no apps
  26. break
  27. soup = BeautifulSoup(response.read())
  28. next = soup.find(text="Next")
  29. if next:
  30. next_page_url = urlparse.urljoin(self.base_url, next.parent['href'])
  31. else:
  32. next_page_url = None
  33. # There is an <h3> for each app that we can use
  34. for h3 in soup.findAll("h3", {"class": "resultsnavbar"}):
  35. application = PlanningApplication()
  36. application.date_received = search_date
  37. application.council_reference = h3.string.split(": ")[1]
  38. application.description = h3.findNext("div").find(text="Proposal:").parent.nextSibling.strip()
  39. application.address = ', '.join(h3.findNext("div").find(text="Address of proposal:").parent.nextSibling.strip().split("\r"))
  40. application.postcode = getPostcodeFromText(application.address)
  41. application.comment_url = urlparse.urljoin(self.base_url, h3.findNext("div").find(text=re.compile("Comment on Application")).parent['href'])
  42. application.info_url = self.info_url %(urllib.quote(application.council_reference))
  43. application.osgb_x, application.osgb_y = h3.findNext("div").find(text="Grid Reference:").parent.nextSibling.strip().split()
  44. self._results.addApplication(application)
  45. return self._results
  46. def getResults(self, day, month, year):
  47. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  48. if __name__ == '__main__':
  49. parser = CalderdaleParser()
  50. print parser.getResults(1,10,2008)
  51. # TODO
  52. # 1) Find a better way to deal with the no apps situation.