Automatically exported from code.google.com/p/planningalerts
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

80 linhas
2.9 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import re
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d%%2F%m%%2F%Y"
  11. class CalderdaleParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Calderdale Council"
  14. self.authority_short_name = "Calderdale"
  15. self.base_url = "http://www.calderdale.gov.uk/environment/planning/search-applications/planapps.jsp?status=0&date1=%(date)s&date2=%(date)s&Search=Search"
  16. self.info_url = "http://www.calderdale.gov.uk/environment/planning/search-applications/planapps.jsp?app=%s&Search=Search"
  17. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  18. def getResultsByDayMonthYear(self, day, month, year):
  19. search_date = datetime.date(year, month, day)
  20. next_page_url = self.base_url %{"date": search_date.strftime(date_format)}
  21. while next_page_url:
  22. try:
  23. response = urllib2.urlopen(next_page_url)
  24. except urllib2.HTTPError:
  25. # This is what seems to happen if there are no apps
  26. break
  27. soup = BeautifulSoup(response.read())
  28. next = soup.find(text="Next")
  29. if next:
  30. next_page_url = urlparse.urljoin(self.base_url, next.parent['href'])
  31. else:
  32. next_page_url = None
  33. # There is an <h3> for each app that we can use
  34. for h3 in soup.findAll("h3", {"class": "resultsnavbar"}):
  35. application = PlanningApplication()
  36. application.date_received = search_date
  37. application.council_reference = h3.string.split(": ")[1]
  38. application.description = h3.findNext("div").find(text="Proposal:").parent.nextSibling.strip()
  39. application.address = ', '.join(h3.findNext("div").find(text="Address of proposal:").parent.nextSibling.strip().split("\r"))
  40. application.postcode = getPostcodeFromText(application.address)
  41. application.comment_url = urlparse.urljoin(self.base_url, h3.findNext("div").find(text=re.compile("Comment on Application")).parent['href'])
  42. application.info_url = self.info_url %(urllib.quote(application.council_reference))
  43. application.osgb_x, application.osgb_y = h3.findNext("div").find(text="Grid Reference:").parent.nextSibling.strip().split()
  44. self._results.addApplication(application)
  45. return self._results
  46. def getResults(self, day, month, year):
  47. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  48. if __name__ == '__main__':
  49. parser = CalderdaleParser()
  50. print parser.getResults(1,10,2008)
  51. # TODO
  52. # 1) Find a better way to deal with the no apps situation.