Automatically exported from code.google.com/p/planningalerts
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 
 
 
 
 

77 řádky
2.8 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import re
  6. import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. search_date_format = "%d%%2F%m%%2F%Y"
  11. class LeicestershireParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Leicestershire County Council"
  14. self.authority_short_name = "Leicestershire"
  15. self.base_url = "http://www.leics.gov.uk/index/environment/community_services_planning/planning_applications/index/environment/community_services_planning/planning_applications/eplanning_searchform/eplanning_resultpage.htm?sd=%(date)s&ed=%(date)s&kw=&map=f"
  16. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  17. def getResultsByDayMonthYear(self, day, month, year):
  18. search_date = datetime.date(year, month, day)
  19. response = urllib2.urlopen(self.base_url %{"date": search_date.strftime(search_date_format)})
  20. soup = BeautifulSoup.BeautifulSoup(response.read())
  21. if not soup.find(text=re.compile("No Results Found")):
  22. trs = soup.findAll("table", {"class": "dataTable"})[1].findAll("tr")[1:]
  23. for tr in trs:
  24. tds = tr.findAll("td")
  25. application = PlanningApplication()
  26. # We can fill in the date received without actually looking at the data
  27. application.date_received = search_date
  28. application.council_reference = tds[0].a.string.strip()
  29. application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
  30. application.address = ', '.join([x for x in tds[1].contents
  31. if isinstance(x, BeautifulSoup.NavigableString)])
  32. application.postcode = getPostcodeFromText(application.address)
  33. application.description = tds[2].string.strip()
  34. # To get the comment link we need to fetch the info page
  35. info_response = urllib2.urlopen(application.info_url)
  36. info_soup = BeautifulSoup.BeautifulSoup(info_response.read())
  37. base = info_soup.base['href']
  38. application.comment_url = urlparse.urljoin(base,
  39. info_soup.find("a", target="Planning Application Consultation Form")['href'])
  40. self._results.addApplication(application)
  41. return self._results
  42. def getResults(self, day, month, year):
  43. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  44. if __name__ == '__main__':
  45. parser = LeicestershireParser()
  46. print parser.getResults(1,9,2008)
  47. # TODO
  48. # I suppose we should think about pagination at some point,
  49. # though I've not managed to find a day with more than 1 app yet...