Automatically exported from code.google.com/p/planningalerts
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 
 
 
 
 

72 řádky
2.7 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. from BeautifulSoup import BeautifulSoup
  6. from PlanningUtils import PlanningApplication, \
  7. PlanningAuthorityResults, \
  8. getPostcodeFromText
  9. date_format = "%d%%2F%m%%2F%Y"
  10. class MendipParser:
  11. def __init__(self, *args):
  12. self.authority_name = "Mendip District Council"
  13. self.authority_short_name = "Mendip"
  14. # The site itelf uses a search by validated date, but received date seems
  15. # to be there too, and to work...
  16. # self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=&DateRecvTo=&DateValidFrom=%(date)s&DateValidTo=%(date)s&Search=Search"
  17. self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=%(date)s&DateRecvTo=%(date)s&Search=Search"
  18. self.comment_url = "http://www.mendip.gov.uk/ShowForm.asp?fm_fid=107&AppNo=%(reference)s&SiteAddress=%(address)s"
  19. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  20. def getResultsByDayMonthYear(self, day, month, year):
  21. search_date = datetime.date(year, month, day)
  22. search_url = self.base_url %{"date": search_date.strftime(date_format)}
  23. while search_url:
  24. response = urllib2.urlopen(search_url)
  25. soup = BeautifulSoup(response.read())
  26. if soup.find(text="No applications matched the search criteria"):
  27. break
  28. for tr in soup.find("table", summary="Application Results").tbody.findAll("tr"):
  29. application = PlanningApplication()
  30. application.date_received = search_date
  31. tds = tr.findAll("td")
  32. application.council_reference = tds[0].a.string.strip()
  33. application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
  34. application.description = tds[1].p.string.strip()
  35. application.address = tds[2].p.string.strip()
  36. application.comment_url = self.comment_url %{
  37. "reference": application.council_reference,
  38. "address": urllib.quote_plus(application.address),
  39. }
  40. self._results.addApplication(application)
  41. next_link = soup.find("a", title="Go to the next page")
  42. search_url = urlparse.urljoin(self.base_url, next_link['href']) if next_link else None
  43. return self._results
  44. def getResults(self, day, month, year):
  45. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  46. if __name__ == '__main__':
  47. parser = MendipParser()
  48. print parser.getResults(1,10,2008)