Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

72 line
2.7 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. from BeautifulSoup import BeautifulSoup
  6. from PlanningUtils import PlanningApplication, \
  7. PlanningAuthorityResults, \
  8. getPostcodeFromText
  9. date_format = "%d%%2F%m%%2F%Y"
  10. class MendipParser:
  11. def __init__(self, *args):
  12. self.authority_name = "Mendip District Council"
  13. self.authority_short_name = "Mendip"
  14. # The site itelf uses a search by validated date, but received date seems
  15. # to be there too, and to work...
  16. # self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=&DateRecvTo=&DateValidFrom=%(date)s&DateValidTo=%(date)s&Search=Search"
  17. self.base_url = "http://www.mendip.gov.uk/PODS/ApplicationSearchResults.asp?DateRecvFrom=%(date)s&DateRecvTo=%(date)s&Search=Search"
  18. self.comment_url = "http://www.mendip.gov.uk/ShowForm.asp?fm_fid=107&AppNo=%(reference)s&SiteAddress=%(address)s"
  19. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  20. def getResultsByDayMonthYear(self, day, month, year):
  21. search_date = datetime.date(year, month, day)
  22. search_url = self.base_url %{"date": search_date.strftime(date_format)}
  23. while search_url:
  24. response = urllib2.urlopen(search_url)
  25. soup = BeautifulSoup(response.read())
  26. if soup.find(text="No applications matched the search criteria"):
  27. break
  28. for tr in soup.find("table", summary="Application Results").tbody.findAll("tr"):
  29. application = PlanningApplication()
  30. application.date_received = search_date
  31. tds = tr.findAll("td")
  32. application.council_reference = tds[0].a.string.strip()
  33. application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
  34. application.description = tds[1].p.string.strip()
  35. application.address = tds[2].p.string.strip()
  36. application.comment_url = self.comment_url %{
  37. "reference": application.council_reference,
  38. "address": urllib.quote_plus(application.address),
  39. }
  40. self._results.addApplication(application)
  41. next_link = soup.find("a", title="Go to the next page")
  42. search_url = urlparse.urljoin(self.base_url, next_link['href']) if next_link else None
  43. return self._results
  44. def getResults(self, day, month, year):
  45. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  46. if __name__ == '__main__':
  47. parser = MendipParser()
  48. print parser.getResults(1,10,2008)