Automatically exported from code.google.com/p/planningalerts
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

Solihull.py 3.0 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. """
  2. This is the screenscraper for planning apps for
  3. Solihull Metropolitan Borough Council.
  4. The apps for Solihull are displayed in html pages one per week, starting on Monday.
  5. """
  6. import urllib2
  7. import urllib
  8. import urlparse
  9. import datetime, time
  10. import cgi
  11. from BeautifulSoup import BeautifulSoup
  12. from PlanningUtils import PlanningApplication, \
  13. PlanningAuthorityResults, \
  14. getPostcodeFromText
  15. date_format = "%d/%m/%Y"
  16. class SolihullParser:
  17. def __init__(self, *args):
  18. self.authority_name = "Solihull Metropolitan Borough Council"
  19. self.authority_short_name = "Solihull"
  20. self.base_url = "http://www.solihull.gov.uk/planning/dc/weeklist.asp?SD=%s&ward=ALL"
  21. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  22. def getResultsByDayMonthYear(self, day, month, year):
  23. search_day = datetime.date(year, month, day)
  24. # What we actually need is the monday before the date searched for:
  25. monday_before = search_day - datetime.timedelta(search_day.weekday())
  26. # Now get the search page
  27. response = urllib2.urlopen(self.base_url %(monday_before.strftime(date_format)))
  28. soup = BeautifulSoup(response.read())
  29. result_tables = soup.findAll("table", width="98%", cellpadding="2")
  30. for table in result_tables:
  31. application = PlanningApplication()
  32. trs = table.findAll("tr")
  33. application.council_reference = trs[0].strong.string.strip()
  34. relative_info_url = trs[0].a['href']
  35. application.info_url = urlparse.urljoin(self.base_url, relative_info_url)
  36. application.address = trs[1].findAll("td")[1].string.strip()
  37. application.postcode = getPostcodeFromText(application.address)
  38. application.description = trs[2].findAll("td")[1].string.strip()
  39. #There's probably a prettier way to get the date, but with Python, it's easier for me to reinvent the wheel than to find an existing wheel!
  40. raw_date_recv = trs[3].findAll("td")[3].string.strip().split("/")
  41. #Check whether the application is on the target day. If not, discard it and move on.
  42. if int(raw_date_recv[0]) != day:
  43. continue
  44. application.date_received = datetime.date(int(raw_date_recv[2]), int(raw_date_recv[1]), int(raw_date_recv[0]))
  45. try:
  46. relative_comment_url = trs[5].findAll("td")[1].a['href']
  47. application.comment_url = urlparse.urljoin(self.base_url, relative_comment_url)
  48. except:
  49. application.comment_url = "No Comment URL."
  50. self._results.addApplication(application)
  51. return self._results
  52. def getResults(self, day, month, year):
  53. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  54. if __name__ == '__main__':
  55. parser = SolihullParser()
  56. #Put this in with constant numbers, copying the Barnsley example. Works for testing, but should it use the arguments for a real run?
  57. print parser.getResults(27,10,2008)