Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

97 linhas
2.8 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. #import re
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d/%m/%Y"
  11. # Where the council reference fills the gap
  12. comment_url_end = "comment.asp?%s"
  13. #comment_regex = re.compile("Comment on this ")
  14. class RutlandLikeParser:
  15. def __init__(self,
  16. authority_name,
  17. authority_short_name,
  18. base_url,
  19. debug=False):
  20. self.authority_name = authority_name
  21. self.authority_short_name = authority_short_name
  22. self.base_url = base_url
  23. self.debug = debug
  24. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  25. def getResultsByDayMonthYear(self, day, month, year):
  26. search_date = datetime.date(year, month, day)
  27. date_string = search_date.strftime(date_format)
  28. search_data = urllib.urlencode({"reference": "",
  29. "undecided": "yes",
  30. "dateFrom": date_string,
  31. "dateTo": date_string,
  32. "Address": "",
  33. "validate": "true",
  34. })
  35. request = urllib2.Request(self.base_url, search_data)
  36. response = urllib2.urlopen(request)
  37. html = response.read()
  38. soup = BeautifulSoup(html)
  39. tables = soup.findAll("table", {"style": "width:auto;"})
  40. if not tables:
  41. return self._results
  42. # We don't want the first or last tr
  43. trs = tables[0].findAll("tr")[1:-1]
  44. for tr in trs:
  45. app = PlanningApplication()
  46. tds = tr.findAll("td")
  47. if len(tds) == 4:
  48. local_info_url = tds[0].a['href']
  49. app.info_url = urlparse.urljoin(self.base_url, local_info_url)
  50. app.council_reference = tds[0].a.string
  51. app.address = tds[1].string
  52. app.postcode = getPostcodeFromText(app.address)
  53. app.description = tds[2].string
  54. app.comment_url = urlparse.urljoin(self.base_url, comment_url_end %app.council_reference)
  55. app.date_received = search_date
  56. self._results.addApplication(app)
  57. return self._results
  58. def getResults(self, day, month, year):
  59. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  60. if __name__ == '__main__':
  61. parser = RutlandLikeParser("Rutland long", "Rutland", "http://www.meltononline.co.uk/planning/searchparam.asp")
  62. print parser.getResults(3,2,2008)