Automatically exported from code.google.com/p/planningalerts
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

Barnsley.py 2.6 KiB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. """
  2. This is the screenscraper for planning apps for
  3. Barnsley Metropolitan Borough Council.
  4. The apps for Barnsley are displayed in html pages one per week, starting on
  5. monday. There is no date_received, so we'll have to use the date of the
  6. start of this week.
  7. There is no comment url, so we'll use the email address.
  8. Developmentcontrol@barnsley.gov.uk
  9. """
  10. import urllib2
  11. import urllib
  12. import urlparse
  13. import datetime, time
  14. import cgi
  15. from BeautifulSoup import BeautifulSoup
  16. from PlanningUtils import PlanningApplication, \
  17. PlanningAuthorityResults, \
  18. getPostcodeFromText
  19. date_format = "%d/%m/%Y"
  20. class BarnsleyParser:
  21. comments_email_address = "Developmentcontrol@barnsley.gov.uk"
  22. def __init__(self, *args):
  23. self.authority_name = "Barnsley Metropolitan Borough Council"
  24. self.authority_short_name = "Barnsley"
  25. self.base_url = "http://applications.barnsley.gov.uk/service/development/week_compact.asp?AppDate=%s"
  26. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  27. def getResultsByDayMonthYear(self, day, month, year):
  28. search_day = datetime.date(year, month, day)
  29. # What we actually need is the monday before the date searched for:
  30. monday_before = search_day - datetime.timedelta(search_day.weekday())
  31. # Now get the search page
  32. response = urllib2.urlopen(self.base_url %(monday_before.strftime(date_format)))
  33. soup = BeautifulSoup(response.read())
  34. result_tables = soup.findAll("table", align="Center", cellpadding="3")
  35. for table in result_tables:
  36. application = PlanningApplication()
  37. # We can set the date received and the comment url straight away.
  38. application.comment_url = self.comments_email_address
  39. trs = table.findAll("tr")
  40. application.council_reference = trs[0].a.string.strip()
  41. relative_info_url = trs[0].a['href']
  42. application.info_url = urlparse.urljoin(self.base_url, relative_info_url)
  43. application.date_received = monday_before
  44. application.address = trs[1].findAll("td")[1].string.strip()
  45. application.postcode = getPostcodeFromText(application.address)
  46. application.description = trs[2].findAll("td")[1].string.strip()
  47. self._results.addApplication(application)
  48. return self._results
  49. def getResults(self, day, month, year):
  50. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  51. if __name__ == '__main__':
  52. parser = BarnsleyParser()
  53. print parser.getResults(21,5,2008)