Automatically exported from code.google.com/p/planningalerts
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 
 
 
 

92 Zeilen
3.3 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. comment_re = re.compile("Submit Comment")
  8. mapref_re = re.compile("Map Ref")
  9. import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. class AberdeenshireParser:
  14. def __init__(self, *args):
  15. self.authority_name = "Aberdeenshire Council"
  16. self.authority_short_name = "Aberdeenshire"
  17. self.base_url = "http://www.aberdeenshire.gov.uk/planning/apps/search.asp?startDateSearch=%(day)s%%2F%(month)s%%2F%(year)s&endDateSearch=%(day)s%%2F%(month)s%%2F%(year)s&Submit=Search"
  18. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  19. def getResultsByDayMonthYear(self, day, month, year):
  20. search_day = datetime.date(year, month, day)
  21. next = self.base_url %{"day": day,
  22. "month": month,
  23. "year": year,
  24. }
  25. while next:
  26. # Now get the search page
  27. response = urllib2.urlopen(next)
  28. soup = BeautifulSoup.BeautifulSoup(response.read())
  29. trs = soup.table.findAll("tr")[1:] # First one is just headers
  30. for tr in trs:
  31. application = PlanningApplication()
  32. application.date_received = search_day
  33. application.council_reference = tr.a.string
  34. application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
  35. tds = tr.findAll("td")
  36. application.address = ' '.join([x.replace(" ", " ").strip() for x in tds[2].contents if type(x) == BeautifulSoup.NavigableString and x.strip()])
  37. application.postcode = getPostcodeFromText(application.address)
  38. application.description = tds[4].string.replace(" ", " ").strip()
  39. # Get the info page in order to find the comment url
  40. # we could do this without a download if it wasn't for the
  41. # sector parameter - I wonder what that is?
  42. info_response = urllib2.urlopen(application.info_url)
  43. info_soup = BeautifulSoup.BeautifulSoup(info_response.read())
  44. comment_navstring = info_soup.find(text=comment_re)
  45. if comment_navstring:
  46. application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text=comment_re).parent['href'])
  47. else:
  48. application.comment_url = "No Comments"
  49. # While we're at it, let's get the OSGB
  50. application.osgb_x, application.osgb_y = [x.strip() for x in info_soup.find(text=mapref_re).findNext("a").string.strip().split(",")]
  51. self._results.addApplication(application)
  52. next_element = soup.find(text="next").parent
  53. if next_element.name == 'a':
  54. next = urlparse.urljoin(self.base_url, next_element['href'])
  55. else:
  56. next = None
  57. return self._results
  58. def getResults(self, day, month, year):
  59. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  60. if __name__ == '__main__':
  61. parser = AberdeenshireParser()
  62. print parser.getResults(7,8,2008)