Automatically exported from code.google.com/p/planningalerts
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 
 
 

92 rader
3.3 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. import re
  7. comment_re = re.compile("Submit Comment")
  8. mapref_re = re.compile("Map Ref")
  9. import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. class AberdeenshireParser:
  14. def __init__(self, *args):
  15. self.authority_name = "Aberdeenshire Council"
  16. self.authority_short_name = "Aberdeenshire"
  17. self.base_url = "http://www.aberdeenshire.gov.uk/planning/apps/search.asp?startDateSearch=%(day)s%%2F%(month)s%%2F%(year)s&endDateSearch=%(day)s%%2F%(month)s%%2F%(year)s&Submit=Search"
  18. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  19. def getResultsByDayMonthYear(self, day, month, year):
  20. search_day = datetime.date(year, month, day)
  21. next = self.base_url %{"day": day,
  22. "month": month,
  23. "year": year,
  24. }
  25. while next:
  26. # Now get the search page
  27. response = urllib2.urlopen(next)
  28. soup = BeautifulSoup.BeautifulSoup(response.read())
  29. trs = soup.table.findAll("tr")[1:] # First one is just headers
  30. for tr in trs:
  31. application = PlanningApplication()
  32. application.date_received = search_day
  33. application.council_reference = tr.a.string
  34. application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
  35. tds = tr.findAll("td")
  36. application.address = ' '.join([x.replace(" ", " ").strip() for x in tds[2].contents if type(x) == BeautifulSoup.NavigableString and x.strip()])
  37. application.postcode = getPostcodeFromText(application.address)
  38. application.description = tds[4].string.replace(" ", " ").strip()
  39. # Get the info page in order to find the comment url
  40. # we could do this without a download if it wasn't for the
  41. # sector parameter - I wonder what that is?
  42. info_response = urllib2.urlopen(application.info_url)
  43. info_soup = BeautifulSoup.BeautifulSoup(info_response.read())
  44. comment_navstring = info_soup.find(text=comment_re)
  45. if comment_navstring:
  46. application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text=comment_re).parent['href'])
  47. else:
  48. application.comment_url = "No Comments"
  49. # While we're at it, let's get the OSGB
  50. application.osgb_x, application.osgb_y = [x.strip() for x in info_soup.find(text=mapref_re).findNext("a").string.strip().split(",")]
  51. self._results.addApplication(application)
  52. next_element = soup.find(text="next").parent
  53. if next_element.name == 'a':
  54. next = urlparse.urljoin(self.base_url, next_element['href'])
  55. else:
  56. next = None
  57. return self._results
  58. def getResults(self, day, month, year):
  59. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  60. if __name__ == '__main__':
  61. parser = AberdeenshireParser()
  62. print parser.getResults(7,8,2008)