Automatically exported from code.google.com/p/planningalerts
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

92 lines
3.3 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import cgi
  6. import re
  7. comment_re = re.compile("Submit Comment")
  8. mapref_re = re.compile("Map Ref")
  9. import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. class AberdeenshireParser:
  14. def __init__(self, *args):
  15. self.authority_name = "Aberdeenshire Council"
  16. self.authority_short_name = "Aberdeenshire"
  17. self.base_url = "http://www.aberdeenshire.gov.uk/planning/apps/search.asp?startDateSearch=%(day)s%%2F%(month)s%%2F%(year)s&endDateSearch=%(day)s%%2F%(month)s%%2F%(year)s&Submit=Search"
  18. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  19. def getResultsByDayMonthYear(self, day, month, year):
  20. search_day = datetime.date(year, month, day)
  21. next = self.base_url %{"day": day,
  22. "month": month,
  23. "year": year,
  24. }
  25. while next:
  26. # Now get the search page
  27. response = urllib2.urlopen(next)
  28. soup = BeautifulSoup.BeautifulSoup(response.read())
  29. trs = soup.table.findAll("tr")[1:] # First one is just headers
  30. for tr in trs:
  31. application = PlanningApplication()
  32. application.date_received = search_day
  33. application.council_reference = tr.a.string
  34. application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
  35. tds = tr.findAll("td")
  36. application.address = ' '.join([x.replace(" ", " ").strip() for x in tds[2].contents if type(x) == BeautifulSoup.NavigableString and x.strip()])
  37. application.postcode = getPostcodeFromText(application.address)
  38. application.description = tds[4].string.replace(" ", " ").strip()
  39. # Get the info page in order to find the comment url
  40. # we could do this without a download if it wasn't for the
  41. # sector parameter - I wonder what that is?
  42. info_response = urllib2.urlopen(application.info_url)
  43. info_soup = BeautifulSoup.BeautifulSoup(info_response.read())
  44. comment_navstring = info_soup.find(text=comment_re)
  45. if comment_navstring:
  46. application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text=comment_re).parent['href'])
  47. else:
  48. application.comment_url = "No Comments"
  49. # While we're at it, let's get the OSGB
  50. application.osgb_x, application.osgb_y = [x.strip() for x in info_soup.find(text=mapref_re).findNext("a").string.strip().split(",")]
  51. self._results.addApplication(application)
  52. next_element = soup.find(text="next").parent
  53. if next_element.name == 'a':
  54. next = urlparse.urljoin(self.base_url, next_element['href'])
  55. else:
  56. next = None
  57. return self._results
  58. def getResults(self, day, month, year):
  59. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  60. if __name__ == '__main__':
  61. parser = AberdeenshireParser()
  62. print parser.getResults(7,8,2008)