Automatically exported from code.google.com/p/planningalerts
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 
 
 

92 líneas
3.3 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime
  5. import cgi
  6. import re
  7. comment_re = re.compile("Submit Comment")
  8. mapref_re = re.compile("Map Ref")
  9. import BeautifulSoup
  10. from PlanningUtils import PlanningApplication, \
  11. PlanningAuthorityResults, \
  12. getPostcodeFromText
  13. class AberdeenshireParser:
  14. def __init__(self, *args):
  15. self.authority_name = "Aberdeenshire Council"
  16. self.authority_short_name = "Aberdeenshire"
  17. self.base_url = "http://www.aberdeenshire.gov.uk/planning/apps/search.asp?startDateSearch=%(day)s%%2F%(month)s%%2F%(year)s&endDateSearch=%(day)s%%2F%(month)s%%2F%(year)s&Submit=Search"
  18. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  19. def getResultsByDayMonthYear(self, day, month, year):
  20. search_day = datetime.date(year, month, day)
  21. next = self.base_url %{"day": day,
  22. "month": month,
  23. "year": year,
  24. }
  25. while next:
  26. # Now get the search page
  27. response = urllib2.urlopen(next)
  28. soup = BeautifulSoup.BeautifulSoup(response.read())
  29. trs = soup.table.findAll("tr")[1:] # First one is just headers
  30. for tr in trs:
  31. application = PlanningApplication()
  32. application.date_received = search_day
  33. application.council_reference = tr.a.string
  34. application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
  35. tds = tr.findAll("td")
  36. application.address = ' '.join([x.replace(" ", " ").strip() for x in tds[2].contents if type(x) == BeautifulSoup.NavigableString and x.strip()])
  37. application.postcode = getPostcodeFromText(application.address)
  38. application.description = tds[4].string.replace(" ", " ").strip()
  39. # Get the info page in order to find the comment url
  40. # we could do this without a download if it wasn't for the
  41. # sector parameter - I wonder what that is?
  42. info_response = urllib2.urlopen(application.info_url)
  43. info_soup = BeautifulSoup.BeautifulSoup(info_response.read())
  44. comment_navstring = info_soup.find(text=comment_re)
  45. if comment_navstring:
  46. application.comment_url = urlparse.urljoin(self.base_url, info_soup.find(text=comment_re).parent['href'])
  47. else:
  48. application.comment_url = "No Comments"
  49. # While we're at it, let's get the OSGB
  50. application.osgb_x, application.osgb_y = [x.strip() for x in info_soup.find(text=mapref_re).findNext("a").string.strip().split(",")]
  51. self._results.addApplication(application)
  52. next_element = soup.find(text="next").parent
  53. if next_element.name == 'a':
  54. next = urlparse.urljoin(self.base_url, next_element['href'])
  55. else:
  56. next = None
  57. return self._results
  58. def getResults(self, day, month, year):
  59. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  60. if __name__ == '__main__':
  61. parser = AberdeenshireParser()
  62. print parser.getResults(7,8,2008)