Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

101 lines
3.6 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d/%m/%Y"
  11. class ShetlandParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Shetland Islands Council"
  14. self.authority_short_name = "Shetland Islands"
  15. self.base_url = "http://www.shetland.gov.uk/planningcontrol/apps/apps.asp?time=14&Orderby=DESC&parish=All&Pref=&Address=&Applicant=&ApplicantBut=View&sortby=PlanRef&offset=0"
  16. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  17. def getResultsByDayMonthYear(self):
  18. # Note that we don't take the day, month and year parameters here.
  19. # First get the search page
  20. request = urllib2.Request(self.base_url)
  21. response = urllib2.urlopen(request)
  22. soup = BeautifulSoup(response.read())
  23. # The apps are in the 5th table on the page (not a very good way to get it...)
  24. results_table = soup.findAll("table")[5]
  25. # Now we need to find the trs which contain the apps.
  26. # The first TR is just headers.
  27. # After that they alternate between containing an app and just some display graphics
  28. # until the third from last. After that, they contain more rubbish.
  29. trs = results_table.findAll("tr")[1:-2]
  30. for i in range(len(trs)):
  31. # We are only interested in the trs in even positions in the list.
  32. if i % 2 == 0:
  33. tr = trs[i]
  34. application = PlanningApplication()
  35. application.date_received = datetime.datetime(*(time.strptime(comment_url_element.findNext("td").string.strip(), date_format)[0:6]))
  36. application.council_reference = tr.a.string
  37. comment_url_element = tr.find(text="comment on this planning application").parent
  38. application.comment_url = urlparse.urljoin(self.base_url, comment_url_element['href'])
  39. application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
  40. info_response = urllib2.urlopen(application.info_url)
  41. info_soup = BeautifulSoup(info_response.read())
  42. info_table = info_soup.findAll("table")[2]
  43. application.description = info_table.find(text="Proposal:").findNext("td").contents[0].strip()
  44. application.postcode = info_table.find(text="Postcode:").findNext("td").contents[0].strip()
  45. # Now to get the address. This will be split across several tds.
  46. address_start_td = info_table.find("td", rowspan="4")
  47. # We need the first bit of the address from this tr
  48. address_bits = [address_start_td.findNext("td").string.strip()]
  49. # We will need the first td from the next three trs after this
  50. for address_tr in address_start_td.findAllNext("tr")[:3]:
  51. address_line = address_tr.td.string.strip()
  52. if address_line:
  53. address_bits.append(address_line)
  54. address_bits.append(application.postcode)
  55. application.address = ', '.join(address_bits)
  56. self._results.addApplication(application)
  57. return self._results
  58. def getResults(self, day, month, year):
  59. return self.getResultsByDayMonthYear().displayXML()
  60. if __name__ == '__main__':
  61. parser = ShetlandParser()
  62. print parser.getResults(21,5,2008)
  63. # TODO: Sort out pagination