Automatically exported from code.google.com/p/planningalerts
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。
 
 
 
 
 
 

101 行
3.6 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cgi
  6. from BeautifulSoup import BeautifulSoup
  7. from PlanningUtils import PlanningApplication, \
  8. PlanningAuthorityResults, \
  9. getPostcodeFromText
  10. date_format = "%d/%m/%Y"
  11. class ShetlandParser:
  12. def __init__(self, *args):
  13. self.authority_name = "Shetland Islands Council"
  14. self.authority_short_name = "Shetland Islands"
  15. self.base_url = "http://www.shetland.gov.uk/planningcontrol/apps/apps.asp?time=14&Orderby=DESC&parish=All&Pref=&Address=&Applicant=&ApplicantBut=View&sortby=PlanRef&offset=0"
  16. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  17. def getResultsByDayMonthYear(self):
  18. # Note that we don't take the day, month and year parameters here.
  19. # First get the search page
  20. request = urllib2.Request(self.base_url)
  21. response = urllib2.urlopen(request)
  22. soup = BeautifulSoup(response.read())
  23. # The apps are in the 5th table on the page (not a very good way to get it...)
  24. results_table = soup.findAll("table")[5]
  25. # Now we need to find the trs which contain the apps.
  26. # The first TR is just headers.
  27. # After that they alternate between containing an app and just some display graphics
  28. # until the third from last. After that, they contain more rubbish.
  29. trs = results_table.findAll("tr")[1:-2]
  30. for i in range(len(trs)):
  31. # We are only interested in the trs in even positions in the list.
  32. if i % 2 == 0:
  33. tr = trs[i]
  34. application = PlanningApplication()
  35. application.date_received = datetime.datetime(*(time.strptime(comment_url_element.findNext("td").string.strip(), date_format)[0:6]))
  36. application.council_reference = tr.a.string
  37. comment_url_element = tr.find(text="comment on this planning application").parent
  38. application.comment_url = urlparse.urljoin(self.base_url, comment_url_element['href'])
  39. application.info_url = urlparse.urljoin(self.base_url, tr.a['href'])
  40. info_response = urllib2.urlopen(application.info_url)
  41. info_soup = BeautifulSoup(info_response.read())
  42. info_table = info_soup.findAll("table")[2]
  43. application.description = info_table.find(text="Proposal:").findNext("td").contents[0].strip()
  44. application.postcode = info_table.find(text="Postcode:").findNext("td").contents[0].strip()
  45. # Now to get the address. This will be split across several tds.
  46. address_start_td = info_table.find("td", rowspan="4")
  47. # We need the first bit of the address from this tr
  48. address_bits = [address_start_td.findNext("td").string.strip()]
  49. # We will need the first td from the next three trs after this
  50. for address_tr in address_start_td.findAllNext("tr")[:3]:
  51. address_line = address_tr.td.string.strip()
  52. if address_line:
  53. address_bits.append(address_line)
  54. address_bits.append(application.postcode)
  55. application.address = ', '.join(address_bits)
  56. self._results.addApplication(application)
  57. return self._results
  58. def getResults(self, day, month, year):
  59. return self.getResultsByDayMonthYear().displayXML()
  60. if __name__ == '__main__':
  61. parser = ShetlandParser()
  62. print parser.getResults(21,5,2008)
  63. # TODO: Sort out pagination