| @@ -0,0 +1,100 @@ | |||||
| import urllib2 | |||||
| import urllib | |||||
| import urlparse | |||||
| import datetime, time | |||||
| import cgi | |||||
| from BeautifulSoup import BeautifulSoup | |||||
| from PlanningUtils import PlanningApplication, \ | |||||
| PlanningAuthorityResults, \ | |||||
| getPostcodeFromText | |||||
| date_format = "%d/%m/%Y" | |||||
| class ShetlandParser: | |||||
| def __init__(self, *args): | |||||
| self.authority_name = "Shetland Islands Council" | |||||
| self.authority_short_name = "Shetland Islands" | |||||
| self.base_url = "http://www.shetland.gov.uk/planningcontrol/apps/apps.asp?time=14&Orderby=DESC&parish=All&Pref=&Address=&Applicant=&ApplicantBut=View&sortby=PlanRef&offset=0" | |||||
| self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) | |||||
| def getResultsByDayMonthYear(self): | |||||
| # Note that we don't take the day, month and year parameters here. | |||||
| # First get the search page | |||||
| request = urllib2.Request(self.base_url) | |||||
| response = urllib2.urlopen(request) | |||||
| soup = BeautifulSoup(response.read()) | |||||
| # The apps are in the 5th table on the page (not a very good way to get it...) | |||||
| results_table = soup.findAll("table")[5] | |||||
| # Now we need to find the trs which contain the apps. | |||||
| # The first TR is just headers. | |||||
| # After that they alternate between containing an app and just some display graphics | |||||
| # until the third from last. After that, they contain more rubbish. | |||||
| trs = results_table.findAll("tr")[1:-2] | |||||
| for i in range(len(trs)): | |||||
| # We are only interested in the trs in even positions in the list. | |||||
| if i % 2 == 0: | |||||
| tr = trs[i] | |||||
| application = PlanningApplication() | |||||
| application.date_received = datetime.datetime(*(time.strptime(comment_url_element.findNext("td").string.strip(), date_format)[0:6])) | |||||
| application.council_reference = tr.a.string | |||||
| comment_url_element = tr.find(text="comment on this planning application").parent | |||||
| application.comment_url = urlparse.urljoin(self.base_url, comment_url_element['href']) | |||||
| application.info_url = urlparse.urljoin(self.base_url, tr.a['href']) | |||||
| info_response = urllib2.urlopen(application.info_url) | |||||
| info_soup = BeautifulSoup(info_response.read()) | |||||
| info_table = info_soup.findAll("table")[2] | |||||
| application.description = info_table.find(text="Proposal:").findNext("td").contents[0].strip() | |||||
| application.postcode = info_table.find(text="Postcode:").findNext("td").contents[0].strip() | |||||
| # Now to get the address. This will be split across several tds. | |||||
| address_start_td = info_table.find("td", rowspan="4") | |||||
| # We need the first bit of the address from this tr | |||||
| address_bits = [address_start_td.findNext("td").string.strip()] | |||||
| # We will need the first td from the next three trs after this | |||||
| for address_tr in address_start_td.findAllNext("tr")[:3]: | |||||
| address_line = address_tr.td.string.strip() | |||||
| if address_line: | |||||
| address_bits.append(address_line) | |||||
| address_bits.append(application.postcode) | |||||
| application.address = ', '.join(address_bits) | |||||
| self._results.addApplication(application) | |||||
| return self._results | |||||
| def getResults(self, day, month, year): | |||||
| return self.getResultsByDayMonthYear().displayXML() | |||||
| if __name__ == '__main__': | |||||
| parser = ShetlandParser() | |||||
| print parser.getResults(21,5,2008) | |||||
| # TODO: Sort out pagination | |||||