import urllib2 import urllib import urlparse import datetime, time import cgi from BeautifulSoup import BeautifulSoup from PlanningUtils import PlanningApplication, \ PlanningAuthorityResults, \ getPostcodeFromText class FifeParser: def __init__(self, *args): self.authority_name = "Fife Council" self.authority_short_name = "Fife" self.base_url = "http://www.fifedirect.org.uk/topics/index.cfm" self.comment_url = "http://www.ukplanning.com/ukp/showCaseFile.do?councilName=Fife+Council&appNumber=%s" self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) def getResultsByDayMonthYear(self, day, month, year): search_date = datetime.date(year, month, day) search_data = urllib.urlencode( [("fuseaction", "planapps.list"), ("SUBJECTID", "104CC166-3ED1-4D22-B9F1E2FB8438478A"), ("src_fromdayRec", day), ("src_frommonthRec", month), ("src_fromyearRec", year), ("src_todayRec", day), ("src_tomonthRec", month), ("src_toyearRec", year), ("findroadworks", "GO"), ] ) search_url = self.base_url + "?" + search_data response = urllib2.urlopen(search_url) soup = BeautifulSoup(response.read()) results_table = soup.find("table", id="results") # Apart from the first tr, which contains headers, the trs come in pairs for each application trs = results_table.findAll("tr")[1:] tr_count = 0 while tr_count < len(trs): tr = trs[tr_count] if tr_count % 2 == 0: application = PlanningApplication() application.date_received = search_date tds = tr.findAll("td") application.council_reference = tds[0].a.string.strip() application.comment_url = self.comment_url %(application.council_reference) application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href']) application.address = ', '.join([x.strip() for x in tds[1].findAll(text=True)]) application.postcode = getPostcodeFromText(application.address) else: # Get rid of the "Details: " at the beginning. application.description = tr.td.string.strip()[9:] self._results.addApplication(application) tr_count += 1 return self._results def getResults(self, day, month, year): return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() if __name__ == '__main__': parser = FifeParser() print parser.getResults(21,5,2008) # TODO # Paginates at 50. Unlikely on a single day, so we'll worry about it later.