import urllib2 import urllib import urlparse import datetime, time #import re from BeautifulSoup import BeautifulSoup from PlanningUtils import PlanningApplication, \ PlanningAuthorityResults, \ getPostcodeFromText date_format = "%d/%m/%Y" # Where the council reference fills the gap comment_url_end = "comment.asp?%s" #comment_regex = re.compile("Comment on this ") class RutlandLikeParser: def __init__(self, authority_name, authority_short_name, base_url, debug=False): self.authority_name = authority_name self.authority_short_name = authority_short_name self.base_url = base_url self.debug = debug self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) def getResultsByDayMonthYear(self, day, month, year): search_date = datetime.date(year, month, day) date_string = search_date.strftime(date_format) search_data = urllib.urlencode({"reference": "", "undecided": "yes", "dateFrom": date_string, "dateTo": date_string, "Address": "", "validate": "true", }) request = urllib2.Request(self.base_url, search_data) response = urllib2.urlopen(request) html = response.read() soup = BeautifulSoup(html) tables = soup.findAll("table", {"style": "width:auto;"}) if not tables: return self._results # We don't want the first or last tr trs = tables[0].findAll("tr")[1:-1] for tr in trs: app = PlanningApplication() tds = tr.findAll("td") if len(tds) == 4: local_info_url = tds[0].a['href'] app.info_url = urlparse.urljoin(self.base_url, local_info_url) app.council_reference = tds[0].a.string app.address = tds[1].string app.postcode = getPostcodeFromText(app.address) app.description = tds[2].string app.comment_url = urlparse.urljoin(self.base_url, comment_url_end %app.council_reference) app.date_received = search_date self._results.addApplication(app) return self._results def getResults(self, day, month, year): return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() if __name__ == '__main__': rutland_parser = RutlandLikeParser("Rutland long", "Rutland", "http://www.meltononline.co.uk/planning/searchparam.asp") print rutland_parser.getResults(15,11,2007)