From 917a6c0c56a87f5ef62bd19b5dd8c615be0f6494 Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Wed, 12 Sep 2007 09:44:05 +0000 Subject: [PATCH] fix Chiltern --- cgi-bin/PlanningUtils.py | 2 +- cgi-bin/PublicAccess.py | 6 ++++-- python_scrapers/PlanningUtils.py | 2 +- python_scrapers/PublicAccess.py | 6 ++++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cgi-bin/PlanningUtils.py b/cgi-bin/PlanningUtils.py index 9210446..e7fc337 100644 --- a/cgi-bin/PlanningUtils.py +++ b/cgi-bin/PlanningUtils.py @@ -90,7 +90,7 @@ class PlanningApplication: return self.displayXML() def displayXML(self): - #print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received + print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received return "\n" +\ "%s\n" %xmlQuote(self.council_reference) +\ "
%s
\n" %xmlQuote(self.address) +\ diff --git a/cgi-bin/PublicAccess.py b/cgi-bin/PublicAccess.py index 22621e8..5f82ba9 100644 --- a/cgi-bin/PublicAccess.py +++ b/cgi-bin/PublicAccess.py @@ -65,7 +65,9 @@ class PublicAccessParser(HTMLParser.HTMLParser): self.handle_start_td(attrs) # we are only interested in tags if we are in the 6th td in # the results table. - elif self._in_td and self._td_count == 6 and tag == "a": + # UPDATE: It seems that, in the case of Chiltern, we are interested in + # td 5. + elif self._in_td and (self._td_count == 5 or self._td_count == 6) and tag == "a": self.handle_start_a(attrs) # If the tag is not one of these then we aren't interested @@ -128,7 +130,7 @@ class PublicAccessParser(HTMLParser.HTMLParser): # go through the attributes of the looking for one # named 'href' - for attr,value in attrs: + for attr,value in attrs: if attr == "href": # the value of this tag is a relative url. # parse it so we can get the query string from it diff --git a/python_scrapers/PlanningUtils.py b/python_scrapers/PlanningUtils.py index 9210446..e7fc337 100644 --- a/python_scrapers/PlanningUtils.py +++ b/python_scrapers/PlanningUtils.py @@ -90,7 +90,7 @@ class PlanningApplication: return self.displayXML() def displayXML(self): - #print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received + print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received return "\n" +\ "%s\n" %xmlQuote(self.council_reference) +\ "
%s
\n" %xmlQuote(self.address) +\ diff --git a/python_scrapers/PublicAccess.py b/python_scrapers/PublicAccess.py index 22621e8..5f82ba9 100644 --- a/python_scrapers/PublicAccess.py +++ b/python_scrapers/PublicAccess.py @@ -65,7 +65,9 @@ class PublicAccessParser(HTMLParser.HTMLParser): self.handle_start_td(attrs) # we are only interested in
tags if we are in the 6th td in # the results table. - elif self._in_td and self._td_count == 6 and tag == "a": + # UPDATE: It seems that, in the case of Chiltern, we are interested in + # td 5. + elif self._in_td and (self._td_count == 5 or self._td_count == 6) and tag == "a": self.handle_start_a(attrs) # If the tag is not one of these then we aren't interested @@ -128,7 +130,7 @@ class PublicAccessParser(HTMLParser.HTMLParser): # go through the attributes of the looking for one # named 'href' - for attr,value in attrs: + for attr,value in attrs: if attr == "href": # the value of this tag is a relative url. # parse it so we can get the query string from it