Browse Source

fix Chiltern

master
duncan.parkes 17 years ago
parent
commit
917a6c0c56
4 changed files with 10 additions and 6 deletions
  1. +1
    -1
      cgi-bin/PlanningUtils.py
  2. +4
    -2
      cgi-bin/PublicAccess.py
  3. +1
    -1
      python_scrapers/PlanningUtils.py
  4. +4
    -2
      python_scrapers/PublicAccess.py

+ 1
- 1
cgi-bin/PlanningUtils.py View File

@@ -90,7 +90,7 @@ class PlanningApplication:
return self.displayXML()
def displayXML(self):
#print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received
print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received
return "<application>\n" +\
"<council_reference>%s</council_reference>\n" %xmlQuote(self.council_reference) +\
"<address>%s</address>\n" %xmlQuote(self.address) +\


+ 4
- 2
cgi-bin/PublicAccess.py View File

@@ -65,7 +65,9 @@ class PublicAccessParser(HTMLParser.HTMLParser):
self.handle_start_td(attrs)
# we are only interested in <a> tags if we are in the 6th td in
# the results table.
elif self._in_td and self._td_count == 6 and tag == "a":
# UPDATE: It seems that, in the case of Chiltern, we are interested in
# td 5.
elif self._in_td and (self._td_count == 5 or self._td_count == 6) and tag == "a":
self.handle_start_a(attrs)
# If the tag is not one of these then we aren't interested

@@ -128,7 +130,7 @@ class PublicAccessParser(HTMLParser.HTMLParser):

# go through the attributes of the <a> looking for one
# named 'href'
for attr,value in attrs:
for attr,value in attrs:
if attr == "href":
# the value of this tag is a relative url.
# parse it so we can get the query string from it


+ 1
- 1
python_scrapers/PlanningUtils.py View File

@@ -90,7 +90,7 @@ class PlanningApplication:
return self.displayXML()
def displayXML(self):
#print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received
print self.council_reference, self.address, self.postcode, self.description, self.info_url, self.comment_url, self.date_received
return "<application>\n" +\
"<council_reference>%s</council_reference>\n" %xmlQuote(self.council_reference) +\
"<address>%s</address>\n" %xmlQuote(self.address) +\


+ 4
- 2
python_scrapers/PublicAccess.py View File

@@ -65,7 +65,9 @@ class PublicAccessParser(HTMLParser.HTMLParser):
self.handle_start_td(attrs)
# we are only interested in <a> tags if we are in the 6th td in
# the results table.
elif self._in_td and self._td_count == 6 and tag == "a":
# UPDATE: It seems that, in the case of Chiltern, we are interested in
# td 5.
elif self._in_td and (self._td_count == 5 or self._td_count == 6) and tag == "a":
self.handle_start_a(attrs)
# If the tag is not one of these then we aren't interested

@@ -128,7 +130,7 @@ class PublicAccessParser(HTMLParser.HTMLParser):

# go through the attributes of the <a> looking for one
# named 'href'
for attr,value in attrs:
for attr,value in attrs:
if attr == "href":
# the value of this tag is a relative url.
# parse it so we can get the query string from it


Loading…
Cancel
Save