Browse Source

Add more debug.

master
duncan.parkes 16 years ago
parent
commit
240f1b2d84
2 changed files with 19 additions and 4 deletions
  1. +3
    -4
      python_scrapers/CGITemplate
  2. +16
    -0
      python_scrapers/Westminster.py

+ 3
- 4
python_scrapers/CGITemplate View File

@@ -19,12 +19,11 @@ base_url = "%(base_url)s"
#print "Content-Type: text/html" # HTML is following
#print

import %(module)s

parser = %(module)s.%(parser)s(authority_name, authority_short_name, base_url)

print "Content-Type: text/xml; charset=utf-8" # XML is following
print

import %(module)s
parser = %(module)s.%(parser)s(authority_name, authority_short_name, base_url)

xml = parser.getResults(day, month, year)
print xml.encode("utf-8") # print the xml

+ 16
- 0
python_scrapers/Westminster.py View File

@@ -58,6 +58,7 @@ class WestminsterParser:
post_data = "REFNO=&STName=&STNUMB=&ADRSNO=&WARD=AllWards&AGT=&ATCDE=AllApps&DECDE=AllDecs&DTErec=%(date)s&DTErecTo=%(date)s&DTEvalid=&DTEvalidTo=&APDECDE=AllAppDecs&submit=Start+Search" %{"date": search_day.strftime(date_format)}

while post_data:

# Now get the search page

@@ -69,19 +70,30 @@ class WestminsterParser:
sys.stderr.write("Got it\n")
soup = BeautifulSoup(response.read())

sys.stderr.write("Created soup\n")

results_form = soup.find("form", {"name": "currentsearchresultsNext"})

# Sort out the post_data for the next page, if there is one
# If there is no next page then there will be no inputs in the form.
# In this case, post_data will be '', which is false.

sys.stderr.write("Found form containing results\n")

post_data = urllib.urlencode([(x['name'], x['value']) for x in results_form.findAll("input")])

sys.stderr.write("Got post data\n")

# Each result has one link, and they are the only links in the form

links = results_form.findAll("a")

sys.stderr.write("Got list of links\n")

for link in links:

sys.stderr.write("Working on link: %s\n" %link['href'])

application = PlanningApplication()

application.date_received = search_day
@@ -111,6 +123,10 @@ class WestminsterParser:

self._results.addApplication(application)

sys.stderr.write("Finished that link\n")


sys.stderr.write("Finished while loop, returning stuff.\n")

return self._results



Loading…
Cancel
Save