From 240f1b2d84b6cb084250b412e12e4957fc4f178c Mon Sep 17 00:00:00 2001 From: "duncan.parkes" Date: Sat, 23 Aug 2008 18:12:27 +0000 Subject: [PATCH] Add more debug. --- python_scrapers/CGITemplate | 7 +++---- python_scrapers/Westminster.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/python_scrapers/CGITemplate b/python_scrapers/CGITemplate index 7040631..02c9dfb 100644 --- a/python_scrapers/CGITemplate +++ b/python_scrapers/CGITemplate @@ -19,12 +19,11 @@ base_url = "%(base_url)s" #print "Content-Type: text/html" # HTML is following #print -import %(module)s - -parser = %(module)s.%(parser)s(authority_name, authority_short_name, base_url) - print "Content-Type: text/xml; charset=utf-8" # XML is following print +import %(module)s +parser = %(module)s.%(parser)s(authority_name, authority_short_name, base_url) + xml = parser.getResults(day, month, year) print xml.encode("utf-8") # print the xml diff --git a/python_scrapers/Westminster.py b/python_scrapers/Westminster.py index 1991f17..ad6e9cf 100644 --- a/python_scrapers/Westminster.py +++ b/python_scrapers/Westminster.py @@ -58,6 +58,7 @@ class WestminsterParser: post_data = "REFNO=&STName=&STNUMB=&ADRSNO=&WARD=AllWards&AGT=&ATCDE=AllApps&DECDE=AllDecs&DTErec=%(date)s&DTErecTo=%(date)s&DTEvalid=&DTEvalidTo=&APDECDE=AllAppDecs&submit=Start+Search" %{"date": search_day.strftime(date_format)} while post_data: + # Now get the search page @@ -69,19 +70,30 @@ class WestminsterParser: sys.stderr.write("Got it\n") soup = BeautifulSoup(response.read()) + sys.stderr.write("Created soup\n") + results_form = soup.find("form", {"name": "currentsearchresultsNext"}) # Sort out the post_data for the next page, if there is one # If there is no next page then there will be no inputs in the form. # In this case, post_data will be '', which is false. + sys.stderr.write("Found form containing results\n") + post_data = urllib.urlencode([(x['name'], x['value']) for x in results_form.findAll("input")]) + sys.stderr.write("Got post data\n") + # Each result has one link, and they are the only links in the form links = results_form.findAll("a") + sys.stderr.write("Got list of links\n") + for link in links: + + sys.stderr.write("Working on link: %s\n" %link['href']) + application = PlanningApplication() application.date_received = search_day @@ -111,6 +123,10 @@ class WestminsterParser: self._results.addApplication(application) + sys.stderr.write("Finished that link\n") + + + sys.stderr.write("Finished while loop, returning stuff.\n") return self._results