|
-
- import urllib2
- import HTMLParser
- import urlparse
- import datetime
-
- from PlanningUtils import getPostcodeFromText, PlanningAuthorityResults, PlanningApplication
-
-
-
-
- search_form_url_end = "results.asp?Scroll=%(scroll)d&DateReceivedStart=%(day)d%%2F%(month)d%%2F%(year)d&DateReceivedEnd=%(day)d%%2F%(month)d%%2F%(year)d"
-
-
-
-
- comment_url_end = "comment.asp?AltRef=%s"
- info_url_end = "detail.asp?AltRef=%s"
-
- class FastWeb:
- def __init__(self,
- authority_name,
- authority_short_name,
- base_url,
- debug=False):
-
- self.authority_name = authority_name
- self.authority_short_name = authority_short_name
- self.base_url = base_url
-
- self.debug = debug
-
-
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
- def getResultsByDayMonthYear(self, day, month, year):
- requested_date = datetime.date(year, month, day)
-
-
-
-
-
-
-
-
-
-
- scroll = 0
- first_time = True
- number_of_results = 0
-
- while first_time or scroll * 20 < number_of_results:
- scroll += 1
-
- this_search_url = search_form_url_end %{"scroll":scroll, "day":day, "month":month, "year":year}
- url = urlparse.urljoin(self.base_url, this_search_url)
- response = urllib2.urlopen(url)
-
- contents = response.read()
-
- if first_time:
-
- returned_url = response.geturl()
-
-
-
- if returned_url.count("search.asp"):
-
- break
-
- results_page_parser = FastWebResultsPageParser(self._results, requested_date, self.base_url)
- results_page_parser.feed(contents)
-
- if first_time:
- number_of_results += results_page_parser.number_of_results
-
- first_time = False
-
- return self._results
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
-
-
-
-
- STARTING = 1
- GOT_RESULTS_COUNT = 2
- IN_RESULTS_TABLE = 3
- IN_RESULTS_TABLE_TD = 4
- IN_INNER_TABLE = 5
- FINISHED = -1
-
-
- class FastWebResultsPageParser(HTMLParser.HTMLParser):
- def __init__(self, results, requested_date, base_url):
-
- self.results = results
-
- self.requested_date = requested_date
- self.base_url = base_url
-
-
- HTMLParser.HTMLParser.__init__(self)
-
-
- self.number_of_results = None
-
- self._state = STARTING
- self._td_count = None
-
- self._data_list = []
-
-
- self._current_application = None
-
- def get_data(self, flush=True):
- data = " ".join(self._data_list)
-
- if flush:
- self.flush_data()
-
- return data
-
- def flush_data(self):
- self._data_list = []
-
- def handle_starttag(self, tag, attrs):
- if self._state == STARTING and tag == "input":
- self._state = GOT_RESULTS_COUNT
-
-
- attr_dict = {}
-
- for attr_name, attr_value in attrs:
- attr_dict[attr_name] = attr_value
-
- if attr_dict.get("id") == "RecCount":
- self.number_of_results = int(attr_dict.get("value"))
-
- elif self._state == GOT_RESULTS_COUNT and tag == "table":
- self._state = IN_RESULTS_TABLE
-
- elif self._state == IN_RESULTS_TABLE and tag == "td":
- self._state = IN_RESULTS_TABLE_TD
- elif self._state == IN_RESULTS_TABLE_TD and tag == "table":
- self._state = IN_INNER_TABLE
- self._td_count = 0
- self._current_application = PlanningApplication()
- self._current_application.date_received = self.requested_date
-
- elif self._state == IN_INNER_TABLE and tag == "td":
- self._td_count += 1
- self.flush_data()
-
- def handle_endtag(self, tag):
- if self._state == IN_INNER_TABLE and tag == "table":
-
- if self._current_application.council_reference is not None:
- self.results.addApplication(self._current_application)
- self._state = IN_RESULTS_TABLE_TD
-
- elif self._state == IN_RESULTS_TABLE_TD and tag == "td":
- self._state = FINISHED
-
- elif self._state == IN_INNER_TABLE and tag == "td":
- if self._td_count == 2:
-
- council_reference = self.get_data().strip()
- self._current_application.council_reference = council_reference
-
-
- self._current_application.info_url = urlparse.urljoin(self.base_url, info_url_end %(council_reference))
- self._current_application.comment_url = urlparse.urljoin(self.base_url, comment_url_end %(council_reference))
-
- elif self._td_count == 4:
-
- self._current_application.address = self.get_data().strip()
- self._current_application.postcode = getPostcodeFromText(self._current_application.address)
- elif self._td_count == 7:
-
- self._current_application.description = self.get_data().strip()
-
-
- def handle_data(self, data):
- self._data_list.append(data)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|