|
-
- import urllib, urllib2
-
- import HTMLParser
- import urlparse
- import datetime, time
-
-
- from PlanningUtils import PlanningAuthorityResults, \
- getPostcodeFromText, \
- PlanningApplication
-
-
- # The search results list will give us reference, location, description,
- # and info url of each app.
-
- # The info page gives us the received date,
- # and comment_url
-
- class ApplicationSearchServletParser(HTMLParser.HTMLParser):
- """Parser for ApplicationSearchServlet sites.
- """
-
-
- # These indicate the column of the main table containing this
- # piece of information.
- # They should be overridden in subclasses
-
- #self._rows_to_ignore_at_start = None
-
- _reference_col_no = None
- _location_col_no = None
- _description_col_no = None
-
- def __init__(self,
- authority_name,
- authority_short_name,
- base_url,
- debug=False):
- HTMLParser.HTMLParser.__init__(self)
-
- self.authority_name = authority_name
- self.authority_short_name = authority_short_name
- self.base_url = base_url
- self.debug = debug
-
- self.search_url = urlparse.urljoin(self.base_url, "portal/servlets/ApplicationSearchServlet")
-
- self._comment_url = urlparse.urljoin(self.base_url, "portal/servlets/PlanningComments?REFNO=%(council_reference)s")
-
- self._requested_date = None
-
- # 0 - no
- # 1 - maybe
- # 2 - yes
- # 3 - finished
- self._in_results_table = 0
- self._tr_count = 0
- self._td_count = 0
- self._data_list = []
-
- # this will hold the application we are currently working on.
- self._current_application = None
-
- # The object which stores our set of planning application results
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
- def _checkAttrsForResultsTable(self, attrs):
- raise SystemError
-
- def handle_starttag(self, tag, attrs):
- if self.debug:
- print tag, attrs
- if tag == "table" and self._in_results_table == 0:
- self._in_results_table = 1
- self._checkAttrsForResultsTable(attrs)
- elif tag == "tr" and self._in_results_table == 2:
- self._tr_count += 1
- self._td_count = 0
- self._data_list = []
- self._current_application = PlanningApplication()
-
- elif tag == "td" and self._in_results_table == 2:
- self._td_count += 1
-
- elif tag == "a" and self._in_results_table == 2 and self._td_count == self._reference_col_no:
- # The href attribute contains the link to the info page
- for (key, value) in attrs:
- if key == "href":
- self._current_application.info_url = urlparse.urljoin(self.search_url, value)
-
- def handle_endtag(self, tag):
- if self.debug:
- print "ending: " , tag
-
- if tag == "table" and self._in_results_table == 2:
- self._in_results_table = 3
- elif tag == "tr" and self._in_results_table == 2:
- if self._current_application.council_reference is not None:
-
- # get the received date
- #info_response = urllib2.urlopen(self._current_application.info_url)
- #info_page_parser = InfoPageParser()
- #info_page_parser.feed(info_response.read())
- self._current_application.date_received = self._requested_date#info_page_parser.date_received
- self._results.addApplication(self._current_application)
- elif tag == "td" and self._in_results_table == 2:
- if self._td_count == self._location_col_no:
- data = ' '.join(self._data_list).strip()
- self._current_application.address = data
- postcode = getPostcodeFromText(data)
- if postcode is not None:
- self._current_application.postcode = postcode
- self._data_list = []
- elif self._td_count == self._description_col_no:
- data = ' '.join(self._data_list).strip()
- self._current_application.description = data
- self._data_list = []
- elif tag == 'a' and self._in_results_table == 2 and self._td_count == self._reference_col_no:
- data = ''.join(self._data_list).strip()
- self._current_application.council_reference = data
- self._current_application.comment_url = self._comment_url %{"council_reference": data}
- self._data_list = []
-
- def handle_data(self, data):
- if self.debug:
- print data
-
- if self._in_results_table == 2:
- if self._td_count == self._reference_col_no or \
- self._td_count == self._location_col_no or \
- self._td_count == self._description_col_no:
- self._data_list.append(data.strip())
-
-
- def getResultsByDayMonthYear(self, day, month, year):
- """This will return an ApplicationResults object containg the
- applications for the date passed in."""
-
- # Were going to need a datetime object for the requested date
- self._requested_date = datetime.date(year, month, day)
-
- required_format = "%d-%m-%Y"
-
- search_data = urllib.urlencode({"ReceivedDateFrom":self._requested_date.strftime(required_format),
- "ReceivedDateTo":self._requested_date.strftime(required_format)})
-
- search_request = urllib2.Request(self.search_url, search_data)
- search_response = urllib2.urlopen(search_request)
- search_contents = search_response.read()
-
- self.feed(search_contents)
-
- return self._results
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
-
- class CoventrySearchParser(ApplicationSearchServletParser):
- # results table spotter
- # width="100%" border="0"
-
- _reference_col_no = 1
- _location_col_no = 5
- _description_col_no = 8
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
-
- for key, value in attrs:
- if key == 'width' and value == '100%':
- got_width = True
- elif key == 'border' and value == '0':
- got_border = True
-
- if got_width and got_border:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
-
- class AllerdaleSearchParser(ApplicationSearchServletParser):
- # results table spotter
- #class="nis_table" summary="Table of planning applications that matched your query, showing reference number, received date, and address"
-
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 4
-
- def _checkAttrsForResultsTable(self, attrs):
- got_class = False
- got_summary = False
-
- for key, value in attrs:
- if key == 'class' and value == 'nis_table':
- got_class = True
- elif key == 'summary' and value == 'Table of planning applications that matched your query, showing reference number, received date, and address':
- got_summary = True
-
- if got_class and got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
-
- class AlnwickSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # width="100%" class="niscontent"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 7
-
- def _checkAttrsForResultsTable(self, attrs):
- got_class = False
-
- for key, value in attrs:
- if key == 'class' and value == 'niscontent':
- got_class = True
-
- if got_class:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class BarrowSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # width="100%" border="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 6
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
-
- for key, value in attrs:
- if key == 'width' and value == '100%':
- got_width = True
- elif key == 'border' and value == '0':
- got_border = True
-
- if got_width and got_border:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class HartlepoolSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # summary="Table of planning applications that matched your query, showing reference number, received date, and address"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 3
-
- def _checkAttrsForResultsTable(self, attrs):
- got_summary = False
-
- for key, value in attrs:
- if key == 'summary' and value == "Table of planning applications that matched your query, showing reference number, received date, and address":
- got_summary = True
-
- if got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class NorthWarksSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # table width="100%" border="0" cellspacing="0" cellpadding="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 4
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
- got_cellspacing = False
- got_cellpadding = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'border' and value == '0':
- got_border = True
- elif key == 'cellspacing' and value == '0':
- got_cellspacing = True
- elif key == 'cellpadding' and value == '0':
- got_cellpadding = True
-
- if got_width and got_border and got_cellspacing and got_cellpadding:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
- class StHelensSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # summary="Search Results List"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 5
-
- def _checkAttrsForResultsTable(self, attrs):
- got_summary = False
-
- for key, value in attrs:
- if key == 'summary' and value == "Search Results List":
- got_summary = True
-
- if got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
- class EasingtonSearchParser(ApplicationSearchServletParser):
- # results table spotter
- #table width="100%" border="0" cellspacing="0" cellpadding="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 6
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
- got_cellspacing = False
- got_cellpadding = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'border' and value == '0':
- got_border = True
- elif key == 'cellspacing' and value == '0':
- got_cellspacing = True
- elif key == 'cellpadding' and value == '0':
- got_cellpadding = True
-
- if got_width and got_border and got_cellspacing and got_cellpadding:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class HighPeakSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # table class="data" width="95%"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 5
-
- def _checkAttrsForResultsTable(self, attrs):
- got_class = False
- got_width = False
-
- for key, value in attrs:
- if key == 'class' and value == "data":
- got_class = True
- if key == 'width' and value == "95%":
- got_width = True
-
- if got_class and got_width:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class WearValleySearchParser(ApplicationSearchServletParser):
- # results table spotter
- # table summary="Table of planning applications that matched your query, showing reference number, received date, and address"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 4
-
- def _checkAttrsForResultsTable(self, attrs):
- got_summary= False
-
- for key, value in attrs:
- if key == 'summary' and value == "Table of planning applications that matched your query, showing reference number, received date, and address":
- got_summary = True
-
- if got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class WellingboroughSearchParser(ApplicationSearchServletParser):
- # results table spotter
- #table width="100%" border="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 6
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'border' and value == "0":
- got_border = True
-
- if got_width and got_border:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
- class EalingSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # table width="100%" cellspacing="0px" border="1px" cellpadding="2px" bordercolor="#FFFFFF"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 4
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_cellspacing = False
- got_border = False
- got_cellpadding = False
- got_bordercolor = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'cellspacing' and value == "0px":
- got_cellspacing = True
- elif key == 'border' and value == "1px":
- got_border = True
- elif key == 'cellpadding' and value == "2px":
- got_cellpadding = True
- elif key == 'bordercolor' and value == "#FFFFFF":
- got_bordercolor = True
-
- if got_width and got_cellspacing and got_border and got_cellpadding and got_bordercolor:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class HaringeySearchParser(ApplicationSearchServletParser):
- # results table spotter
- # summary="Application Results"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 5
-
- def _checkAttrsForResultsTable(self, attrs):
- got_summary= False
-
- for key, value in attrs:
- if key == 'summary' and value == "Application Results":
- got_summary = True
-
- if got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class DenbighshireSearchParser(ApplicationSearchServletParser):
- # results table spotter
- #table width="100%" border="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 5
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'border' and value == "0":
- got_border = True
-
- if got_width and got_border:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- if __name__ == "__main__":
- #parser = CoventrySearchParser("Coventry", "Coventry", "http://planning.coventry.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = AllerdaleSearchParser("Allerdale", "Allerdale", "http://planning.allerdale.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = AlnwickSearchParser("Alnwick", "Alnwick", "http://services.castlemorpeth.gov.uk:7777")
- #print parser.getResults(28,3,2007)
- #parser = BarrowSearchParser("Barrow", "Barrow", "http://localportal.barrowbc.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = HartlepoolSearchParser("Hartlepool", "Hartlepool", "http://eforms.hartlepool.gov.uk:7777")
- #print parser.getResults(28,3,2007)
- #parser = NorthWarksSearchParser("North Warwickshire", "North Warks", "http://planning.northwarks.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = StHelensSearchParser("St Helens", "St Helens", "http://212.248.225.150:8080")
- #print parser.getResults(28,3,2007)
- #parser = EasingtonSearchParser("Easington", "Easington", "http://planning.easington.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = HighPeakSearchParser("High Peak", "High Peak", "http://planning.highpeak.gov.uk")
- #print parser.getResults(20,3,2007)
- #parser = WearValleySearchParser("Wear Valley", "Wear Valley", "http://planning.wearvalley.gov.uk")
- #print parser.getResults(20,3,2007)
- #parser = WellingboroughSearchParser("Wellingborough", "Wellingborough", "http://planning.wellingborough.gov.uk")
- #print parser.getResults(20,3,2007)
- #parser = EalingSearchParser("Ealing", "Ealing", "http://www.pam.ealing.gov.uk")
- #print parser.getResults(20,3,2007)
- #parser = HaringeySearchParser("Haringey", "Haringey", "http://www.planningservices.haringey.gov.uk")
- #print parser.getResults(20,3,2007)
- #parser = DenbighshireSearchParser("Denbighshire", "Denbighshire", "http://planning.denbighshire.gov.uk")
- #print parser.getResults(20,3,2007)
- pass
|