|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518 |
-
- import urllib, urllib2
-
- import HTMLParser
- import urlparse
- import datetime, time
-
-
- from PlanningUtils import PlanningAuthorityResults, \
- getPostcodeFromText, \
- PlanningApplication
-
-
- # The search results list will give us reference, location, description,
- # and info url of each app.
-
- # The info page gives us the received date,
- # and comment_url
-
- class ApplicationSearchServletParser(HTMLParser.HTMLParser):
- """Parser for ApplicationSearchServlet sites.
- """
-
-
- # These indicate the column of the main table containing this
- # piece of information.
- # They should be overridden in subclasses
-
- #self._rows_to_ignore_at_start = None
-
- _reference_col_no = None
- _location_col_no = None
- _description_col_no = None
-
- def __init__(self,
- authority_name,
- authority_short_name,
- base_url,
- debug=False):
- HTMLParser.HTMLParser.__init__(self)
-
- self.authority_name = authority_name
- self.authority_short_name = authority_short_name
- self.base_url = base_url
- self.debug = debug
-
- self.search_url = urlparse.urljoin(self.base_url, "portal/servlets/ApplicationSearchServlet")
-
- self._comment_url = urlparse.urljoin(self.base_url, "portal/servlets/PlanningComments?REFNO=%(council_reference)s")
-
- self._requested_date = None
-
- # 0 - no
- # 1 - maybe
- # 2 - yes
- # 3 - finished
- self._in_results_table = 0
- self._tr_count = 0
- self._td_count = 0
- self._data_list = []
-
- # this will hold the application we are currently working on.
- self._current_application = None
-
- # The object which stores our set of planning application results
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
- def _checkAttrsForResultsTable(self, attrs):
- raise SystemError
-
- def handle_starttag(self, tag, attrs):
- if self.debug:
- print tag, attrs
- if tag == "table" and self._in_results_table == 0:
- self._in_results_table = 1
- self._checkAttrsForResultsTable(attrs)
- elif tag == "tr" and self._in_results_table == 2:
- self._tr_count += 1
- self._td_count = 0
- self._data_list = []
- self._current_application = PlanningApplication()
-
- elif tag == "td" and self._in_results_table == 2:
- self._td_count += 1
-
- elif tag == "a" and self._in_results_table == 2 and self._td_count == self._reference_col_no:
- # The href attribute contains the link to the info page
- for (key, value) in attrs:
- if key == "href":
- self._current_application.info_url = urlparse.urljoin(self.search_url, value)
-
- def handle_endtag(self, tag):
- if self.debug:
- print "ending: " , tag
-
- if tag == "table" and self._in_results_table == 2:
- self._in_results_table = 3
- elif tag == "tr" and self._in_results_table == 2:
- if self._current_application.council_reference is not None:
-
- # get the received date
- #info_response = urllib2.urlopen(self._current_application.info_url)
- #info_page_parser = InfoPageParser()
- #info_page_parser.feed(info_response.read())
- self._current_application.date_received = self._requested_date#info_page_parser.date_received
- self._results.addApplication(self._current_application)
- elif tag == "td" and self._in_results_table == 2:
- if self._td_count == self._location_col_no:
- data = ' '.join(self._data_list).strip()
- self._current_application.address = data
- postcode = getPostcodeFromText(data)
- if postcode is not None:
- self._current_application.postcode = postcode
- self._data_list = []
- elif self._td_count == self._description_col_no:
- data = ' '.join(self._data_list).strip()
- self._current_application.description = data
- self._data_list = []
- elif tag == 'a' and self._in_results_table == 2 and self._td_count == self._reference_col_no:
- data = ''.join(self._data_list).strip()
- self._current_application.council_reference = data
- self._current_application.comment_url = self._comment_url %{"council_reference": data}
- self._data_list = []
-
- def handle_data(self, data):
- if self.debug:
- print data
-
- if self._in_results_table == 2:
- if self._td_count == self._reference_col_no or \
- self._td_count == self._location_col_no or \
- self._td_count == self._description_col_no:
- self._data_list.append(data.strip())
-
-
- def getResultsByDayMonthYear(self, day, month, year):
- """This will return an ApplicationResults object containg the
- applications for the date passed in."""
-
- # Were going to need a datetime object for the requested date
- self._requested_date = datetime.date(year, month, day)
-
- required_format = "%d-%m-%Y"
-
- search_data = urllib.urlencode({"ReceivedDateFrom":self._requested_date.strftime(required_format),
- "ReceivedDateTo":self._requested_date.strftime(required_format)})
-
- search_request = urllib2.Request(self.search_url, search_data)
- search_response = urllib2.urlopen(search_request)
- search_contents = search_response.read()
-
- self.feed(search_contents)
-
- return self._results
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
-
- class CoventrySearchParser(ApplicationSearchServletParser):
- # results table spotter
- # width="100%" border="0"
-
- _reference_col_no = 1
- _location_col_no = 5
- _description_col_no = 8
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
-
- for key, value in attrs:
- if key == 'width' and value == '100%':
- got_width = True
- elif key == 'border' and value == '0':
- got_border = True
-
- if got_width and got_border:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
-
- class AllerdaleSearchParser(ApplicationSearchServletParser):
- # results table spotter
- #class="nis_table" summary="Table of planning applications that matched your query, showing reference number, received date, and address"
-
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 4
-
- def _checkAttrsForResultsTable(self, attrs):
- got_class = False
- got_summary = False
-
- for key, value in attrs:
- if key == 'class' and value == 'nis_table':
- got_class = True
- elif key == 'summary' and value == 'Table of planning applications that matched your query, showing reference number, received date, and address':
- got_summary = True
-
- if got_class and got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
-
- class AlnwickSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # width="100%" class="niscontent"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 7
-
- def _checkAttrsForResultsTable(self, attrs):
- got_class = False
-
- for key, value in attrs:
- if key == 'class' and value == 'niscontent':
- got_class = True
-
- if got_class:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class BarrowSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # width="100%" border="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 6
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
-
- for key, value in attrs:
- if key == 'width' and value == '100%':
- got_width = True
- elif key == 'border' and value == '0':
- got_border = True
-
- if got_width and got_border:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class HartlepoolSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # summary="Table of planning applications that matched your query, showing reference number, received date, and address"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 3
-
- def _checkAttrsForResultsTable(self, attrs):
- got_summary = False
-
- for key, value in attrs:
- if key == 'summary' and value == "Table of planning applications that matched your query, showing reference number, received date, and address":
- got_summary = True
-
- if got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class NorthWarksSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # table width="100%" border="0" cellspacing="0" cellpadding="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 4
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
- got_cellspacing = False
- got_cellpadding = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'border' and value == '0':
- got_border = True
- elif key == 'cellspacing' and value == '0':
- got_cellspacing = True
- elif key == 'cellpadding' and value == '0':
- got_cellpadding = True
-
- if got_width and got_border and got_cellspacing and got_cellpadding:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
- class StHelensSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # summary="Search Results List"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 5
-
- def _checkAttrsForResultsTable(self, attrs):
- got_summary = False
-
- for key, value in attrs:
- if key == 'summary' and value == "Search Results List":
- got_summary = True
-
- if got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
- class EasingtonSearchParser(ApplicationSearchServletParser):
- # results table spotter
- #table width="100%" border="0" cellspacing="0" cellpadding="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 6
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
- got_cellspacing = False
- got_cellpadding = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'border' and value == '0':
- got_border = True
- elif key == 'cellspacing' and value == '0':
- got_cellspacing = True
- elif key == 'cellpadding' and value == '0':
- got_cellpadding = True
-
- if got_width and got_border and got_cellspacing and got_cellpadding:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class HighPeakSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # table class="data" width="95%"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 5
-
- def _checkAttrsForResultsTable(self, attrs):
- got_class = False
- got_width = False
-
- for key, value in attrs:
- if key == 'class' and value == "data":
- got_class = True
- if key == 'width' and value == "95%":
- got_width = True
-
- if got_class and got_width:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class WearValleySearchParser(ApplicationSearchServletParser):
- # results table spotter
- # table summary="Table of planning applications that matched your query, showing reference number, received date, and address"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 4
-
- def _checkAttrsForResultsTable(self, attrs):
- got_summary= False
-
- for key, value in attrs:
- if key == 'summary' and value == "Table of planning applications that matched your query, showing reference number, received date, and address":
- got_summary = True
-
- if got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class WellingboroughSearchParser(ApplicationSearchServletParser):
- # results table spotter
- #table width="100%" border="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 6
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'border' and value == "0":
- got_border = True
-
- if got_width and got_border:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
- class EalingSearchParser(ApplicationSearchServletParser):
- # results table spotter
- # table width="100%" cellspacing="0px" border="1px" cellpadding="2px" bordercolor="#FFFFFF"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 4
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_cellspacing = False
- got_border = False
- got_cellpadding = False
- got_bordercolor = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'cellspacing' and value == "0px":
- got_cellspacing = True
- elif key == 'border' and value == "1px":
- got_border = True
- elif key == 'cellpadding' and value == "2px":
- got_cellpadding = True
- elif key == 'bordercolor' and value == "#FFFFFF":
- got_bordercolor = True
-
- if got_width and got_cellspacing and got_border and got_cellpadding and got_bordercolor:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class HaringeySearchParser(ApplicationSearchServletParser):
- # results table spotter
- # summary="Application Results"
- _reference_col_no = 1
- _location_col_no = 2
- _description_col_no = 5
-
- def _checkAttrsForResultsTable(self, attrs):
- got_summary= False
-
- for key, value in attrs:
- if key == 'summary' and value == "Application Results":
- got_summary = True
-
- if got_summary:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- class DenbighshireSearchParser(ApplicationSearchServletParser):
- # results table spotter
- #table width="100%" border="0"
- _reference_col_no = 1
- _location_col_no = 3
- _description_col_no = 5
-
- def _checkAttrsForResultsTable(self, attrs):
- got_width = False
- got_border = False
-
- for key, value in attrs:
- if key == 'width' and value == "100%":
- got_width = True
- elif key == 'border' and value == "0":
- got_border = True
-
- if got_width and got_border:
- self._in_results_table = 2
- else:
- self._in_results_table = 0
-
-
- if __name__ == "__main__":
- #parser = CoventrySearchParser("Coventry", "Coventry", "http://planning.coventry.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = AllerdaleSearchParser("Allerdale", "Allerdale", "http://planning.allerdale.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = AlnwickSearchParser("Alnwick", "Alnwick", "http://services.castlemorpeth.gov.uk:7777")
- #print parser.getResults(28,3,2007)
- #parser = BarrowSearchParser("Barrow", "Barrow", "http://localportal.barrowbc.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = HartlepoolSearchParser("Hartlepool", "Hartlepool", "http://eforms.hartlepool.gov.uk:7777")
- #print parser.getResults(28,3,2007)
- #parser = NorthWarksSearchParser("North Warwickshire", "North Warks", "http://planning.northwarks.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = StHelensSearchParser("St Helens", "St Helens", "http://212.248.225.150:8080")
- #print parser.getResults(28,3,2007)
- #parser = EasingtonSearchParser("Easington", "Easington", "http://planning.easington.gov.uk")
- #print parser.getResults(28,3,2007)
- #parser = HighPeakSearchParser("High Peak", "High Peak", "http://planning.highpeak.gov.uk")
- #print parser.getResults(20,3,2007)
- #parser = WearValleySearchParser("Wear Valley", "Wear Valley", "http://planning.wearvalley.gov.uk")
- #print parser.getResults(20,3,2007)
- #parser = WellingboroughSearchParser("Wellingborough", "Wellingborough", "http://planning.wellingborough.gov.uk")
- #print parser.getResults(20,3,2007)
- #parser = EalingSearchParser("Ealing", "Ealing", "http://www.pam.ealing.gov.uk")
- #print parser.getResults(20,3,2007)
- parser = HaringeySearchParser("Haringey", "Haringey", "http://www.planningservices.haringey.gov.uk")
- print parser.getResults(3,1,2008)
- #parser = DenbighshireSearchParser("Denbighshire", "Denbighshire", "http://planning.denbighshire.gov.uk")
- #print parser.getResults(20,3,2007)
- pass
|