|
-
- import urllib, urllib2
-
- import HTMLParser
- import urlparse
- import datetime, time
-
-
- search_url = "http://www.southoxon.gov.uk/ccm/planning/ApplicationList.jsp?PAGE=%d"
-
-
- comment_url = "https://forms.southoxon.gov.uk/ufs/ufsmain?formid=PLANNINGCOMMENT&PLNGAPPL_REFERENCE=%(reference)s"
-
- authority_name = "South Oxfordshire District Council"
- authority_short_name = "South Oxfordshire"
-
-
- from PlanningUtils import fixNewlines, \
- getPostcodeFromText, \
- PlanningAuthorityResults, \
- PlanningApplication
-
- class SouthOxfordshireParser(HTMLParser.HTMLParser):
- """In this case we'll take the date, so that we can avoid doing dowloads for
- the other days in this week's file. This date should be a datetime.date object.
- """
- def __init__(self):
- HTMLParser.HTMLParser.__init__(self)
-
- self._requested_date = None
-
-
-
- self._table_count = 0
-
-
-
-
-
-
-
- self._td_count = 0
-
-
-
- self._get_reference = False
-
- self._data = ''
-
-
- self._current_application = None
-
-
- self._results = PlanningAuthorityResults(authority_name, authority_short_name)
-
- def handle_starttag(self, tag, attrs):
-
- if tag == 'table':
- self._table_count += 1
-
-
- if self._table_count == 3:
-
-
-
- if tag == 'tr':
- self._current_application = PlanningApplication()
-
-
- if tag == 'td':
- self._td_count += 1
-
-
-
- if tag == 'a' and self._td_count == 1:
- for key, value in attrs:
- if key == 'href':
- url_end = value
- self._current_application.info_url = urlparse.urljoin(search_url,url_end)
-
-
- self._get_reference = True
-
-
- break
-
- def handle_endtag(self, tag):
-
- if self._table_count == 3:
-
-
-
-
-
-
-
-
- if tag == 'tr' and \
- self._current_application.council_reference is not None and \
- self._current_application.date_received == self._requested_date:
-
- info_page_parser = SouthOxfordshireInfoURLParser()
- info_page_parser.feed(urllib2.urlopen(self._current_application.info_url).read())
-
- self._current_application.address = info_page_parser.address
- self._current_application.postcode = getPostcodeFromText(info_page_parser.address)
- self._current_application.description = info_page_parser.description
-
-
- self._results.addApplication(self._current_application)
-
-
-
- if tag == 'td' and self._td_count == 5:
- app_year, app_month, app_day = tuple(time.strptime(self._data, "%d %B %Y")[:3])
- self._current_application.date_received = datetime.date(app_year, app_month, app_day)
- self._data = ''
- self._td_count = 0
-
- def handle_data(self, data):
-
- if self._table_count == 3:
-
-
- if self._td_count == 1 and self._get_reference:
- self._current_application.council_reference = data
-
-
-
-
-
-
-
-
- self._current_application.comment_url = comment_url %{'reference': self._current_application.council_reference}
-
-
- self._get_reference = False
-
-
-
- if self._td_count == 5:
- self._data += data
-
- def handle_entityref( self, ref ):
-
-
- if self._table_count == 3 and self._td_count == 5:
- if ref == 'nbsp':
- self._data += ' '
-
-
- def getResultsByDayMonthYear(self, day, month, year):
- """This will return an ApplicationResults object containg the
- applications for the date passed in."""
-
- today = datetime.date.today()
- self._requested_date = datetime.date(year, month, day)
- delta = today - self._requested_date
-
-
-
- page_number = delta.days/7 + 1
-
- response = urllib2.urlopen(search_url %page_number)
-
- contents = response.read()
-
- self.feed(contents)
-
- return self._results
-
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
- class SouthOxfordshireInfoURLParser(HTMLParser.HTMLParser):
- """This parser is to get the description and address out of the info page
- for a South Oxfordshire application."""
-
- def __init__(self):
- HTMLParser.HTMLParser.__init__(self)
-
- self.address = None
- self.description = None
-
-
-
-
-
- self._address_state = 0
- self._description_state = 0
-
-
- self._in_td = False
-
-
- self._data = ''
-
- def handle_starttag(self, tag, attrs):
-
-
- if tag == 'td' and (self._address_state < 2 or self._description_state < 2):
- self._in_td = True
- self._data = ''
-
- def handle_endtag(self, tag):
- if tag == 'td' and (self._address_state < 2 or self._description_state < 2):
-
-
- if self._description_state == 1:
- self.description = self._data
- self._description_state = 2
-
-
-
-
- elif self._address_state == 1:
- self.address = self._data
- self._address_state = 2
-
-
-
- elif self._data.strip() == 'Description':
- self._description_state = 1
-
-
-
- elif self._data.strip() == 'Location':
- self._address_state = 1
-
-
- self._in_td = False
-
- def handle_data(self, data):
-
-
- if self._in_td and (self._address_state < 2 or self._description_state < 2):
- self._data += data
-
-
-
-
-
-
-
-
|