|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- import urllib2
- import urllib
- import urlparse
-
- import datetime, time
- import cgi
-
- import re
-
- from BeautifulSoup import BeautifulSoup
-
- from PlanningUtils import PlanningApplication, \
- PlanningAuthorityResults, \
- getPostcodeFromText
-
- date_format = "%d/%m/%Y"
-
- class ShropshireParser:
- reference_input_name = "ApplNum"
- contact_email_name = "offemail"
-
- comment_url = None
-
- use_validated_date = False
-
- def _get_info_link_list(self, soup):
- return [tr.a for tr in soup.find("table", id="tbllist").findAll("tr", recursive=False)[:-1]]
-
- def _get_postcode(self, info_soup):
- return info_soup.find("input", {"name": "Postcode"})['value']
-
- def __init__(self, authority_name, authority_short_name, base_url, debug=False):
- self.debug = debug
-
- self.authority_name = authority_name
- self.authority_short_name = authority_short_name
- self.base_url = base_url
- self._split_base_url = urlparse.urlsplit(base_url)
-
- self._current_application = None
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
-
- def getResultsByDayMonthYear(self, day, month, year):
- search_date = datetime.date(year, month, day)
- search_date_string = search_date.strftime(date_format)
-
- if self.use_validated_date:
- received_search_string = ""
- validated_search_string = search_date_string
- else:
- received_search_string = search_date_string
- validated_search_string = ""
-
- search_data = urllib.urlencode([
- ("txtAppNum", ""),
- ("txtAppName", ""),
- ("txtAppLocn", ""),
- ("txtAppPCode", ""),
- ("txtAppRecFrom", received_search_string),
- ("txtAppRecTo", received_search_string),
- ("txtAppDecFrom", ""),
- ("txtAppDecTo", ""),
- ("txtAppValFrom", validated_search_string),
- ("txtAppValTo", validated_search_string),
- ("district_drop", ""),
- ("parish_drop", ""),
- ("ward_drop", ""),
- ("ft", "yes"),
- ("submit1", "Submit"),
- ])
-
- split_search_url = self._split_base_url[:3] + (search_data, '')
- search_url = urlparse.urlunsplit(split_search_url)
-
- response = urllib2.urlopen(search_url)
- soup = BeautifulSoup(response.read())
-
- # Handle the case where there are no apps
- if soup.find(text=re.compile("No applications matched your query")):
- return self._results
-
-
- info_link_list = self._get_info_link_list(soup)
-
- for app_link in info_link_list:
- self._current_application = PlanningApplication()
-
- # We could get this from the info soup, but as we already know it, why bother.
- self._current_application.date_received = search_date
-
- self._current_application.info_url = urlparse.urljoin(self.base_url, app_link['href'])
-
- # To get the postcode we will need to download each info page
- info_response = urllib2.urlopen(self._current_application.info_url)
- info_soup = BeautifulSoup(info_response.read())
-
- self._current_application.council_reference = info_soup.find("input", {"name": self.reference_input_name})['value']
- self._current_application.address = info_soup.find("textarea", {"name": "Location"}).string.strip()
- self._current_application.postcode = self._get_postcode(info_soup)
-
- self._current_application.description = info_soup.find("textarea", {"name": "Proposal"}).string.strip()
-
- if self.comment_url:
- self._current_application.comment_url = self.comment_url
- else:
- self._current_application.comment_url = info_soup.find("input", {"name": self.contact_email_name})['value']
-
- # There is an OSGB position here :-)
- self._current_application.osgb_x = info_soup.find("input", {"name": "Easting"})['value']
- self._current_application.osgb_y = info_soup.find("input", {"name": "Northing"})['value']
-
- self._results.addApplication(self._current_application)
-
- return self._results
-
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
-
- class NorthYorkshireParser(ShropshireParser):
- reference_input_name = "txtAppNum"
- contact_email_name = "contactEmail"
-
- comment_url = None
-
- # The date we give as the date_received here is actually the validated date.
- use_validated_date = True
-
- def _get_postcode(self, info_soup):
- return getPostcodeFromText(self._current_application.address)
-
- def _get_info_link_list(self, soup):
- return [div.a for div in soup.findAll("div", {"class": "listApplNum"})]
-
-
- class SouthNorthamptonshireParser(ShropshireParser):
- reference_input_name = "txtAppNum"
-
- comment_url = "http://www.southnorthants.gov.uk/mandoforms/servlet/com.mandoforms.server.MandoformsServer?MF_XML=ApplicationComments&MF_DEVICE=HTML"
-
- def _get_postcode(self, info_soup):
- return getPostcodeFromText(self._current_application.address)
-
- def _get_info_link_list(self, soup):
- return soup.find("div", {"class": "div-content-class"}).findAll("a")
-
- if __name__ == '__main__':
- parser = ShropshireParser("Shropshire County Council", "Shropshire", "http://planning.shropshire.gov.uk/PlanAppList.asp")
- print parser.getResults(6,6,2008)
- # parser = NorthYorkshireParser("North Yorkshire County Council", "North Yorkshire", "https://onlineplanningregister.northyorks.gov.uk/Online%20Register/PlanAppList.asp")
- # print parser.getResults(10,6,2008)
- # parser = SouthNorthamptonshireParser("South Northamptonshire Council", "South Northamptonshire", "http://snc.planning-register.co.uk/PlanAppList.asp")
- # print parser.getResults(5,6,2008)
-
-
- # TODO
-
- #1) Pagination: South Northants paginates at 25. I doubt this is a problem. Should also check out the others.
|