|
-
- import urllib2
- import urllib
- import urlparse
-
- import datetime, time
- import cgi
-
-
- import cookielib
-
- cookie_jar = cookielib.CookieJar()
-
-
- from BeautifulSoup import BeautifulSoup
-
- from PlanningUtils import PlanningApplication, \
- PlanningAuthorityResults, \
- getPostcodeFromText
-
-
- date_format = "%d/%m/%Y"
- received_date_format = "%d %B %Y"
-
- import re
-
-
-
- address_finder_re = re.compile("\s(?:of)|(?:at)\s", re.I)
-
- class HaltonParser:
- def __init__(self, *args):
-
- self.authority_name = "Halton Borough Council"
- self.authority_short_name = "Halton"
- self.base_url = "http://www.halton.gov.uk/planningapps/index.asp"
-
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
-
-
- def getResultsByDayMonthYear(self, day, month, year):
- search_day = datetime.date(year, month, day)
-
-
- post_data = urllib.urlencode(
- [
-
-
- ("DateApValFrom", search_day.strftime(date_format)),
- ("DateApValTo", (search_day + datetime.timedelta(1)).strftime(date_format)),
-
-
-
- ("DropWeekDate", "0"),
- ("DropAppealStatus", "0"),
-
-
- ("PageSize", "10"),
- ("Action", "Search"),
- ]
- )
-
- request = urllib2.Request(self.base_url, post_data)
-
- while request:
-
-
- cookie_jar.add_cookie_header(request)
- response = urllib2.urlopen(request)
-
- cookie_jar.extract_cookies(response, request)
-
- soup = BeautifulSoup(response.read())
-
-
- caseno_strings = soup.findAll(text="Case No:")
-
- for caseno_string in caseno_strings:
- application = PlanningApplication()
-
- application.council_reference = caseno_string.findNext("td").string
- application.description = caseno_string.findNext(text="Details of proposal:").findNext("td").string.strip()
-
- application.date_received = datetime.datetime.strptime(caseno_string.findNext(text="Date Received").findNext("td").string, received_date_format).date()
-
-
-
-
- try:
- application.address = re.split(address_finder_re, application.description)[-1].strip()
- except IndexError:
-
- application.address = application.description
-
-
- application.postcode = getPostcodeFromText(application.description)
-
- application.comment_url = urlparse.urljoin(self.base_url, caseno_string.findNext("form")['action'])
-
-
-
- application.info_url = self.base_url
-
- self._results.addApplication(application)
-
-
-
- next_form = soup.find("form", id="formNext")
-
- if next_form is not None:
- action = next_form['action']
-
-
-
-
- inputs = next_form.findNext("td").findAll("input")
-
- post_data = urllib.urlencode([(x['name'], x['value']) for x in inputs])
-
- request = urllib2.Request(urlparse.urljoin(self.base_url, action), post_data)
- else:
- request = None
-
-
- return self._results
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
- if __name__ == '__main__':
- parser = HaltonParser()
- print parser.getResults(4,8,2008)
|