|
- """
- """
-
- import time
-
- import urlparse
- import pycurl
- import StringIO
-
- import datetime
-
-
- from BeautifulSoup import BeautifulSoup
-
- from PlanningUtils import PlanningApplication, \
- PlanningAuthorityResults, \
- getPostcodeFromText
-
- class CairngormsParser:
- def __init__(self, *args):
- self.authority_name = "Cairngorms National Park"
- self.authority_short_name = "Cairngorms"
- self.referer = "http://www.cairngorms.co.uk/planning/e-planning/index.php"
-
- self.base_url = "http://www.cairngorms.co.uk/planning/e-planning/holding.php"
-
- # The timestamp here looks like the number of milliseconds since 1970
- self.first_post_url = "http://www.cairngorms.co.uk/planning/e-planning/search.php?timeStamp=%d"
-
- self.comments_email_address = "planning@cairngorms.co.uk"
-
- self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
-
-
- def getResultsByDayMonthYear(self, day, month, year):
- search_date = datetime.date(year, month, day)
-
- post_data = [
- ("CNPA_ref", ""),
- ("application_number", ""),
- ("LA_id", "%"),
- ("applicant_type", "%"),
- ("applicant_name", ""),
- ("development_address", ""),
- ("agent_name", ""),
- ("status", "%"),
- ("startDay", "%02d" %day),
- ("startMonth", "%02d" %month),
- ("startYear", "%d" %year),
- ("endDay", "%02d" %day),
- ("endMonth", "%02d" %month),
- ("endYear", "%d" %year),
- ]
-
- first_post_data = "CNPA_ref=&application_number=&applicant_name=&development_address=&agent_name=&applicant_type=%%&LA_id=%%&status=%%&startYear=%(year)d&startMonth=%(month)02d&startDay=%(day)02d&endYear=%(year)d&endMonth=%(month)02d&endDay=%(day)02d" %{"day": day, "month": month, "year": year}
-
- curlobj = pycurl.Curl()
- curlobj.setopt(pycurl.FOLLOWLOCATION, True)
- curlobj.setopt(pycurl.MAXREDIRS, 10)
-
-
- # First we do a normal post, this would happen as an AJAX query
- # from the browser and just returns the number of applications found.
- fakefile = StringIO.StringIO()
-
- curlobj.setopt(pycurl.URL, self.first_post_url %(int(time.time()*1000)))
- curlobj.setopt(pycurl.POST, True)
- curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
- curlobj.setopt(pycurl.POSTFIELDS, first_post_data)
-
- curlobj.perform()
-
- app_count = int(fakefile.getvalue())
- fakefile.close()
-
- if app_count:
- # Now we do another multipart form post
- # This gives us something to use as the callback
- fakefile = StringIO.StringIO()
-
- curlobj.setopt(pycurl.URL, self.base_url)
- curlobj.setopt(pycurl.HTTPPOST, post_data)
- curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
- curlobj.setopt(pycurl.REFERER, self.referer)
- curlobj.perform()
-
- soup = BeautifulSoup(fakefile.getvalue())
- # We may as well free up the memory used by fakefile
- fakefile.close()
-
- for tr in soup.table.findAll("tr")[1:]:
- application = PlanningApplication()
- application.date_received = search_date
- application.comment_url = self.comments_email_address
-
- tds = tr.findAll("td")
-
- application.council_reference = tds[1].string.strip()
- application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
-
- application.address = tds[2].string.strip()
- application.postcode = getPostcodeFromText(application.address)
-
- # We're going to need to get the info page in order to get the description
- # We can't pass a unicode string to pycurl, so we'll have to encode it.
- curlobj.setopt(pycurl.URL, application.info_url.encode())
- curlobj.setopt(pycurl.HTTPGET, True)
-
- # This gives us something to use as the callback
- fakefile = StringIO.StringIO()
- curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
-
- curlobj.perform()
- info_soup = BeautifulSoup(fakefile.getvalue())
- fakefile.close()
-
- application.description = info_soup.find(text="Development Details").findNext("td").string.strip()
- application.osgb_x = info_soup.find(text="Grid Ref East").findNext("td").string.strip()
- application.osgb_y = info_soup.find(text="Grid Ref North").findNext("td").string.strip()
-
- self._results.addApplication(application)
-
- return self._results
-
- def getResults(self, day, month, year):
- return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
-
- if __name__ == '__main__':
- parser = CairngormsParser()
- print parser.getResults(3,10,2008)
-
-
- # TODO
- # Is there pagination?
|