|
|
@@ -0,0 +1,134 @@ |
|
|
|
""" |
|
|
|
""" |
|
|
|
|
|
|
|
import time |
|
|
|
|
|
|
|
import urlparse |
|
|
|
import pycurl |
|
|
|
import StringIO |
|
|
|
|
|
|
|
import datetime |
|
|
|
|
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulSoup |
|
|
|
|
|
|
|
from PlanningUtils import PlanningApplication, \ |
|
|
|
PlanningAuthorityResults, \ |
|
|
|
getPostcodeFromText |
|
|
|
|
|
|
|
class CairngormsParser: |
|
|
|
def __init__(self, *args): |
|
|
|
self.authority_name = "Cairngorms National Park" |
|
|
|
self.authority_short_name = "Cairngorms" |
|
|
|
self.referer = "http://www.cairngorms.co.uk/planning/e-planning/index.php" |
|
|
|
|
|
|
|
self.base_url = "http://www.cairngorms.co.uk/planning/e-planning/holding.php" |
|
|
|
|
|
|
|
# The timestamp here looks like the number of milliseconds since 1970 |
|
|
|
self.first_post_url = "http://www.cairngorms.co.uk/planning/e-planning/search.php?timeStamp=%d" |
|
|
|
|
|
|
|
self.comments_email_address = "planning@cairngorms.co.uk" |
|
|
|
|
|
|
|
self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name) |
|
|
|
|
|
|
|
|
|
|
|
def getResultsByDayMonthYear(self, day, month, year): |
|
|
|
search_date = datetime.date(year, month, day) |
|
|
|
|
|
|
|
post_data = [ |
|
|
|
("CNPA_ref", ""), |
|
|
|
("application_number", ""), |
|
|
|
("LA_id", "%"), |
|
|
|
("applicant_type", "%"), |
|
|
|
("applicant_name", ""), |
|
|
|
("development_address", ""), |
|
|
|
("agent_name", ""), |
|
|
|
("status", "%"), |
|
|
|
("startDay", "%02d" %day), |
|
|
|
("startMonth", "%02d" %month), |
|
|
|
("startYear", "%d" %year), |
|
|
|
("endDay", "%02d" %day), |
|
|
|
("endMonth", "%02d" %month), |
|
|
|
("endYear", "%d" %year), |
|
|
|
] |
|
|
|
|
|
|
|
first_post_data = "CNPA_ref=&application_number=&applicant_name=&development_address=&agent_name=&applicant_type=%%&LA_id=%%&status=%%&startYear=%(year)d&startMonth=%(month)02d&startDay=%(day)02d&endYear=%(year)d&endMonth=%(month)02d&endDay=%(day)02d" %{"day": day, "month": month, "year": year} |
|
|
|
|
|
|
|
curlobj = pycurl.Curl() |
|
|
|
curlobj.setopt(pycurl.FOLLOWLOCATION, True) |
|
|
|
curlobj.setopt(pycurl.MAXREDIRS, 10) |
|
|
|
|
|
|
|
|
|
|
|
# First we do a normal post, this would happen as an AJAX query |
|
|
|
# from the browser and just returns the number of applications found. |
|
|
|
fakefile = StringIO.StringIO() |
|
|
|
|
|
|
|
curlobj.setopt(pycurl.URL, self.first_post_url %(int(time.time()*1000))) |
|
|
|
curlobj.setopt(pycurl.POST, True) |
|
|
|
curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write) |
|
|
|
curlobj.setopt(pycurl.POSTFIELDS, first_post_data) |
|
|
|
|
|
|
|
curlobj.perform() |
|
|
|
|
|
|
|
app_count = int(fakefile.getvalue()) |
|
|
|
fakefile.close() |
|
|
|
|
|
|
|
if app_count: |
|
|
|
# Now we do another multipart form post |
|
|
|
# This gives us something to use as the callback |
|
|
|
fakefile = StringIO.StringIO() |
|
|
|
|
|
|
|
curlobj.setopt(pycurl.URL, self.base_url) |
|
|
|
curlobj.setopt(pycurl.HTTPPOST, post_data) |
|
|
|
curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write) |
|
|
|
curlobj.setopt(pycurl.REFERER, self.referer) |
|
|
|
curlobj.perform() |
|
|
|
|
|
|
|
soup = BeautifulSoup(fakefile.getvalue()) |
|
|
|
# We may as well free up the memory used by fakefile |
|
|
|
fakefile.close() |
|
|
|
|
|
|
|
for tr in soup.table.findAll("tr")[1:]: |
|
|
|
application = PlanningApplication() |
|
|
|
application.date_received = search_date |
|
|
|
application.comment_url = self.comments_email_address |
|
|
|
|
|
|
|
tds = tr.findAll("td") |
|
|
|
|
|
|
|
application.council_reference = tds[1].string.strip() |
|
|
|
application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href']) |
|
|
|
|
|
|
|
application.address = tds[2].string.strip() |
|
|
|
application.postcode = getPostcodeFromText(application.address) |
|
|
|
|
|
|
|
# We're going to need to get the info page in order to get the description |
|
|
|
# We can't pass a unicode string to pycurl, so we'll have to encode it. |
|
|
|
curlobj.setopt(pycurl.URL, application.info_url.encode()) |
|
|
|
curlobj.setopt(pycurl.HTTPGET, True) |
|
|
|
|
|
|
|
# This gives us something to use as the callback |
|
|
|
fakefile = StringIO.StringIO() |
|
|
|
curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write) |
|
|
|
|
|
|
|
curlobj.perform() |
|
|
|
info_soup = BeautifulSoup(fakefile.getvalue()) |
|
|
|
fakefile.close() |
|
|
|
|
|
|
|
application.description = info_soup.find(text="Development Details").findNext("td").string.strip() |
|
|
|
application.osgb_x = info_soup.find(text="Grid Ref East").findNext("td").string.strip() |
|
|
|
application.osgb_y = info_soup.find(text="Grid Ref North").findNext("td").string.strip() |
|
|
|
|
|
|
|
self._results.addApplication(application) |
|
|
|
|
|
|
|
return self._results |
|
|
|
|
|
|
|
def getResults(self, day, month, year): |
|
|
|
return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML() |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
parser = CairngormsParser() |
|
|
|
print parser.getResults(3,10,2008) |
|
|
|
|
|
|
|
|
|
|
|
# TODO |
|
|
|
# Is there pagination? |