Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

135 regels
4.9 KiB

  1. """
  2. """
  3. import time
  4. import urlparse
  5. import pycurl
  6. import StringIO
  7. import datetime
  8. from BeautifulSoup import BeautifulSoup
  9. from PlanningUtils import PlanningApplication, \
  10. PlanningAuthorityResults, \
  11. getPostcodeFromText
  12. class CairngormsParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Cairngorms National Park"
  15. self.authority_short_name = "Cairngorms"
  16. self.referer = "http://www.cairngorms.co.uk/planning/e-planning/index.php"
  17. self.base_url = "http://www.cairngorms.co.uk/planning/e-planning/holding.php"
  18. # The timestamp here looks like the number of milliseconds since 1970
  19. self.first_post_url = "http://www.cairngorms.co.uk/planning/e-planning/search.php?timeStamp=%d"
  20. self.comments_email_address = "planning@cairngorms.co.uk"
  21. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  22. def getResultsByDayMonthYear(self, day, month, year):
  23. search_date = datetime.date(year, month, day)
  24. post_data = [
  25. ("CNPA_ref", ""),
  26. ("application_number", ""),
  27. ("LA_id", "%"),
  28. ("applicant_type", "%"),
  29. ("applicant_name", ""),
  30. ("development_address", ""),
  31. ("agent_name", ""),
  32. ("status", "%"),
  33. ("startDay", "%02d" %day),
  34. ("startMonth", "%02d" %month),
  35. ("startYear", "%d" %year),
  36. ("endDay", "%02d" %day),
  37. ("endMonth", "%02d" %month),
  38. ("endYear", "%d" %year),
  39. ]
  40. first_post_data = "CNPA_ref=&application_number=&applicant_name=&development_address=&agent_name=&applicant_type=%%&LA_id=%%&status=%%&startYear=%(year)d&startMonth=%(month)02d&startDay=%(day)02d&endYear=%(year)d&endMonth=%(month)02d&endDay=%(day)02d" %{"day": day, "month": month, "year": year}
  41. curlobj = pycurl.Curl()
  42. curlobj.setopt(pycurl.FOLLOWLOCATION, True)
  43. curlobj.setopt(pycurl.MAXREDIRS, 10)
  44. # First we do a normal post, this would happen as an AJAX query
  45. # from the browser and just returns the number of applications found.
  46. fakefile = StringIO.StringIO()
  47. curlobj.setopt(pycurl.URL, self.first_post_url %(int(time.time()*1000)))
  48. curlobj.setopt(pycurl.POST, True)
  49. curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
  50. curlobj.setopt(pycurl.POSTFIELDS, first_post_data)
  51. curlobj.perform()
  52. app_count = int(fakefile.getvalue())
  53. fakefile.close()
  54. if app_count:
  55. # Now we do another multipart form post
  56. # This gives us something to use as the callback
  57. fakefile = StringIO.StringIO()
  58. curlobj.setopt(pycurl.URL, self.base_url)
  59. curlobj.setopt(pycurl.HTTPPOST, post_data)
  60. curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
  61. curlobj.setopt(pycurl.REFERER, self.referer)
  62. curlobj.perform()
  63. soup = BeautifulSoup(fakefile.getvalue())
  64. # We may as well free up the memory used by fakefile
  65. fakefile.close()
  66. for tr in soup.table.findAll("tr")[1:]:
  67. application = PlanningApplication()
  68. application.date_received = search_date
  69. application.comment_url = self.comments_email_address
  70. tds = tr.findAll("td")
  71. application.council_reference = tds[1].string.strip()
  72. application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
  73. application.address = tds[2].string.strip()
  74. application.postcode = getPostcodeFromText(application.address)
  75. # We're going to need to get the info page in order to get the description
  76. # We can't pass a unicode string to pycurl, so we'll have to encode it.
  77. curlobj.setopt(pycurl.URL, application.info_url.encode())
  78. curlobj.setopt(pycurl.HTTPGET, True)
  79. # This gives us something to use as the callback
  80. fakefile = StringIO.StringIO()
  81. curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
  82. curlobj.perform()
  83. info_soup = BeautifulSoup(fakefile.getvalue())
  84. fakefile.close()
  85. application.description = info_soup.find(text="Development Details").findNext("td").string.strip()
  86. application.osgb_x = info_soup.find(text="Grid Ref East").findNext("td").string.strip()
  87. application.osgb_y = info_soup.find(text="Grid Ref North").findNext("td").string.strip()
  88. self._results.addApplication(application)
  89. return self._results
  90. def getResults(self, day, month, year):
  91. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  92. if __name__ == '__main__':
  93. parser = CairngormsParser()
  94. print parser.getResults(3,10,2008)
  95. # TODO
  96. # Is there pagination?