Automatically exported from code.google.com/p/planningalerts
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

Cairngorms.py 4.9 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. """
  2. """
  3. import time
  4. import urlparse
  5. import pycurl
  6. import StringIO
  7. import datetime
  8. from BeautifulSoup import BeautifulSoup
  9. from PlanningUtils import PlanningApplication, \
  10. PlanningAuthorityResults, \
  11. getPostcodeFromText
  12. class CairngormsParser:
  13. def __init__(self, *args):
  14. self.authority_name = "Cairngorms National Park"
  15. self.authority_short_name = "Cairngorms"
  16. self.referer = "http://www.cairngorms.co.uk/planning/e-planning/index.php"
  17. self.base_url = "http://www.cairngorms.co.uk/planning/e-planning/holding.php"
  18. # The timestamp here looks like the number of milliseconds since 1970
  19. self.first_post_url = "http://www.cairngorms.co.uk/planning/e-planning/search.php?timeStamp=%d"
  20. self.comments_email_address = "planning@cairngorms.co.uk"
  21. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  22. def getResultsByDayMonthYear(self, day, month, year):
  23. search_date = datetime.date(year, month, day)
  24. post_data = [
  25. ("CNPA_ref", ""),
  26. ("application_number", ""),
  27. ("LA_id", "%"),
  28. ("applicant_type", "%"),
  29. ("applicant_name", ""),
  30. ("development_address", ""),
  31. ("agent_name", ""),
  32. ("status", "%"),
  33. ("startDay", "%02d" %day),
  34. ("startMonth", "%02d" %month),
  35. ("startYear", "%d" %year),
  36. ("endDay", "%02d" %day),
  37. ("endMonth", "%02d" %month),
  38. ("endYear", "%d" %year),
  39. ]
  40. first_post_data = "CNPA_ref=&application_number=&applicant_name=&development_address=&agent_name=&applicant_type=%%&LA_id=%%&status=%%&startYear=%(year)d&startMonth=%(month)02d&startDay=%(day)02d&endYear=%(year)d&endMonth=%(month)02d&endDay=%(day)02d" %{"day": day, "month": month, "year": year}
  41. curlobj = pycurl.Curl()
  42. curlobj.setopt(pycurl.FOLLOWLOCATION, True)
  43. curlobj.setopt(pycurl.MAXREDIRS, 10)
  44. # First we do a normal post, this would happen as an AJAX query
  45. # from the browser and just returns the number of applications found.
  46. fakefile = StringIO.StringIO()
  47. curlobj.setopt(pycurl.URL, self.first_post_url %(int(time.time()*1000)))
  48. curlobj.setopt(pycurl.POST, True)
  49. curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
  50. curlobj.setopt(pycurl.POSTFIELDS, first_post_data)
  51. curlobj.perform()
  52. app_count = int(fakefile.getvalue())
  53. fakefile.close()
  54. if app_count:
  55. # Now we do another multipart form post
  56. # This gives us something to use as the callback
  57. fakefile = StringIO.StringIO()
  58. curlobj.setopt(pycurl.URL, self.base_url)
  59. curlobj.setopt(pycurl.HTTPPOST, post_data)
  60. curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
  61. curlobj.setopt(pycurl.REFERER, self.referer)
  62. curlobj.perform()
  63. soup = BeautifulSoup(fakefile.getvalue())
  64. # We may as well free up the memory used by fakefile
  65. fakefile.close()
  66. for tr in soup.table.findAll("tr")[1:]:
  67. application = PlanningApplication()
  68. application.date_received = search_date
  69. application.comment_url = self.comments_email_address
  70. tds = tr.findAll("td")
  71. application.council_reference = tds[1].string.strip()
  72. application.info_url = urlparse.urljoin(self.base_url, tds[0].a['href'])
  73. application.address = tds[2].string.strip()
  74. application.postcode = getPostcodeFromText(application.address)
  75. # We're going to need to get the info page in order to get the description
  76. # We can't pass a unicode string to pycurl, so we'll have to encode it.
  77. curlobj.setopt(pycurl.URL, application.info_url.encode())
  78. curlobj.setopt(pycurl.HTTPGET, True)
  79. # This gives us something to use as the callback
  80. fakefile = StringIO.StringIO()
  81. curlobj.setopt(pycurl.WRITEFUNCTION, fakefile.write)
  82. curlobj.perform()
  83. info_soup = BeautifulSoup(fakefile.getvalue())
  84. fakefile.close()
  85. application.description = info_soup.find(text="Development Details").findNext("td").string.strip()
  86. application.osgb_x = info_soup.find(text="Grid Ref East").findNext("td").string.strip()
  87. application.osgb_y = info_soup.find(text="Grid Ref North").findNext("td").string.strip()
  88. self._results.addApplication(application)
  89. return self._results
  90. def getResults(self, day, month, year):
  91. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  92. if __name__ == '__main__':
  93. parser = CairngormsParser()
  94. print parser.getResults(3,10,2008)
  95. # TODO
  96. # Is there pagination?