Automatically exported from code.google.com/p/planningalerts
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

SwiftLG.py 11 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import cgi
  5. import re
  6. import datetime
  7. import BeautifulSoup
  8. from PlanningUtils import getPostcodeFromText, \
  9. PlanningAuthorityResults, \
  10. PlanningApplication
  11. # - Browser request: --------------------------
  12. # {POST http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPCRITERIA HTTP/1.0} {Host: digitalmaidstone.co.uk
  13. # Accept: text/html, text/plain, text/css, text/sgml, */*;q=0.01
  14. # Accept-Encoding: gzip
  15. # Accept-Language: en
  16. # Pragma: no-cache
  17. # Cache-Control: no-cache
  18. # User-Agent: Lynx/2.8.6rel.4 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.6.3
  19. # Content-type: application/x-www-form-urlencoded
  20. # Content-length: 638
  21. # } %25.MAINBODY.WPACIS.1.=&APNID.MAINBODY.WPACIS.1.=&JUSTLOCATION.MAINBODY.WPACIS.1.=&JUSTDEVDESC.MAINBODY.WPACIS.1.=&DEVDESC.MAINBODY.WPACIS.1.=&SURNAME.MAINBODY.WPACIS.1.=&REGFROMDATE.MAINBODY.WPACIS.1.=01%2F11%2F2007&REGTODATE.MAINBODY.WPACIS.1.=02%2F11%2F2007&DECFROMDATE.MAINBODY.WPACIS.1.=&DECTODATE.MAINBODY.WPACIS.1.=&FINALGRANTFROM.MAINBODY.WPACIS.1.=&FINALGRANTTO.MAINBODY.WPACIS.1.=&APELDGDATFROM.MAINBODY.WPACIS.1.=&APELDGDATTO.MAINBODY.WPACIS.1.=&APEDECDATFROM.MAINBODY.WPACIS.1.=&APEDECDATTO.MAINBODY.WPACIS.1.=&AREA.MAINBODY.WPACIS.1.=&WARD.MAINBODY.WPACIS.1.=&PARISH.MAINBODY.WPACIS.1.=&SEARCHBUTTON.MAINBODY.WPACIS.1.=Search
  22. # server=[digitalmaidstone.co.uk] , port=[80], script=[/swiftlg/apas/run/WPHAPPCRITERIA]
  23. # request_line=[POST /swiftlg/apas/run/WPHAPPCRITERIA HTTP/1.0]
  24. # second page
  25. #http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPSEARCHRES.displayResultsURL?ResultID=243941&
  26. #StartIndex=11&
  27. #SortOrder=APNID:asc&
  28. #DispResultsAs=WPHAPPSEARCHRES&
  29. #BackURL=<a%20href=wphappcriteria.display?paSearchKey=147118>Search%20Criteria
  30. # Date format to enter into search boxes
  31. date_format = "%d/%m/%Y"
  32. class SwiftLGParser:
  33. search_path = "WPHAPPCRITERIA"
  34. info_path = "WPHAPPDETAIL.DisplayUrl?theApnID=%s"
  35. comment_path ="wphmakerep.displayURL?ApnID=%s"
  36. def _findResultsTable(self, soup):
  37. """Unless there is just one table in the page, the resuts table,
  38. override this in a subclass."""
  39. return soup.table
  40. def _findTRs(self, results_table):
  41. """The usual situation is for the results table to contain
  42. one row of headers, followed by a row per app.
  43. If this is not the case, override this in a subclass."""
  44. # import pdb;pdb.set_trace()
  45. return results_table.findAll("tr")[1:]
  46. def __init__(self,
  47. authority_name,
  48. authority_short_name,
  49. base_url,
  50. debug=False):
  51. self.authority_name = authority_name
  52. self.authority_short_name = authority_short_name
  53. self.base_url = base_url
  54. self.search_url = urlparse.urljoin(base_url, self.search_path)
  55. self.info_url = urlparse.urljoin(base_url, self.info_path)
  56. self.comment_url = urlparse.urljoin(base_url, self.comment_path)
  57. self.debug = debug
  58. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  59. def getResultsByDayMonthYear(self, day, month, year):
  60. search_date = datetime.date(year, month, day)
  61. post_data = urllib.urlencode((
  62. ("REGFROMDATE.MAINBODY.WPACIS.1.", search_date.strftime(date_format)),
  63. ("REGTODATE.MAINBODY.WPACIS.1.", search_date.strftime(date_format)),
  64. ("SEARCHBUTTON.MAINBODY.WPACIS.1.", "Search"),
  65. ))
  66. response = urllib2.urlopen(self.search_url, post_data)
  67. contents = response.read()
  68. # Check for the no results warning
  69. if not contents.count("No Matching Applications Found"):
  70. soup = BeautifulSoup.BeautifulSoup(contents)
  71. # Get the links to later pages of results.
  72. later_pages = soup.findAll("a", {"href": re.compile("WPHAPPSEARCHRES\.displayResultsURL.*StartIndex=\d*.*")})
  73. for a in ["initial_search"] + later_pages:
  74. if a != "initial_search":
  75. url = a['href']
  76. # Example url
  77. #http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPSEARCHRES.displayResultsURL?ResultID=244037&StartIndex=11&SortOrder=APNID:asc&DispResultsAs=WPHAPPSEARCHRES&BackURL=<a href=wphappcriteria.display?paSearchKey=147170>Search Criteria</a>
  78. # urllib2 doesn't like this url, to make it happy, we'll
  79. # get rid of the BackURL parameter, which we don't need.
  80. split_url = urlparse.urlsplit(url)
  81. qs = split_url[3]
  82. # This gets us a dictionary of key to lists of values
  83. qsl = cgi.parse_qsl(qs)
  84. # Get rid of BackURL
  85. qsl.pop(-1)
  86. # I think this is safe, as there are no repeats of parameters
  87. new_qs = urllib.urlencode(qsl)
  88. url = urlparse.urlunsplit(split_url[:3] + (new_qs,) + split_url[4:])
  89. this_page_url = urlparse.urljoin(self.base_url, url)
  90. response = urllib2.urlopen(this_page_url)
  91. contents = response.read()
  92. soup = BeautifulSoup.BeautifulSoup(contents)
  93. results_table = self._findResultsTable(soup)#.body.find("table", {"class": "apas_tbl"})
  94. trs = self._findTRs(results_table)
  95. for tr in trs:
  96. self._current_application = PlanningApplication()
  97. tds = tr.findAll("td")
  98. # The first td
  99. #<td class="apas_tblContent"><a href="WPHAPPDETAIL.DisplayUrl?theApnID=07/1884&amp;backURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt; &gt; &lt;a href='wphappsearchres.displayResultsURL?ResultID=243950%26StartIndex=1%26SortOrder=APNID:asc%26DispResultsAs=WPHAPPSEARCHRES%26BackURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt;'&gt;Search Results&lt;/a&gt;"></a><a href="wphappcriteria.display?paSearchKey=147125">Search Criteria</a> > <a href="wphappsearchres.displayResultsURL?ResultID=243950%26StartIndex=1%26SortOrder=APNID:asc%26DispResultsAs=WPHAPPSEARCHRES%26BackURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt;"></a><a href="wphappcriteria.display?paSearchKey=147125">Search Criteria</a>'>Search Results">07/1884</td>
  100. # The html here is a bit of a mess, and doesn't all get into
  101. # the soup.
  102. # We can get the reference from the first <a href> in td 0.
  103. first_link = tds[0].a['href']
  104. app_id = cgi.parse_qs(urlparse.urlsplit(first_link)[3])['theApnID'][0]
  105. self._current_application.date_received = search_date
  106. self._current_application.council_reference = app_id
  107. self._current_application.info_url = self.info_url %(app_id)
  108. self._current_application.comment_url = self.comment_url %(app_id)
  109. self._current_application.description = tds[1].string.strip()
  110. # the second td
  111. #<td class="apas_tblContent"><input type="HIDDEN" name="ORDERCOUNTER.PAHEADER.PACIS2.1-1." value="1" class="input-box" size="7" />
  112. #LAND ADJ. BRAMBLING, HAWKENBURY ROAD, HAWKENBURY, TN120EA
  113. #</td>
  114. address = ' '.join([x for x in tds[2].contents if isinstance(x, BeautifulSoup.NavigableString)]).strip()
  115. self._current_application.address = address
  116. self._current_application.postcode = getPostcodeFromText(address)
  117. self._results.addApplication(self._current_application)
  118. return self._results
  119. def getResults(self, day, month, year):
  120. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  121. class EastHertsParser(SwiftLGParser):
  122. def _findResultsTable(self, soup):
  123. return soup.findAll("table")[3]
  124. class GwyneddParser(SwiftLGParser):
  125. def _findResultsTable(self, soup):
  126. return soup.find("table", {"class": "thinBox"})
  127. class IslingtonParser(SwiftLGParser):
  128. def _findResultsTable(self, soup):
  129. return soup.table.table
  130. class MacclesfieldParser(SwiftLGParser):
  131. def _findResultsTable(self, soup):
  132. return soup.findAll("table")[6]
  133. class MoleValleyParser(SwiftLGParser):
  134. def _findResultsTable(self, soup):
  135. # import pdb;pdb.set_trace()
  136. return soup.findAll("table")[2]
  137. class SloughParser(SwiftLGParser):
  138. def _findResultsTable(self, soup):
  139. return soup.findAll("table")[1]
  140. def _findTRs(self, results_table):
  141. return results_table.findAll("tr")[2:]
  142. if __name__ == '__main__':
  143. # parser = SwiftLGParser("Boston Borough Council", "Boston", "http://195.224.121.199/swiftlg/apas/run/")
  144. # parser = SwiftLGParser("Dudley", "Dudley", "http://www2.dudley.gov.uk/swiftlg/apas/run/")
  145. # parser = EastHertsParser("East Hertfordshire", "East Herts", "http://e-services.eastherts.gov.uk/swiftlg/apas/run/")
  146. # parser = GwyneddParser("Gwynedd", "Gwynedd", "http://www.gwynedd.gov.uk/swiftlg/apas/run/")
  147. # parser = IslingtonParser("Islington", "Islington", "https://www.islington.gov.uk/onlineplanning/apas/run/")
  148. parser = SwiftLGParser("Lake District", "Lake District", "http://www.lake-district.gov.uk/swiftlg/apas/run/")
  149. # parser = SwiftLGParser("Maidstone Borough Council", "Maidstone", "http://digitalmaidstone.co.uk/swiftlg/apas/run/")
  150. # parser = MoleValleyParser("Mole Valley", "Mole Valley", "http://www.molevalley.gov.uk/swiftlg/apas/run/")
  151. # parser = SwiftLGParser("Pembrokeshire County Council", "Pembrokeshire", "http://planning.pembrokeshire.gov.uk/swiftlg/apas/run/")
  152. # parser = SwiftLGParser("Rochdale Metropolitan Borough Council", "Rochdale", "http://www.rochdale.gov.uk/swiftlg/apas/run/")
  153. # parser = SloughParser("Slough", "Slough", "http://www2.slough.gov.uk/swiftlg/apas/run/")
  154. # parser = SwiftLGParser("Snowdonia National Park", "Snowdonia", "http://www.snowdonia-npa.gov.uk/swiftlg/apas/run/")
  155. # parser = SwiftLGParser("St Edmundsbury", "Bury St Edmunds", "http://www.stedmundsbury.gov.uk/swiftlg/apas/run/")
  156. # parser = MacclesfieldParser("Macclesfield", "Macclesfield", "http://www.planportal.macclesfield.gov.uk/swiftlg/apas/run/")
  157. # parser = SwiftLGParser("Daventry District Council", "Daventry", "http://62.231.149.150/swiftlg/apas/run/wphappcriteria.display")
  158. # parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display")
  159. # parser = SwiftLGParser("Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display")
  160. # parser = SwiftLGParser("London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display")
  161. print parser.getResults(18,3,2009)
  162. # To Do:
  163. #1) Check out comment url on Maidstone
  164. #2) Daventry, when it is back up.
  165. #3) Work out what goes wrong with Gwynedd on 06/11/2007