Automatically exported from code.google.com/p/planningalerts
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 
 
 
 
 

228 řádky
11 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import cgi
  5. import re
  6. import datetime
  7. import BeautifulSoup
  8. from PlanningUtils import getPostcodeFromText, \
  9. PlanningAuthorityResults, \
  10. PlanningApplication
  11. # - Browser request: --------------------------
  12. # {POST http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPCRITERIA HTTP/1.0} {Host: digitalmaidstone.co.uk
  13. # Accept: text/html, text/plain, text/css, text/sgml, */*;q=0.01
  14. # Accept-Encoding: gzip
  15. # Accept-Language: en
  16. # Pragma: no-cache
  17. # Cache-Control: no-cache
  18. # User-Agent: Lynx/2.8.6rel.4 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.6.3
  19. # Content-type: application/x-www-form-urlencoded
  20. # Content-length: 638
  21. # } %25.MAINBODY.WPACIS.1.=&APNID.MAINBODY.WPACIS.1.=&JUSTLOCATION.MAINBODY.WPACIS.1.=&JUSTDEVDESC.MAINBODY.WPACIS.1.=&DEVDESC.MAINBODY.WPACIS.1.=&SURNAME.MAINBODY.WPACIS.1.=&REGFROMDATE.MAINBODY.WPACIS.1.=01%2F11%2F2007&REGTODATE.MAINBODY.WPACIS.1.=02%2F11%2F2007&DECFROMDATE.MAINBODY.WPACIS.1.=&DECTODATE.MAINBODY.WPACIS.1.=&FINALGRANTFROM.MAINBODY.WPACIS.1.=&FINALGRANTTO.MAINBODY.WPACIS.1.=&APELDGDATFROM.MAINBODY.WPACIS.1.=&APELDGDATTO.MAINBODY.WPACIS.1.=&APEDECDATFROM.MAINBODY.WPACIS.1.=&APEDECDATTO.MAINBODY.WPACIS.1.=&AREA.MAINBODY.WPACIS.1.=&WARD.MAINBODY.WPACIS.1.=&PARISH.MAINBODY.WPACIS.1.=&SEARCHBUTTON.MAINBODY.WPACIS.1.=Search
  22. # server=[digitalmaidstone.co.uk] , port=[80], script=[/swiftlg/apas/run/WPHAPPCRITERIA]
  23. # request_line=[POST /swiftlg/apas/run/WPHAPPCRITERIA HTTP/1.0]
  24. # second page
  25. #http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPSEARCHRES.displayResultsURL?ResultID=243941&
  26. #StartIndex=11&
  27. #SortOrder=APNID:asc&
  28. #DispResultsAs=WPHAPPSEARCHRES&
  29. #BackURL=<a%20href=wphappcriteria.display?paSearchKey=147118>Search%20Criteria
  30. # Date format to enter into search boxes
  31. date_format = "%d/%m/%Y"
  32. class SwiftLGParser:
  33. search_path = "WPHAPPCRITERIA"
  34. info_path = "WPHAPPDETAIL.DisplayUrl?theApnID=%s"
  35. comment_path ="wphmakerep.displayURL?ApnID=%s"
  36. def _findResultsTable(self, soup):
  37. """Unless there is just one table in the page, the resuts table,
  38. override this in a subclass."""
  39. return soup.table
  40. def _findTRs(self, results_table):
  41. """The usual situation is for the results table to contain
  42. one row of headers, followed by a row per app.
  43. If this is not the case, override this in a subclass."""
  44. return results_table.findAll("tr")[1:]
  45. def __init__(self,
  46. authority_name,
  47. authority_short_name,
  48. base_url,
  49. debug=False):
  50. self.authority_name = authority_name
  51. self.authority_short_name = authority_short_name
  52. self.base_url = base_url
  53. self.search_url = urlparse.urljoin(base_url, self.search_path)
  54. self.info_url = urlparse.urljoin(base_url, self.info_path)
  55. self.comment_url = urlparse.urljoin(base_url, self.comment_path)
  56. self.debug = debug
  57. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  58. def getResultsByDayMonthYear(self, day, month, year):
  59. search_date = datetime.date(year, month, day)
  60. post_data = urllib.urlencode((
  61. ("REGFROMDATE.MAINBODY.WPACIS.1.", search_date.strftime(date_format)),
  62. ("REGTODATE.MAINBODY.WPACIS.1.", search_date.strftime(date_format)),
  63. ("SEARCHBUTTON.MAINBODY.WPACIS.1.", "Search"),
  64. ))
  65. response = urllib2.urlopen(self.search_url, post_data)
  66. contents = response.read()
  67. # Check for the no results warning
  68. if not contents.count("No Matching Applications Found"):
  69. soup = BeautifulSoup.BeautifulSoup(contents)
  70. # Get the links to later pages of results.
  71. later_pages = soup.findAll("a", {"href": re.compile("WPHAPPSEARCHRES\.displayResultsURL.*StartIndex=\d*.*")})
  72. for a in ["initial_search"] + later_pages:
  73. if a != "initial_search":
  74. url = a['href']
  75. # Example url
  76. #http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPSEARCHRES.displayResultsURL?ResultID=244037&StartIndex=11&SortOrder=APNID:asc&DispResultsAs=WPHAPPSEARCHRES&BackURL=<a href=wphappcriteria.display?paSearchKey=147170>Search Criteria</a>
  77. # urllib2 doesn't like this url, to make it happy, we'll
  78. # get rid of the BackURL parameter, which we don't need.
  79. split_url = urlparse.urlsplit(url)
  80. qs = split_url[3]
  81. # This gets us a dictionary of key to lists of values
  82. qsl = cgi.parse_qsl(qs)
  83. # Get rid of BackURL
  84. qsl.pop(-1)
  85. # I think this is safe, as there are no repeats of parameters
  86. new_qs = urllib.urlencode(qsl)
  87. url = urlparse.urlunsplit(split_url[:3] + (new_qs,) + split_url[4:])
  88. this_page_url = urlparse.urljoin(self.base_url, url)
  89. response = urllib2.urlopen(this_page_url)
  90. contents = response.read()
  91. soup = BeautifulSoup.BeautifulSoup(contents)
  92. results_table = self._findResultsTable(soup)#.body.find("table", {"class": "apas_tbl"})
  93. trs = self._findTRs(results_table)
  94. for tr in trs:
  95. self._current_application = PlanningApplication()
  96. tds = tr.findAll("td")
  97. # The first td
  98. #<td class="apas_tblContent"><a href="WPHAPPDETAIL.DisplayUrl?theApnID=07/1884&amp;backURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt; &gt; &lt;a href='wphappsearchres.displayResultsURL?ResultID=243950%26StartIndex=1%26SortOrder=APNID:asc%26DispResultsAs=WPHAPPSEARCHRES%26BackURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt;'&gt;Search Results&lt;/a&gt;"></a><a href="wphappcriteria.display?paSearchKey=147125">Search Criteria</a> > <a href="wphappsearchres.displayResultsURL?ResultID=243950%26StartIndex=1%26SortOrder=APNID:asc%26DispResultsAs=WPHAPPSEARCHRES%26BackURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt;"></a><a href="wphappcriteria.display?paSearchKey=147125">Search Criteria</a>'>Search Results">07/1884</td>
  99. # The html here is a bit of a mess, and doesn't all get into
  100. # the soup.
  101. # We can get the reference from the first <a href> in td 0.
  102. first_link = tds[0].a['href']
  103. app_id = cgi.parse_qs(urlparse.urlsplit(first_link)[3])['theApnID'][0]
  104. self._current_application.date_received = search_date
  105. self._current_application.council_reference = app_id
  106. self._current_application.info_url = self.info_url %(app_id)
  107. self._current_application.comment_url = self.comment_url %(app_id)
  108. self._current_application.description = tds[1].string.strip()
  109. # the second td
  110. #<td class="apas_tblContent"><input type="HIDDEN" name="ORDERCOUNTER.PAHEADER.PACIS2.1-1." value="1" class="input-box" size="7" />
  111. #LAND ADJ. BRAMBLING, HAWKENBURY ROAD, HAWKENBURY, TN120EA
  112. #</td>
  113. address = ' '.join([x for x in tds[2].contents if isinstance(x, BeautifulSoup.NavigableString)]).strip()
  114. self._current_application.address = address
  115. self._current_application.postcode = getPostcodeFromText(address)
  116. self._results.addApplication(self._current_application)
  117. return self._results
  118. def getResults(self, day, month, year):
  119. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  120. class EastHertsParser(SwiftLGParser):
  121. def _findResultsTable(self, soup):
  122. return soup.findAll("table")[3]
  123. class GwyneddParser(SwiftLGParser):
  124. def _findResultsTable(self, soup):
  125. return soup.find("table", {"class": "thinBox"})
  126. class IslingtonParser(SwiftLGParser):
  127. def _findResultsTable(self, soup):
  128. return soup.table.table
  129. class LakeDistrictParser(SwiftLGParser):
  130. def _findResultsTable(self, soup):
  131. return soup.table.table
  132. class MacclesfieldParser(SwiftLGParser):
  133. def _findResultsTable(self, soup):
  134. return soup.findAll("table")[6]
  135. class MoleValleyParser(SwiftLGParser):
  136. def _findResultsTable(self, soup):
  137. # import pdb;pdb.set_trace()
  138. return soup.findAll("table")[2]
  139. class SloughParser(SwiftLGParser):
  140. def _findResultsTable(self, soup):
  141. return soup.findAll("table")[1]
  142. def _findTRs(self, results_table):
  143. return results_table.findAll("tr")[2:]
  144. if __name__ == '__main__':
  145. # parser = SwiftLGParser("Boston Borough Council", "Boston", "http://195.224.121.199/swiftlg/apas/run/")
  146. # parser = SwiftLGParser("Dudley", "Dudley", "http://www2.dudley.gov.uk/swiftlg/apas/run/")
  147. # parser = EastHertsParser("East Hertfordshire", "East Herts", "http://e-services.eastherts.gov.uk/swiftlg/apas/run/")
  148. # parser = GwyneddParser("Gwynedd", "Gwynedd", "http://www.gwynedd.gov.uk/swiftlg/apas/run/")
  149. # parser = IslingtonParser("Islington", "Islington", "https://www.islington.gov.uk/onlineplanning/apas/run/")
  150. # parser = LakeDistrictParser("Lake District", "Lake District", "http://www.lake-district.gov.uk/swiftlg/apas/run/")
  151. # parser = SwiftLGParser("Maidstone Borough Council", "Maidstone", "http://digitalmaidstone.co.uk/swiftlg/apas/run/")
  152. parser = MoleValleyParser("Mole Valley", "Mole Valley", "http://www.molevalley.gov.uk/swiftlg/apas/run/")
  153. # parser = SwiftLGParser("Pembrokeshire County Council", "Pembrokeshire", "http://planning.pembrokeshire.gov.uk/swiftlg/apas/run/")
  154. # parser = SwiftLGParser("Rochdale Metropolitan Borough Council", "Rochdale", "http://www.rochdale.gov.uk/swiftlg/apas/run/")
  155. # parser = SloughParser("Slough", "Slough", "http://www2.slough.gov.uk/swiftlg/apas/run/")
  156. # parser = SwiftLGParser("Snowdonia National Park", "Snowdonia", "http://www.snowdonia-npa.gov.uk/swiftlg/apas/run/")
  157. # parser = SwiftLGParser("St Edmundsbury", "Bury St Edmunds", "http://www.stedmundsbury.gov.uk/swiftlg/apas/run/")
  158. # parser = MacclesfieldParser("Macclesfield", "Macclesfield", "http://www.planportal.macclesfield.gov.uk/swiftlg/apas/run/")
  159. # parser = SwiftLGParser("Daventry District Council", "Daventry", "http://62.231.149.150/swiftlg/apas/run/wphappcriteria.display")
  160. # parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display")
  161. # parser = SwiftLGParser("Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display")
  162. # parser = SwiftLGParser("London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display")
  163. print parser.getResults(20,11,2008)
  164. # To Do:
  165. #1) Check out comment url on Maidstone
  166. #2) Daventry, when it is back up.
  167. #3) Work out what goes wrong with Gwynedd on 06/11/2007