Automatically exported from code.google.com/p/planningalerts
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 
 
 

226 строки
10 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import cgi
  5. import re
  6. import datetime
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import getPostcodeFromText, \
  9. PlanningAuthorityResults, \
  10. PlanningApplication
  11. # - Browser request: --------------------------
  12. # {POST http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPCRITERIA HTTP/1.0} {Host: digitalmaidstone.co.uk
  13. # Accept: text/html, text/plain, text/css, text/sgml, */*;q=0.01
  14. # Accept-Encoding: gzip
  15. # Accept-Language: en
  16. # Pragma: no-cache
  17. # Cache-Control: no-cache
  18. # User-Agent: Lynx/2.8.6rel.4 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.6.3
  19. # Content-type: application/x-www-form-urlencoded
  20. # Content-length: 638
  21. # } %25.MAINBODY.WPACIS.1.=&APNID.MAINBODY.WPACIS.1.=&JUSTLOCATION.MAINBODY.WPACIS.1.=&JUSTDEVDESC.MAINBODY.WPACIS.1.=&DEVDESC.MAINBODY.WPACIS.1.=&SURNAME.MAINBODY.WPACIS.1.=&REGFROMDATE.MAINBODY.WPACIS.1.=01%2F11%2F2007&REGTODATE.MAINBODY.WPACIS.1.=02%2F11%2F2007&DECFROMDATE.MAINBODY.WPACIS.1.=&DECTODATE.MAINBODY.WPACIS.1.=&FINALGRANTFROM.MAINBODY.WPACIS.1.=&FINALGRANTTO.MAINBODY.WPACIS.1.=&APELDGDATFROM.MAINBODY.WPACIS.1.=&APELDGDATTO.MAINBODY.WPACIS.1.=&APEDECDATFROM.MAINBODY.WPACIS.1.=&APEDECDATTO.MAINBODY.WPACIS.1.=&AREA.MAINBODY.WPACIS.1.=&WARD.MAINBODY.WPACIS.1.=&PARISH.MAINBODY.WPACIS.1.=&SEARCHBUTTON.MAINBODY.WPACIS.1.=Search
  22. # server=[digitalmaidstone.co.uk] , port=[80], script=[/swiftlg/apas/run/WPHAPPCRITERIA]
  23. # request_line=[POST /swiftlg/apas/run/WPHAPPCRITERIA HTTP/1.0]
  24. # second page
  25. #http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPSEARCHRES.displayResultsURL?ResultID=243941&
  26. #StartIndex=11&
  27. #SortOrder=APNID:asc&
  28. #DispResultsAs=WPHAPPSEARCHRES&
  29. #BackURL=<a%20href=wphappcriteria.display?paSearchKey=147118>Search%20Criteria
  30. # Date format to enter into search boxes
  31. date_format = "%d/%m/%Y"
  32. class SwiftLGParser:
  33. search_path = "WPHAPPCRITERIA"
  34. info_path = "WPHAPPDETAIL.DisplayUrl?theApnID=%s"
  35. comment_path ="wphmakerep.displayURL?ApnID=%s"
  36. def _findResultsTable(self, soup):
  37. """Unless there is just one table in the page, the resuts table,
  38. override this in a subclass."""
  39. return soup.table
  40. def _findTRs(self, results_table):
  41. """The usual situation is for the results table to contain
  42. one row of headers, followed by a row per app.
  43. If this is not the case, override this in a subclass."""
  44. return results_table.findAll("tr")[1:]
  45. def __init__(self,
  46. authority_name,
  47. authority_short_name,
  48. base_url,
  49. debug=False):
  50. self.authority_name = authority_name
  51. self.authority_short_name = authority_short_name
  52. self.base_url = base_url
  53. self.search_url = urlparse.urljoin(base_url, self.search_path)
  54. self.info_url = urlparse.urljoin(base_url, self.info_path)
  55. self.comment_url = urlparse.urljoin(base_url, self.comment_path)
  56. self.debug = debug
  57. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  58. def getResultsByDayMonthYear(self, day, month, year):
  59. search_date = datetime.date(year, month, day)
  60. post_data = urllib.urlencode((
  61. ("REGFROMDATE.MAINBODY.WPACIS.1.", search_date.strftime(date_format)),
  62. ("REGTODATE.MAINBODY.WPACIS.1.", search_date.strftime(date_format)),
  63. ("SEARCHBUTTON.MAINBODY.WPACIS.1.", "Search"),
  64. ))
  65. response = urllib2.urlopen(self.search_url, post_data)
  66. contents = response.read()
  67. # Check for the no results warning
  68. if not contents.count("No Matching Applications Found"):
  69. soup = BeautifulSoup(contents)
  70. # Get the links to later pages of results.
  71. later_pages = soup.findAll("a", {"href": re.compile("WPHAPPSEARCHRES\.displayResultsURL.*StartIndex=\d*.*")})
  72. for a in ["initial_search"] + later_pages:
  73. if a != "initial_search":
  74. url = a['href']
  75. # Example url
  76. #http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPSEARCHRES.displayResultsURL?ResultID=244037&StartIndex=11&SortOrder=APNID:asc&DispResultsAs=WPHAPPSEARCHRES&BackURL=<a href=wphappcriteria.display?paSearchKey=147170>Search Criteria</a>
  77. # urllib2 doesn't like this url, to make it happy, we'll
  78. # get rid of the BackURL parameter, which we don't need.
  79. split_url = urlparse.urlsplit(url)
  80. qs = split_url[3]
  81. # This gets us a dictionary of key to lists of values
  82. qsl = cgi.parse_qsl(qs)
  83. # Get rid of BackURL
  84. qsl.pop(-1)
  85. # I think this is safe, as there are no repeats of parameters
  86. new_qs = urllib.urlencode(qsl)
  87. url = urlparse.urlunsplit(split_url[:3] + (new_qs,) + split_url[4:])
  88. this_page_url = urlparse.urljoin(self.base_url, url)
  89. response = urllib2.urlopen(this_page_url)
  90. contents = response.read()
  91. soup = BeautifulSoup(contents)
  92. results_table = self._findResultsTable(soup)#.body.find("table", {"class": "apas_tbl"})
  93. trs = self._findTRs(results_table)
  94. for tr in trs:
  95. self._current_application = PlanningApplication()
  96. tds = tr.findAll("td")
  97. # The first td
  98. #<td class="apas_tblContent"><a href="WPHAPPDETAIL.DisplayUrl?theApnID=07/1884&amp;backURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt; &gt; &lt;a href='wphappsearchres.displayResultsURL?ResultID=243950%26StartIndex=1%26SortOrder=APNID:asc%26DispResultsAs=WPHAPPSEARCHRES%26BackURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt;'&gt;Search Results&lt;/a&gt;"></a><a href="wphappcriteria.display?paSearchKey=147125">Search Criteria</a> > <a href="wphappsearchres.displayResultsURL?ResultID=243950%26StartIndex=1%26SortOrder=APNID:asc%26DispResultsAs=WPHAPPSEARCHRES%26BackURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt;"></a><a href="wphappcriteria.display?paSearchKey=147125">Search Criteria</a>'>Search Results">07/1884</td>
  99. # The html here is a bit of a mess, and doesn't all get into
  100. # the soup.
  101. # We can get the reference from the first <a href> in td 0.
  102. first_link = tds[0].a['href']
  103. app_id = cgi.parse_qs(urlparse.urlsplit(first_link)[3])['theApnID'][0]
  104. self._current_application.date_received = search_date
  105. self._current_application.council_reference = app_id
  106. self._current_application.info_url = self.info_url %(app_id)
  107. self._current_application.comment_url = self.comment_url %(app_id)
  108. self._current_application.description = tds[1].string.strip()
  109. # the second td
  110. #<td class="apas_tblContent"><input type="HIDDEN" name="ORDERCOUNTER.PAHEADER.PACIS2.1-1." value="1" class="input-box" size="7" />
  111. #LAND ADJ. BRAMBLING, HAWKENBURY ROAD, HAWKENBURY, TN120EA
  112. #</td>
  113. # For some reason, this doesn't work:
  114. #address = tds[2].string
  115. # But this does
  116. address = tds[2].input.next.strip()
  117. self._current_application.address = address
  118. self._current_application.postcode = getPostcodeFromText(address)
  119. self._results.addApplication(self._current_application)
  120. return self._results
  121. def getResults(self, day, month, year):
  122. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  123. class EastHertsParser(SwiftLGParser):
  124. def _findResultsTable(self, soup):
  125. return soup.findAll("table")[3]
  126. class GwyneddParser(SwiftLGParser):
  127. def _findResultsTable(self, soup):
  128. return soup.find("table", {"class": "thinBox"})
  129. class IslingtonParser(SwiftLGParser):
  130. def _findResultsTable(self, soup):
  131. return soup.table.table
  132. class LakeDistrictParser(SwiftLGParser):
  133. def _findResultsTable(self, soup):
  134. return soup.table.table
  135. class MacclesfieldParser(SwiftLGParser):
  136. def _findResultsTable(self, soup):
  137. return soup.findAll("table")[6]
  138. class MoleValleyParser(SwiftLGParser):
  139. def _findResultsTable(self, soup):
  140. return soup.findAll("table")[5]
  141. class SloughParser(SwiftLGParser):
  142. def _findResultsTable(self, soup):
  143. return soup.findAll("table")[1]
  144. def _findTRs(self, results_table):
  145. return results_table.findAll("tr")[2:]
  146. if __name__ == '__main__':
  147. # parser = SwiftLGParser("Dudley", "Dudley", "http://www2.dudley.gov.uk/swiftlg/apas/run/")
  148. parser = EastHertsParser("East Hertfordshire", "East Herts", "http://e-services.eastherts.gov.uk/swiftlg/apas/run/")
  149. # parser = GwyneddParser("Gwynedd", "Gwynedd", "http://www.gwynedd.gov.uk/swiftlg/apas/run/")
  150. # parser = IslingtonParser("Islington", "Islington", "https://www.islington.gov.uk/onlineplanning/apas/run/")
  151. # parser = LakeDistrictParser("Lake District", "Lake District", "http://www.lake-district.gov.uk/swiftlg/apas/run/")
  152. # parser = SwiftLGParser("Maidstone Borough Council", "Maidstone", "http://digitalmaidstone.co.uk/swiftlg/apas/run/")
  153. # parser = MoleValleyParser("Mole Valley", "Mole Valley", "http://www.molevalley.gov.uk/swiftlg/apas/run/")
  154. # parser = SwiftLGParser("Pembrokeshire County Council", "Pembrokeshire", "http://planning.pembrokeshire.gov.uk/swiftlg/apas/run/")
  155. # parser = SwiftLGParser("Rochdale Metropolitan Borough Council", "Rochdale", "http://www.rochdale.gov.uk/swiftlg/apas/run/")
  156. # parser = SloughParser("Slough", "Slough", "http://www2.slough.gov.uk/swiftlg/apas/run/")
  157. # parser = SwiftLGParser("Snowdonia National Park", "Snowdonia", "http://www.snowdonia-npa.gov.uk/swiftlg/apas/run/")
  158. # parser = SwiftLGParser("St Edmundsbury", "Bury St Edmunds", "http://www.stedmundsbury.gov.uk/swiftlg/apas/run/")
  159. # parser = MacclesfieldParser("Macclesfield", "Macclesfield", "http://www.planportal.macclesfield.gov.uk/swiftlg/apas/run/")
  160. print parser.getResults(22,11,2007)
  161. # To Do:
  162. #1) Check out comment url on Maidstone
  163. #2) Daventry, when it is back up.
  164. #3) Work out what goes wrong with Gwynedd on 06/11/2007