Automatically exported from code.google.com/p/planningalerts
 
 
 
 
 
 

225 rader
11 KiB

  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import cgi
  5. import re
  6. import datetime
  7. import BeautifulSoup
  8. from PlanningUtils import getPostcodeFromText, \
  9. PlanningAuthorityResults, \
  10. PlanningApplication
  11. # - Browser request: --------------------------
  12. # {POST http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPCRITERIA HTTP/1.0} {Host: digitalmaidstone.co.uk
  13. # Accept: text/html, text/plain, text/css, text/sgml, */*;q=0.01
  14. # Accept-Encoding: gzip
  15. # Accept-Language: en
  16. # Pragma: no-cache
  17. # Cache-Control: no-cache
  18. # User-Agent: Lynx/2.8.6rel.4 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.6.3
  19. # Content-type: application/x-www-form-urlencoded
  20. # Content-length: 638
  21. # } %25.MAINBODY.WPACIS.1.=&APNID.MAINBODY.WPACIS.1.=&JUSTLOCATION.MAINBODY.WPACIS.1.=&JUSTDEVDESC.MAINBODY.WPACIS.1.=&DEVDESC.MAINBODY.WPACIS.1.=&SURNAME.MAINBODY.WPACIS.1.=&REGFROMDATE.MAINBODY.WPACIS.1.=01%2F11%2F2007&REGTODATE.MAINBODY.WPACIS.1.=02%2F11%2F2007&DECFROMDATE.MAINBODY.WPACIS.1.=&DECTODATE.MAINBODY.WPACIS.1.=&FINALGRANTFROM.MAINBODY.WPACIS.1.=&FINALGRANTTO.MAINBODY.WPACIS.1.=&APELDGDATFROM.MAINBODY.WPACIS.1.=&APELDGDATTO.MAINBODY.WPACIS.1.=&APEDECDATFROM.MAINBODY.WPACIS.1.=&APEDECDATTO.MAINBODY.WPACIS.1.=&AREA.MAINBODY.WPACIS.1.=&WARD.MAINBODY.WPACIS.1.=&PARISH.MAINBODY.WPACIS.1.=&SEARCHBUTTON.MAINBODY.WPACIS.1.=Search
  22. # server=[digitalmaidstone.co.uk] , port=[80], script=[/swiftlg/apas/run/WPHAPPCRITERIA]
  23. # request_line=[POST /swiftlg/apas/run/WPHAPPCRITERIA HTTP/1.0]
  24. # second page
  25. #http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPSEARCHRES.displayResultsURL?ResultID=243941&
  26. #StartIndex=11&
  27. #SortOrder=APNID:asc&
  28. #DispResultsAs=WPHAPPSEARCHRES&
  29. #BackURL=<a%20href=wphappcriteria.display?paSearchKey=147118>Search%20Criteria
  30. # Date format to enter into search boxes
  31. date_format = "%d/%m/%Y"
  32. class SwiftLGParser:
  33. search_path = "WPHAPPCRITERIA"
  34. info_path = "WPHAPPDETAIL.DisplayUrl?theApnID=%s"
  35. comment_path ="wphmakerep.displayURL?ApnID=%s"
  36. def _findResultsTable(self, soup):
  37. """Unless there is just one table in the page, the resuts table,
  38. override this in a subclass."""
  39. return soup.table
  40. def _findTRs(self, results_table):
  41. """The usual situation is for the results table to contain
  42. one row of headers, followed by a row per app.
  43. If this is not the case, override this in a subclass."""
  44. # import pdb;pdb.set_trace()
  45. return results_table.findAll("tr")[1:]
  46. def __init__(self,
  47. authority_name,
  48. authority_short_name,
  49. base_url,
  50. debug=False):
  51. self.authority_name = authority_name
  52. self.authority_short_name = authority_short_name
  53. self.base_url = base_url
  54. self.search_url = urlparse.urljoin(base_url, self.search_path)
  55. self.info_url = urlparse.urljoin(base_url, self.info_path)
  56. self.comment_url = urlparse.urljoin(base_url, self.comment_path)
  57. self.debug = debug
  58. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  59. def getResultsByDayMonthYear(self, day, month, year):
  60. search_date = datetime.date(year, month, day)
  61. post_data = urllib.urlencode((
  62. ("REGFROMDATE.MAINBODY.WPACIS.1.", search_date.strftime(date_format)),
  63. ("REGTODATE.MAINBODY.WPACIS.1.", search_date.strftime(date_format)),
  64. ("SEARCHBUTTON.MAINBODY.WPACIS.1.", "Search"),
  65. ))
  66. response = urllib2.urlopen(self.search_url, post_data)
  67. contents = response.read()
  68. # Check for the no results warning
  69. if not contents.count("No Matching Applications Found"):
  70. soup = BeautifulSoup.BeautifulSoup(contents)
  71. # Get the links to later pages of results.
  72. later_pages = soup.findAll("a", {"href": re.compile("WPHAPPSEARCHRES\.displayResultsURL.*StartIndex=\d*.*")})
  73. for a in ["initial_search"] + later_pages:
  74. if a != "initial_search":
  75. url = a['href']
  76. # Example url
  77. #http://digitalmaidstone.co.uk/swiftlg/apas/run/WPHAPPSEARCHRES.displayResultsURL?ResultID=244037&StartIndex=11&SortOrder=APNID:asc&DispResultsAs=WPHAPPSEARCHRES&BackURL=<a href=wphappcriteria.display?paSearchKey=147170>Search Criteria</a>
  78. # urllib2 doesn't like this url, to make it happy, we'll
  79. # get rid of the BackURL parameter, which we don't need.
  80. split_url = urlparse.urlsplit(url)
  81. qs = split_url[3]
  82. # This gets us a dictionary of key to lists of values
  83. qsl = cgi.parse_qsl(qs)
  84. # Get rid of BackURL
  85. qsl.pop(-1)
  86. # I think this is safe, as there are no repeats of parameters
  87. new_qs = urllib.urlencode(qsl)
  88. url = urlparse.urlunsplit(split_url[:3] + (new_qs,) + split_url[4:])
  89. this_page_url = urlparse.urljoin(self.base_url, url)
  90. response = urllib2.urlopen(this_page_url)
  91. contents = response.read()
  92. soup = BeautifulSoup.BeautifulSoup(contents)
  93. results_table = self._findResultsTable(soup)#.body.find("table", {"class": "apas_tbl"})
  94. trs = self._findTRs(results_table)
  95. for tr in trs:
  96. self._current_application = PlanningApplication()
  97. tds = tr.findAll("td")
  98. # The first td
  99. #<td class="apas_tblContent"><a href="WPHAPPDETAIL.DisplayUrl?theApnID=07/1884&amp;backURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt; &gt; &lt;a href='wphappsearchres.displayResultsURL?ResultID=243950%26StartIndex=1%26SortOrder=APNID:asc%26DispResultsAs=WPHAPPSEARCHRES%26BackURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt;'&gt;Search Results&lt;/a&gt;"></a><a href="wphappcriteria.display?paSearchKey=147125">Search Criteria</a> > <a href="wphappsearchres.displayResultsURL?ResultID=243950%26StartIndex=1%26SortOrder=APNID:asc%26DispResultsAs=WPHAPPSEARCHRES%26BackURL=&lt;a href=wphappcriteria.display?paSearchKey=147125&gt;Search Criteria&lt;/a&gt;"></a><a href="wphappcriteria.display?paSearchKey=147125">Search Criteria</a>'>Search Results">07/1884</td>
  100. # The html here is a bit of a mess, and doesn't all get into
  101. # the soup.
  102. # We can get the reference from the first <a href> in td 0.
  103. first_link = tds[0].a['href']
  104. app_id = cgi.parse_qs(urlparse.urlsplit(first_link)[3])['theApnID'][0]
  105. self._current_application.date_received = search_date
  106. self._current_application.council_reference = app_id
  107. self._current_application.info_url = self.info_url %(app_id)
  108. self._current_application.comment_url = self.comment_url %(app_id)
  109. self._current_application.description = tds[1].string.strip()
  110. # the second td
  111. #<td class="apas_tblContent"><input type="HIDDEN" name="ORDERCOUNTER.PAHEADER.PACIS2.1-1." value="1" class="input-box" size="7" />
  112. #LAND ADJ. BRAMBLING, HAWKENBURY ROAD, HAWKENBURY, TN120EA
  113. #</td>
  114. address = ' '.join([x for x in tds[2].contents if isinstance(x, BeautifulSoup.NavigableString)]).strip()
  115. self._current_application.address = address
  116. self._current_application.postcode = getPostcodeFromText(address)
  117. self._results.addApplication(self._current_application)
  118. return self._results
  119. def getResults(self, day, month, year):
  120. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  121. class EastHertsParser(SwiftLGParser):
  122. def _findResultsTable(self, soup):
  123. return soup.findAll("table")[3]
  124. class GwyneddParser(SwiftLGParser):
  125. def _findResultsTable(self, soup):
  126. return soup.find("table", {"class": "thinBox"})
  127. class IslingtonParser(SwiftLGParser):
  128. def _findResultsTable(self, soup):
  129. return soup.table.table
  130. class MacclesfieldParser(SwiftLGParser):
  131. def _findResultsTable(self, soup):
  132. return soup.findAll("table")[6]
  133. class MoleValleyParser(SwiftLGParser):
  134. def _findResultsTable(self, soup):
  135. # import pdb;pdb.set_trace()
  136. return soup.findAll("table")[2]
  137. class SloughParser(SwiftLGParser):
  138. def _findResultsTable(self, soup):
  139. return soup.findAll("table")[1]
  140. def _findTRs(self, results_table):
  141. return results_table.findAll("tr")[2:]
  142. if __name__ == '__main__':
  143. # parser = SwiftLGParser("Boston Borough Council", "Boston", "http://195.224.121.199/swiftlg/apas/run/")
  144. # parser = SwiftLGParser("Dudley", "Dudley", "http://www2.dudley.gov.uk/swiftlg/apas/run/")
  145. # parser = EastHertsParser("East Hertfordshire", "East Herts", "http://e-services.eastherts.gov.uk/swiftlg/apas/run/")
  146. # parser = GwyneddParser("Gwynedd", "Gwynedd", "http://www.gwynedd.gov.uk/swiftlg/apas/run/")
  147. # parser = IslingtonParser("Islington", "Islington", "https://www.islington.gov.uk/onlineplanning/apas/run/")
  148. parser = SwiftLGParser("Lake District", "Lake District", "http://www.lake-district.gov.uk/swiftlg/apas/run/")
  149. # parser = SwiftLGParser("Maidstone Borough Council", "Maidstone", "http://digitalmaidstone.co.uk/swiftlg/apas/run/")
  150. # parser = MoleValleyParser("Mole Valley", "Mole Valley", "http://www.molevalley.gov.uk/swiftlg/apas/run/")
  151. # parser = SwiftLGParser("Pembrokeshire County Council", "Pembrokeshire", "http://planning.pembrokeshire.gov.uk/swiftlg/apas/run/")
  152. # parser = SwiftLGParser("Rochdale Metropolitan Borough Council", "Rochdale", "http://www.rochdale.gov.uk/swiftlg/apas/run/")
  153. # parser = SloughParser("Slough", "Slough", "http://www2.slough.gov.uk/swiftlg/apas/run/")
  154. # parser = SwiftLGParser("Snowdonia National Park", "Snowdonia", "http://www.snowdonia-npa.gov.uk/swiftlg/apas/run/")
  155. # parser = SwiftLGParser("St Edmundsbury", "Bury St Edmunds", "http://www.stedmundsbury.gov.uk/swiftlg/apas/run/")
  156. # parser = MacclesfieldParser("Macclesfield", "Macclesfield", "http://www.planportal.macclesfield.gov.uk/swiftlg/apas/run/")
  157. # parser = SwiftLGParser("Daventry District Council", "Daventry", "http://62.231.149.150/swiftlg/apas/run/wphappcriteria.display")
  158. # parser = SwiftLGParser("Warrington Borough Council", "Warrington", "http://212.248.237.123:8080/swiftlg/apas/run/wphappcriteria.display")
  159. # parser = SwiftLGParser("Cannock Chase District Council", "Cannock Chase", "http://planning.cannockchasedc.com/swiftlg/apas/run/wphappcriteria.display")
  160. # parser = SwiftLGParser("London Borough of Enfield", "Enfield", "http://forms.enfield.gov.uk/swiftlg/apas/run/wphappcriteria.display")
  161. print parser.getResults(18,3,2009)
  162. # To Do:
  163. #1) Check out comment url on Maidstone
  164. #2) Daventry, when it is back up.
  165. #3) Work out what goes wrong with Gwynedd on 06/11/2007