Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cookielib
  6. cookie_jar = cookielib.CookieJar()
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. search_date_format = "%d-%m-%Y" # Format used for the accepted date when searching
  12. possible_date_formats = [search_date_format, "%d/%m/%Y"]
  13. class CookieAddingHTTPRedirectHandler(urllib2.HTTPRedirectHandler):
  14. """The standard python HttpRedirectHandler doesn't add a cookie to the new request after a 302. This handler does."""
  15. def redirect_request(self, req, fp, code, msg, headers, newurl):
  16. new_request = urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
  17. # We need to add a cookie from the cookie_jar
  18. cookie_jar.add_cookie_header(new_request)
  19. return new_request
  20. cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler())
  21. class OcellaParser:
  22. received_date_format = search_date_format
  23. def __init__(self,
  24. authority_name,
  25. authority_short_name,
  26. base_url,
  27. debug=False):
  28. self.authority_name = authority_name
  29. self.authority_short_name = authority_short_name
  30. self.base_url = base_url
  31. self.debug = debug
  32. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  33. # These will be used to store the column numbers of the appropriate items in the results table
  34. self.reference_col = None
  35. self.address_col = None
  36. self.applicant_col = None
  37. self.description_col = None
  38. self.received_date_col = None
  39. self.accepted_date_col = None
  40. def getResultsByDayMonthYear(self, day, month, year):
  41. search_date = datetime.date(year, month, day)
  42. # First get the search page
  43. get_request = urllib2.Request(self.base_url)
  44. get_request.add_header('Accept', 'text/html')
  45. get_response = urllib2.urlopen(get_request)
  46. cookie_jar.extract_cookies(get_response, get_request)
  47. get_soup = BeautifulSoup(get_response.read())
  48. # We need to find where the post action goes
  49. action = get_soup.form['action']
  50. try:
  51. session_id = get_soup.find('input', {'name': 'p_session_id'})['value']
  52. except TypeError:
  53. # In the case of Middlesbrough, there is no session cookie,
  54. # but it seems we don't need it...
  55. session_id = None
  56. # Unless we retrieve the correct form name, we will simply get the last week's applications
  57. submit_tag = get_soup.find('input', {'value': 'Search'}) or get_soup.find('input', {'value': 'Search for Applications'}) or get_soup.find('input', {'value': 'Submit'})
  58. try:
  59. submit_name = submit_tag['name']
  60. form_name = submit_name.split('.')[0]
  61. except TypeError:
  62. form_name = 'FRM_PLANNING_LIST'
  63. # # From Breckland
  64. # p_object_name=FRM_WEEKLY_LIST.DEFAULT.SUBMIT_TOP.01
  65. # p_instance=1
  66. # p_event_type=ON_CLICK
  67. # p_user_args=
  68. # p_session_id=53573
  69. # p_page_url=http%3A%2F%2Fwplan01.intranet.breckland.gov.uk%3A7778%2Fportal%2Fpage%3F_pageid%3D33%2C30988%26_dad%3Dportal%26_schema%3DPORTAL
  70. # FRM_WEEKLY_LIST.DEFAULT.START_DATE.01=02-06-2008
  71. # FRM_WEEKLY_LIST.DEFAULT.END_DATE.01=09-06-2008
  72. # FRM_WEEKLY_LIST.DEFAULT.PARISH.01=
  73. post_data = urllib.urlencode(
  74. [('p_object_name', form_name + '.DEFAULT.SUBMIT_TOP.01'),
  75. ('p_instance', '1'),
  76. ('p_event_type', 'ON_CLICK'),
  77. ('p_user_args', ''),
  78. ('p_session_id', session_id),
  79. ('p_page_url', self.base_url),
  80. (form_name + '.DEFAULT.AGENT.01', ''),
  81. (form_name + '.DEFAULT.START_DATE.01', search_date.strftime(search_date_format)),
  82. (form_name + '.DEFAULT.END_DATE.01', search_date.strftime(search_date_format)),
  83. (form_name + '.DEFAULT.PARISH.01', ''),
  84. ]
  85. )
  86. post_request = urllib2.Request(action, post_data)
  87. cookie_jar.add_cookie_header(post_request)
  88. post_request.add_header('Accept', 'text/html')
  89. post_request.add_header('Referer', self.base_url)
  90. post_response = cookie_handling_opener.open(post_request)
  91. post_soup = BeautifulSoup(post_response.read())
  92. results_table = post_soup.find("table", summary="Printing Table Headers")
  93. trs = results_table.findAll("tr")
  94. # We'll use the headings in the first tr to find out what columns the address, description, etc are in.
  95. ths = trs[0].findAll("th")
  96. th_index = 0
  97. for th in ths:
  98. th_content = th.font.string.strip()
  99. if th_content == 'Reference' or th_content == 'Application Ref' or th_content == 'Application Number':
  100. self.reference_col = th_index
  101. elif th_content == 'Location':
  102. self.address_col = th_index
  103. elif th_content == 'Applicant Details':
  104. self.applicant_col = th_index
  105. elif th_content == 'Proposal':
  106. self.description_col = th_index
  107. elif th_content == 'Development Description':
  108. self.description_col = th_index
  109. elif th_content == 'Received Date' or th_content == 'Date Received':
  110. self.received_date_col = th_index
  111. elif th_content == 'Accepted Date':
  112. self.accepted_date_col = th_index
  113. th_index += 1
  114. # If there is a received date, we'll use that, otherwise, we'll have to settle for the accepted date.
  115. self.received_date_col = self.received_date_col or self.accepted_date_col
  116. # We want all the trs except the first one, which is just headers,
  117. # and the last, which is empty
  118. trs = trs[1:-1]
  119. for tr in trs:
  120. self._current_application = PlanningApplication()
  121. tds = tr.findAll("td")
  122. self._current_application.council_reference = (tds[self.reference_col].font.a or tds[self.reference_col].a.font).string.strip()
  123. date_string = tds[self.received_date_col]
  124. for possible_format in possible_date_formats:
  125. try:
  126. self._current_application.date_received = datetime.datetime(*(time.strptime(tds[self.received_date_col].font.string.strip(), possible_format)[0:6]))
  127. except ValueError:
  128. pass
  129. self._current_application.address = tds[self.address_col].font.string.strip()
  130. self._current_application.postcode = getPostcodeFromText(self._current_application.address)
  131. if self._current_application.postcode is None and self.applicant_col is not None:
  132. # won't always be accurate to do this but better than nothing (needed for Havering)
  133. self._current_application.postcode = getPostcodeFromText(tds[self.applicant_col].font.string.strip())
  134. self._current_application.description = tds[self.description_col].font.string.strip()
  135. # seems to be dependent on the implementation whether the URL is encoded (e.g. Great Yarmouth does this), so we cannot do anything more "standard"
  136. self._current_application.info_url = urlparse.urljoin(post_response.geturl(), tds[self.reference_col].a['href'].replace('&','&'))
  137. # This is what a comment url looks like
  138. # It seems to be no problem to remove the sessionid (which is in any case blank...)
  139. # I can't see a good way to avoid having to go to the info page to find the moduleid though.
  140. #http://wplan01.intranet.breckland.gov.uk:7778/pls/portal/PORTAL.wwa_app_module.link?p_arg_names=_moduleid&p_arg_values=8941787057&p_arg_names=_sessionid&p_arg_values=&p_arg_names=APPLICATION_REFERENCE&p_arg_values=3PL%2F2008%2F0877%2FF
  141. # For the moment, we'll just use the info url, as that seems to work.
  142. self._current_application.comment_url = self._current_application.info_url
  143. self._results.addApplication(self._current_application)
  144. return self._results
  145. def getResults(self, day, month, year):
  146. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  147. if __name__ == '__main__':
  148. # parser = OcellaParser("Arun", "Arun", "http://www.arun.gov.uk/iplanning/portal/page?_pageid=33,4139&_dad=portal&_schema=PORTAL")
  149. # parser = OcellaParser("Breckland Council", "Breckland", "http://wplan01.intranet.breckland.gov.uk:7778/portal/page?_pageid=33,30988&_dad=portal&_schema=PORTAL")
  150. # parser = OcellaParser("Ellesmere Port", "Ellesmere Port", "http://ocella.epnbc.gov.uk/portal/page?_pageid=33,38205&_dad=portal&_schema=PORTAL")
  151. # parser = OcellaParser("Fareham", "Fareham", "http://eocella.fareham.gov.uk/portal/page?_pageid=33,31754&_dad=portal&_schema=PORTAL")
  152. # parser = OcellaParser("Hillingdon", "Hillingdon", "http://w09.hillingdon.gov.uk/portal/page?_pageid=33,82093&_dad=portal&_schema=PORTAL")
  153. # parser = OcellaParser("Middlesbrough", "Middlesbrough", "http://planserv.middlesbrough.gov.uk/portal/page?_pageid=33,4178&_dad=portal&_schema=PORTAL")
  154. # parser = OcellaParser("North East Lincolnshire", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,64104&_dad=portal&_schema=PORTAL")
  155. # parser = OcellaParser("Uttlesford", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page/portal/plan/weekly")
  156. # parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL")
  157. parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL")
  158. # parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL")
  159. parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly")
  160. print parser.getResults(21,5,2008)
  161. #TODO
  162. # 1) Sort out proper comment url?
  163. # 2) Check for pagination
  164. # 3) Check no results case