Automatically exported from code.google.com/p/planningalerts
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
преди 16 години
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. import urllib2
  2. import urllib
  3. import urlparse
  4. import datetime, time
  5. import cookielib
  6. cookie_jar = cookielib.CookieJar()
  7. from BeautifulSoup import BeautifulSoup
  8. from PlanningUtils import PlanningApplication, \
  9. PlanningAuthorityResults, \
  10. getPostcodeFromText
  11. search_date_format = "%d-%m-%Y" # Format used for the accepted date when searching
  12. possible_date_formats = [search_date_format, "%d/%m/%Y"]
  13. from HTTPHandlers import CookieAddingHTTPRedirectHandler
  14. cookie_handling_opener = urllib2.build_opener(CookieAddingHTTPRedirectHandler(cookie_jar))
  15. class OcellaParser:
  16. received_date_format = search_date_format
  17. def __init__(self,
  18. authority_name,
  19. authority_short_name,
  20. base_url,
  21. debug=False):
  22. self.authority_name = authority_name
  23. self.authority_short_name = authority_short_name
  24. self.base_url = base_url
  25. self.debug = debug
  26. self._results = PlanningAuthorityResults(self.authority_name, self.authority_short_name)
  27. # These will be used to store the column numbers of the appropriate items in the results table
  28. self.reference_col = None
  29. self.address_col = None
  30. self.applicant_col = None
  31. self.description_col = None
  32. self.received_date_col = None
  33. self.accepted_date_col = None
  34. def getResultsByDayMonthYear(self, day, month, year):
  35. search_date = datetime.date(year, month, day)
  36. # First get the search page
  37. get_request = urllib2.Request(self.base_url)
  38. get_request.add_header('Accept', 'text/html')
  39. get_response = urllib2.urlopen(get_request)
  40. cookie_jar.extract_cookies(get_response, get_request)
  41. get_soup = BeautifulSoup(get_response.read())
  42. # We need to find where the post action goes
  43. action = get_soup.form['action']
  44. try:
  45. session_id = get_soup.find('input', {'name': 'p_session_id'})['value']
  46. except TypeError:
  47. # In the case of Middlesbrough, there is no session cookie,
  48. # but it seems we don't need it...
  49. session_id = None
  50. # Unless we retrieve the correct form name, we will simply get the last week's applications
  51. submit_tag = get_soup.find('input', {'value': 'Search'}) or get_soup.find('input', {'value': 'Search for Applications'}) or get_soup.find('input', {'value': 'Submit'})
  52. try:
  53. submit_name = submit_tag['name']
  54. form_name = submit_name.split('.')[0]
  55. except TypeError:
  56. form_name = 'FRM_PLANNING_LIST'
  57. # # From Breckland
  58. # p_object_name=FRM_WEEKLY_LIST.DEFAULT.SUBMIT_TOP.01
  59. # p_instance=1
  60. # p_event_type=ON_CLICK
  61. # p_user_args=
  62. # p_session_id=53573
  63. # p_page_url=http%3A%2F%2Fwplan01.intranet.breckland.gov.uk%3A7778%2Fportal%2Fpage%3F_pageid%3D33%2C30988%26_dad%3Dportal%26_schema%3DPORTAL
  64. # FRM_WEEKLY_LIST.DEFAULT.START_DATE.01=02-06-2008
  65. # FRM_WEEKLY_LIST.DEFAULT.END_DATE.01=09-06-2008
  66. # FRM_WEEKLY_LIST.DEFAULT.PARISH.01=
  67. post_data = urllib.urlencode(
  68. [('p_object_name', form_name + '.DEFAULT.SUBMIT_TOP.01'),
  69. ('p_instance', '1'),
  70. ('p_event_type', 'ON_CLICK'),
  71. ('p_user_args', ''),
  72. ('p_session_id', session_id),
  73. ('p_page_url', self.base_url),
  74. (form_name + '.DEFAULT.AGENT.01', ''),
  75. (form_name + '.DEFAULT.START_DATE.01', search_date.strftime(search_date_format)),
  76. (form_name + '.DEFAULT.END_DATE.01', search_date.strftime(search_date_format)),
  77. (form_name + '.DEFAULT.PARISH.01', ''),
  78. ]
  79. )
  80. post_request = urllib2.Request(action, post_data)
  81. cookie_jar.add_cookie_header(post_request)
  82. post_request.add_header('Accept', 'text/html')
  83. post_request.add_header('Referer', self.base_url)
  84. post_response = cookie_handling_opener.open(post_request)
  85. post_soup = BeautifulSoup(post_response.read())
  86. results_table = post_soup.find("table", summary="Printing Table Headers")
  87. trs = results_table.findAll("tr")
  88. # We'll use the headings in the first tr to find out what columns the address, description, etc are in.
  89. ths = trs[0].findAll("th")
  90. th_index = 0
  91. for th in ths:
  92. th_content = th.font.string.strip()
  93. if th_content == 'Reference' or th_content == 'Application Ref' or th_content == 'Application Number':
  94. self.reference_col = th_index
  95. elif th_content == 'Location':
  96. self.address_col = th_index
  97. elif th_content == 'Applicant Details':
  98. self.applicant_col = th_index
  99. elif th_content == 'Proposal':
  100. self.description_col = th_index
  101. elif th_content == 'Development Description':
  102. self.description_col = th_index
  103. elif th_content == 'Received Date' or th_content == 'Date Received':
  104. self.received_date_col = th_index
  105. elif th_content == 'Accepted Date':
  106. self.accepted_date_col = th_index
  107. th_index += 1
  108. # If there is a received date, we'll use that, otherwise, we'll have to settle for the accepted date.
  109. self.received_date_col = self.received_date_col or self.accepted_date_col
  110. # We want all the trs except the first one, which is just headers,
  111. # and the last, which is empty
  112. trs = trs[1:-1]
  113. for tr in trs:
  114. self._current_application = PlanningApplication()
  115. tds = tr.findAll("td")
  116. self._current_application.council_reference = (tds[self.reference_col].font.a or tds[self.reference_col].a.font).string.strip()
  117. date_string = tds[self.received_date_col]
  118. for possible_format in possible_date_formats:
  119. try:
  120. self._current_application.date_received = datetime.datetime(*(time.strptime(tds[self.received_date_col].font.string.strip(), possible_format)[0:6]))
  121. except ValueError:
  122. pass
  123. self._current_application.address = tds[self.address_col].font.string.strip()
  124. self._current_application.postcode = getPostcodeFromText(self._current_application.address)
  125. if self._current_application.postcode is None and self.applicant_col is not None:
  126. # won't always be accurate to do this but better than nothing (needed for Havering)
  127. self._current_application.postcode = getPostcodeFromText(tds[self.applicant_col].font.string.strip())
  128. self._current_application.description = tds[self.description_col].font.string.strip()
  129. # seems to be dependent on the implementation whether the URL is encoded (e.g. Great Yarmouth does this), so we cannot do anything more "standard"
  130. self._current_application.info_url = urlparse.urljoin(post_response.geturl(), tds[self.reference_col].a['href'].replace('&','&'))
  131. # This is what a comment url looks like
  132. # It seems to be no problem to remove the sessionid (which is in any case blank...)
  133. # I can't see a good way to avoid having to go to the info page to find the moduleid though.
  134. #http://wplan01.intranet.breckland.gov.uk:7778/pls/portal/PORTAL.wwa_app_module.link?p_arg_names=_moduleid&p_arg_values=8941787057&p_arg_names=_sessionid&p_arg_values=&p_arg_names=APPLICATION_REFERENCE&p_arg_values=3PL%2F2008%2F0877%2FF
  135. # For the moment, we'll just use the info url, as that seems to work.
  136. self._current_application.comment_url = self._current_application.info_url
  137. self._results.addApplication(self._current_application)
  138. return self._results
  139. def getResults(self, day, month, year):
  140. return self.getResultsByDayMonthYear(int(day), int(month), int(year)).displayXML()
  141. if __name__ == '__main__':
  142. # parser = OcellaParser("Arun", "Arun", "http://www.arun.gov.uk/iplanning/portal/page?_pageid=33,4139&_dad=portal&_schema=PORTAL")
  143. # parser = OcellaParser("Breckland Council", "Breckland", "http://wplan01.intranet.breckland.gov.uk:7778/portal/page?_pageid=33,30988&_dad=portal&_schema=PORTAL")
  144. # parser = OcellaParser("Ellesmere Port", "Ellesmere Port", "http://ocella.epnbc.gov.uk/portal/page?_pageid=33,38205&_dad=portal&_schema=PORTAL")
  145. # parser = OcellaParser("Fareham", "Fareham", "http://eocella.fareham.gov.uk/portal/page?_pageid=33,31754&_dad=portal&_schema=PORTAL")
  146. # parser = OcellaParser("Hillingdon", "Hillingdon", "http://w09.hillingdon.gov.uk/portal/page?_pageid=33,82093&_dad=portal&_schema=PORTAL")
  147. # parser = OcellaParser("Middlesbrough", "Middlesbrough", "http://planserv.middlesbrough.gov.uk/portal/page?_pageid=33,4178&_dad=portal&_schema=PORTAL")
  148. # parser = OcellaParser("North East Lincolnshire", "North East Lincolnshire", "http://planning.nelincs.gov.uk/portal/page?_pageid=33,64104&_dad=portal&_schema=PORTAL")
  149. # parser = OcellaParser("Uttlesford", "Uttlesford", "http://planning.uttlesford.gov.uk/portal/page/portal/plan/weekly")
  150. # parser = OcellaParser("Bridgend", "Bridgend", "http://eplan.bridgend.gov.uk:7778/portal/page?_pageid=55,31779&_dad=portal&_schema=PORTAL")
  151. # parser = OcellaParser("Havering", "Havering", "http://planning.havering.gov.uk/portal/page?_pageid=33,1026&_dad=portal&_schema=PORTAL")
  152. # parser = OcellaParser("Castle Point", "Castle Point", "http://planning.castlepoint.gov.uk/portal/page?_pageid=35,38205&_dad=portal&_schema=PORTAL")
  153. parser = OcellaParser("Great Yarmouth", "Great Yarmouth", "http://planning.great-yarmouth.gov.uk/portal/page/portal/plan/weekly")
  154. print parser.getResults(21,5,2008)
  155. #TODO
  156. # 1) Sort out proper comment url?
  157. # 2) Check for pagination
  158. # 3) Check no results case