RB Kingston upon Thames planning applications
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

87 regels
2.6 KiB

  1. require 'bundler'
  2. Bundler.setup
  3. require 'scraperwiki'
  4. require 'mechanize'
  5. require 'pp'
  6. require 'time'
  7. require 'date'
  8. # Use the column names from planningalerts.org.au:
  9. # https://www.planningalerts.org.au/how_to_write_a_scraper
  10. BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
  11. # Parse and save a single planning application
  12. def parse(app)
  13. record = {}
  14. record['title'] = app.at("h4").inner_text
  15. matches = record['title'].match(/(\d+\/\d+\/\w+)\s+-\s+(.+)/)
  16. record['council_reference'] = matches[1]
  17. record['type'] = matches[2]
  18. app.search("a").each do |link|
  19. record['info_url'] = BASEURL + link['href'].strip if link['href'].match(/Details/)
  20. record['map_url'] = link['href'].strip if link['href'].match(/\?map=/)
  21. record['images_url'] = BASEURL + link['href'].strip if link['href'].match(/ImageMenu/)
  22. record['comment_url'] = BASEURL + link['href'].strip if link['href'].match(/PlanningComments/)
  23. end
  24. spans = app.search("span")
  25. record['description'] = spans[0].inner_text
  26. record['address'] = spans[1].inner_text
  27. record['ward'] = spans[2].inner_text
  28. # Decision and decision date
  29. if matches = spans[4].inner_text.match(/(.+?)\s+(\d{1,2}\/\d{1,2}\/\d{4})/)
  30. record['decision'] = matches[1]
  31. record['date_decision'] = Date.parse(matches[2])
  32. end
  33. # Comments/consultation - consultation end date can change during lifetime of application
  34. app.search("dd").each do |dd|
  35. if matches = dd.inner_text.match(/The current closing date for comments on this application is (\d{1,2}-[A-Z][a-z]{2}-\d{4})/)
  36. record['on_notice_to'] = Date.parse(matches[1])
  37. end
  38. end
  39. # Date valid
  40. begin
  41. record['date_valid'] = Date.parse(spans[3].inner_text)
  42. record['date_valid_text'] = nil
  43. rescue ArgumentError
  44. record['date_valid'] = nil
  45. record['date_valid_text'] = spans[3].inner_text
  46. end
  47. # Scraper timestamps
  48. record['updated_at'] = Time.now
  49. record['date_scraped'] = Date.today.to_s
  50. ScraperWiki.save_sqlite(['council_reference'], record)
  51. end
  52. agent = Mechanize.new
  53. agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  54. # Get all valid applications for the last 12 * 30 days
  55. d = Date.today
  56. 12.times do
  57. d_start = (d - 29).strftime("%d/%m/%Y")
  58. d_end = d.strftime("%d/%m/%Y")
  59. url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500"
  60. puts url
  61. page = agent.get(url)
  62. apps = page.search("#planningApplication")
  63. puts apps.size, ''
  64. apps.each { |app| parse(app) }
  65. d -= 30
  66. sleep 5
  67. end
  68. # page = Nokogiri::HTML(open("page.html"))