RB Kingston upon Thames planning applications
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

scraper.rb 2.6 KiB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. require 'bundler'
  2. Bundler.setup
  3. require 'scraperwiki'
  4. require 'mechanize'
  5. require 'pp'
  6. require 'time'
  7. require 'date'
  8. # Use the column names from planningalerts.org.au:
  9. # https://www.planningalerts.org.au/how_to_write_a_scraper
  10. BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
  11. # Parse and save a single planning application
  12. def parse(app)
  13. record = {}
  14. record['title'] = app.at("h4").inner_text
  15. matches = record['title'].match(/(\d+\/\d+\/\w+)\s+-\s+(.+)/)
  16. record['council_reference'] = matches[1]
  17. record['type'] = matches[2]
  18. app.search("a").each do |link|
  19. record['info_url'] = BASEURL + link['href'].strip if link['href'].match(/Details/)
  20. record['map_url'] = link['href'].strip if link['href'].match(/\?map=/)
  21. record['images_url'] = BASEURL + link['href'].strip if link['href'].match(/ImageMenu/)
  22. record['comment_url'] = BASEURL + link['href'].strip if link['href'].match(/PlanningComments/)
  23. end
  24. spans = app.search("span")
  25. record['description'] = spans[0].inner_text
  26. record['address'] = spans[1].inner_text
  27. record['ward'] = spans[2].inner_text
  28. # Decision and decision date
  29. if matches = spans[4].inner_text.match(/(.+?)\s+(\d{1,2}\/\d{1,2}\/\d{4})/)
  30. record['decision'] = matches[1]
  31. record['date_decision'] = Date.parse(matches[2])
  32. end
  33. # Comments/consultation - consultation end date can change during lifetime of application
  34. app.search("dd").each do |dd|
  35. if matches = dd.inner_text.match(/The current closing date for comments on this application is (\d{1,2}-[A-Z][a-z]{2}-\d{4})/)
  36. record['on_notice_to'] = Date.parse(matches[1])
  37. end
  38. end
  39. # Date valid
  40. begin
  41. record['date_valid'] = Date.parse(spans[3].inner_text)
  42. record['date_valid_text'] = nil
  43. rescue ArgumentError
  44. record['date_valid'] = nil
  45. record['date_valid_text'] = spans[3].inner_text
  46. end
  47. # Scraper timestamps
  48. record['updated_at'] = Time.now
  49. record['date_scraped'] = Date.today.to_s
  50. ScraperWiki.save_sqlite(['council_reference'], record)
  51. end
  52. agent = Mechanize.new
  53. agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  54. # Get all valid applications for the last 12 * 30 days
  55. d = Date.today
  56. 12.times do
  57. d_start = (d - 29).strftime("%d/%m/%Y")
  58. d_end = d.strftime("%d/%m/%Y")
  59. url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500"
  60. puts url
  61. page = agent.get(url)
  62. apps = page.search("#planningApplication")
  63. puts apps.size, ''
  64. apps.each { |app| parse(app) }
  65. d -= 30
  66. sleep 5
  67. end
  68. # page = Nokogiri::HTML(open("page.html"))