RB Kingston upon Thames planning applications
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
1.6 KiB

  1. # This is a template for a Ruby scraper on morph.io (https://morph.io)
  2. # including some code snippets below that you should find helpful
  3. require 'bundler'
  4. Bundler.setup
  5. require 'scraperwiki'
  6. require 'mechanize'
  7. require 'pp'
  8. BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
  9. agent = Mechanize.new
  10. agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  11. #
  12. # # Read in a page
  13. page = agent.get("https://maps.kingston.gov.uk/propertyServices/planning/Summary?weekListType=SRCH&recFrom=01/Jan/2017&recTo=01/Feb/2017&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=50")
  14. #
  15. # page = Nokogiri::HTML(open("page.html"))
  16. apps = page.search("#planningApplication")
  17. apps.each do |app|
  18. @title = app.at("h4").inner_text
  19. @id = @title.match(/\d+\/\d+\/\w+/)[0]
  20. puts @id
  21. app.search("a").each do |link|
  22. @url = BASEURL + link['href'].strip if link['href'].match(/Details\.aspx/)
  23. puts @url
  24. @map_url = link['href'].strip if link['href'].match(/\?map=/)
  25. end
  26. spans = app.search("span")
  27. @description = spans[0].inner_text
  28. @address = spans[1].inner_text
  29. @ward = spans[2].inner_text
  30. begin
  31. @date_valid = Date.parse(spans[3].inner_text)
  32. @date_valid_text = nil
  33. rescue ArgumentError
  34. @date_valid = nil
  35. @date_valid_text = spans[3].inner_text
  36. end
  37. ScraperWiki.save_sqlite(["id"],
  38. { 'id' => @id,
  39. 'url' => @url,
  40. 'title' => @title,
  41. 'description' => @description,
  42. 'address' => @address,
  43. 'ward' => @ward,
  44. 'date_valid' => @date_valid,
  45. 'date_valid_text' => @date_valid_text,
  46. 'map_url' => @map_url
  47. })
  48. end