RB Kingston upon Thames planning applications
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

scraper.rb 1.6 KiB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. # This is a template for a Ruby scraper on morph.io (https://morph.io)
  2. # including some code snippets below that you should find helpful
  3. require 'bundler'
  4. Bundler.setup
  5. require 'scraperwiki'
  6. require 'mechanize'
  7. require 'pp'
  8. BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
  9. agent = Mechanize.new
  10. agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  11. #
  12. # # Read in a page
  13. page = agent.get("https://maps.kingston.gov.uk/propertyServices/planning/Summary?weekListType=SRCH&recFrom=01/Jan/2017&recTo=01/Feb/2017&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=50")
  14. #
  15. # page = Nokogiri::HTML(open("page.html"))
  16. apps = page.search("#planningApplication")
  17. apps.each do |app|
  18. @title = app.at("h4").inner_text
  19. @id = @title.match(/\d+\/\d+\/\w+/)[0]
  20. puts @id
  21. app.search("a").each do |link|
  22. @url = BASEURL + link['href'].strip if link['href'].match(/Details\.aspx/)
  23. puts @url
  24. @map_url = link['href'].strip if link['href'].match(/\?map=/)
  25. end
  26. spans = app.search("span")
  27. @description = spans[0].inner_text
  28. @address = spans[1].inner_text
  29. @ward = spans[2].inner_text
  30. begin
  31. @date_valid = Date.parse(spans[3].inner_text)
  32. @date_valid_text = nil
  33. rescue ArgumentError
  34. @date_valid = nil
  35. @date_valid_text = spans[3].inner_text
  36. end
  37. ScraperWiki.save_sqlite(["id"],
  38. { 'id' => @id,
  39. 'url' => @url,
  40. 'title' => @title,
  41. 'description' => @description,
  42. 'address' => @address,
  43. 'ward' => @ward,
  44. 'date_valid' => @date_valid,
  45. 'date_valid_text' => @date_valid_text,
  46. 'map_url' => @map_url
  47. })
  48. end