|
- # This is a template for a Ruby scraper on morph.io (https://morph.io)
- # including some code snippets below that you should find helpful
-
- require 'bundler'
- Bundler.setup
- require 'scraperwiki'
- require 'mechanize'
- require 'pp'
-
- BASEURL = "https://maps.kingston.gov.uk/propertyServices/planning/"
-
- agent = Mechanize.new
- agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
- #
- # # Read in a page
- page = agent.get("https://maps.kingston.gov.uk/propertyServices/planning/Summary?weekListType=SRCH&recFrom=01/Jan/2017&recTo=01/Feb/2017&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=50")
- #
- # page = Nokogiri::HTML(open("page.html"))
-
- apps = page.search("#planningApplication")
-
- apps.each do |app|
- @title = app.at("h4").inner_text
- @id = @title.match(/\d+\/\d+\/\w+/)[0]
- puts @id
- app.search("a").each do |link|
- @url = BASEURL + link['href'].strip if link['href'].match(/Details\.aspx/)
- puts @url
- @map_url = link['href'].strip if link['href'].match(/\?map=/)
- end
- spans = app.search("span")
- @description = spans[0].inner_text
- @address = spans[1].inner_text
- @ward = spans[2].inner_text
-
- begin
- @date_valid = Date.parse(spans[3].inner_text)
- @date_valid_text = nil
- rescue ArgumentError
- @date_valid = nil
- @date_valid_text = spans[3].inner_text
- end
-
- ScraperWiki.save_sqlite(["id"],
- { 'id' => @id,
- 'url' => @url,
- 'title' => @title,
- 'description' => @description,
- 'address' => @address,
- 'ward' => @ward,
- 'date_valid' => @date_valid,
- 'date_valid_text' => @date_valid_text,
- 'map_url' => @map_url
- })
- end
|