| @@ -4,7 +4,7 @@ | |||
| source "https://rubygems.org" | |||
| ruby "2.0.0" | |||
| ruby "2.3.1" | |||
| gem "uk_planning_scraper", git: "https://github.com/adrianshort/uk_planning_scraper.git", ref: "1f9d78e" | |||
| gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults" | |||
| gem "mechanize" | |||
| @@ -1,3 +1,12 @@ | |||
| GIT | |||
| remote: https://github.com/adrianshort/uk_planning_scraper.git | |||
| revision: 1f9d78e1bed6bfd08f0589e6e7595dec86d9673f | |||
| ref: 1f9d78e | |||
| specs: | |||
| uk_planning_scraper (0.2.0) | |||
| http | |||
| mechanize (~> 2.7) | |||
| GIT | |||
| remote: https://github.com/openaustralia/scraperwiki-ruby.git | |||
| revision: fc50176812505e463077d5c673d504a6a234aa78 | |||
| @@ -10,38 +19,58 @@ GIT | |||
| GEM | |||
| remote: https://rubygems.org/ | |||
| specs: | |||
| domain_name (0.5.24) | |||
| addressable (2.5.2) | |||
| public_suffix (>= 2.0.2, < 4.0) | |||
| connection_pool (2.2.2) | |||
| domain_name (0.5.20180417) | |||
| unf (>= 0.0.5, < 1.0.0) | |||
| http-cookie (1.0.2) | |||
| http (3.3.0) | |||
| addressable (~> 2.3) | |||
| http-cookie (~> 1.0) | |||
| http-form_data (~> 2.0) | |||
| http_parser.rb (~> 0.6.0) | |||
| http-cookie (1.0.3) | |||
| domain_name (~> 0.5) | |||
| http-form_data (2.1.1) | |||
| http_parser.rb (0.6.0) | |||
| httpclient (2.6.0.1) | |||
| mechanize (2.7.3) | |||
| mechanize (2.7.6) | |||
| domain_name (~> 0.5, >= 0.5.1) | |||
| http-cookie (~> 1.0) | |||
| mime-types (~> 2.0) | |||
| mime-types (>= 1.17.2) | |||
| net-http-digest_auth (~> 1.1, >= 1.1.1) | |||
| net-http-persistent (~> 2.5, >= 2.5.2) | |||
| nokogiri (~> 1.4) | |||
| net-http-persistent (>= 2.5.2) | |||
| nokogiri (~> 1.6) | |||
| ntlm-http (~> 0.1, >= 0.1.1) | |||
| webrobots (>= 0.0.9, < 0.2) | |||
| mime-types (2.5) | |||
| mini_portile (0.6.2) | |||
| net-http-digest_auth (1.4) | |||
| net-http-persistent (2.9.4) | |||
| nokogiri (1.6.6.2) | |||
| mini_portile (~> 0.6.0) | |||
| mime-types (3.2.2) | |||
| mime-types-data (~> 3.2015) | |||
| mime-types-data (3.2018.0812) | |||
| mini_portile2 (2.3.0) | |||
| net-http-digest_auth (1.4.1) | |||
| net-http-persistent (3.0.0) | |||
| connection_pool (~> 2.2) | |||
| nokogiri (1.8.4) | |||
| mini_portile2 (~> 2.3.0) | |||
| ntlm-http (0.1.1) | |||
| public_suffix (3.0.3) | |||
| sqlite3 (1.3.10) | |||
| sqlite_magic (0.0.3) | |||
| sqlite3 | |||
| unf (0.1.4) | |||
| unf_ext | |||
| unf_ext (0.0.7.1) | |||
| webrobots (0.1.1) | |||
| unf_ext (0.0.7.5) | |||
| webrobots (0.1.2) | |||
| PLATFORMS | |||
| ruby | |||
| DEPENDENCIES | |||
| mechanize | |||
| scraperwiki! | |||
| uk_planning_scraper! | |||
| RUBY VERSION | |||
| ruby 2.3.1p112 | |||
| BUNDLED WITH | |||
| 1.16.5 | |||
| @@ -1,25 +1,15 @@ | |||
| # This is a template for a Ruby scraper on morph.io (https://morph.io) | |||
| # including some code snippets below that you should find helpful | |||
| require 'uk_planning_scraper' | |||
| require 'scraperwiki' | |||
| # require 'scraperwiki' | |||
| # require 'mechanize' | |||
| # | |||
| # agent = Mechanize.new | |||
| # | |||
| # # Read in a page | |||
| # page = agent.get("http://foo.com") | |||
| # | |||
| # # Find somehing on the page using css selectors | |||
| # p page.at('div.content') | |||
| # | |||
| # # Write out to the sqlite database using scraperwiki library | |||
| # ScraperWiki.save_sqlite(["name"], {"name" => "susan", "occupation" => "software developer"}) | |||
| # | |||
| # # An arbitrary query against the database | |||
| # ScraperWiki.select("* from data where 'name'='peter'") | |||
| auths = UKPlanningScraper::Authority.tagged('london') | |||
| # You don't have to do things with the Mechanize or ScraperWiki libraries. | |||
| # You can use whatever gems you want: https://morph.io/documentation/ruby | |||
| # All that matters is that your final data is written to an SQLite database | |||
| # called "data.sqlite" in the current working directory which has at least a table | |||
| # called "data". | |||
| auths.each_with_index do |auth, i| | |||
| begin | |||
| puts "#{i + 1} of #{auths.size}: Scraping #{auth.name}" | |||
| apps = auth.scrape({ decided_days: ENV['MORPH_DAYS'] }) | |||
| ScraperWiki.save_sqlite([:authority_name, :council_reference], apps) | |||
| puts "#{auth.name}: #{apps.size} application(s) saved." | |||
| rescue UKPlanningScraper::SystemNotSupportedError => e | |||
| puts e | |||
| end | |||
| end | |||