@@ -4,7 +4,7 @@ | |||||
source "https://rubygems.org" | source "https://rubygems.org" | ||||
ruby "2.0.0" | |||||
ruby "2.3.1" | |||||
gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults" | gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults" | ||||
gem "mechanize" | |||||
gem "uk_planning_scraper", git: "https://github.com/adrianshort/uk_planning_scraper.git", ref: "6d72d25" |
@@ -1,3 +1,12 @@ | |||||
GIT | |||||
remote: https://github.com/adrianshort/uk_planning_scraper.git | |||||
revision: 6d72d251665941b56daaefa33d463b4f590b4ace | |||||
ref: 6d72d25 | |||||
specs: | |||||
uk_planning_scraper (0.2.0) | |||||
http | |||||
mechanize (~> 2.7) | |||||
GIT | GIT | ||||
remote: https://github.com/openaustralia/scraperwiki-ruby.git | remote: https://github.com/openaustralia/scraperwiki-ruby.git | ||||
revision: fc50176812505e463077d5c673d504a6a234aa78 | revision: fc50176812505e463077d5c673d504a6a234aa78 | ||||
@@ -10,38 +19,58 @@ GIT | |||||
GEM | GEM | ||||
remote: https://rubygems.org/ | remote: https://rubygems.org/ | ||||
specs: | specs: | ||||
domain_name (0.5.24) | |||||
addressable (2.5.2) | |||||
public_suffix (>= 2.0.2, < 4.0) | |||||
connection_pool (2.2.2) | |||||
domain_name (0.5.20180417) | |||||
unf (>= 0.0.5, < 1.0.0) | unf (>= 0.0.5, < 1.0.0) | ||||
http-cookie (1.0.2) | |||||
http (3.3.0) | |||||
addressable (~> 2.3) | |||||
http-cookie (~> 1.0) | |||||
http-form_data (~> 2.0) | |||||
http_parser.rb (~> 0.6.0) | |||||
http-cookie (1.0.3) | |||||
domain_name (~> 0.5) | domain_name (~> 0.5) | ||||
http-form_data (2.1.1) | |||||
http_parser.rb (0.6.0) | |||||
httpclient (2.6.0.1) | httpclient (2.6.0.1) | ||||
mechanize (2.7.3) | |||||
mechanize (2.7.6) | |||||
domain_name (~> 0.5, >= 0.5.1) | domain_name (~> 0.5, >= 0.5.1) | ||||
http-cookie (~> 1.0) | http-cookie (~> 1.0) | ||||
mime-types (~> 2.0) | |||||
mime-types (>= 1.17.2) | |||||
net-http-digest_auth (~> 1.1, >= 1.1.1) | net-http-digest_auth (~> 1.1, >= 1.1.1) | ||||
net-http-persistent (~> 2.5, >= 2.5.2) | |||||
nokogiri (~> 1.4) | |||||
net-http-persistent (>= 2.5.2) | |||||
nokogiri (~> 1.6) | |||||
ntlm-http (~> 0.1, >= 0.1.1) | ntlm-http (~> 0.1, >= 0.1.1) | ||||
webrobots (>= 0.0.9, < 0.2) | webrobots (>= 0.0.9, < 0.2) | ||||
mime-types (2.5) | |||||
mini_portile (0.6.2) | |||||
net-http-digest_auth (1.4) | |||||
net-http-persistent (2.9.4) | |||||
nokogiri (1.6.6.2) | |||||
mini_portile (~> 0.6.0) | |||||
mime-types (3.2.2) | |||||
mime-types-data (~> 3.2015) | |||||
mime-types-data (3.2018.0812) | |||||
mini_portile2 (2.3.0) | |||||
net-http-digest_auth (1.4.1) | |||||
net-http-persistent (3.0.0) | |||||
connection_pool (~> 2.2) | |||||
nokogiri (1.8.4) | |||||
mini_portile2 (~> 2.3.0) | |||||
ntlm-http (0.1.1) | ntlm-http (0.1.1) | ||||
public_suffix (3.0.3) | |||||
sqlite3 (1.3.10) | sqlite3 (1.3.10) | ||||
sqlite_magic (0.0.3) | sqlite_magic (0.0.3) | ||||
sqlite3 | sqlite3 | ||||
unf (0.1.4) | unf (0.1.4) | ||||
unf_ext | unf_ext | ||||
unf_ext (0.0.7.1) | |||||
webrobots (0.1.1) | |||||
unf_ext (0.0.7.5) | |||||
webrobots (0.1.2) | |||||
PLATFORMS | PLATFORMS | ||||
ruby | ruby | ||||
DEPENDENCIES | DEPENDENCIES | ||||
mechanize | |||||
scraperwiki! | scraperwiki! | ||||
uk_planning_scraper! | |||||
RUBY VERSION | |||||
ruby 2.3.1p112 | |||||
BUNDLED WITH | |||||
1.15.4 |
@@ -1 +1,3 @@ | |||||
This is a scraper that runs on [Morph](https://morph.io). To get started [see the documentation](https://morph.io/documentation) | |||||
This is a scraper that runs on [Morph](https://morph.io). To get started [see the documentation](https://morph.io/documentation). | |||||
This exists to enable [uk_planning_scraper](https://github.com/adrianshort/uk_planning_scraper) to be tested in the Morph environment, giving us some degree of [dev/prod parity](https://12factor.net/dev-prod-parity). |
@@ -1,25 +1,5 @@ | |||||
# This is a template for a Ruby scraper on morph.io (https://morph.io) | |||||
# including some code snippets below that you should find helpful | |||||
require 'uk_planning_scraper' | |||||
require 'scraperwiki' | |||||
# require 'scraperwiki' | |||||
# require 'mechanize' | |||||
# | |||||
# agent = Mechanize.new | |||||
# | |||||
# # Read in a page | |||||
# page = agent.get("http://foo.com") | |||||
# | |||||
# # Find somehing on the page using css selectors | |||||
# p page.at('div.content') | |||||
# | |||||
# # Write out to the sqlite database using scraperwiki library | |||||
# ScraperWiki.save_sqlite(["name"], {"name" => "susan", "occupation" => "software developer"}) | |||||
# | |||||
# # An arbitrary query against the database | |||||
# ScraperWiki.select("* from data where 'name'='peter'") | |||||
# You don't have to do things with the Mechanize or ScraperWiki libraries. | |||||
# You can use whatever gems you want: https://morph.io/documentation/ruby | |||||
# All that matters is that your final data is written to an SQLite database | |||||
# called "data.sqlite" in the current working directory which has at least a table | |||||
# called "data". | |||||
apps = UKPlanningScraper::Authority.named(ENV['MORPH_AUTHORITY_NAME']).scrape({ validated_days: ENV['MORPH_DAYS'].to_i }) | |||||
ScraperWiki.save_sqlite([:authority_name, :council_reference], apps) |