Browse Source

Use chained scrape parameters

pull/1/head
Adrian Short 6 years ago
parent
commit
0160286120
3 changed files with 9 additions and 12 deletions
  1. +1
    -1
      Gemfile
  2. +4
    -4
      Gemfile.lock
  3. +4
    -7
      scraper.rb

+ 1
- 1
Gemfile View File

@@ -6,5 +6,5 @@ source "https://rubygems.org"

ruby "2.3.1"

gem "uk_planning_scraper", git: "https://github.com/adrianshort/uk_planning_scraper.git", ref: "8a070e1"
gem "uk_planning_scraper", git: "https://github.com/adrianshort/uk_planning_scraper.git", ref: "cad5fcd"
gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults"

+ 4
- 4
Gemfile.lock View File

@@ -1,9 +1,9 @@
GIT
remote: https://github.com/adrianshort/uk_planning_scraper.git
revision: 8a070e17732b3304e7e73a81cb7538795fe604c0
ref: 8a070e1
revision: cad5fcd2b98260572dccf0a4cf1ddfeabc469a43
ref: cad5fcd
specs:
uk_planning_scraper (0.2.0)
uk_planning_scraper (0.4.0)
http
mechanize (~> 2.7)

@@ -24,7 +24,7 @@ GEM
connection_pool (2.2.2)
domain_name (0.5.20180417)
unf (>= 0.0.5, < 1.0.0)
http (3.3.0)
http (4.0.0)
addressable (~> 2.3)
http-cookie (~> 1.0)
http-form_data (~> 2.0)


+ 4
- 7
scraper.rb View File

@@ -3,16 +3,13 @@ require 'scraperwiki'

auths = UKPlanningScraper::Authority.tagged('london')

scrapes = [
{ validated_days: ENV['MORPH_DAYS'].to_i },
{ decided_days: ENV['MORPH_DAYS'].to_i }
]
params = %w(validated_days decided_days)

auths.each_with_index do |auth, i|
scrapes.each_with_index do |scrape, j|
puts "Authority #{i + 1} of #{auths.size}: Scrape #{j + 1} of #{scrapes.size} for #{auth.name}."
params.each_with_index do |param, j|
puts "Authority #{i + 1} of #{auths.size}: Scrape #{j + 1} of #{params.size} for #{auth.name}."
begin
apps = auth.scrape(scrape)
apps = auth.send(param, ENV['MORPH_DAYS'].to_i).scrape
ScraperWiki.save_sqlite([:authority_name, :council_reference], apps)
puts "#{auth.name}: #{apps.size} application(s) saved."
rescue StandardError => e


Loading…
Cancel
Save