From ca0fa0dd8ac5ba9e8bdf9b0050b865956921c127 Mon Sep 17 00:00:00 2001 From: Adrian Short Date: Wed, 10 Oct 2018 21:46:33 +0100 Subject: [PATCH] Add extra search terms; bump to latest scraper gem --- Gemfile | 2 +- Gemfile.lock | 6 +++--- scraper.rb | 24 ++++++++++++++++-------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/Gemfile b/Gemfile index 4e6d895..b0694b2 100644 --- a/Gemfile +++ b/Gemfile @@ -2,5 +2,5 @@ source "https://rubygems.org" ruby '2.3.1' -gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/', :ref => '8a070e1' +gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/', :ref => 'dd8e084' gem 'scraperwiki', :git => 'https://github.com/openaustralia/scraperwiki-ruby/', :branch => 'morph_defaults' diff --git a/Gemfile.lock b/Gemfile.lock index 65a4d24..db2c2d9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,9 +1,9 @@ GIT remote: https://github.com/adrianshort/uk_planning_scraper/ - revision: 8a070e17732b3304e7e73a81cb7538795fe604c0 - ref: 8a070e1 + revision: dd8e0849e2b96303891b9023692cae0feaa2e153 + ref: dd8e084 specs: - uk_planning_scraper (0.2.0) + uk_planning_scraper (0.3.1) http mechanize (~> 2.7) diff --git a/scraper.rb b/scraper.rb index 833a29a..7297b0f 100644 --- a/scraper.rb +++ b/scraper.rb @@ -3,17 +3,25 @@ require 'scraperwiki' auths = UKPlanningScraper::Authority.all +scrapes = [ + { validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'}, + { validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'bt phone kiosk'} +] + auths.each_with_index do |auth, i| puts "#{i + 1} of #{auths.size}: #{auth.name}" - begin - apps = auth.scrape({ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'}) - apps.each do |app| - unless app[:description].match(/chainlink/i) # Backend keyword search is weak - ScraperWiki.save_sqlite([:authority_name, :council_reference], app, 'applications') + scrapes.each_with_index do |scrape, j| + puts "Scrape #{j + 1} of #{scrapes.size}: keywords: #{scrape[:keywords]}" + begin + apps = auth.scrape(scrape) + apps.each do |app| + unless app[:description].match(/chainlink/i) # Backend keyword search is weak + ScraperWiki.save_sqlite([:authority_name, :council_reference], app, 'applications') + end end + puts "#{auth.name}: #{apps.size} application(s) saved." + rescue StandardError => e + puts "Error: #{e}" end - puts "#{auth.name}: #{apps.size} application(s) saved." - rescue StandardError => e - puts "Error: #{e}" end end