From 5e51a2ea15942a39784374b907e10b9446a2a143 Mon Sep 17 00:00:00 2001 From: Adrian Short Date: Sun, 30 Sep 2018 11:40:08 +0100 Subject: [PATCH] Work around weak backend keyword search --- Gemfile | 2 +- Gemfile.lock | 3 +-- scraper.rb | 9 ++++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Gemfile b/Gemfile index f7e6a95..1ee14ee 100644 --- a/Gemfile +++ b/Gemfile @@ -2,5 +2,5 @@ source "https://rubygems.org" ruby '2.3.1' -gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/', :ref => '7349fd6' +gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/' gem 'scraperwiki', :git => 'https://github.com/openaustralia/scraperwiki-ruby/', :branch => 'morph_defaults' diff --git a/Gemfile.lock b/Gemfile.lock index 1ba570a..cd89c8d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,6 @@ GIT remote: https://github.com/adrianshort/uk_planning_scraper/ - revision: 7349fd6470326964451f0405ffc92fc1c955e82a - ref: 7349fd6 + revision: 2cac593c569cd6d0c686ec8e285adf6d28303a43 specs: uk_planning_scraper (0.2.0) http diff --git a/scraper.rb b/scraper.rb index 93b4e91..833a29a 100644 --- a/scraper.rb +++ b/scraper.rb @@ -4,13 +4,16 @@ require 'scraperwiki' auths = UKPlanningScraper::Authority.all auths.each_with_index do |auth, i| - next if auth.name == 'Bolton' # https://github.com/adrianshort/uk_planning_scraper/issues/7 puts "#{i + 1} of #{auths.size}: #{auth.name}" begin apps = auth.scrape({ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'}) - ScraperWiki.save_sqlite([:authority_name, :council_reference], apps, 'applications') + apps.each do |app| + unless app[:description].match(/chainlink/i) # Backend keyword search is weak + ScraperWiki.save_sqlite([:authority_name, :council_reference], app, 'applications') + end + end puts "#{auth.name}: #{apps.size} application(s) saved." - rescue UKPlanningScraper::SystemNotSupported => e + rescue StandardError => e puts "Error: #{e}" end end