Browse Source

Work around weak backend keyword search

master
Adrian Short 5 years ago
parent
commit
5e51a2ea15
3 changed files with 8 additions and 6 deletions
  1. +1
    -1
      Gemfile
  2. +1
    -2
      Gemfile.lock
  3. +6
    -3
      scraper.rb

+ 1
- 1
Gemfile View File

@@ -2,5 +2,5 @@ source "https://rubygems.org"

ruby '2.3.1'

gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/', :ref => '7349fd6'
gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/'
gem 'scraperwiki', :git => 'https://github.com/openaustralia/scraperwiki-ruby/', :branch => 'morph_defaults'

+ 1
- 2
Gemfile.lock View File

@@ -1,7 +1,6 @@
GIT
remote: https://github.com/adrianshort/uk_planning_scraper/
revision: 7349fd6470326964451f0405ffc92fc1c955e82a
ref: 7349fd6
revision: 2cac593c569cd6d0c686ec8e285adf6d28303a43
specs:
uk_planning_scraper (0.2.0)
http


+ 6
- 3
scraper.rb View File

@@ -4,13 +4,16 @@ require 'scraperwiki'
auths = UKPlanningScraper::Authority.all

auths.each_with_index do |auth, i|
next if auth.name == 'Bolton' # https://github.com/adrianshort/uk_planning_scraper/issues/7
puts "#{i + 1} of #{auths.size}: #{auth.name}"
begin
apps = auth.scrape({ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'})
ScraperWiki.save_sqlite([:authority_name, :council_reference], apps, 'applications')
apps.each do |app|
unless app[:description].match(/chainlink/i) # Backend keyword search is weak
ScraperWiki.save_sqlite([:authority_name, :council_reference], app, 'applications')
end
end
puts "#{auth.name}: #{apps.size} application(s) saved."
rescue UKPlanningScraper::SystemNotSupported => e
rescue StandardError => e
puts "Error: #{e}"
end
end

Loading…
Cancel
Save