Browse Source

Add extra search terms; bump to latest scraper gem

master
Adrian Short 5 years ago
parent
commit
ca0fa0dd8a
3 changed files with 20 additions and 12 deletions
  1. +1
    -1
      Gemfile
  2. +3
    -3
      Gemfile.lock
  3. +16
    -8
      scraper.rb

+ 1
- 1
Gemfile View File

@@ -2,5 +2,5 @@ source "https://rubygems.org"

ruby '2.3.1'

gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/', :ref => '8a070e1'
gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/', :ref => 'dd8e084'
gem 'scraperwiki', :git => 'https://github.com/openaustralia/scraperwiki-ruby/', :branch => 'morph_defaults'

+ 3
- 3
Gemfile.lock View File

@@ -1,9 +1,9 @@
GIT
remote: https://github.com/adrianshort/uk_planning_scraper/
revision: 8a070e17732b3304e7e73a81cb7538795fe604c0
ref: 8a070e1
revision: dd8e0849e2b96303891b9023692cae0feaa2e153
ref: dd8e084
specs:
uk_planning_scraper (0.2.0)
uk_planning_scraper (0.3.1)
http
mechanize (~> 2.7)



+ 16
- 8
scraper.rb View File

@@ -3,17 +3,25 @@ require 'scraperwiki'

auths = UKPlanningScraper::Authority.all

scrapes = [
{ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'},
{ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'bt phone kiosk'}
]

auths.each_with_index do |auth, i|
puts "#{i + 1} of #{auths.size}: #{auth.name}"
begin
apps = auth.scrape({ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'})
apps.each do |app|
unless app[:description].match(/chainlink/i) # Backend keyword search is weak
ScraperWiki.save_sqlite([:authority_name, :council_reference], app, 'applications')
scrapes.each_with_index do |scrape, j|
puts "Scrape #{j + 1} of #{scrapes.size}: keywords: #{scrape[:keywords]}"
begin
apps = auth.scrape(scrape)
apps.each do |app|
unless app[:description].match(/chainlink/i) # Backend keyword search is weak
ScraperWiki.save_sqlite([:authority_name, :council_reference], app, 'applications')
end
end
puts "#{auth.name}: #{apps.size} application(s) saved."
rescue StandardError => e
puts "Error: #{e}"
end
puts "#{auth.name}: #{apps.size} application(s) saved."
rescue StandardError => e
puts "Error: #{e}"
end
end

Loading…
Cancel
Save