浏览代码

Merge branch 'chainedparams'

master
Adrian Short 6 年前
父节点
当前提交
ce87cf2b0f
共有 1 个文件被更改,包括 18 次插入15 次删除
  1. +18
    -15
      scraper.rb

+ 18
- 15
scraper.rb 查看文件

@@ -1,25 +1,28 @@
require 'uk_planning_scraper'
require 'scraperwiki'

auths = UKPlanningScraper::Authority.all
keyword_searches = ['inlink', 'bt phone kiosk']
authorities = UKPlanningScraper::Authority.all

scrapes = [
{ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'},
{ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'bt phone kiosk'}
]

auths.each_with_index do |auth, i|
puts "#{i + 1} of #{auths.size}: #{auth.name}"
scrapes.each_with_index do |scrape, j|
puts "Scrape #{j + 1} of #{scrapes.size}: keywords: #{scrape[:keywords]}"
authorities.each_with_index do |authority, i|
puts "#{i + 1} of #{authorities.size}: #{authority.name}"
keyword_searches.each_with_index do |search, j|
puts "Scrape #{j + 1} of #{keyword_searches.size}: keywords: #{search}"
begin
apps = auth.scrape(scrape)
apps.each do |app|
unless app[:description].match(/chainlink/i) # Backend keyword search is weak
ScraperWiki.save_sqlite([:authority_name, :council_reference], app, 'applications')
applications = authority.validated_days(ENV['MORPH_DAYS'].to_i). \
keywords(search).scrape
applications.each do |application|
# Backend keyword search is weak
unless application[:description].match(/chainlink/i)
ScraperWiki.save_sqlite(
[:authority_name, :council_reference],
application,
'applications')
end
end
puts "#{auth.name}: #{apps.size} application(s) saved."
puts "#{authority.name}: #{applications.size} application(s) saved."
rescue StandardError => e
puts "Error: #{e}"
end


正在加载...
取消
保存