|
@@ -1,25 +1,28 @@ |
|
|
require 'uk_planning_scraper' |
|
|
require 'uk_planning_scraper' |
|
|
require 'scraperwiki' |
|
|
require 'scraperwiki' |
|
|
|
|
|
|
|
|
auths = UKPlanningScraper::Authority.all |
|
|
|
|
|
|
|
|
keyword_searches = ['inlink', 'bt phone kiosk'] |
|
|
|
|
|
authorities = UKPlanningScraper::Authority.all |
|
|
|
|
|
|
|
|
scrapes = [ |
|
|
|
|
|
{ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'}, |
|
|
|
|
|
{ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'bt phone kiosk'} |
|
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
auths.each_with_index do |auth, i| |
|
|
|
|
|
puts "#{i + 1} of #{auths.size}: #{auth.name}" |
|
|
|
|
|
scrapes.each_with_index do |scrape, j| |
|
|
|
|
|
puts "Scrape #{j + 1} of #{scrapes.size}: keywords: #{scrape[:keywords]}" |
|
|
|
|
|
|
|
|
authorities.each_with_index do |authority, i| |
|
|
|
|
|
puts "#{i + 1} of #{authorities.size}: #{authority.name}" |
|
|
|
|
|
keyword_searches.each_with_index do |search, j| |
|
|
|
|
|
puts "Scrape #{j + 1} of #{keyword_searches.size}: keywords: #{search}" |
|
|
|
|
|
|
|
|
begin |
|
|
begin |
|
|
apps = auth.scrape(scrape) |
|
|
|
|
|
apps.each do |app| |
|
|
|
|
|
unless app[:description].match(/chainlink/i) # Backend keyword search is weak |
|
|
|
|
|
ScraperWiki.save_sqlite([:authority_name, :council_reference], app, 'applications') |
|
|
|
|
|
|
|
|
applications = authority.validated_days(ENV['MORPH_DAYS'].to_i). \ |
|
|
|
|
|
keywords(search).scrape |
|
|
|
|
|
|
|
|
|
|
|
applications.each do |application| |
|
|
|
|
|
# Backend keyword search is weak |
|
|
|
|
|
unless application[:description].match(/chainlink/i) |
|
|
|
|
|
ScraperWiki.save_sqlite( |
|
|
|
|
|
[:authority_name, :council_reference], |
|
|
|
|
|
application, |
|
|
|
|
|
'applications') |
|
|
end |
|
|
end |
|
|
end |
|
|
end |
|
|
puts "#{auth.name}: #{apps.size} application(s) saved." |
|
|
|
|
|
|
|
|
puts "#{authority.name}: #{applications.size} application(s) saved." |
|
|
rescue StandardError => e |
|
|
rescue StandardError => e |
|
|
puts "Error: #{e}" |
|
|
puts "Error: #{e}" |
|
|
end |
|
|
end |
|
|