From 1a00f3797d121aaf8b87401d1a9b4c8da5fc1d5c Mon Sep 17 00:00:00 2001 From: Adrian Short Date: Tue, 18 Sep 2018 22:18:32 +0100 Subject: [PATCH] Add Authority class UKPlanningScraper#search method is now UKPlanningScraper::Authority.scrape. --- lib/uk_planning_scraper.rb | 40 +--------- lib/uk_planning_scraper/authority.rb | 114 +++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 39 deletions(-) create mode 100644 lib/uk_planning_scraper/authority.rb diff --git a/lib/uk_planning_scraper.rb b/lib/uk_planning_scraper.rb index 93f132e..b7aaa2b 100644 --- a/lib/uk_planning_scraper.rb +++ b/lib/uk_planning_scraper.rb @@ -1,46 +1,8 @@ require "uk_planning_scraper/version" +require "uk_planning_scraper/authority" require 'uk_planning_scraper/idox' require 'uk_planning_scraper/northgate' require 'logger' module UKPlanningScraper - def self.search(search_url, params, options = {}) - default_options = { - delay: 10, - } - options = default_options.merge(options) # The user-supplied options override the defaults - - # Validated within the last n days - # Assumes that every scraper/system can do a date range search - if params[:validated_days] - params[:validated_to] = Date.today - params[:validated_from] = Date.today - (params[:validated_days] - 1) - end - - # Received within the last n days - # Assumes that every scraper/system can do a date range search - if params[:received_days] - params[:received_to] = Date.today - params[:received_from] = Date.today - (params[:received_days] - 1) - end - - # Decided within the last n days - # Assumes that every scraper/system can do a date range search - if params[:decided_days] - params[:decided_to] = Date.today - params[:decided_from] = Date.today - (params[:decided_days] - 1) - end - - # Select which scraper to use based on the URL - if search_url.match(/search\.do\?action=advanced/i) - apps = self.scrape_idox(search_url, params, options) - elsif search_url.match(/generalsearch\.aspx/i) - apps = self.scrape_northgate(search_url, params, options) - else - # Not supported - raise "Planning system not supported for URL: #{search_url}" - end - - apps # Single point of successful exit - end end diff --git a/lib/uk_planning_scraper/authority.rb b/lib/uk_planning_scraper/authority.rb new file mode 100644 index 0000000..2df77a0 --- /dev/null +++ b/lib/uk_planning_scraper/authority.rb @@ -0,0 +1,114 @@ +require 'csv' + +module UKPlanningScraper + class Authority + attr_reader :name, :tags, :url + @@authorities = [] + + def initialize(name, url, tags) + @name = name + @url = url + @tags = tags + end + + def scrape(params, options = {}) + default_options = { + delay: 10, + } + options = default_options.merge(options) # The user-supplied options override the defaults + + # Validated within the last n days + # Assumes that every scraper/system can do a date range search + if params[:validated_days] + params[:validated_to] = Date.today + params[:validated_from] = Date.today - (params[:validated_days] - 1) + end + + # Received within the last n days + # Assumes that every scraper/system can do a date range search + if params[:received_days] + params[:received_to] = Date.today + params[:received_from] = Date.today - (params[:received_days] - 1) + end + + # Decided within the last n days + # Assumes that every scraper/system can do a date range search + if params[:decided_days] + params[:decided_to] = Date.today + params[:decided_from] = Date.today - (params[:decided_days] - 1) + end + + # Select which scraper to use based on the URL + if authority.url.match(/search\.do\?action=advanced/i) + apps = self.scrape_idox(authority.url, params, options) + elsif authority.url.match(/generalsearch\.aspx/i) + apps = self.scrape_northgate(authority.url, params, options) + else + # Not supported + raise "Planning system not supported for #{@name} at URL: #{@url}" + end + + # Post processing + apps.each do |app| + app[:authority_name] = @name + end + + apps # Single point of successful exit + end + + def tagged?(tag) + @tags.include?(tag) + end + + def self.all + @@authorities + end + + # List all the tags in use + def self.tags + tags = [] + @@authorities.each { |a| tags << a.tags } + tags.flatten.uniq.sort + end + + def self.named(name) + @@authorities.each { |a| return a if name == a.name } + nil + end + + # Tagged x + def self.tagged(tag) + found = [] + @@authorities.each { |a| found << a if a.tags.include?(tag) } + found + end + + # Not tagged x + def self.not_tagged(tag) + found = [] + @@authorities.each { |a| found << a unless a.tags.include?(tag) } + found + end + + # Authorities with no tags + def self.untagged + found = [] + @@authorities.each { |a| found << a if a.tags.empty? } + found + end + + def self.load + # Don't run this method more than once + return unless @@authorities.empty? + # FIXME hardcoded file path + CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', 'authorities.csv')) do |line| + @@authorities << Authority.new( + line[0].strip, + line[1].strip, + line[2..-1].map { |e| e.strip }.sort) + end + end + end +end + +UKPlanningScraper::Authority.load