Browse Source

Add Authority class

UKPlanningScraper#search method is now UKPlanningScraper::Authority.scrape.
tags/v0.4.5
Adrian Short 6 years ago
parent
commit
1a00f3797d
2 changed files with 115 additions and 39 deletions
  1. +1
    -39
      lib/uk_planning_scraper.rb
  2. +114
    -0
      lib/uk_planning_scraper/authority.rb

+ 1
- 39
lib/uk_planning_scraper.rb View File

@@ -1,46 +1,8 @@
require "uk_planning_scraper/version"
require "uk_planning_scraper/authority"
require 'uk_planning_scraper/idox'
require 'uk_planning_scraper/northgate'
require 'logger'

module UKPlanningScraper
def self.search(search_url, params, options = {})
default_options = {
delay: 10,
}
options = default_options.merge(options) # The user-supplied options override the defaults
# Validated within the last n days
# Assumes that every scraper/system can do a date range search
if params[:validated_days]
params[:validated_to] = Date.today
params[:validated_from] = Date.today - (params[:validated_days] - 1)
end
# Received within the last n days
# Assumes that every scraper/system can do a date range search
if params[:received_days]
params[:received_to] = Date.today
params[:received_from] = Date.today - (params[:received_days] - 1)
end
# Decided within the last n days
# Assumes that every scraper/system can do a date range search
if params[:decided_days]
params[:decided_to] = Date.today
params[:decided_from] = Date.today - (params[:decided_days] - 1)
end
# Select which scraper to use based on the URL
if search_url.match(/search\.do\?action=advanced/i)
apps = self.scrape_idox(search_url, params, options)
elsif search_url.match(/generalsearch\.aspx/i)
apps = self.scrape_northgate(search_url, params, options)
else
# Not supported
raise "Planning system not supported for URL: #{search_url}"
end
apps # Single point of successful exit
end
end

+ 114
- 0
lib/uk_planning_scraper/authority.rb View File

@@ -0,0 +1,114 @@
require 'csv'

module UKPlanningScraper
class Authority
attr_reader :name, :tags, :url
@@authorities = []

def initialize(name, url, tags)
@name = name
@url = url
@tags = tags
end

def scrape(params, options = {})
default_options = {
delay: 10,
}
options = default_options.merge(options) # The user-supplied options override the defaults
# Validated within the last n days
# Assumes that every scraper/system can do a date range search
if params[:validated_days]
params[:validated_to] = Date.today
params[:validated_from] = Date.today - (params[:validated_days] - 1)
end
# Received within the last n days
# Assumes that every scraper/system can do a date range search
if params[:received_days]
params[:received_to] = Date.today
params[:received_from] = Date.today - (params[:received_days] - 1)
end
# Decided within the last n days
# Assumes that every scraper/system can do a date range search
if params[:decided_days]
params[:decided_to] = Date.today
params[:decided_from] = Date.today - (params[:decided_days] - 1)
end
# Select which scraper to use based on the URL
if authority.url.match(/search\.do\?action=advanced/i)
apps = self.scrape_idox(authority.url, params, options)
elsif authority.url.match(/generalsearch\.aspx/i)
apps = self.scrape_northgate(authority.url, params, options)
else
# Not supported
raise "Planning system not supported for #{@name} at URL: #{@url}"
end
# Post processing
apps.each do |app|
app[:authority_name] = @name
end
apps # Single point of successful exit
end
def tagged?(tag)
@tags.include?(tag)
end

def self.all
@@authorities
end
# List all the tags in use
def self.tags
tags = []
@@authorities.each { |a| tags << a.tags }
tags.flatten.uniq.sort
end
def self.named(name)
@@authorities.each { |a| return a if name == a.name }
nil
end

# Tagged x
def self.tagged(tag)
found = []
@@authorities.each { |a| found << a if a.tags.include?(tag) }
found
end

# Not tagged x
def self.not_tagged(tag)
found = []
@@authorities.each { |a| found << a unless a.tags.include?(tag) }
found
end

# Authorities with no tags
def self.untagged
found = []
@@authorities.each { |a| found << a if a.tags.empty? }
found
end

def self.load
# Don't run this method more than once
return unless @@authorities.empty?
# FIXME hardcoded file path
CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', 'authorities.csv')) do |line|
@@authorities << Authority.new(
line[0].strip,
line[1].strip,
line[2..-1].map { |e| e.strip }.sort)
end
end
end
end

UKPlanningScraper::Authority.load

Loading…
Cancel
Save