@@ -2,6 +2,7 @@ require "uk_planning_scraper/version" | |||||
require "uk_planning_scraper/authority" | require "uk_planning_scraper/authority" | ||||
require "uk_planning_scraper/authority_scrape_params" | require "uk_planning_scraper/authority_scrape_params" | ||||
require "uk_planning_scraper/application" | require "uk_planning_scraper/application" | ||||
require "uk_planning_scraper/property" | |||||
require 'uk_planning_scraper/idox' | require 'uk_planning_scraper/idox' | ||||
require 'uk_planning_scraper/northgate' | require 'uk_planning_scraper/northgate' | ||||
require 'logger' | require 'logger' | ||||
@@ -16,9 +16,13 @@ module UKPlanningScraper | |||||
attr_accessor :date_decision | attr_accessor :date_decision | ||||
attr_accessor :appeal_status | attr_accessor :appeal_status | ||||
attr_accessor :appeal_decision | attr_accessor :appeal_decision | ||||
attr_accessor :property_count | |||||
attr_accessor :property_url | |||||
attr_accessor :property_detail_urls | |||||
attr_accessor :properties | |||||
def to_hash | def to_hash | ||||
{ | |||||
base = { | |||||
scraped_at: @scraped_at, | scraped_at: @scraped_at, | ||||
authority_name: @authority_name, | authority_name: @authority_name, | ||||
council_reference: @council_reference, | council_reference: @council_reference, | ||||
@@ -34,10 +38,24 @@ module UKPlanningScraper | |||||
documents_url: @documents_url, | documents_url: @documents_url, | ||||
alternative_reference: @alternative_reference, | alternative_reference: @alternative_reference, | ||||
appeal_status: @appeal_status, | appeal_status: @appeal_status, | ||||
appeal_decision: @appeal_decision | |||||
appeal_decision: @appeal_decision, | |||||
property_count: @property_count | |||||
} | } | ||||
@property_detail_urls.each_with_index do |url, idx| | |||||
base["property_detail_url_#{idx + 1}".to_sym] = url | |||||
end if @property_detail_urls | |||||
@properties.each_with_index do |property, idx| | |||||
property_hash = property.to_hash | |||||
property_hash.transform_keys! { |k| "property_#{idx + 1}_#{k}".to_sym } | |||||
base.merge!(property_hash) | |||||
end if @properties | |||||
base | |||||
end | end | ||||
def valid? | def valid? | ||||
return true if @authority_name && @council_reference && @info_url | return true if @authority_name && @council_reference && @info_url | ||||
false | false | ||||
@@ -3,7 +3,7 @@ require 'csv' | |||||
module UKPlanningScraper | module UKPlanningScraper | ||||
class Authority | class Authority | ||||
attr_reader :name, :url | attr_reader :name, :url | ||||
@@authorities = [] | @@authorities = [] | ||||
def initialize(name, url) | def initialize(name, url) | ||||
@@ -31,7 +31,7 @@ module UKPlanningScraper | |||||
raise SystemNotSupported.new("Planning system not supported for \ | raise SystemNotSupported.new("Planning system not supported for \ | ||||
#{@name} at URL: #{@url}") | #{@name} at URL: #{@url}") | ||||
end | end | ||||
# Post processing | # Post processing | ||||
@applications.each do |app| | @applications.each do |app| | ||||
app.authority_name = @name | app.authority_name = @name | ||||
@@ -41,32 +41,32 @@ module UKPlanningScraper | |||||
output = [] | output = [] | ||||
# FIXME - silently ignores invalid apps. How should we handle them? | # FIXME - silently ignores invalid apps. How should we handle them? | ||||
@applications.each { |app| output << app.to_hash if app.valid? } | @applications.each { |app| output << app.to_hash if app.valid? } | ||||
# Reset so that old params don't get used for new scrapes | # Reset so that old params don't get used for new scrapes | ||||
clear_scrape_params | clear_scrape_params | ||||
output # Single point of successful exit | output # Single point of successful exit | ||||
end | end | ||||
def tags | def tags | ||||
@tags.sort | @tags.sort | ||||
end | end | ||||
# Add multiple tags to existing tags | # Add multiple tags to existing tags | ||||
def add_tags(tags) | def add_tags(tags) | ||||
tags.each { |t| add_tag(t) } | tags.each { |t| add_tag(t) } | ||||
end | end | ||||
# Add a single tag to existing tags | # Add a single tag to existing tags | ||||
def add_tag(tag) | def add_tag(tag) | ||||
clean_tag = tag.strip.downcase.gsub(' ', '') | clean_tag = tag.strip.downcase.gsub(' ', '') | ||||
@tags << clean_tag unless tagged?(clean_tag) # prevent duplicates | @tags << clean_tag unless tagged?(clean_tag) # prevent duplicates | ||||
end | end | ||||
def tagged?(tag) | def tagged?(tag) | ||||
@tags.include?(tag) | @tags.include?(tag) | ||||
end | end | ||||
def system | def system | ||||
if @url.match(/search\.do\?action=advanced/i) | if @url.match(/search\.do\?action=advanced/i) | ||||
'idox' | 'idox' | ||||
@@ -84,18 +84,18 @@ module UKPlanningScraper | |||||
def self.all | def self.all | ||||
@@authorities | @@authorities | ||||
end | end | ||||
# List all the tags in use | # List all the tags in use | ||||
def self.tags | def self.tags | ||||
tags = [] | tags = [] | ||||
@@authorities.each { |a| tags << a.tags } | @@authorities.each { |a| tags << a.tags } | ||||
tags.flatten.uniq.sort | tags.flatten.uniq.sort | ||||
end | end | ||||
def self.named(name) | def self.named(name) | ||||
authority = @@authorities.find { |a| name == a.name } | authority = @@authorities.find { |a| name == a.name } | ||||
raise AuthorityNotFound if authority.nil? | raise AuthorityNotFound if authority.nil? | ||||
authority | |||||
authority | |||||
end | end | ||||
# Tagged x | # Tagged x | ||||
@@ -125,11 +125,11 @@ module UKPlanningScraper | |||||
CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', \ | CSV.foreach(File.join(File.dirname(__dir__), 'uk_planning_scraper', \ | ||||
'authorities.csv'), :headers => true) do |line| | 'authorities.csv'), :headers => true) do |line| | ||||
auth = Authority.new(line['authority_name'], line['url']) | auth = Authority.new(line['authority_name'], line['url']) | ||||
if line['tags'] | if line['tags'] | ||||
auth.add_tags(line['tags'].split(/\s+/)) | auth.add_tags(line['tags'].split(/\s+/)) | ||||
end | end | ||||
auth.add_tag(auth.system) | auth.add_tag(auth.system) | ||||
@@authorities << auth | @@authorities << auth | ||||
end | end | ||||
@@ -4,7 +4,7 @@ module UKPlanningScraper | |||||
class Authority | class Authority | ||||
# Parameter methods for Authority#scrape | # Parameter methods for Authority#scrape | ||||
# Desgined to be method chained, eg: | # Desgined to be method chained, eg: | ||||
# | |||||
# | |||||
# applications = UKPlanningScraper::Authority.named("Barnet"). \ | # applications = UKPlanningScraper::Authority.named("Barnet"). \ | ||||
# development_type("Q22").keywords("illuminat"). \ | # development_type("Q22").keywords("illuminat"). \ | ||||
# validated_days(30).scrape | # validated_days(30).scrape | ||||
@@ -17,7 +17,7 @@ module UKPlanningScraper | |||||
unless n > 0 | unless n > 0 | ||||
raise ArgumentError.new("validated_days must be greater than 0") | raise ArgumentError.new("validated_days must be greater than 0") | ||||
end | end | ||||
validated_from(Date.today - (n - 1)) | validated_from(Date.today - (n - 1)) | ||||
validated_to(Date.today) | validated_to(Date.today) | ||||
self | self | ||||
@@ -31,7 +31,7 @@ module UKPlanningScraper | |||||
unless n > 0 | unless n > 0 | ||||
raise ArgumentError.new("received_days must be greater than 0") | raise ArgumentError.new("received_days must be greater than 0") | ||||
end | end | ||||
received_from(Date.today - (n - 1)) | received_from(Date.today - (n - 1)) | ||||
received_to(Date.today) | received_to(Date.today) | ||||
self | self | ||||
@@ -45,18 +45,18 @@ module UKPlanningScraper | |||||
unless n > 0 | unless n > 0 | ||||
raise ArgumentError.new("decided_days must be greater than 0") | raise ArgumentError.new("decided_days must be greater than 0") | ||||
end | end | ||||
decided_from(Date.today - (n - 1)) | decided_from(Date.today - (n - 1)) | ||||
decided_to(Date.today) | decided_to(Date.today) | ||||
self | self | ||||
end | end | ||||
def applicant_name(s) | def applicant_name(s) | ||||
unless system == 'idox' | unless system == 'idox' | ||||
raise NoMethodError.new("applicant_name is only implemented for Idox. \ | raise NoMethodError.new("applicant_name is only implemented for Idox. \ | ||||
This authority (#{@name}) is #{system.capitalize}.") | This authority (#{@name}) is #{system.capitalize}.") | ||||
end | end | ||||
check_class(s, String) | check_class(s, String) | ||||
@scrape_params[:applicant_name] = s.strip | @scrape_params[:applicant_name] = s.strip | ||||
self | self | ||||
@@ -67,7 +67,7 @@ module UKPlanningScraper | |||||
raise NoMethodError.new("application_type is only implemented for \ | raise NoMethodError.new("application_type is only implemented for \ | ||||
Idox. This authority (#{@name}) is #{system.capitalize}.") | Idox. This authority (#{@name}) is #{system.capitalize}.") | ||||
end | end | ||||
check_class(s, String) | check_class(s, String) | ||||
@scrape_params[:application_type] = s.strip | @scrape_params[:application_type] = s.strip | ||||
self | self | ||||
@@ -78,14 +78,24 @@ module UKPlanningScraper | |||||
raise NoMethodError.new("development_type is only implemented for \ | raise NoMethodError.new("development_type is only implemented for \ | ||||
Idox. This authority (#{@name}) is #{system.capitalize}.") | Idox. This authority (#{@name}) is #{system.capitalize}.") | ||||
end | end | ||||
check_class(s, String) | check_class(s, String) | ||||
@scrape_params[:development_type] = s.strip | @scrape_params[:development_type] = s.strip | ||||
self | self | ||||
end | end | ||||
def include_property | |||||
unless system == 'idox' | |||||
raise NoMethodError.new("include_property is only implemented for \ | |||||
Idox. This authority (#{@name}) is #{system.capitalize}.") | |||||
end | |||||
@scrape_params[:include_property] = true | |||||
self | |||||
end | |||||
private | private | ||||
# Handle the simple params with this | # Handle the simple params with this | ||||
def method_missing(method_name, *args) | def method_missing(method_name, *args) | ||||
sc_params = { | sc_params = { | ||||
@@ -97,18 +107,18 @@ module UKPlanningScraper | |||||
decided_to: Date, | decided_to: Date, | ||||
keywords: String | keywords: String | ||||
} | } | ||||
value = args[0] | value = args[0] | ||||
if sc_params[method_name] | if sc_params[method_name] | ||||
check_class(value, sc_params[method_name], method_name.to_s) | check_class(value, sc_params[method_name], method_name.to_s) | ||||
value.strip! if value.class == String | value.strip! if value.class == String | ||||
if value.class == Date && value > Date.today | if value.class == Date && value > Date.today | ||||
raise ArgumentError.new("#{method_name} can't be a date in the " + \ | raise ArgumentError.new("#{method_name} can't be a date in the " + \ | ||||
"future (#{value.to_s})") | "future (#{value.to_s})") | ||||
end | end | ||||
@scrape_params[method_name] = value | @scrape_params[method_name] = value | ||||
self | self | ||||
else | else | ||||
@@ -119,7 +129,7 @@ module UKPlanningScraper | |||||
def clear_scrape_params | def clear_scrape_params | ||||
@scrape_params = {} | @scrape_params = {} | ||||
end | end | ||||
# https://stackoverflow.com/questions/5100299/how-to-get-the-name-of-the-calling-method | # https://stackoverflow.com/questions/5100299/how-to-get-the-name-of-the-calling-method | ||||
def check_class( | def check_class( | ||||
param_value, | param_value, | ||||
@@ -4,15 +4,32 @@ require 'pp' | |||||
module UKPlanningScraper | module UKPlanningScraper | ||||
class Authority | class Authority | ||||
private | private | ||||
def base_url | |||||
@base_url ||= @url.match(/(https?:\/\/.+?)\//)[1] | |||||
end | |||||
def agent | |||||
@agent ||= Mechanize.new | |||||
end | |||||
def get(url, &block) | |||||
puts "Getting: #{url}" | |||||
res = agent.get(url) | |||||
if res.code == '200' # That's a String not an Integer, ffs | |||||
block_given? ? block.call(res) : res | |||||
else | |||||
puts "Error: HTTP #{res.code}" | |||||
end | |||||
end | |||||
def scrape_idox(params, options) | def scrape_idox(params, options) | ||||
puts "Using Idox scraper." | puts "Using Idox scraper." | ||||
base_url = @url.match(/(https?:\/\/.+?)\//)[1] | |||||
apps = [] | apps = [] | ||||
agent = Mechanize.new | |||||
puts "Getting: #{@url}" | |||||
page = agent.get(@url) # load the search form page | |||||
page = get(@url) # load the search form page | |||||
# Check that the search form is actually present. | # Check that the search form is actually present. | ||||
# When Idox has an internal error it returns an error page with HTTP 200. | # When Idox has an internal error it returns an error page with HTTP 200. | ||||
@@ -31,7 +48,7 @@ module UKPlanningScraper | |||||
}.each { |f| form.add_field!(f) unless form.has_field?(f) } | }.each { |f| form.add_field!(f) unless form.has_field?(f) } | ||||
date_format = "%d/%m/%Y" | date_format = "%d/%m/%Y" | ||||
form.send(:"date(applicationReceivedStart)", params[:received_from].strftime(date_format)) if params[:received_from] | form.send(:"date(applicationReceivedStart)", params[:received_from].strftime(date_format)) if params[:received_from] | ||||
form.send(:"date(applicationReceivedEnd)", params[:received_to].strftime(date_format)) if params[:received_to] | form.send(:"date(applicationReceivedEnd)", params[:received_to].strftime(date_format)) if params[:received_to] | ||||
@@ -42,12 +59,12 @@ module UKPlanningScraper | |||||
form.send(:"date(applicationDecisionEnd)", params[:decided_to].strftime(date_format)) if params[:decided_to] | form.send(:"date(applicationDecisionEnd)", params[:decided_to].strftime(date_format)) if params[:decided_to] | ||||
form.send(:"searchCriteria\.description", params[:keywords]) | form.send(:"searchCriteria\.description", params[:keywords]) | ||||
# Some councils don't have the applicant name on their form, eg Bexley | # Some councils don't have the applicant name on their form, eg Bexley | ||||
form.send(:"searchCriteria\.applicantName", params[:applicant_name]) if form.has_field? 'searchCriteria.applicantName' | form.send(:"searchCriteria\.applicantName", params[:applicant_name]) if form.has_field? 'searchCriteria.applicantName' | ||||
form.send(:"searchCriteria\.caseType", params[:application_type]) if form.has_field? 'searchCriteria.caseType' | form.send(:"searchCriteria\.caseType", params[:application_type]) if form.has_field? 'searchCriteria.caseType' | ||||
# Only some Idox sites (eg Bolton) have a 'searchCriteria.developmentType' parameter | # Only some Idox sites (eg Bolton) have a 'searchCriteria.developmentType' parameter | ||||
form.send(:"searchCriteria\.developmentType", params[:development_type]) if form.has_field? 'searchCriteria.developmentType' | form.send(:"searchCriteria\.developmentType", params[:development_type]) if form.has_field? 'searchCriteria.developmentType' | ||||
@@ -56,7 +73,7 @@ module UKPlanningScraper | |||||
if page.search('.errors').inner_text.match(/Too many results found/i) | if page.search('.errors').inner_text.match(/Too many results found/i) | ||||
raise TooManySearchResults.new("Scrape in smaller chunks. Use shorter date ranges and/or more search parameters.") | raise TooManySearchResults.new("Scrape in smaller chunks. Use shorter date ranges and/or more search parameters.") | ||||
end | end | ||||
loop do | loop do | ||||
# Parse search results | # Parse search results | ||||
items = page.search('li.searchresult') | items = page.search('li.searchresult') | ||||
@@ -69,7 +86,7 @@ module UKPlanningScraper | |||||
# Parse info line | # Parse info line | ||||
info_line = app.at("p.metaInfo").inner_text.strip | info_line = app.at("p.metaInfo").inner_text.strip | ||||
bits = info_line.split('|').map { |e| e.strip.delete("\r\n") } | bits = info_line.split('|').map { |e| e.strip.delete("\r\n") } | ||||
bits.each do |bit| | bits.each do |bit| | ||||
if matches = bit.match(/Ref\. No:\s+(.+)/) | if matches = bit.match(/Ref\. No:\s+(.+)/) | ||||
data.council_reference = matches[1] | data.council_reference = matches[1] | ||||
@@ -78,7 +95,7 @@ module UKPlanningScraper | |||||
if matches = bit.match(/(Received|Registered):\s+.*(\d{2}\s\w{3}\s\d{2}\d{2}?)/) | if matches = bit.match(/(Received|Registered):\s+.*(\d{2}\s\w{3}\s\d{2}\d{2}?)/) | ||||
data.date_received = Date.parse(matches[2]) | data.date_received = Date.parse(matches[2]) | ||||
end | end | ||||
if matches = bit.match(/Validated:\s+.*(\d{2}\s\w{3}\s\d{2}\d{2}?)/) | if matches = bit.match(/Validated:\s+.*(\d{2}\s\w{3}\s\d{2}\d{2}?)/) | ||||
data.date_validated = Date.parse(matches[1]) | data.date_validated = Date.parse(matches[1]) | ||||
end | end | ||||
@@ -92,91 +109,153 @@ module UKPlanningScraper | |||||
data.info_url = base_url + app.at('a')['href'] | data.info_url = base_url + app.at('a')['href'] | ||||
data.address = app.at('p.address').inner_text.strip | data.address = app.at('p.address').inner_text.strip | ||||
data.description = app.at('a').inner_text.strip | data.description = app.at('a').inner_text.strip | ||||
apps << data | apps << data | ||||
end | end | ||||
# Get the Next button from the pager, if there is one | # Get the Next button from the pager, if there is one | ||||
if next_button = page.at('a.next') | if next_button = page.at('a.next') | ||||
next_url = base_url + next_button[:href]# + '&searchCriteria.resultsPerPage=100' | next_url = base_url + next_button[:href]# + '&searchCriteria.resultsPerPage=100' | ||||
sleep options[:delay] | sleep options[:delay] | ||||
puts "Getting: #{next_url}" | |||||
page = agent.get(next_url) | |||||
page = get(next_url) | |||||
else | else | ||||
break | break | ||||
end | end | ||||
end | end | ||||
# Scrape the summary tab for each app | # Scrape the summary tab for each app | ||||
apps.each_with_index do |app, i| | apps.each_with_index do |app, i| | ||||
sleep options[:delay] | sleep options[:delay] | ||||
puts "#{i + 1} of #{apps.size}: #{app.info_url}" | |||||
res = agent.get(app.info_url) | |||||
if res.code == '200' # That's a String not an Integer, ffs | |||||
# Parse the summary tab for this app | |||||
app.scraped_at = Time.now | |||||
# The Documents tab doesn't show if there are no documents (we get li.nodocuments instead) | |||||
# Bradford has #tab_documents but without the document count on it | |||||
app.documents_count = 0 | |||||
if documents_link = res.at('.associateddocument a') | |||||
if documents_link.inner_text.match(/\d+/) | |||||
app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i | |||||
app.documents_url = base_url + documents_link[:href] | |||||
end | |||||
elsif documents_link = res.at('#tab_documents') | |||||
if documents_link.inner_text.match(/\d+/) | |||||
app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i | |||||
app.documents_url = base_url + documents_link[:href] | |||||
end | |||||
puts "#{i + 1} of #{apps.size}" | |||||
parse_info_url(app) if app.info_url | |||||
next unless params[:include_property] | |||||
parse_property_url(app) if app.property_url | |||||
parse_property_detail_urls(app) if app.property_detail_urls | |||||
end # scrape summary tab for apps | |||||
apps | |||||
end # scrape_idox | |||||
def parse_info_url(app) | |||||
get(app.info_url) do |res| | |||||
# Parse the summary tab for this app | |||||
app.scraped_at = Time.now | |||||
# The Documents tab doesn't show if there are no documents (we get li.nodocuments instead) | |||||
# Bradford has #tab_documents but without the document count on it | |||||
app.documents_count = 0 | |||||
if documents_link = res.at('.associateddocument a') | |||||
if documents_link.inner_text.match(/\d+/) | |||||
app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i | |||||
app.documents_url = base_url + documents_link[:href] | |||||
end | |||||
elsif documents_link = res.at('#tab_documents') | |||||
if documents_link.inner_text.match(/\d+/) | |||||
app.documents_count = documents_link.inner_text.match(/\d+/)[0].to_i | |||||
app.documents_url = base_url + documents_link[:href] | |||||
end | end | ||||
# We need to find values in the table by using the th labels. | |||||
# The row indexes/positions change from site to site (or even app to app) so we can't rely on that. | |||||
end | |||||
# We need to find values in the table by using the th labels. | |||||
# The row indexes/positions change from site to site (or even app to app) so we can't rely on that. | |||||
res.search('#simpleDetailsTable tr').each do |row| | |||||
key = row.at('th').inner_text.strip | |||||
value = row.at('td').inner_text.strip | |||||
case key | |||||
when 'Reference' | |||||
app.council_reference = value | |||||
when 'Alternative Reference' | |||||
app.alternative_reference = value unless value.empty? | |||||
when 'Planning Portal Reference' | |||||
app.alternative_reference = value unless value.empty? | |||||
when 'Application Received' | |||||
app.date_received = Date.parse(value) if value.match(/\d/) | |||||
when 'Application Registered' | |||||
app.date_received = Date.parse(value) if value.match(/\d/) | |||||
when 'Application Validated' | |||||
app.date_validated = Date.parse(value) if value.match(/\d/) | |||||
when 'Address' | |||||
app.address = value unless value.empty? | |||||
when 'Proposal' | |||||
app.description = value unless value.empty? | |||||
when 'Status' | |||||
app.status = value unless value.empty? | |||||
when 'Decision' | |||||
app.decision = value unless value.empty? | |||||
when 'Decision Issued Date' | |||||
app.date_decision = Date.parse(value) if value.match(/\d/) | |||||
when 'Appeal Status' | |||||
app.appeal_status = value unless value.empty? | |||||
when 'Appeal Decision' | |||||
app.appeal_decision = value unless value.empty? | |||||
else | |||||
puts "Error: key '#{key}' not found" | |||||
end # case | |||||
end # each row | |||||
# find associated property link | |||||
property_association_link = res.at('p.associatedproperty a') | |||||
if property_association_link | |||||
app.property_url = base_url + property_association_link[:href] | |||||
app.property_count = property_association_link.inner_text.to_i | |||||
end | |||||
end # get | |||||
end | |||||
def parse_property_url(app) | |||||
# get URLs of property pages | |||||
app.property_detail_urls = [] | |||||
get(app.property_url) do |res| | |||||
res.search('#Property li a').each_with_index do |property_link, index| | |||||
break if index >= 10 | |||||
app.property_detail_urls << base_url + property_link[:href] | |||||
end | |||||
end | |||||
end | |||||
def parse_property_detail_urls(app) | |||||
# get property details | |||||
app.properties = [] | |||||
res.search('#simpleDetailsTable tr').each do |row| | |||||
app.property_detail_urls.each do |property_url| | |||||
get(property_url) do |res| | |||||
property = Property.new | |||||
res.search('#propertyAddress tr').each do |row| | |||||
key = row.at('th').inner_text.strip | key = row.at('th').inner_text.strip | ||||
value = row.at('td').inner_text.strip | value = row.at('td').inner_text.strip | ||||
case key | case key | ||||
when 'Reference' | |||||
app.council_reference = value | |||||
when 'Alternative Reference' | |||||
app.alternative_reference = value unless value.empty? | |||||
when 'Planning Portal Reference' | |||||
app.alternative_reference = value unless value.empty? | |||||
when 'Application Received' | |||||
app.date_received = Date.parse(value) if value.match(/\d/) | |||||
when 'Application Registered' | |||||
app.date_received = Date.parse(value) if value.match(/\d/) | |||||
when 'Application Validated' | |||||
app.date_validated = Date.parse(value) if value.match(/\d/) | |||||
when 'Address' | |||||
app.address = value unless value.empty? | |||||
when 'Proposal' | |||||
app.description = value unless value.empty? | |||||
when 'Status' | |||||
app.status = value unless value.empty? | |||||
when 'Decision' | |||||
app.decision = value unless value.empty? | |||||
when 'Decision Issued Date' | |||||
app.date_decision = Date.parse(value) if value.match(/\d/) | |||||
when 'Appeal Status' | |||||
app.appeal_status = value unless value.empty? | |||||
when 'Appeal Decision' | |||||
app.appeal_decision = value unless value.empty? | |||||
else | |||||
puts "Error: key '#{key}' not found" | |||||
end # case | |||||
end # each row | |||||
else | |||||
puts "Error: HTTP #{res.code}" | |||||
end # if | |||||
end # scrape summary tab for apps | |||||
apps | |||||
end # scrape_idox | |||||
when 'UPRN:' | |||||
property.uprn = value | |||||
when 'Full Address:' | |||||
property.address = value unless value.empty? | |||||
when 'Property Number:' | |||||
property.number = value unless value.empty? | |||||
when 'Street:' | |||||
property.street = value unless value.empty? | |||||
when 'Town:' | |||||
property.town = value unless value.empty? | |||||
when 'Postcode:' | |||||
property.postcode = value unless value.empty? | |||||
when 'Ward:' | |||||
property.ward = value unless value.empty? | |||||
when 'Parish:' | |||||
property.parish = value unless value.empty? | |||||
end | |||||
end | |||||
app.properties << property | |||||
end | |||||
end | |||||
end | |||||
end # class | end # class | ||||
end | end |
@@ -8,10 +8,10 @@ module UKPlanningScraper | |||||
def scrape_northgate(params, options) | def scrape_northgate(params, options) | ||||
puts "Using Northgate scraper." | puts "Using Northgate scraper." | ||||
base_url = @url.match(/(https?:\/\/.+?)\//)[1] | base_url = @url.match(/(https?:\/\/.+?)\//)[1] | ||||
# Remove 'generalsearch.aspx' from the end and add '/Generic/' - case sensitive? | # Remove 'generalsearch.aspx' from the end and add '/Generic/' - case sensitive? | ||||
generic_url = @url.match(/.+\//)[0] + 'Generic/' | generic_url = @url.match(/.+\//)[0] + 'Generic/' | ||||
apps = [] | apps = [] | ||||
$stdout.sync = true # Flush output buffer after every write so log messages appear immediately. | $stdout.sync = true # Flush output buffer after every write so log messages appear immediately. | ||||
@@ -0,0 +1,30 @@ | |||||
module UKPlanningScraper | |||||
class Property | |||||
attr_accessor :uprn | |||||
attr_accessor :address | |||||
attr_accessor :number | |||||
attr_accessor :street | |||||
attr_accessor :town | |||||
attr_accessor :postcode | |||||
attr_accessor :ward | |||||
attr_accessor :parish | |||||
def to_hash | |||||
{ | |||||
uprn: @uprn, | |||||
address: @address, | |||||
number: @number, | |||||
street: @street, | |||||
town: @town, | |||||
postcode: @postcode, | |||||
ward: @ward, | |||||
parish: @parish | |||||
} | |||||
end | |||||
def valid? | |||||
return true if @uprn | |||||
false | |||||
end | |||||
end | |||||
end |
@@ -0,0 +1,25 @@ | |||||
require 'spec_helper' | |||||
describe UKPlanningScraper::Authority do | |||||
describe '#include_property' do | |||||
let(:scraper) { UKPlanningScraper::Authority.named(authority_name) } | |||||
context 'for 2 days with property details' do | |||||
let(:authority_name) { 'Cardiff' } | |||||
it 'returns apps' do | |||||
apps = VCR.use_cassette("#{self.class.description}") { | |||||
scraper.include_property | |||||
.decided_from(Date.new(2019, 4, 8)) | |||||
.decided_to(Date.new(2019, 4, 9)) | |||||
.scrape(delay: 0) | |||||
} | |||||
pp apps | |||||
end | |||||
end | |||||
end | |||||
end |