Currently we only parse: - application_type - location_easting - location_northingnorthgate-dates
@@ -74,6 +74,16 @@ module UKPlanningScraper | |||||
# This may change if there are subsequent extensions. | # This may change if there are subsequent extensions. | ||||
attr_accessor :extended_expiry_date | attr_accessor :extended_expiry_date | ||||
# Application type: Full planning permission, advertisement, | |||||
# LDC, prior approval etc. | |||||
# Codes are specific to each local planning authority although there will | |||||
# be a high degree of overlap between LPAs | |||||
attr_accessor :application_type | |||||
attr_accessor :location_easting | |||||
attr_accessor :location_northing | |||||
def to_hash | def to_hash | ||||
{ | { | ||||
scraped_at: @scraped_at, | scraped_at: @scraped_at, | ||||
@@ -94,7 +104,10 @@ module UKPlanningScraper | |||||
appeal_decision: @appeal_decision, | appeal_decision: @appeal_decision, | ||||
consultation_end_date: @consultation_end_date, | consultation_end_date: @consultation_end_date, | ||||
statutory_due_date: @statutory_due_date, | statutory_due_date: @statutory_due_date, | ||||
extended_expiry_date: @extended_expiry_date | |||||
extended_expiry_date: @extended_expiry_date, | |||||
application_type: @application_type, | |||||
location_easting: @location_easting, | |||||
location_northing: @location_northing | |||||
} | } | ||||
end | end | ||||
@@ -73,6 +73,15 @@ module UKPlanningScraper | |||||
self | self | ||||
end | end | ||||
def include_details | |||||
unless system == 'northgate' | |||||
raise NoMethodError.new("include_details is only implemented for Northgate. This authority (#{@name}) is #{system.capitalize}.") | |||||
end | |||||
@scrape_params[:include_details] = true | |||||
self | |||||
end | |||||
def include_dates | def include_dates | ||||
unless system == 'northgate' | unless system == 'northgate' | ||||
raise NoMethodError.new("include_dates is only implemented for Northgate. This authority (#{@name}) is #{system.capitalize}.") | raise NoMethodError.new("include_dates is only implemented for Northgate. This authority (#{@name}) is #{system.capitalize}.") | ||||
@@ -10,6 +10,7 @@ module UKPlanningScraper | |||||
logger.level = Logger::DEBUG | logger.level = Logger::DEBUG | ||||
logger.info "Using Northgate scraper." | logger.info "Using Northgate scraper." | ||||
logger.info "Will also scrape details page." if params[:include_details] | |||||
logger.info "Will also scrape dates page." if params[:include_dates] | logger.info "Will also scrape dates page." if params[:include_dates] | ||||
base_url = @url.match(/(https?:\/\/.+?)\//)[1] | base_url = @url.match(/(https?:\/\/.+?)\//)[1] | ||||
@@ -137,6 +138,7 @@ module UKPlanningScraper | |||||
end | end | ||||
end | end | ||||
# Scrape dates page if required | |||||
if params[:include_dates] | if params[:include_dates] | ||||
apps.each do |app| | apps.each do |app| | ||||
sleep options[:delay] | sleep options[:delay] | ||||
@@ -175,7 +177,37 @@ module UKPlanningScraper | |||||
end | end | ||||
end | end | ||||
end | end | ||||
# Scrape details page if required | |||||
if params[:include_details] | |||||
apps.each do |app| | |||||
sleep options[:delay] | |||||
agent = Mechanize.new | |||||
# agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE | |||||
logger.info "Getting details page for application #{app.council_reference}: #{app.info_url}" | |||||
page = agent.get(app.info_url) # load the search form page | |||||
if page.code == '200' | |||||
page.search(".dataview")[2].search(".list li").each do |element| | |||||
if bits = element.inner_html.match(/<span>(.+)<\/span>(.+)</) | |||||
# Some labels have tab characters (\t) in them | |||||
label = bits[1].downcase.gsub(/[[:space:]]+/, ' ').strip | |||||
value = bits[2].gsub(/[[:space:]]+/, ' ').strip | |||||
case label | |||||
when 'application type' | |||||
app.application_type = value | |||||
when 'location co ordinates' | |||||
coords = value.match(/Easting.+?(\d+).+?(\d+)/) | |||||
app.location_easting = coords[1].to_i | |||||
app.location_northing = coords[2].to_i | |||||
end | |||||
end | |||||
end | |||||
end | |||||
end | |||||
end | |||||
apps | apps | ||||
end | end | ||||
end | end | ||||