瀏覽代碼

Scape properties details

pull/38/head
Graeme Porteous 5 年之前
父節點
當前提交
452e39a867
共有 4 個文件被更改,包括 105 次插入1 次删除
  1. +1
    -0
      lib/uk_planning_scraper.rb
  2. +8
    -1
      lib/uk_planning_scraper/application.rb
  3. +66
    -0
      lib/uk_planning_scraper/idox.rb
  4. +30
    -0
      lib/uk_planning_scraper/property.rb

+ 1
- 0
lib/uk_planning_scraper.rb 查看文件

@@ -2,6 +2,7 @@ require "uk_planning_scraper/version"
require "uk_planning_scraper/authority"
require "uk_planning_scraper/authority_scrape_params"
require "uk_planning_scraper/application"
require "uk_planning_scraper/property"
require 'uk_planning_scraper/idox'
require 'uk_planning_scraper/northgate'
require 'logger'


+ 8
- 1
lib/uk_planning_scraper/application.rb 查看文件

@@ -16,6 +16,10 @@ module UKPlanningScraper
attr_accessor :date_decision
attr_accessor :appeal_status
attr_accessor :appeal_decision
attr_accessor :property_count
attr_accessor :property_url
attr_accessor :property_detail_urls
attr_accessor :properties

def to_hash
{
@@ -34,7 +38,10 @@ module UKPlanningScraper
documents_url: @documents_url,
alternative_reference: @alternative_reference,
appeal_status: @appeal_status,
appeal_decision: @appeal_decision
appeal_decision: @appeal_decision,
property_count: @property_count,
property_detail_urls: @property_detail_urls,
properties: @properties
}
end



+ 66
- 0
lib/uk_planning_scraper/idox.rb 查看文件

@@ -120,6 +120,8 @@ module UKPlanningScraper
puts "#{i + 1} of #{apps.size}: #{app.info_url}"

parse_info_url(app) if app.info_url
parse_property_url(app) if app.property_url
parse_property_detail_urls(app) if app.property_detail_urls
end # scrape summary tab for apps
apps
end # scrape_idox
@@ -186,9 +188,73 @@ module UKPlanningScraper
puts "Error: key '#{key}' not found"
end # case
end # each row

# find associated property link
property_association_link = res.at('p.associatedproperty a')

if property_association_link
app.property_url = base_url + property_association_link[:href]
app.property_count = property_association_link.inner_text.to_i
end
else
puts "Error: HTTP #{res.code}"
end # if
end

def parse_property_url(app)
# get URLs of property pages
app.property_detail_urls = []

res = agent.get(app.property_url)

if res.code == '200'
res.search('#Property li a').each do |property_link|
app.property_detail_urls << base_url + property_link[:href]
end
else
puts "Error: HTTP #{res.code}"
end
end

def parse_property_detail_urls(app)
# get property details
app.properties = []

app.property_detail_urls.each do |property_url|
res = agent.get(property_url)

if res.code == '200'
property = Property.new

res.search('#propertyAddress tr').each do |row|
key = row.at('th').inner_text.strip
value = row.at('td').inner_text.strip

case key
when 'UPRN:'
property.uprn = value
when 'Full Address:'
property.address = value unless value.empty?
when 'Property Number:'
property.number = value unless value.empty?
when 'Street:'
property.street = value unless value.empty?
when 'Town:'
property.town = value unless value.empty?
when 'Postcode:'
property.postcode = value unless value.empty?
when 'Ward:'
property.ward = value unless value.empty?
when 'Parish:'
property.parish = value unless value.empty?
end
end

app.properties << property
else
puts "Error: HTTP #{res.code}"
end
end
end
end # class
end

+ 30
- 0
lib/uk_planning_scraper/property.rb 查看文件

@@ -0,0 +1,30 @@
module UKPlanningScraper
class Property
attr_accessor :uprn
attr_accessor :address
attr_accessor :number
attr_accessor :street
attr_accessor :town
attr_accessor :postcode
attr_accessor :ward
attr_accessor :parish

def to_hash
{
uprn: @uprn,
address: @address,
number: @number,
street: @street,
town: @town,
postcode: @postcode,
ward: @ward,
parish: @parish
}
end

def valid?
return true if @uprn
false
end
end
end

Loading…
取消
儲存