From 452e39a867f6f311962ea64291619e056945c3cb Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Mon, 15 Apr 2019 09:59:22 +0100 Subject: [PATCH] Scape properties details --- lib/uk_planning_scraper.rb | 1 + lib/uk_planning_scraper/application.rb | 9 +++- lib/uk_planning_scraper/idox.rb | 66 ++++++++++++++++++++++++++ lib/uk_planning_scraper/property.rb | 30 ++++++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 lib/uk_planning_scraper/property.rb diff --git a/lib/uk_planning_scraper.rb b/lib/uk_planning_scraper.rb index 3dd2d0a..71100a9 100644 --- a/lib/uk_planning_scraper.rb +++ b/lib/uk_planning_scraper.rb @@ -2,6 +2,7 @@ require "uk_planning_scraper/version" require "uk_planning_scraper/authority" require "uk_planning_scraper/authority_scrape_params" require "uk_planning_scraper/application" +require "uk_planning_scraper/property" require 'uk_planning_scraper/idox' require 'uk_planning_scraper/northgate' require 'logger' diff --git a/lib/uk_planning_scraper/application.rb b/lib/uk_planning_scraper/application.rb index 5fced94..b8e7508 100644 --- a/lib/uk_planning_scraper/application.rb +++ b/lib/uk_planning_scraper/application.rb @@ -16,6 +16,10 @@ module UKPlanningScraper attr_accessor :date_decision attr_accessor :appeal_status attr_accessor :appeal_decision + attr_accessor :property_count + attr_accessor :property_url + attr_accessor :property_detail_urls + attr_accessor :properties def to_hash { @@ -34,7 +38,10 @@ module UKPlanningScraper documents_url: @documents_url, alternative_reference: @alternative_reference, appeal_status: @appeal_status, - appeal_decision: @appeal_decision + appeal_decision: @appeal_decision, + property_count: @property_count, + property_detail_urls: @property_detail_urls, + properties: @properties } end diff --git a/lib/uk_planning_scraper/idox.rb b/lib/uk_planning_scraper/idox.rb index d483dcf..230799e 100644 --- a/lib/uk_planning_scraper/idox.rb +++ b/lib/uk_planning_scraper/idox.rb @@ -120,6 +120,8 @@ module UKPlanningScraper puts "#{i + 1} of #{apps.size}: #{app.info_url}" parse_info_url(app) if app.info_url + parse_property_url(app) if app.property_url + parse_property_detail_urls(app) if app.property_detail_urls end # scrape summary tab for apps apps end # scrape_idox @@ -186,9 +188,73 @@ module UKPlanningScraper puts "Error: key '#{key}' not found" end # case end # each row + + # find associated property link + property_association_link = res.at('p.associatedproperty a') + + if property_association_link + app.property_url = base_url + property_association_link[:href] + app.property_count = property_association_link.inner_text.to_i + end else puts "Error: HTTP #{res.code}" end # if end + + def parse_property_url(app) + # get URLs of property pages + app.property_detail_urls = [] + + res = agent.get(app.property_url) + + if res.code == '200' + res.search('#Property li a').each do |property_link| + app.property_detail_urls << base_url + property_link[:href] + end + else + puts "Error: HTTP #{res.code}" + end + end + + def parse_property_detail_urls(app) + # get property details + app.properties = [] + + app.property_detail_urls.each do |property_url| + res = agent.get(property_url) + + if res.code == '200' + property = Property.new + + res.search('#propertyAddress tr').each do |row| + key = row.at('th').inner_text.strip + value = row.at('td').inner_text.strip + + case key + when 'UPRN:' + property.uprn = value + when 'Full Address:' + property.address = value unless value.empty? + when 'Property Number:' + property.number = value unless value.empty? + when 'Street:' + property.street = value unless value.empty? + when 'Town:' + property.town = value unless value.empty? + when 'Postcode:' + property.postcode = value unless value.empty? + when 'Ward:' + property.ward = value unless value.empty? + when 'Parish:' + property.parish = value unless value.empty? + end + end + + app.properties << property + else + puts "Error: HTTP #{res.code}" + end + end + end end # class end diff --git a/lib/uk_planning_scraper/property.rb b/lib/uk_planning_scraper/property.rb new file mode 100644 index 0000000..8154ca8 --- /dev/null +++ b/lib/uk_planning_scraper/property.rb @@ -0,0 +1,30 @@ +module UKPlanningScraper + class Property + attr_accessor :uprn + attr_accessor :address + attr_accessor :number + attr_accessor :street + attr_accessor :town + attr_accessor :postcode + attr_accessor :ward + attr_accessor :parish + + def to_hash + { + uprn: @uprn, + address: @address, + number: @number, + street: @street, + town: @town, + postcode: @postcode, + ward: @ward, + parish: @parish + } + end + + def valid? + return true if @uprn + false + end + end +end