소스 검색

Scrape and parse summary tab for each app

tags/v0.4.5
Adrian Short 6 년 전
부모
커밋
ddc81dd838
1개의 변경된 파일60개의 추가작업 그리고 2개의 파일을 삭제
  1. +60
    -2
      lib/uk_planning_scraper.rb

+ 60
- 2
lib/uk_planning_scraper.rb 파일 보기

@@ -90,6 +90,64 @@ module UKPlanningScraper
break
end
end
# Scrape the summary tab for each app
apps.each_with_index do |app, i|
sleep @options[:delay]
puts "#{i + 1} of #{apps.size}: #{app[:info_url]}"
res = agent.get(app[:info_url])
if res.code == '200' # That's a String not an Integer, ffs
# Parse the summary tab for this app

app[:scraped_at] = Time.now
# Does the Documents tab show if there are no documents?
app[:documents_count] = res.at('#tab_documents').inner_text.match(/\d+/)[0].to_i
app[:documents_url] = @base_url + res.at('#tab_documents')[:href]

# We need to find values in the table by using the th labels.
# The row indexes/positions change from site to site (or even app to app) so we can't rely on that.

res.search('#simpleDetailsTable tr').each do |row|
key = row.at('th').inner_text.strip
value = row.at('td').inner_text.strip
case key
when 'Reference'
app[:council_reference] = value
when 'Alternative Reference'
app[:alternative_reference] = value
when 'Planning Portal Reference'
app[:alternative_reference] = value
when 'Application Received'
app[:date_received] = Date.parse(value) if value != ''
when 'Application Registered'
app[:date_received] = Date.parse(value) if value != ''
when 'Application Validated'
app[:date_validated] = Date.parse(value) if value != ''
when 'Address'
app[:address] = value
when 'Proposal'
app[:description] = value
when 'Status'
app[:status] = value
when 'Decision'
app[:decision] = value
when 'Decision Issued Date'
app[:date_decision] = Date.parse(value) if value != ''
when 'Appeal Status'
app[:appeal_status] = value
when 'Appeal Decision'
app[:appeal_decision] = value
else
puts "Error: key '#{key}' not found"
end # case
end # each row
else
puts "Error: HTTP #{res.code}"
end # if
pp app
end # scrape summary tab for apps
apps
end
end
end # self.search
end # module

불러오는 중...
취소
저장