From a2d6858065ff6521d67c71c8355bc105c4f9557b Mon Sep 17 00:00:00 2001 From: Adrian Short Date: Thu, 20 Sep 2018 00:04:11 +0100 Subject: [PATCH] Update for uk_planning_scraper 0.2.0 --- Gemfile | 2 +- Gemfile.lock | 6 +++--- councils.csv | 35 ----------------------------------- scraper.rb | 36 +++++++++--------------------------- 4 files changed, 13 insertions(+), 66 deletions(-) delete mode 100644 councils.csv diff --git a/Gemfile b/Gemfile index c196d00..e3fa546 100644 --- a/Gemfile +++ b/Gemfile @@ -2,5 +2,5 @@ source "https://rubygems.org" ruby '2.3.1' -gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/', :ref => '5e166c3' +gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/', :ref => '747bb06' gem 'scraperwiki', :git => 'https://github.com/openaustralia/scraperwiki-ruby/', :branch => 'morph_defaults' diff --git a/Gemfile.lock b/Gemfile.lock index ed56ddb..24d3a0a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,9 +1,9 @@ GIT remote: https://github.com/adrianshort/uk_planning_scraper/ - revision: 5e166c3d3491ee7819fc9f401d57d44763915efc - ref: 5e166c3 + revision: 747bb06258a4bd7466f778dd0c94bf5ee01149b3 + ref: 747bb06 specs: - uk_planning_scraper (0.1.1) + uk_planning_scraper (0.2.0) http mechanize (~> 2.7) diff --git a/councils.csv b/councils.csv deleted file mode 100644 index 275eec5..0000000 --- a/councils.csv +++ /dev/null @@ -1,35 +0,0 @@ -#City of London,http://www.planning2.cityoflondon.gov.uk/online-applications/search.do?action=advanced -#Barking and Dagenham,http://paplan.lbbd.gov.uk/online-applications/search.do?action=advanced -#Barnet,https://publicaccess.barnet.gov.uk/online-applications/search.do?action=advanced -#Bexley,http://pa.bexley.gov.uk/online-applications/search.do?action=advanced -#Brent,https://pa.brent.gov.uk/online-applications/search.do?action=advanced&searchType=Application -#Bromley,https://searchapplications.bromley.gov.uk/online-applications/search.do?action=advanced -#Croydon,http://publicaccess2.croydon.gov.uk/online-applications/search.do?action=advanced -#Ealing,https://pam.ealing.gov.uk/online-applications/search.do?action=advanced -#Enfield,https://planningandbuildingcontrol.enfield.gov.uk/online-applications/search.do?action=advanced -#Newham,https://pa.newham.gov.uk/online-applications/search.do?action=advanced -#Sutton,https://planningregister.sutton.gov.uk/online-applications/search.do?action=advanced -#Greenwich,https://planning.royalgreenwich.gov.uk/online-applications/search.do?action=advanced -#Hammersmith and Fulham,http://public-access.lbhf.gov.uk/online-applications/search.do?action=advanced -#Lambeth,https://planning.lambeth.gov.uk/online-applications/search.do?action=advanced -#Lewisham,http://planning.lewisham.gov.uk/online-applications/search.do?action=advanced -#Southwark,https://planning.southwark.gov.uk/online-applications/search.do?action=advanced -#Tower Hamlets,https://development.towerhamlets.gov.uk/online-applications/search.do?action=advanced -#Westminster,http://idoxpa.westminster.gov.uk/online-applications/search.do?action=advanced -#Bristol,https://planningonline.bristol.gov.uk/online-applications/search.do?action=advanced -#Salford,http://publicaccess.salford.gov.uk/publicaccess/search.do?action=advanced -#Manchester,https://pa.manchester.gov.uk/online-applications/search.do?action=advanced -#Leeds,https://publicaccess.leeds.gov.uk/online-applications/search.do?action=advanced -#Glasgow,https://publicaccess.glasgow.gov.uk/online-applications/search.do?action=advanced -#Sheffield,https://planningapps.sheffield.gov.uk/online-applications/search.do?action=advanced -#Bradford,https://planning.bradford.gov.uk/online-applications/search.do?action=advanced -#Edinburgh,https://citydev-portal.edinburgh.gov.uk/idoxpa-web/search.do?action=advanced -#Wakefield,https://planning.wakefield.gov.uk/online-applications/search.do?action=advanced -#Cardiff,https://planningonline.cardiff.gov.uk/online-applications/search.do?action=advanced -Birmingham,https://eplanning.birmingham.gov.uk/Northgate/PlanningExplorer/GeneralSearch.aspx -Liverpool,http://northgate.liverpool.gov.uk/PlanningExplorer17/GeneralSearch.aspx -Camden,http://planningrecords.camden.gov.uk/Northgate/PlanningExplorer17/GeneralSearch.aspx -Hackney,http://planning.hackney.gov.uk/Northgate/PlanningExplorer/generalsearch.aspx -Islington,http://planning.islington.gov.uk/northgate/planningexplorer/generalsearch.aspx -Merton,http://planning.merton.gov.uk/Northgate/PlanningExplorerAA/GeneralSearch.aspx -Wandsworth,https://planning1.wandsworth.gov.uk/Northgate/PlanningExplorer/GeneralSearch.aspx diff --git a/scraper.rb b/scraper.rb index c02ba13..b3dc83f 100644 --- a/scraper.rb +++ b/scraper.rb @@ -1,33 +1,15 @@ require 'uk_planning_scraper' require 'scraperwiki' -require 'date' -require 'time' -require 'csv' -councils = [] +auths = UKPlanningScraper::Authority.all -CSV.foreach('councils.csv') do |line| - councils << { name: line[0], url: line[1] } unless line[0][0] == '#' -end - -params = { - validated_from: Date.today - ENV['MORPH_DAYS'].to_i, - validated_to: Date.today, - description: 'inlink', -} - -councils.each do |council| - apps = UKPlanningScraper.search(council[:url], params) - - apps.map! do |app| - app.merge( - { - la_name: council[:name], - updated_at: Time.now - } - ) +auths.each_with_index do |auth, i| + puts "#{i + 1} of #{auths.size}: #{auth.name}" + begin + apps = auth.scrape({ validated_days: ENV['MORPH_DAYS'].to_i, keywords: 'inlink'}) + ScraperWiki.save_sqlite([:authority_name, :council_reference], apps, 'applications') + puts "#{auth.name}: #{apps.size} application(s) saved." + rescue UKPlanningScraper::SystemNotSupportedError => e + puts "Error: #{e}" end - - ScraperWiki.save_sqlite([:council_reference, :la_name], apps, 'applications') - puts "#{council[:name]}: #{apps.size}" end