commit 16656594182e6811199105e8a2205b276f07f046 Author: Adrian Short Date: Fri Sep 14 18:24:44 2018 +0100 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5cdde8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.ruby-* +*.db +*.sqlite +Gemfile.lock diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..1ee14ee --- /dev/null +++ b/Gemfile @@ -0,0 +1,6 @@ +source "https://rubygems.org" + +ruby '2.3.1' + +gem 'uk_planning_scraper', :git => 'https://github.com/adrianshort/uk_planning_scraper/' +gem 'scraperwiki', :git => 'https://github.com/openaustralia/scraperwiki-ruby/', :branch => 'morph_defaults' diff --git a/README.md b/README.md new file mode 100644 index 0000000..8cd9695 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# BT InLink planning applications scraper + +Scrapes planning applications data for [BT InLink kiosks](https://www.adrianshort.org/tags/inlinkuk/) from UK council websites. + diff --git a/councils.csv b/councils.csv new file mode 100644 index 0000000..8615eb5 --- /dev/null +++ b/councils.csv @@ -0,0 +1,19 @@ +City of London,http://www.planning2.cityoflondon.gov.uk/online-applications/search.do?action=advanced +Barking and Dagenham,http://paplan.lbbd.gov.uk/online-applications/search.do?action=advanced +Barnet,https://publicaccess.barnet.gov.uk/online-applications/search.do?action=advanced +#Bexley,http://pa.bexley.gov.uk/online-applications/search.do?action=advanced +Brent,https://pa.brent.gov.uk/online-applications/search.do?action=advanced&searchType=Application +Bromley,https://searchapplications.bromley.gov.uk/online-applications/search.do?action=advanced +#Croydon,http://publicaccess2.croydon.gov.uk/online-applications/search.do?action=advanced +Ealing,https://pam.ealing.gov.uk/online-applications/search.do?action=advanced +Enfield,https://planningandbuildingcontrol.enfield.gov.uk/online-applications/search.do?action=advanced +#Newham,https://pa.newham.gov.uk/online-applications/search.do?action=advanced +Sutton,https://planningregister.sutton.gov.uk/online-applications/search.do?action=advanced +#Greenwich,https://planning.royalgreenwich.gov.uk/online-applications/search.do?action=advanced +#Hammersmith and Fulham,http://public-access.lbhf.gov.uk/online-applications/search.do?action=advanced +Lambeth,https://planning.lambeth.gov.uk/online-applications/search.do?action=advanced +Lewisham,http://planning.lewisham.gov.uk/online-applications/search.do?action=advanced +Southwark,https://planning.southwark.gov.uk/online-applications/search.do?action=advanced +#Tower Hamlets,https://development.towerhamlets.gov.uk/online-applications/search.do?action=advanced +Westminster,http://idoxpa.westminster.gov.uk/online-applications/search.do?action=advanced +#Bristol,https://planningonline.bristol.gov.uk/online-applications/search.do?action=advanced diff --git a/scraper.rb b/scraper.rb new file mode 100644 index 0000000..c02ba13 --- /dev/null +++ b/scraper.rb @@ -0,0 +1,33 @@ +require 'uk_planning_scraper' +require 'scraperwiki' +require 'date' +require 'time' +require 'csv' + +councils = [] + +CSV.foreach('councils.csv') do |line| + councils << { name: line[0], url: line[1] } unless line[0][0] == '#' +end + +params = { + validated_from: Date.today - ENV['MORPH_DAYS'].to_i, + validated_to: Date.today, + description: 'inlink', +} + +councils.each do |council| + apps = UKPlanningScraper.search(council[:url], params) + + apps.map! do |app| + app.merge( + { + la_name: council[:name], + updated_at: Time.now + } + ) + end + + ScraperWiki.save_sqlite([:council_reference, :la_name], apps, 'applications') + puts "#{council[:name]}: #{apps.size}" +end