@@ -8,4 +8,3 @@ source "https://rubygems.org" | |||||
gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults" | gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults" | ||||
gem "mechanize" | gem "mechanize" | ||||
gem "activesupport" # For date calculations |
@@ -10,18 +10,11 @@ GIT | |||||
GEM | GEM | ||||
remote: https://rubygems.org/ | remote: https://rubygems.org/ | ||||
specs: | specs: | ||||
activesupport (5.1.2) | |||||
concurrent-ruby (~> 1.0, >= 1.0.2) | |||||
i18n (~> 0.7) | |||||
minitest (~> 5.1) | |||||
tzinfo (~> 1.1) | |||||
concurrent-ruby (1.0.5) | |||||
domain_name (0.5.20170404) | domain_name (0.5.20170404) | ||||
unf (>= 0.0.5, < 1.0.0) | unf (>= 0.0.5, < 1.0.0) | ||||
http-cookie (1.0.3) | http-cookie (1.0.3) | ||||
domain_name (~> 0.5) | domain_name (~> 0.5) | ||||
httpclient (2.8.3) | httpclient (2.8.3) | ||||
i18n (0.8.4) | |||||
mechanize (2.7.5) | mechanize (2.7.5) | ||||
domain_name (~> 0.5, >= 0.5.1) | domain_name (~> 0.5, >= 0.5.1) | ||||
http-cookie (~> 1.0) | http-cookie (~> 1.0) | ||||
@@ -35,7 +28,6 @@ GEM | |||||
mime-types-data (~> 3.2015) | mime-types-data (~> 3.2015) | ||||
mime-types-data (3.2016.0521) | mime-types-data (3.2016.0521) | ||||
mini_portile2 (2.2.0) | mini_portile2 (2.2.0) | ||||
minitest (5.10.2) | |||||
net-http-digest_auth (1.4.1) | net-http-digest_auth (1.4.1) | ||||
net-http-persistent (2.9.4) | net-http-persistent (2.9.4) | ||||
nokogiri (1.8.0) | nokogiri (1.8.0) | ||||
@@ -44,9 +36,6 @@ GEM | |||||
sqlite3 (1.3.13) | sqlite3 (1.3.13) | ||||
sqlite_magic (0.0.6) | sqlite_magic (0.0.6) | ||||
sqlite3 | sqlite3 | ||||
thread_safe (0.3.6) | |||||
tzinfo (1.2.3) | |||||
thread_safe (~> 0.1) | |||||
unf (0.1.4) | unf (0.1.4) | ||||
unf_ext | unf_ext | ||||
unf_ext (0.0.7.4) | unf_ext (0.0.7.4) | ||||
@@ -56,7 +45,6 @@ PLATFORMS | |||||
ruby | ruby | ||||
DEPENDENCIES | DEPENDENCIES | ||||
activesupport | |||||
mechanize | mechanize | ||||
scraperwiki! | scraperwiki! | ||||
@@ -5,7 +5,6 @@ require 'mechanize' | |||||
require 'pp' | require 'pp' | ||||
require 'time' | require 'time' | ||||
require 'date' | require 'date' | ||||
require 'active_support/all' | |||||
# Use the column names from planningalerts.org.au: | # Use the column names from planningalerts.org.au: | ||||
# https://www.planningalerts.org.au/how_to_write_a_scraper | # https://www.planningalerts.org.au/how_to_write_a_scraper | ||||
@@ -69,7 +68,7 @@ agent.verify_mode = OpenSSL::SSL::VERIFY_NONE | |||||
d = Date.today | d = Date.today | ||||
12.times do | 12.times do | ||||
d_start = (d - 29.days).strftime("%d/%m/%Y") | |||||
d_start = (d - 29).strftime("%d/%m/%Y") | |||||
d_end = d.strftime("%d/%m/%Y") | d_end = d.strftime("%d/%m/%Y") | ||||
url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500" | url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500" | ||||
@@ -78,9 +77,9 @@ d = Date.today | |||||
page = agent.get(url) | page = agent.get(url) | ||||
apps = page.search("#planningApplication") | apps = page.search("#planningApplication") | ||||
puts apps.size, '' | puts apps.size, '' | ||||
apps.each { |app| parse(app) } | apps.each { |app| parse(app) } | ||||
d -= 30.days | |||||
d -= 30 | |||||
sleep 5 | sleep 5 | ||||
end | end | ||||