| @@ -8,4 +8,3 @@ source "https://rubygems.org" | |||||
| gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults" | gem "scraperwiki", git: "https://github.com/openaustralia/scraperwiki-ruby.git", branch: "morph_defaults" | ||||
| gem "mechanize" | gem "mechanize" | ||||
| gem "activesupport" # For date calculations | |||||
| @@ -10,18 +10,11 @@ GIT | |||||
| GEM | GEM | ||||
| remote: https://rubygems.org/ | remote: https://rubygems.org/ | ||||
| specs: | specs: | ||||
| activesupport (5.1.2) | |||||
| concurrent-ruby (~> 1.0, >= 1.0.2) | |||||
| i18n (~> 0.7) | |||||
| minitest (~> 5.1) | |||||
| tzinfo (~> 1.1) | |||||
| concurrent-ruby (1.0.5) | |||||
| domain_name (0.5.20170404) | domain_name (0.5.20170404) | ||||
| unf (>= 0.0.5, < 1.0.0) | unf (>= 0.0.5, < 1.0.0) | ||||
| http-cookie (1.0.3) | http-cookie (1.0.3) | ||||
| domain_name (~> 0.5) | domain_name (~> 0.5) | ||||
| httpclient (2.8.3) | httpclient (2.8.3) | ||||
| i18n (0.8.4) | |||||
| mechanize (2.7.5) | mechanize (2.7.5) | ||||
| domain_name (~> 0.5, >= 0.5.1) | domain_name (~> 0.5, >= 0.5.1) | ||||
| http-cookie (~> 1.0) | http-cookie (~> 1.0) | ||||
| @@ -35,7 +28,6 @@ GEM | |||||
| mime-types-data (~> 3.2015) | mime-types-data (~> 3.2015) | ||||
| mime-types-data (3.2016.0521) | mime-types-data (3.2016.0521) | ||||
| mini_portile2 (2.2.0) | mini_portile2 (2.2.0) | ||||
| minitest (5.10.2) | |||||
| net-http-digest_auth (1.4.1) | net-http-digest_auth (1.4.1) | ||||
| net-http-persistent (2.9.4) | net-http-persistent (2.9.4) | ||||
| nokogiri (1.8.0) | nokogiri (1.8.0) | ||||
| @@ -44,9 +36,6 @@ GEM | |||||
| sqlite3 (1.3.13) | sqlite3 (1.3.13) | ||||
| sqlite_magic (0.0.6) | sqlite_magic (0.0.6) | ||||
| sqlite3 | sqlite3 | ||||
| thread_safe (0.3.6) | |||||
| tzinfo (1.2.3) | |||||
| thread_safe (~> 0.1) | |||||
| unf (0.1.4) | unf (0.1.4) | ||||
| unf_ext | unf_ext | ||||
| unf_ext (0.0.7.4) | unf_ext (0.0.7.4) | ||||
| @@ -56,7 +45,6 @@ PLATFORMS | |||||
| ruby | ruby | ||||
| DEPENDENCIES | DEPENDENCIES | ||||
| activesupport | |||||
| mechanize | mechanize | ||||
| scraperwiki! | scraperwiki! | ||||
| @@ -5,7 +5,6 @@ require 'mechanize' | |||||
| require 'pp' | require 'pp' | ||||
| require 'time' | require 'time' | ||||
| require 'date' | require 'date' | ||||
| require 'active_support/all' | |||||
| # Use the column names from planningalerts.org.au: | # Use the column names from planningalerts.org.au: | ||||
| # https://www.planningalerts.org.au/how_to_write_a_scraper | # https://www.planningalerts.org.au/how_to_write_a_scraper | ||||
| @@ -69,7 +68,7 @@ agent.verify_mode = OpenSSL::SSL::VERIFY_NONE | |||||
| d = Date.today | d = Date.today | ||||
| 12.times do | 12.times do | ||||
| d_start = (d - 29.days).strftime("%d/%m/%Y") | |||||
| d_start = (d - 29).strftime("%d/%m/%Y") | |||||
| d_end = d.strftime("%d/%m/%Y") | d_end = d.strftime("%d/%m/%Y") | ||||
| url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500" | url = "#{BASEURL}Summary?weekListType=SRCH&recFrom=#{d_start}&recTo=#{d_end}&ward=ALL&appTyp=ALL&wardTxt=All%20Wards&appTypTxt=All%20Application%20Types&limit=500" | ||||
| @@ -78,9 +77,9 @@ d = Date.today | |||||
| page = agent.get(url) | page = agent.get(url) | ||||
| apps = page.search("#planningApplication") | apps = page.search("#planningApplication") | ||||
| puts apps.size, '' | puts apps.size, '' | ||||
| apps.each { |app| parse(app) } | apps.each { |app| parse(app) } | ||||
| d -= 30.days | |||||
| d -= 30 | |||||
| sleep 5 | sleep 5 | ||||
| end | end | ||||