@@ -23,10 +23,14 @@ The schema is based on the core elements from [planningalerts.org.au](https://ww | |||||
According to the principle of _one codebase, many deploys_, this scraper is [configured using environment variables](https://12factor.net/config) rather than by editing constants in the code. | According to the principle of _one codebase, many deploys_, this scraper is [configured using environment variables](https://12factor.net/config) rather than by editing constants in the code. | ||||
| Name | Purpose | Default | Required? | | | Name | Purpose | Default | Required? | | ||||
| --- |--- | --- | | |||||
| --- | --- | --- | | |||||
| MORPH_DELAY | Minimum delay in seconds between HTTP requests to the server. | 10 | No | | | MORPH_DELAY | Minimum delay in seconds between HTTP requests to the server. | 10 | No | | ||||
| MORPH_USER_AGENT | User agent string sent as an HTTP request header. | _None_ | Yes | | | MORPH_USER_AGENT | User agent string sent as an HTTP request header. | _None_ | Yes | | ||||
| MORPH_LOG_LEVEL | Controls the level of detail in the output logs according to [Ruby's `Logger` class](https://ruby-doc.org/stdlib-2.1.0/libdoc/logger/rdoc/Logger.html) constants. | 1 _(Logger::INFO)_ | No | | | MORPH_LOG_LEVEL | Controls the level of detail in the output logs according to [Ruby's `Logger` class](https://ruby-doc.org/stdlib-2.1.0/libdoc/logger/rdoc/Logger.html) constants. | 1 _(Logger::INFO)_ | No | | ||||
| MORPH_DAYS | | Number of days to scrape | Only if MORPH_MONTHS is unset | | |||||
| MORPH_MONTHS | Number of months to scrape | _None_ | Only if MORPH_DAYS is unset | | |||||
| MORPH_STATUS | Only scrape applications with this status code. | _None_ | No | | |||||
## Running | ## Running | ||||
@@ -33,14 +33,34 @@ logger.info "Log level is: #{logger.level}" | |||||
URL = SITE_URL + '/Northgate/PlanningExplorerAA/GeneralSearch.aspx' | URL = SITE_URL + '/Northgate/PlanningExplorerAA/GeneralSearch.aspx' | ||||
form_vars = { | form_vars = { | ||||
# 'cboStatusCode' => '4', # REGISTERED | |||||
'cboSelectDateValue' => 'DATE_RECEIVED', | 'cboSelectDateValue' => 'DATE_RECEIVED', | ||||
# 'cboMonths' => '12', # 1..12 | |||||
'cboDays' => 1, | |||||
'rbGroup' => 'rbDay', | |||||
'csbtnSearch' => 'Search' # required | 'csbtnSearch' => 'Search' # required | ||||
} | } | ||||
# If both MORPH_DAYS and MORPH_MONTHS are set, MORPH_DAYS should be used. | |||||
unless ENV['MORPH_DAYS'] || ENV['MORPH_MONTHS'] | |||||
logger.fatal "Neither MORPH_MONTHS nor MORPH_DAYS set. Nothing to scrape. Exiting." | |||||
exit 1 | |||||
end | |||||
if ENV['MORPH_MONTHS'] | |||||
form_vars.merge!({ | |||||
'cboMonths' => ENV['MORPH_MONTHS'], | |||||
'rbGroup' => 'rbMonth' | |||||
}) | |||||
end | |||||
if ENV['MORPH_DAYS'] | |||||
form_vars.merge!({ | |||||
'cboMonths' => nil, | |||||
'cboDays' => ENV['MORPH_DAYS'], | |||||
'rbGroup' => 'rbDay' | |||||
}) | |||||
end | |||||
form_vars.merge!({ 'cboStatusCode' => ENV['MORPH_STATUS']}) if ENV['MORPH_STATUS'] | |||||
logger.info "Form variables: #{form_vars.to_s}" | logger.info "Form variables: #{form_vars.to_s}" | ||||
headers = { | headers = { | ||||