| @@ -23,10 +23,14 @@ The schema is based on the core elements from [planningalerts.org.au](https://ww | |||||
| According to the principle of _one codebase, many deploys_, this scraper is [configured using environment variables](https://12factor.net/config) rather than by editing constants in the code. | According to the principle of _one codebase, many deploys_, this scraper is [configured using environment variables](https://12factor.net/config) rather than by editing constants in the code. | ||||
| | Name | Purpose | Default | Required? | | | Name | Purpose | Default | Required? | | ||||
| | --- |--- | --- | | |||||
| | --- | --- | --- | | |||||
| | MORPH_DELAY | Minimum delay in seconds between HTTP requests to the server. | 10 | No | | | MORPH_DELAY | Minimum delay in seconds between HTTP requests to the server. | 10 | No | | ||||
| | MORPH_USER_AGENT | User agent string sent as an HTTP request header. | _None_ | Yes | | | MORPH_USER_AGENT | User agent string sent as an HTTP request header. | _None_ | Yes | | ||||
| | MORPH_LOG_LEVEL | Controls the level of detail in the output logs according to [Ruby's `Logger` class](https://ruby-doc.org/stdlib-2.1.0/libdoc/logger/rdoc/Logger.html) constants. | 1 _(Logger::INFO)_ | No | | | MORPH_LOG_LEVEL | Controls the level of detail in the output logs according to [Ruby's `Logger` class](https://ruby-doc.org/stdlib-2.1.0/libdoc/logger/rdoc/Logger.html) constants. | 1 _(Logger::INFO)_ | No | | ||||
| | MORPH_DAYS | | Number of days to scrape | Only if MORPH_MONTHS is unset | | |||||
| | MORPH_MONTHS | Number of months to scrape | _None_ | Only if MORPH_DAYS is unset | | |||||
| | MORPH_STATUS | Only scrape applications with this status code. | _None_ | No | | |||||
| ## Running | ## Running | ||||
| @@ -33,14 +33,34 @@ logger.info "Log level is: #{logger.level}" | |||||
| URL = SITE_URL + '/Northgate/PlanningExplorerAA/GeneralSearch.aspx' | URL = SITE_URL + '/Northgate/PlanningExplorerAA/GeneralSearch.aspx' | ||||
| form_vars = { | form_vars = { | ||||
| # 'cboStatusCode' => '4', # REGISTERED | |||||
| 'cboSelectDateValue' => 'DATE_RECEIVED', | 'cboSelectDateValue' => 'DATE_RECEIVED', | ||||
| # 'cboMonths' => '12', # 1..12 | |||||
| 'cboDays' => 1, | |||||
| 'rbGroup' => 'rbDay', | |||||
| 'csbtnSearch' => 'Search' # required | 'csbtnSearch' => 'Search' # required | ||||
| } | } | ||||
| # If both MORPH_DAYS and MORPH_MONTHS are set, MORPH_DAYS should be used. | |||||
| unless ENV['MORPH_DAYS'] || ENV['MORPH_MONTHS'] | |||||
| logger.fatal "Neither MORPH_MONTHS nor MORPH_DAYS set. Nothing to scrape. Exiting." | |||||
| exit 1 | |||||
| end | |||||
| if ENV['MORPH_MONTHS'] | |||||
| form_vars.merge!({ | |||||
| 'cboMonths' => ENV['MORPH_MONTHS'], | |||||
| 'rbGroup' => 'rbMonth' | |||||
| }) | |||||
| end | |||||
| if ENV['MORPH_DAYS'] | |||||
| form_vars.merge!({ | |||||
| 'cboMonths' => nil, | |||||
| 'cboDays' => ENV['MORPH_DAYS'], | |||||
| 'rbGroup' => 'rbDay' | |||||
| }) | |||||
| end | |||||
| form_vars.merge!({ 'cboStatusCode' => ENV['MORPH_STATUS']}) if ENV['MORPH_STATUS'] | |||||
| logger.info "Form variables: #{form_vars.to_s}" | logger.info "Form variables: #{form_vars.to_s}" | ||||
| headers = { | headers = { | ||||