#!/usr/bin/env sh # Download PDF files for a planning application from Sutton Council planning website # If you run this more than once it'll only download the new files uploaded for that application. # Usage: $ get.sh , e.g. $ get.sh B2015/71962 # Install curl and wget before use. Mac users can install them with Homebrew. # Windows users: Try running this in Cygwin or install Linux in a virtual machine. # Adrian Short 26 Feb 2016 COOKIEJAR=cookiejar.txt URLS=urls.txt BASEURL=https://fastweb.sutton.gov.uk/FASTWEB mkdir -p $1 cd $1 echo "Getting session cookies" curl -s -c $COOKIEJAR "$BASEURL/welcome.asp" > /dev/null # Get the session cookies echo "OK" echo echo "Getting list of PDF files. This could take several minutes if there are a large number of documents for this application." curl -s -c $COOKIEJAR \ --data "cbxCopyrightStatement=on" \ --data "ApplicationNumber=$1" \ "$BASEURL/images.asp" \ | grep -E -o 'http.+?\.(pdf|PDF)' > $URLS echo "OK" echo "Downloading PDFs" wget --no-check-certificate --no-clobber -i $URLS echo "Downloading complete" rm $COOKIEJAR ls -lht cd -