|
|
@@ -8,19 +8,28 @@ |
|
|
|
# Adrian Short 26 Feb 2016 |
|
|
|
|
|
|
|
COOKIEJAR=cookiejar.txt |
|
|
|
BASEURL=https://fastweb.sutton.gov.uk/fastweb |
|
|
|
URLS=urls.txt |
|
|
|
BASEURL=https://fastweb.sutton.gov.uk/FASTWEB |
|
|
|
|
|
|
|
mkdir -p $1 |
|
|
|
cd $1 |
|
|
|
|
|
|
|
echo "Getting session cookies" |
|
|
|
curl -s -c $COOKIEJAR "$BASEURL/welcome.asp" > /dev/null # Get the session cookies |
|
|
|
echo "OK" |
|
|
|
echo |
|
|
|
|
|
|
|
echo "Getting list of PDF files. This could take several minutes if there are a large number of documents for this application." |
|
|
|
curl -s -c $COOKIEJAR \ |
|
|
|
--data "cbxCopyrightStatement=on" \ |
|
|
|
--data "ApplicationNumber=$1" \ |
|
|
|
"$BASEURL/images.asp" \ |
|
|
|
| grep -E -o 'http.+?\.(pdf|PDF)' \ |
|
|
|
| wget --no-check-certificate --no-clobber -i - |
|
|
|
| grep -E -o 'http.+?\.(pdf|PDF)' > $URLS |
|
|
|
echo "OK" |
|
|
|
|
|
|
|
echo "Downloading PDFs" |
|
|
|
wget --no-check-certificate --no-clobber -i $URLS |
|
|
|
echo "Downloading complete" |
|
|
|
|
|
|
|
rm $COOKIEJAR |
|
|
|
ls -lht |
|
|
|