Sergey Oboguev (oboguev) wrote,
Sergey Oboguev
oboguev

ГПИБ shpl download script (версия 2)



#!/bin/bash

# load.sh book-url [start-page]

if [ -z "$1" ]; then
    echo "usage: load.sh book-url [start-page]"
    exit 1
fi

if [ "$#" -ne 1 ] && [ "$#" -ne 2 ]; then
    echo "usage: load.sh book-url [start-page]"
    exit 1
fi

URL="$1"
STARTPAGE="1"

if [ "$#" -eq 2 ]; then
    STARTPAGE="$2"
fi

if [ ! -z "${STARTPAGE##*[!0-9]*}" ] ; then
    echo "Will start at page $STARTPAGE"
else
    echo "Invalid startig page number: $STARTPAGE"
    exit 1
fi

if [ -z `which jq` ]; then
    echo "jq is not installed"
    exit 1
fi

TFR="/tmp/$$.raw-book.shpl"
TF="/tmp/$$.book.shpl"

echo "Loading root page ..."
wget --quiet -O $TFR $URL
if [ "$?" -ne 0 ]; then
    echo "Unable to load the page"
    exit 1
fi

tr '\r\n' '  ' <$TFR >$TF
rm $TFR

#
# extract page ids and zoom list
#
awk 'BEGIN { FS = "\"pages\":\\[" } ; { print $2 }' $TF | awk 'BEGIN { FS = "\\]" } ; { print "[ " $1 " ]" }' >pages.shpl
awk 'BEGIN { FS = "\"project_zoom_nums\":\\[" } ; { print $2 }' $TF | awk 'BEGIN { FS = "\\]" } ; { print "[ " $1 " ]" }' >zoomnums.shpl

#
# parse zoom
#
MZ=""
ZOOMS=`cat zoomnums.shpl  | jq '.[]'`
for XZOOM in $ZOOMS ; do
    if [ ! -z "${XZOOM##*[!0-9]*}" ] ; then
        if [ -z "$MZ" ] ; then
            MZ="$XZOOM"
        fi
        if [ "$XZOOM" -gt "$MZ" ] ; then
            MZ="$XZOOM"
        fi
    fi
done

if [ ! -z "${MZ##*[!0-9]*}" ] ; then
    echo "Using zoom $MZ"
else
    echo "Invalid zoom, defaulting to zoom 7"
    MZ="7"
fi

#
# handle pages
#
jq '.[] | .id' <pages.shpl >page-ids.shpl
NPAGES=`wc -l <page-ids.shpl`
echo Number of pages: $NPAGES

PAGEIDS=`cat page-ids.shpl`
PAGE=1
for ID in $PAGEIDS ; do
    if [ "$PAGE" -lt "$STARTPAGE" ] ; then
        let PAGE=PAGE+1
        continue
    fi
    echo "Loading page $PAGE (of $NPAGES) ..."
    URL="http://elib.shpl.ru/pages/$ID/zooms/$MZ"
    wget --quiet -O $ID.jpg $URL
    if [ "$?" -ne 0 ]; then
        echo "Unable to load the page ($URL)"
        exit 1
    fi
    let PAGE=PAGE+1
done

rm -f $TFR
rm -f $TF
rm *.shpl

echo Finished.

Subscribe
  • Post a new comment

    Error

    Anonymous comments are disabled in this journal

    default userpic

    Your reply will be screened

  • 0 comments