#!/bin/bash
#
# The Qubes OS Project, http://www.qubes-os.org
#
# Copyright (C) 2013  Joanna Rutkowska <joanna@invisiblethingslab.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# Requires: 
# - poppler-utils (pdftocairo, pdfinfo)
# - ImageMagick (convert)

INPUT_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX)
TEMP_PNG_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX.png)
TEMP_RGB_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX.pdf)
IMG_DEPTH=8

# Get the original (untrusted) PDF file...
cat > "$INPUT_FILE"

# now, let's convert it into a simple representation,
# and send back to the client.

# Note that we might be compromised at this point (due to exploitation of PDF
# parsing code) and so what we're sending back might very well be something
# totally different than a decent simple representation -- the client should
# never trust what we're sending back, and should discard anything that doesn't
# look like the simple representation!

NO_PAGES=$(pdfinfo "$INPUT_FILE" | grep -a "^Pages:" | sed -e "s/^Pages:[^0-9]*//")
if [ -z "$NO_PAGES" ]; then
    # Perhaps this is not a PDF, only some JPG/PNG/etc? Let's try it anyway...
    NO_PAGES=1
fi
echo $NO_PAGES

cd /tmp || exit 1
PAGE=1
while [ $PAGE -le $NO_PAGES ]; do
    # if pdftocairo fails, lets try the ImageMagick's convert -- perhaps this is just some img file?
    pdftocairo "$INPUT_FILE" -png -f $PAGE -l $PAGE -singlefile "$(basename "$TEMP_PNG_FILE" .png)" || \
        convert "$INPUT_FILE" png:"$TEMP_PNG_FILE"
    IMG_WIDTH=$(identify -format "%w" "$TEMP_PNG_FILE")
    IMG_HEIGHT=$(identify -format "%h" "$TEMP_PNG_FILE")
    convert "$TEMP_PNG_FILE" -depth $IMG_DEPTH rgb:"$TEMP_RGB_FILE"
    echo "$IMG_WIDTH $IMG_HEIGHT"
    cat "$TEMP_RGB_FILE"
    PAGE=$((PAGE + 1))
done

# Cleanup tmp files...
# Note: our DispVM might get destroyed before the commands below
# complete, but that doesn't hurt us, because this is... well a DispVM.
rm -f "$INPUT_FILE"
rm -f "$TEMP_PNG_FILE"
rm -f "$TEMP_RGB_FILE"
