|
- #!/bin/bash
- # Uploads megawarcs from the upload queue.
- # (Needs a config.sh in the working directory.)
- #
- # ./upload-one
- #
- # 1. Grabs an item from UPLOAD_QUEUE_DIR
- # 2. Reserves the item by moving the directory to the
- # UPLOADER_WORKING_DIR
- # 3. Uploads the item to s3.us.archive.org
- # 4. Removes the source files from the working directory
- # If COMPLETED_DIR is set, uploaded files are moved there.
- #
- # The program exits with 1 on any nontransient error.
- #
-
- SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
- source ./config.sh || exit 1
-
- mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1
- mkdir -p "$UPLOADER_WORKING_DIR" || exit 1
-
- if [ ! -z "$COMPLETED_DIR" ]
- then
- mkdir -p "$COMPLETED_DIR" || exit 1
- fi
-
- function mayicontinue {
- echo
- # echo "May I continue?"
- # read
- # echo
- }
-
- mayicontinue
-
-
- # try to grab an item from UPLOAD_QUEUE_DIR
- ITEM=none
- while [[ $ITEM = none ]]
- do
- possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep 201 | sort | head -n 1 )
- if [[ $possible_item =~ 201 ]]
- then
- echo "Trying to grab $possible_item"
- if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/"
- then
- ITEM=$possible_item
- else
- echo "Failed to move $possible_item"
- sleep 5
- fi
- else
- date
- echo "No current item found!"
- sleep 30
- exit 0
- fi
- done
-
-
- echo "$( date ): Start uploading for item $ITEM" >> uploader.log
-
- # upload megawarc
- size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" )
- # (upload the large files first to optimise S3 snowballing)
- for ext in warc.gz tar json.gz
- do
- result=1
- while [[ $result -ne 0 ]]
- do
- filename="${FILE_PREFIX}${ITEM}.megawarc.${ext}"
- curl -v --location --fail \
- --speed-limit 1 --speed-time 900 \
- --header "x-archive-queue-derive:1" \
- --header "x-amz-auto-make-bucket:1" \
- --header "x-archive-meta-collection:${IA_COLLECTION}" \
- --header "x-archive-meta-mediatype:texts" \
- --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
- --header "x-archive-meta-date:${IA_ITEM_DATE}" \
- --header "x-archive-meta-language:eng" \
- --header "x-archive-size-hint:$size_hint" \
- --header "authorization: LOW ${IA_AUTH}" \
- --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \
- "http://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \
- > /dev/null
- result=$?
- if [[ $result -ne 0 ]]
- then
- date
- echo "Error while uploading $ITEM, curl said $result"
- echo "Will retry in 30 seconds"
- sleep 30
- fi
- done
- done
-
- echo "Uploaded $ITEM"
-
- echo "$( date ): Completed uploading for item $ITEM" >> uploader.log
-
-
- mayicontinue
-
-
- # move or remove megawarc
- if [ -z "$COMPLETED_DIR" ]
- then
- # remove
- rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
- result=$?
-
- if [[ $result -ne 0 ]]
- then
- date
- echo "rm -rf megawarc exited with $result for $ITEM"
- exit 1
- fi
- else
- # move
- mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
- result=$?
-
- if [[ $result -ne 0 ]]
- then
- date
- echo "rm -rf megawarc exited with $result for $ITEM"
- exit 1
- fi
- fi
-
- exit 0
|