You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

upload-one 3.0 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. #!/bin/bash
  2. # Uploads megawarcs from the upload queue.
  3. # (Needs a config.sh in the working directory.)
  4. #
  5. # ./upload-one
  6. #
  7. # 1. Grabs an item from UPLOAD_QUEUE_DIR
  8. # 2. Reserves the item by moving the directory to the
  9. # UPLOADER_WORKING_DIR
  10. # 3. Uploads the item to s3.us.archive.org
  11. # 4. Removes the source files from the working directory
  12. # If COMPLETED_DIR is set, uploaded files are moved there.
  13. #
  14. # The program exits with 1 on any nontransient error.
  15. #
  16. SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  17. source ./config.sh || exit 1
  18. mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1
  19. mkdir -p "$UPLOADER_WORKING_DIR" || exit 1
  20. if [ ! -z "$COMPLETED_DIR" ]
  21. then
  22. mkdir -p "$COMPLETED_DIR" || exit 1
  23. fi
  24. function mayicontinue {
  25. echo
  26. # echo "May I continue?"
  27. # read
  28. # echo
  29. }
  30. mayicontinue
  31. # try to grab an item from UPLOAD_QUEUE_DIR
  32. ITEM=none
  33. while [[ $ITEM = none ]]
  34. do
  35. possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep 201 | sort | head -n 1 )
  36. if [[ $possible_item =~ 201 ]]
  37. then
  38. echo "Trying to grab $possible_item"
  39. if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/"
  40. then
  41. ITEM=$possible_item
  42. else
  43. echo "Failed to move $possible_item"
  44. sleep 5
  45. fi
  46. else
  47. date
  48. echo "No current item found!"
  49. sleep 30
  50. exit 0
  51. fi
  52. done
  53. echo "$( date ): Start uploading for item $ITEM" >> uploader.log
  54. # upload megawarc
  55. size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" )
  56. # (upload the large files first to optimise S3 snowballing)
  57. for ext in warc.gz tar json.gz
  58. do
  59. result=1
  60. while [[ $result -ne 0 ]]
  61. do
  62. filename="${FILE_PREFIX}${ITEM}.megawarc.${ext}"
  63. curl -v --location --fail \
  64. --speed-limit 1 --speed-time 900 \
  65. --header "x-archive-queue-derive:1" \
  66. --header "x-amz-auto-make-bucket:1" \
  67. --header "x-archive-meta-collection:${IA_COLLECTION}" \
  68. --header "x-archive-meta-mediatype:web" \
  69. --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
  70. --header "x-archive-meta-date:${IA_ITEM_DATE}" \
  71. --header "x-archive-meta-language:eng" \
  72. --header "x-archive-size-hint:$size_hint" \
  73. --header "authorization: LOW ${IA_AUTH}" \
  74. --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \
  75. "http://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \
  76. > /dev/null
  77. result=$?
  78. if [[ $result -ne 0 ]]
  79. then
  80. date
  81. echo "Error while uploading $ITEM, curl said $result"
  82. echo "Will retry in 30 seconds"
  83. sleep 30
  84. fi
  85. done
  86. done
  87. echo "Uploaded $ITEM"
  88. echo "$( date ): Completed uploading for item $ITEM" >> uploader.log
  89. mayicontinue
  90. # move or remove megawarc
  91. if [ -z "$COMPLETED_DIR" ]
  92. then
  93. # remove
  94. rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
  95. result=$?
  96. if [[ $result -ne 0 ]]
  97. then
  98. date
  99. echo "rm -rf megawarc exited with $result for $ITEM"
  100. exit 1
  101. fi
  102. else
  103. # move
  104. mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
  105. result=$?
  106. if [[ $result -ne 0 ]]
  107. then
  108. date
  109. echo "rm -rf megawarc exited with $result for $ITEM"
  110. exit 1
  111. fi
  112. fi
  113. exit 0