You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

upload-one 3.3 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #!/bin/bash
  2. # Uploads megawarcs from the upload queue.
  3. # (Needs a config.sh in the working directory.)
  4. #
  5. # ./upload-one
  6. #
  7. # 1. Grabs an item from UPLOAD_QUEUE_DIR
  8. # 2. Reserves the item by moving the directory to the
  9. # UPLOADER_WORKING_DIR
  10. # 3. Uploads the item to s3.us.archive.org
  11. # 4. Removes the source files from the working directory
  12. # If COMPLETED_DIR is set, uploaded files are moved there.
  13. #
  14. # The program exits with 1 on any nontransient error.
  15. #
  16. SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  17. source ./config.sh || exit 1
  18. mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1
  19. mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1
  20. if [ ! -z "${COMPLETED_DIR}" ]
  21. then
  22. mkdir -p "${COMPLETED_DIR}" || exit 1
  23. fi
  24. function mayicontinue {
  25. echo
  26. # echo "May I continue?"
  27. # read
  28. # echo
  29. }
  30. mayicontinue
  31. # try to grab an item from UPLOAD_QUEUE_DIR
  32. ITEM=none
  33. while [[ "${ITEM}" = none ]]
  34. do
  35. possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
  36. if test -n "${possible_item}"
  37. then
  38. echo "Trying to grab ${possible_item}"
  39. if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/"
  40. then
  41. ITEM="${possible_item}"
  42. else
  43. echo "Failed to move ${possible_item}"
  44. sleep 5
  45. fi
  46. else
  47. date
  48. echo "No current item found!"
  49. sleep 30
  50. exit 0
  51. fi
  52. done
  53. echo "$( date ): Start uploading for item ${ITEM}" >> uploader.log
  54. # upload megawarc
  55. size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" )
  56. # (upload the large files first to optimise S3 snowballing)
  57. find "${UPLOADER_WORKING_DIR}/${ITEM}" -type f -regextype posix-egrep -regex ".+\.megawarc\.(warc\.(gz|zst)|tar|json\.gz)$" -printf "%f\n" \
  58. | while read filename
  59. do
  60. result=1
  61. while [[ "${result}" -ne 0 ]]
  62. do
  63. curl -v --location --fail \
  64. --speed-limit 1 --speed-time 900 \
  65. --header "x-archive-queue-derive:1" \
  66. --header "x-amz-auto-make-bucket:1" \
  67. --header "x-archive-keep-old-version:1" \
  68. --header "x-archive-meta-collection:${IA_COLLECTION}" \
  69. --header "x-archive-meta-mediatype:web" \
  70. --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
  71. --header "x-archive-meta-date:${IA_ITEM_DATE}" \
  72. --header "x-archive-meta-language:eng" \
  73. --header "x-archive-meta-noarchivetorrent:true" \
  74. --header "x-archive-size-hint:${size_hint}" \
  75. --header "authorization: LOW ${IA_AUTH}" \
  76. --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \
  77. "https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \
  78. > /dev/null
  79. result="${?}"
  80. if [[ "${result}" -ne 0 ]]
  81. then
  82. date
  83. echo "Error while uploading ${ITEM}, curl said ${result}"
  84. echo "Will retry in 30 seconds"
  85. sleep 30
  86. fi
  87. done
  88. done
  89. echo "Uploaded ${ITEM}"
  90. echo "$( date ): Completed uploading for item ${ITEM}" >> uploader.log
  91. mayicontinue
  92. # move or remove megawarc
  93. if [ -z "${COMPLETED_DIR}" ]
  94. then
  95. # remove
  96. rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
  97. result="${?}"
  98. if [[ "${result}" -ne 0 ]]
  99. then
  100. date
  101. echo "rm -rf megawarc exited with ${result} for ${ITEM}"
  102. exit 1
  103. fi
  104. else
  105. # move
  106. mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
  107. result="${?}"
  108. if [[ "${result}" -ne 0 ]]
  109. then
  110. date
  111. echo "rm -rf megawarc exited with ${result} for ${ITEM}"
  112. exit 1
  113. fi
  114. fi
  115. exit 0