Browse Source

Add crude in-progress upload listing

master
JustAnotherArchivist 1 year ago
parent
commit
6a76814ec5
1 changed files with 43 additions and 2 deletions
  1. +43
    -2
      ia-upload-stream

+ 43
- 2
ia-upload-stream View File

@@ -276,6 +276,40 @@ def upload(item, filename, metadata, *, iaConfigFile = None, partSize = 100*1024
logger.info('Done!')


def list_uploads(item, *, tries = 3):
# No auth needed
url = f'https://s3.us.archive.org/{item}/?uploads'

# This endpoint redirects to the server storing the item under ia######.s3dns.us.archive.org, but those servers present an invalid TLS certificate for *.us.archive.org.
class IAS3CertificateFixHTTPAdapter(requests.adapters.HTTPAdapter):
def init_poolmanager(self, *args, **kwargs):
kwargs['assert_hostname'] = 's3.us.archive.org'
return super().init_poolmanager(*args, **kwargs)

for attempt in range(1, tries + 1):
r = requests.get(url, allow_redirects = False)
if r.status_code == 307 and '.s3dns.us.archive.org' in r.headers['Location']:
s3dnsUrl = r.headers['Location']
s3dnsUrl = s3dnsUrl.replace('http://', 'https://')
s3dnsUrl = s3dnsUrl.replace('.s3dns.us.archive.org:80/', '.s3dns.us.archive.org/')
domain = s3dnsUrl[8:s3dnsUrl.find('/', 9)]
s = requests.Session()
s.mount(f'https://{domain}/', IAS3CertificateFixHTTPAdapter())
r = s.get(s3dnsUrl)
if r.status_code == 200:
print(f'In-progress uploads for {item} (initiation datetime, upload ID, filename):')
for upload in re.findall(r'<Upload>.*?</Upload>', r.text):
uploadId = re.search(r'<UploadId>(.*?)</UploadId>', upload).group(1)
filename = re.search(r'<Key>(.*?)</Key>', upload).group(1)
date = re.search(r'<Initiated>(.*?)</Initiated>', upload).group(1)
print(f'{date} {uploadId} {filename}')
break
retrying = f', retrying' if attempt < tries else ''
logger.error(f'Could not list uploads; got status {r.status_code} from IA S3{retrying}')
if attempt == tries:
raise UploadError(f'Could not list uploads; got status {r.status_code} from IA S3', r = r)


def abort(item, filename, uploadId, *, iaConfigFile = None, tries = 3):
# Read `ia` config
access, secret = get_ia_access_secret(iaConfigFile)
@@ -338,18 +372,23 @@ def main():
parser.add_argument('--upload-id', dest = 'uploadId', help = 'upload ID when resuming or aborting an upload')
parser.add_argument('--parts', type = parts, help = 'previous parts data for resumption; can only be used with --upload-id')
parser.add_argument('--abort', action = 'store_true', help = 'aborts an upload; can only be used with --upload-id; most other options are ignored when this is used')
parser.add_argument('--list', action = 'store_true', help = 'list in-progress uploads for item; most other options are ignored when this is used')
parser.add_argument('item', help = 'identifier of the target item')
parser.add_argument('filename', help = 'filename to store the data to')
parser.add_argument('filename', nargs = '?', help = 'filename to store the data to')
parser.add_argument('metadata', nargs = '*', type = metadata, help = "metadata for the item in the form 'key:value'; only has an effect if the item doesn't exist yet")
args = parser.parse_args()
if (args.parts or args.abort) and not args.uploadId:
parser.error('--parts and --abort can only be used together with --upload-id')
if args.uploadId and (args.parts is not None) == bool(args.abort):
parser.error('--upload-id requires exactly one of --parts and --abort')
if args.abort and args.list:
parser.error('--abort and --list cannot be used together')
if not args.list and not args.filename:
parser.error('filename is required when not using --list')

logging.basicConfig(level = logging.INFO, format = '{asctime}.{msecs:03.0f} {levelname} {name} {message}', datefmt = '%Y-%m-%d %H:%M:%S', style = '{')
try:
if not args.abort:
if not args.abort and not args.list:
upload(
args.item,
args.filename,
@@ -365,6 +404,8 @@ def main():
parts = args.parts,
progress = args.progress,
)
elif args.list:
list_uploads(args.item, tries = args.tries)
else:
abort(
args.item,


Loading…
Cancel
Save