From 6a76814ec56902d437c65bd3e53e66063cf43a52 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 20 Dec 2022 06:59:27 +0000 Subject: [PATCH] Add crude in-progress upload listing --- ia-upload-stream | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/ia-upload-stream b/ia-upload-stream index 01f5137..818606e 100755 --- a/ia-upload-stream +++ b/ia-upload-stream @@ -276,6 +276,40 @@ def upload(item, filename, metadata, *, iaConfigFile = None, partSize = 100*1024 logger.info('Done!') +def list_uploads(item, *, tries = 3): + # No auth needed + url = f'https://s3.us.archive.org/{item}/?uploads' + + # This endpoint redirects to the server storing the item under ia######.s3dns.us.archive.org, but those servers present an invalid TLS certificate for *.us.archive.org. + class IAS3CertificateFixHTTPAdapter(requests.adapters.HTTPAdapter): + def init_poolmanager(self, *args, **kwargs): + kwargs['assert_hostname'] = 's3.us.archive.org' + return super().init_poolmanager(*args, **kwargs) + + for attempt in range(1, tries + 1): + r = requests.get(url, allow_redirects = False) + if r.status_code == 307 and '.s3dns.us.archive.org' in r.headers['Location']: + s3dnsUrl = r.headers['Location'] + s3dnsUrl = s3dnsUrl.replace('http://', 'https://') + s3dnsUrl = s3dnsUrl.replace('.s3dns.us.archive.org:80/', '.s3dns.us.archive.org/') + domain = s3dnsUrl[8:s3dnsUrl.find('/', 9)] + s = requests.Session() + s.mount(f'https://{domain}/', IAS3CertificateFixHTTPAdapter()) + r = s.get(s3dnsUrl) + if r.status_code == 200: + print(f'In-progress uploads for {item} (initiation datetime, upload ID, filename):') + for upload in re.findall(r'.*?', r.text): + uploadId = re.search(r'(.*?)', upload).group(1) + filename = re.search(r'(.*?)', upload).group(1) + date = re.search(r'(.*?)', upload).group(1) + print(f'{date} {uploadId} {filename}') + break + retrying = f', retrying' if attempt < tries else '' + logger.error(f'Could not list uploads; got status {r.status_code} from IA S3{retrying}') + if attempt == tries: + raise UploadError(f'Could not list uploads; got status {r.status_code} from IA S3', r = r) + + def abort(item, filename, uploadId, *, iaConfigFile = None, tries = 3): # Read `ia` config access, secret = get_ia_access_secret(iaConfigFile) @@ -338,18 +372,23 @@ def main(): parser.add_argument('--upload-id', dest = 'uploadId', help = 'upload ID when resuming or aborting an upload') parser.add_argument('--parts', type = parts, help = 'previous parts data for resumption; can only be used with --upload-id') parser.add_argument('--abort', action = 'store_true', help = 'aborts an upload; can only be used with --upload-id; most other options are ignored when this is used') + parser.add_argument('--list', action = 'store_true', help = 'list in-progress uploads for item; most other options are ignored when this is used') parser.add_argument('item', help = 'identifier of the target item') - parser.add_argument('filename', help = 'filename to store the data to') + parser.add_argument('filename', nargs = '?', help = 'filename to store the data to') parser.add_argument('metadata', nargs = '*', type = metadata, help = "metadata for the item in the form 'key:value'; only has an effect if the item doesn't exist yet") args = parser.parse_args() if (args.parts or args.abort) and not args.uploadId: parser.error('--parts and --abort can only be used together with --upload-id') if args.uploadId and (args.parts is not None) == bool(args.abort): parser.error('--upload-id requires exactly one of --parts and --abort') + if args.abort and args.list: + parser.error('--abort and --list cannot be used together') + if not args.list and not args.filename: + parser.error('filename is required when not using --list') logging.basicConfig(level = logging.INFO, format = '{asctime}.{msecs:03.0f} {levelname} {name} {message}', datefmt = '%Y-%m-%d %H:%M:%S', style = '{') try: - if not args.abort: + if not args.abort and not args.list: upload( args.item, args.filename, @@ -365,6 +404,8 @@ def main(): parts = args.parts, progress = args.progress, ) + elif args.list: + list_uploads(args.item, tries = args.tries) else: abort( args.item,