Add s3-bucket-list

4 years ago · 9743aa7c35
--- a/+ 91
+++ b/+ 91
@@ -0,0 +1,91 @@
 #!/usr/bin/env python3
 import html
 import http.client
 import os
 import sys
 import urllib.parse


 # Arguments
 i = 1
 withListUrls = False
 listUrlsFD = None
 startMarker = None
 format = '{url}'
 args = []
 while i < len(sys.argv):
 	arg = sys.argv[i]
 	if arg == '--help':
 		print('s3-bucket-list [options] HOSTNAME BUCKETNAME', file = sys.stderr)
 		print('', file = sys.stderr)
 		print('Options:', file = sys.stderr)
 		print(f'  --format FORMAT    Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
 		print( '  --marker KEY       Start after a particular key instead of from the beginning', file = sys.stderr)
 		print( '  --with-list-urls   Enables printing the list URLs retrieved to stderr', file = sys.stderr)
 		sys.exit(1)
 	elif arg == '--with-list-urls':
 		withListUrls = True
 		try:
 			listUrlsFD = os.fdopen(3, 'w')
 		except OSError:
 			print('Error: FD 3 not open', file = sys.stderr)
 			sys.exit(1)
 	elif arg == '--marker':
 		startMarker = sys.argv[i + 1]
 		i += 1
 	elif arg == '--format':
 		format = sys.argv[i + 1]
 		i += 1
 	else:
 		args.append(arg)
 	i += 1
 assert len(args) == 2, 'Need two arguments: hostname and bucketname'
 hostname, bucketname = args


 conn = http.client.HTTPSConnection(hostname)
 params = {}
 if startMarker is not None:
 	params['marker'] = startMarker
 baseUrl = f'https://{hostname}/{urllib.parse.quote(bucketname)}'
 while True:
 	queryString = urllib.parse.urlencode(params)
 	url = f'{baseUrl}{"?" + queryString if queryString else ""}'
 	if withListUrls:
 		print(f'{url}', file = listUrlsFD)
 	conn.request('GET', url[url.index('/', 8):])
 	resp = conn.getresponse()
 	body = resp.read()
 	if not body.startswith(b'<?xml version="1.0" encoding="UTF-8"?>\n<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">'):
 		raise RuntimeError(f'Invalid body: {body[:200]}...')

 	# No risk, no fun!
 	contents = body.split(b'<Contents>')
 	assert all(content.startswith(b'<Key>') for content in contents[1:])
 	assert all(content.endswith(b'</Contents>') for content in contents[1:-1])
 	assert contents[-1].endswith(b'</Contents></ListBucketResult>')
 	contents[-1] = contents[-1][:-len('</ListBucketResult>')]
 	for content in contents[1:]:
 		key = content[5 : content.index(b'</Key>')].decode('utf-8') # 5 = len(b'<Key>')
 		url = f'{baseUrl}/{urllib.parse.quote(key)}'

 		tags = content.split(b'>')
 		assert len(tags) % 2 == 0
 		assert tags[-1] == b''
 		assert tags[-2] == b'</Contents'
 		assert all(a[1:] == b[b.rindex(b'</') + 2:] for a, b in zip(tags[:-2:2], tags[1:-2:2]))
 		fields = {}
 		for a, b in zip(tags[:-2:2], tags[1:-2:2]):
 			fields[a[1:].decode('utf-8')] = html.unescape(b[:b.rindex(b'</')].decode('utf-8'))

 		size = int(fields['Size']) if 'Size' in fields else None

 		print(format.format(**fields, key = key, url = url, size = size))
 	lastKey = key

 	truncated = True if b'<IsTruncated>true</IsTruncated>' in body else (False if b'<IsTruncated>false</IsTruncated>' in body else None)
 	assert truncated in (True, False)

 	if not truncated:
 		break
 	params['marker'] = lastKey