#!/usr/bin/env python3
import html
import http.client
import os
import sys
import urllib.parse
# Arguments
i = 1
withListUrls = False
listUrlsFD = None
startMarker = None
format = '{url}'
args = []
while i < len(sys.argv):
arg = sys.argv[i]
if arg == '--help':
print('s3-bucket-list [options] HOSTNAME BUCKETNAME', file = sys.stderr)
print('', file = sys.stderr)
print('Options:', file = sys.stderr)
print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr)
print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
sys.exit(1)
elif arg == '--with-list-urls':
withListUrls = True
try:
listUrlsFD = os.fdopen(3, 'w')
except OSError:
print('Error: FD 3 not open', file = sys.stderr)
sys.exit(1)
elif arg == '--marker':
startMarker = sys.argv[i + 1]
i += 1
elif arg == '--format':
format = sys.argv[i + 1]
i += 1
else:
args.append(arg)
i += 1
assert len(args) == 2, 'Need two arguments: hostname and bucketname'
hostname, bucketname = args
conn = http.client.HTTPSConnection(hostname)
params = {}
if startMarker is not None:
params['marker'] = startMarker
baseUrl = f'https://{hostname}/{urllib.parse.quote(bucketname)}'
while True:
queryString = urllib.parse.urlencode(params)
url = f'{baseUrl}{"?" + queryString if queryString else ""}'
if withListUrls:
print(f'{url}', file = listUrlsFD)
conn.request('GET', url[url.index('/', 8):])
resp = conn.getresponse()
body = resp.read()
if not body.startswith(b'\n'):
raise RuntimeError(f'Invalid body: {body[:200]}...')
# No risk, no fun!
contents = body.split(b'')
assert all(content.startswith(b'') for content in contents[1:])
assert all(content.endswith(b'') for content in contents[1:-1])
assert contents[-1].endswith(b'')
contents[-1] = contents[-1][:-len('')]
for content in contents[1:]:
key = content[5 : content.index(b'')].decode('utf-8') # 5 = len(b'')
url = f'{baseUrl}/{urllib.parse.quote(key)}'
tags = content.split(b'>')
assert len(tags) % 2 == 0
assert tags[-1] == b''
assert tags[-2] == b'true' in body else (False if b'false' in body else None)
assert truncated in (True, False)
if not truncated:
break
params['marker'] = lastKey