The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

122 lines
4.2 KiB

  1. #!/usr/bin/env python3
  2. import html
  3. import http.client
  4. import os
  5. import shlex
  6. import sys
  7. import urllib.parse
  8. # Arguments
  9. i = 1
  10. withListUrls = False
  11. listUrlsFD = None
  12. startMarker = None
  13. format = '{url}'
  14. args = []
  15. while i < len(sys.argv):
  16. arg = sys.argv[i]
  17. if arg == '--help':
  18. print('s3-bucket-list [options] BUCKETURL', file = sys.stderr)
  19. print('', file = sys.stderr)
  20. print('Options:', file = sys.stderr)
  21. print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
  22. print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr)
  23. print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
  24. sys.exit(1)
  25. elif arg == '--with-list-urls':
  26. withListUrls = True
  27. try:
  28. listUrlsFD = os.fdopen(3, 'w')
  29. except OSError:
  30. print('Error: FD 3 not open', file = sys.stderr)
  31. sys.exit(1)
  32. elif arg == '--marker':
  33. startMarker = sys.argv[i + 1]
  34. i += 1
  35. elif arg == '--format':
  36. format = sys.argv[i + 1]
  37. i += 1
  38. else:
  39. args.append(arg)
  40. i += 1
  41. assert len(args) == 1, 'Need one argument: bucket URL'
  42. baseUrl = args[0]
  43. assert baseUrl.startswith('http://') or baseUrl.startswith('https://'), 'Argument does not look like an HTTP URL'
  44. if '/' not in baseUrl.split('://', 1)[1] or not baseUrl.endswith('/'):
  45. baseUrl = f'{baseUrl}/'
  46. hostname = baseUrl.split('://', 1)[1].split('/', 1)[0]
  47. conn = http.client.HTTPSConnection(hostname)
  48. params = {}
  49. if startMarker is not None:
  50. params['marker'] = startMarker
  51. attempt = 1
  52. while True:
  53. queryString = urllib.parse.urlencode(params)
  54. url = f'{baseUrl}{"?" + queryString if queryString else ""}'
  55. if withListUrls:
  56. print(f'{url}', file = listUrlsFD)
  57. conn.request('GET', url[url.index('/', 8):])
  58. resp = conn.getresponse()
  59. body = resp.read()
  60. if b'<Error><Code>InternalError</Code><Message>We encountered an internal error. Please try again.</Message>' in body:
  61. print(f'Got internal error on {url} on attempt {attempt}; {"retrying" if attempt < 10 else "aborting"}', file = sys.stderr)
  62. if attempt >= 10:
  63. if 'marker' in params:
  64. print(f'To retry, use --marker {shlex.quote(params["marker"])}', file = sys.stderr)
  65. break
  66. attempt += 1
  67. continue
  68. if not body.startswith(b'<?xml version="1.0" encoding="UTF-8"?>\n<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">') and not body.startswith(b"<?xml version='1.0' encoding='UTF-8'?><ListBucketResult xmlns='http://doc.s3.amazonaws.com/2006-03-01'>"):
  69. raise RuntimeError(f'Invalid body: {body[:200]}...')
  70. if b'<Marker></Marker>' in body[:200] and 'marker' in params:
  71. raise RuntimeError('Marker loop (empty marker in response despite providing one)')
  72. # No risk, no fun!
  73. contents = body.split(b'<Contents>')
  74. assert all(content.startswith(b'<Key>') for content in contents[1:])
  75. assert all(content.endswith(b'</Contents>') for content in contents[1:-1])
  76. assert contents[-1].endswith(b'</Contents></ListBucketResult>')
  77. contents[-1] = contents[-1][:-len('</ListBucketResult>')]
  78. for content in contents[1:]:
  79. key = html.unescape(content[5 : content.index(b'</Key>')].decode('utf-8')) # 5 = len(b'<Key>')
  80. url = f'{baseUrl}{urllib.parse.quote(key)}'
  81. tags = content.split(b'>')
  82. assert len(tags) % 2 == 0
  83. assert tags[-1] == b''
  84. assert tags[-2] == b'</Contents'
  85. openTags = [] # Current open tag hierarchy
  86. fields = {}
  87. for tag in tags[:-2]:
  88. if tag.startswith(b'<'):
  89. openTags.append(tag[1:])
  90. continue
  91. assert openTags
  92. if tag.endswith(b'</' + openTags[-1]):
  93. fields[b'>'.join(openTags).decode('utf-8')] = html.unescape(tag[:-(len(openTags[-1]) + 2)].decode('utf-8'))
  94. openTags.pop()
  95. continue
  96. assert False
  97. size = int(fields['Size']) if 'Size' in fields else None
  98. try:
  99. print(format.format(**fields, key = key, url = url, size = size))
  100. except BrokenPipeError:
  101. sys.exit(0)
  102. lastKey = key
  103. truncated = True if b'<IsTruncated>true</IsTruncated>' in body else (False if b'<IsTruncated>false</IsTruncated>' in body else None)
  104. assert truncated in (True, False)
  105. if not truncated:
  106. break
  107. if 'marker' in params and params['marker'] == lastKey:
  108. raise RuntimeError('Marker loop (same last key as previous marker)')
  109. params['marker'] = lastKey
  110. attempt = 1