The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

139 lignes
4.8 KiB

  1. #!/usr/bin/env python3
  2. import html
  3. import http.client
  4. import json
  5. import os
  6. import shlex
  7. import ssl
  8. import sys
  9. import urllib.parse
  10. # Arguments
  11. i = 1
  12. withListUrls = False
  13. listUrlsFD = None
  14. startMarker = None
  15. format = None
  16. defaultFormat = '{url}'
  17. jsonl = False
  18. args = []
  19. while i < len(sys.argv):
  20. arg = sys.argv[i]
  21. if arg == '--help':
  22. print('s3-bucket-list [options] BUCKETURL', file = sys.stderr)
  23. print('', file = sys.stderr)
  24. print('Options:', file = sys.stderr)
  25. print(f' --format FORMAT Modify the output format; FORMAT defaults to {defaultFormat!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
  26. print( ' --jsonl Enable JSONL output format; cannot be used if --format is present', file = sys.stderr)
  27. print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr)
  28. print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
  29. sys.exit(1)
  30. elif arg == '--with-list-urls':
  31. withListUrls = True
  32. try:
  33. listUrlsFD = os.fdopen(3, 'w')
  34. except OSError:
  35. print('Error: FD 3 not open', file = sys.stderr)
  36. sys.exit(1)
  37. elif arg == '--marker':
  38. startMarker = sys.argv[i + 1]
  39. i += 1
  40. elif arg == '--format':
  41. format = sys.argv[i + 1]
  42. i += 1
  43. elif arg == '--jsonl':
  44. jsonl = True
  45. else:
  46. args.append(arg)
  47. i += 1
  48. assert not jsonl or format is None, '--jsonl and --format options are mutually exclusive'
  49. if format is None:
  50. format = defaultFormat
  51. assert len(args) == 1, 'Need one argument: bucket URL'
  52. baseUrl = args[0]
  53. assert baseUrl.startswith('http://') or baseUrl.startswith('https://'), 'Argument does not look like an HTTP URL'
  54. if '/' not in baseUrl.split('://', 1)[1] or not baseUrl.endswith('/'):
  55. baseUrl = f'{baseUrl}/'
  56. hostname = baseUrl.split('://', 1)[1].split('/', 1)[0]
  57. conn = http.client.HTTPSConnection(hostname, context = ssl._create_unverified_context())
  58. params = {}
  59. if startMarker is not None:
  60. params['marker'] = startMarker
  61. attempt = 1
  62. while True:
  63. queryString = urllib.parse.urlencode(params)
  64. url = f'{baseUrl}{"?" + queryString if queryString else ""}'
  65. if withListUrls:
  66. print(f'{url}', file = listUrlsFD)
  67. conn.request('GET', url[url.index('/', 8):])
  68. resp = conn.getresponse()
  69. body = resp.read()
  70. if b'<Error><Code>InternalError</Code><Message>We encountered an internal error. Please try again.</Message>' in body:
  71. print(f'Got internal error on {url} on attempt {attempt}; {"retrying" if attempt < 10 else "aborting"}', file = sys.stderr)
  72. if attempt >= 10:
  73. if 'marker' in params:
  74. print(f'To retry, use --marker {shlex.quote(params["marker"])}', file = sys.stderr)
  75. break
  76. attempt += 1
  77. continue
  78. if not body.startswith(b'<?xml version="1.0" encoding="UTF-8"?>\n<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">') and not body.startswith(b"<?xml version='1.0' encoding='UTF-8'?><ListBucketResult xmlns='http://doc.s3.amazonaws.com/2006-03-01'>"):
  79. raise RuntimeError(f'Invalid body: {body[:200]}...')
  80. if b'<Marker></Marker>' in body[:200] and 'marker' in params:
  81. raise RuntimeError('Marker loop (empty marker in response despite providing one)')
  82. # No risk, no fun!
  83. contents = body.split(b'<Contents>')
  84. assert all(content.startswith(b'<Key>') for content in contents[1:])
  85. assert all(content.endswith(b'</Contents>') for content in contents[1:-1])
  86. assert contents[-1].endswith(b'</Contents></ListBucketResult>')
  87. contents[-1] = contents[-1][:-len('</ListBucketResult>')]
  88. for content in contents[1:]:
  89. key = html.unescape(content[5 : content.index(b'</Key>')].decode('utf-8')) # 5 = len(b'<Key>')
  90. fields = {}
  91. url = f'{baseUrl}{urllib.parse.quote(key)}'
  92. fields['URL'] = url
  93. tags = content.split(b'>')
  94. assert len(tags) % 2 == 0
  95. assert tags[-1] == b''
  96. assert tags[-2] == b'</Contents'
  97. openTags = [] # Current open tag hierarchy
  98. for tag in tags[:-2]:
  99. if tag.startswith(b'<'):
  100. openTags.append(tag[1:])
  101. continue
  102. assert openTags
  103. if tag.endswith(b'</' + openTags[-1]):
  104. k = b'>'.join(openTags).decode('utf-8')
  105. assert k not in fields, f'{k!r} encountered twice (previous value: {fields[k]!r})'
  106. fields[k] = html.unescape(tag[:-(len(openTags[-1]) + 2)].decode('utf-8'))
  107. openTags.pop()
  108. continue
  109. assert False
  110. if 'Size' in fields:
  111. fields['Size'] = int(fields['Size'])
  112. try:
  113. if jsonl:
  114. print(json.dumps(fields))
  115. else:
  116. print(format.format(**fields, key = key, url = url, size = fields.get('Size')))
  117. except BrokenPipeError:
  118. sys.exit(0)
  119. lastKey = key
  120. truncated = True if b'<IsTruncated>true</IsTruncated>' in body else (False if b'<IsTruncated>false</IsTruncated>' in body else None)
  121. assert truncated in (True, False)
  122. if not truncated:
  123. break
  124. if 'marker' in params and params['marker'] == lastKey:
  125. raise RuntimeError('Marker loop (same last key as previous marker)')
  126. params['marker'] = lastKey
  127. attempt = 1