The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

100 lines
3.5 KiB

  1. #!/usr/bin/env python3
  2. import html
  3. import http.client
  4. import os
  5. import sys
  6. import urllib.parse
  7. # Arguments
  8. i = 1
  9. withListUrls = False
  10. listUrlsFD = None
  11. startMarker = None
  12. format = '{url}'
  13. args = []
  14. while i < len(sys.argv):
  15. arg = sys.argv[i]
  16. if arg == '--help':
  17. print('s3-bucket-list [options] BUCKETURL', file = sys.stderr)
  18. print('', file = sys.stderr)
  19. print('Options:', file = sys.stderr)
  20. print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
  21. print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr)
  22. print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
  23. sys.exit(1)
  24. elif arg == '--with-list-urls':
  25. withListUrls = True
  26. try:
  27. listUrlsFD = os.fdopen(3, 'w')
  28. except OSError:
  29. print('Error: FD 3 not open', file = sys.stderr)
  30. sys.exit(1)
  31. elif arg == '--marker':
  32. startMarker = sys.argv[i + 1]
  33. i += 1
  34. elif arg == '--format':
  35. format = sys.argv[i + 1]
  36. i += 1
  37. else:
  38. args.append(arg)
  39. i += 1
  40. assert len(args) == 1, 'Need one argument: bucket URL'
  41. baseUrl = args[0]
  42. assert baseUrl.startswith('http://') or baseUrl.startswith('https://'), 'Argument does not look like an HTTP URL'
  43. if '/' not in baseUrl.split('://', 1)[1]:
  44. baseUrl = f'{baseUrl}/'
  45. hostname = baseUrl.split('://', 1)[1].split('/', 1)[0]
  46. conn = http.client.HTTPSConnection(hostname)
  47. params = {}
  48. if startMarker is not None:
  49. params['marker'] = startMarker
  50. while True:
  51. queryString = urllib.parse.urlencode(params)
  52. url = f'{baseUrl}{"?" + queryString if queryString else ""}'
  53. if withListUrls:
  54. print(f'{url}', file = listUrlsFD)
  55. conn.request('GET', url[url.index('/', 8):])
  56. resp = conn.getresponse()
  57. body = resp.read()
  58. if not body.startswith(b'<?xml version="1.0" encoding="UTF-8"?>\n<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">'):
  59. raise RuntimeError(f'Invalid body: {body[:200]}...')
  60. if b'<Marker></Marker>' in body[:200] and 'marker' in params:
  61. raise RuntimeError('Marker loop (empty marker in response despite providing one)')
  62. # No risk, no fun!
  63. contents = body.split(b'<Contents>')
  64. assert all(content.startswith(b'<Key>') for content in contents[1:])
  65. assert all(content.endswith(b'</Contents>') for content in contents[1:-1])
  66. assert contents[-1].endswith(b'</Contents></ListBucketResult>')
  67. contents[-1] = contents[-1][:-len('</ListBucketResult>')]
  68. for content in contents[1:]:
  69. key = content[5 : content.index(b'</Key>')].decode('utf-8') # 5 = len(b'<Key>')
  70. url = f'{baseUrl}/{urllib.parse.quote(key)}'
  71. tags = content.split(b'>')
  72. assert len(tags) % 2 == 0
  73. assert tags[-1] == b''
  74. assert tags[-2] == b'</Contents'
  75. assert all(a[1:] == b[b.rindex(b'</') + 2:] for a, b in zip(tags[:-2:2], tags[1:-2:2]))
  76. fields = {}
  77. for a, b in zip(tags[:-2:2], tags[1:-2:2]):
  78. fields[a[1:].decode('utf-8')] = html.unescape(b[:b.rindex(b'</')].decode('utf-8'))
  79. size = int(fields['Size']) if 'Size' in fields else None
  80. print(format.format(**fields, key = key, url = url, size = size))
  81. lastKey = key
  82. truncated = True if b'<IsTruncated>true</IsTruncated>' in body else (False if b'<IsTruncated>false</IsTruncated>' in body else None)
  83. assert truncated in (True, False)
  84. if not truncated:
  85. break
  86. if 'marker' in params and params['marker'] == lastKey:
  87. raise RuntimeError('Marker loop (same last key as previous marker)')
  88. params['marker'] = lastKey