The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

92 lines
3.0 KiB

  1. #!/usr/bin/env python3
  2. import html
  3. import http.client
  4. import os
  5. import sys
  6. import urllib.parse
  7. # Arguments
  8. i = 1
  9. withListUrls = False
  10. listUrlsFD = None
  11. startMarker = None
  12. format = '{url}'
  13. args = []
  14. while i < len(sys.argv):
  15. arg = sys.argv[i]
  16. if arg == '--help':
  17. print('s3-bucket-list [options] HOSTNAME BUCKETNAME', file = sys.stderr)
  18. print('', file = sys.stderr)
  19. print('Options:', file = sys.stderr)
  20. print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
  21. print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr)
  22. print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
  23. sys.exit(1)
  24. elif arg == '--with-list-urls':
  25. withListUrls = True
  26. try:
  27. listUrlsFD = os.fdopen(3, 'w')
  28. except OSError:
  29. print('Error: FD 3 not open', file = sys.stderr)
  30. sys.exit(1)
  31. elif arg == '--marker':
  32. startMarker = sys.argv[i + 1]
  33. i += 1
  34. elif arg == '--format':
  35. format = sys.argv[i + 1]
  36. i += 1
  37. else:
  38. args.append(arg)
  39. i += 1
  40. assert len(args) == 2, 'Need two arguments: hostname and bucketname'
  41. hostname, bucketname = args
  42. conn = http.client.HTTPSConnection(hostname)
  43. params = {}
  44. if startMarker is not None:
  45. params['marker'] = startMarker
  46. baseUrl = f'https://{hostname}/{urllib.parse.quote(bucketname)}'
  47. while True:
  48. queryString = urllib.parse.urlencode(params)
  49. url = f'{baseUrl}{"?" + queryString if queryString else ""}'
  50. if withListUrls:
  51. print(f'{url}', file = listUrlsFD)
  52. conn.request('GET', url[url.index('/', 8):])
  53. resp = conn.getresponse()
  54. body = resp.read()
  55. if not body.startswith(b'<?xml version="1.0" encoding="UTF-8"?>\n<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">'):
  56. raise RuntimeError(f'Invalid body: {body[:200]}...')
  57. # No risk, no fun!
  58. contents = body.split(b'<Contents>')
  59. assert all(content.startswith(b'<Key>') for content in contents[1:])
  60. assert all(content.endswith(b'</Contents>') for content in contents[1:-1])
  61. assert contents[-1].endswith(b'</Contents></ListBucketResult>')
  62. contents[-1] = contents[-1][:-len('</ListBucketResult>')]
  63. for content in contents[1:]:
  64. key = content[5 : content.index(b'</Key>')].decode('utf-8') # 5 = len(b'<Key>')
  65. url = f'{baseUrl}/{urllib.parse.quote(key)}'
  66. tags = content.split(b'>')
  67. assert len(tags) % 2 == 0
  68. assert tags[-1] == b''
  69. assert tags[-2] == b'</Contents'
  70. assert all(a[1:] == b[b.rindex(b'</') + 2:] for a, b in zip(tags[:-2:2], tags[1:-2:2]))
  71. fields = {}
  72. for a, b in zip(tags[:-2:2], tags[1:-2:2]):
  73. fields[a[1:].decode('utf-8')] = html.unescape(b[:b.rindex(b'</')].decode('utf-8'))
  74. size = int(fields['Size']) if 'Size' in fields else None
  75. print(format.format(**fields, key = key, url = url, size = size))
  76. lastKey = key
  77. truncated = True if b'<IsTruncated>true</IsTruncated>' in body else (False if b'<IsTruncated>false</IsTruncated>' in body else None)
  78. assert truncated in (True, False)
  79. if not truncated:
  80. break
  81. params['marker'] = lastKey