The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

123 lines
4.1 KiB

  1. #!/usr/bin/env python3
  2. import html
  3. import http.client
  4. import json
  5. import os
  6. import shlex
  7. import ssl
  8. import sys
  9. import urllib.parse
  10. # Arguments
  11. i = 1
  12. withListUrls = False
  13. listUrlsFD = None
  14. startMarker = None
  15. format = '{url}'
  16. jsonl = False
  17. args = []
  18. while i < len(sys.argv):
  19. arg = sys.argv[i]
  20. if arg == '--help':
  21. print('azure-storage-list [options] CONTAINERURL', file = sys.stderr)
  22. print('', file = sys.stderr)
  23. print('Options:', file = sys.stderr)
  24. print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: name, url, and all fields returned by Azure (e.g. Content-Length, Last-Modified)', file = sys.stderr)
  25. print( ' --jsonl Output JSONL instead of formatted lines', file = sys.stderr)
  26. print( ' --marker MARKER Start with a marker instead of from the beginning', file = sys.stderr)
  27. print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
  28. sys.exit(1)
  29. elif arg == '--with-list-urls':
  30. withListUrls = True
  31. try:
  32. listUrlsFD = os.fdopen(3, 'w')
  33. except OSError:
  34. print('Error: FD 3 not open', file = sys.stderr)
  35. sys.exit(1)
  36. elif arg == '--marker':
  37. startMarker = sys.argv[i + 1]
  38. i += 1
  39. elif arg == '--format':
  40. format = sys.argv[i + 1]
  41. i += 1
  42. elif arg == '--jsonl':
  43. jsonl = True
  44. else:
  45. args.append(arg)
  46. i += 1
  47. assert len(args) == 1, 'Need one argument: container URL'
  48. baseUrl = args[0]
  49. assert baseUrl.startswith('http://') or baseUrl.startswith('https://'), 'Argument does not look like an HTTP URL'
  50. if '/' not in baseUrl.split('://', 1)[1] or not baseUrl.endswith('/'):
  51. baseUrl = f'{baseUrl}/'
  52. hostname = baseUrl.split('://', 1)[1].split('/', 1)[0]
  53. conn = http.client.HTTPSConnection(hostname, context = ssl._create_unverified_context())
  54. params = {'restype': 'container', 'comp': 'list'}
  55. if startMarker is not None:
  56. params['marker'] = startMarker
  57. while True:
  58. queryString = urllib.parse.urlencode(params)
  59. url = f'{baseUrl}?{queryString}'
  60. if withListUrls:
  61. print(f'{url}', file = listUrlsFD)
  62. conn.request('GET', url[url.index('/', 8):])
  63. resp = conn.getresponse()
  64. body = resp.read()
  65. if not body.startswith(b'\xef\xbb\xbf<?xml version="1.0" encoding="utf-8"?><EnumerationResults ContainerName="'):
  66. raise RuntimeError(f'Invalid body: {body[:200]}...')
  67. if b'<Marker>' not in body[:200] and 'marker' in params:
  68. raise RuntimeError('Marker loop (no marker in response despite providing one)')
  69. # No risk, no fun!
  70. blobs = body.split(b'<Blob>')
  71. assert all(blob.startswith(b'<Name>') for blob in blobs[1:])
  72. assert all(blob.endswith(b'</Blob>') for blob in blobs[1:-1])
  73. assert b'</Blobs>' in blobs[-1] and blobs[-1].endswith(b'</EnumerationResults>')
  74. blobs[-1], ending = blobs[-1].split(b'</Blobs>')
  75. assert b'<NextMarker' in ending
  76. for blob in blobs[1:]:
  77. name = html.unescape(blob[6 : blob.index(b'</Name>')].decode('utf-8')) # 6 = len(b'<Name>')
  78. url = f'{baseUrl}{urllib.parse.quote(name)}'
  79. tags = blob.split(b'>')
  80. assert tags[-1] == b''
  81. assert tags[-2] == b'</Blob'
  82. assert tags[-3] == b'</Properties'
  83. assert b'<Properties' in tags
  84. openTags = [] # Current open tag hierarchy
  85. fields = {}
  86. for tag in tags[:-3]:
  87. if tag == b'<Properties':
  88. continue
  89. if tag.endswith(b' /'): # Self-closing tag without a value
  90. continue
  91. if tag.startswith(b'<'):
  92. openTags.append(tag[1:])
  93. continue
  94. assert openTags
  95. if tag.endswith(b'</' + openTags[-1]):
  96. fields[b'>'.join(openTags).decode('utf-8')] = html.unescape(tag[:-(len(openTags[-1]) + 2)].decode('utf-8'))
  97. openTags.pop()
  98. continue
  99. assert False
  100. try:
  101. if not jsonl:
  102. print(format.format(**fields, name = name, url = url))
  103. else:
  104. print(json.dumps({'name': name, 'url': url, **fields}))
  105. except BrokenPipeError:
  106. sys.exit(0)
  107. if b'<NextMarker />' in ending:
  108. break
  109. nextMarkerStart = ending.index(b'<NextMarker>')
  110. nextMarker = ending[nextMarkerStart + 12 : ending.index(b'</NextMarker', nextMarkerStart)]
  111. if 'marker' in params and params['marker'] == nextMarker:
  112. raise RuntimeError('Marker loop (same NextMarker as previous marker)')
  113. params['marker'] = nextMarker.decode('utf-8')