The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

111 lignes
3.8 KiB

  1. #!/usr/bin/env python3
  2. import re
  3. import requests
  4. import sys
  5. import urllib3
  6. RESPONSE_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'''<ListBucketResult xmlns=(["'])http://(?:s3\.amazonaws\.com/doc/2006-03-01/|doc\.s3\.amazonaws\.com/2006-03-01)\3>''')
  7. NAME_PATTERN = re.compile(r'<Name>([^<]*)</Name>')
  8. KEY_PATTERN = re.compile(r'<Key>([^<]*)</Key>')
  9. MTIME_PATTERN = re.compile(r'<LastModified>([^<]*)</LastModified>')
  10. REDIRECT_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'<Error><Code>PermanentRedirect</Code>')
  11. REDIRECT_TARGET_ENDPOINT_PATTERN = re.compile(r'<Endpoint>([^<]*)</Endpoint>')
  12. REDIRECT_TARGET_BUCKET_PATTERN = re.compile(r'<Bucket>([^<]*)</Bucket>')
  13. PROVIDERS = {
  14. 'amazon': ['https://s3.amazonaws.com/{}/', 'https://{}.s3.amazonaws.com/'],
  15. 'google': ['https://storage.googleapis.com/{}/'],
  16. 'scaleway': ['https://s3.nl-ams.scw.cloud/{}/', 'https://s3.fr-par.scw.cloud/{}/'],
  17. 'wasabi': ['https://s3.wasabisys.com/{}/'],
  18. }
  19. # AWS S3 buckets whose names contain a . are broken because AWS can't be bothered to serve valid TLS certs for them.
  20. urllib3.disable_warnings()
  21. def fetch_with_redirect(url):
  22. print(f'Fetching {url}', file = sys.stderr)
  23. r = requests.get(url, verify = False, timeout = 60)
  24. print(f'{r.status_code} {url}', file = sys.stderr)
  25. body = r.text
  26. if r.status_code == 301 and REDIRECT_PATTERN.match(body):
  27. m = REDIRECT_TARGET_ENDPOINT_PATTERN.search(body)
  28. if not m:
  29. raise RuntimeError('Could not get redirect endpoint')
  30. endpoint = m.group(1)
  31. m = REDIRECT_TARGET_BUCKET_PATTERN.search(body)
  32. if not m:
  33. raise RuntimeError('Could not get redirect bucket')
  34. bucket = m.group(1)
  35. print(f'Redirect to endpoint {endpoint!r} bucket {bucket!r}')
  36. url = f'https://{endpoint}/{bucket}/'
  37. print(f'Fetching {url}')
  38. r = requests.get(url, timeout = 60)
  39. print(f'{r.status_code} {url}', file = sys.stderr)
  40. body = r.text
  41. if r.status_code == 200 and not RESPONSE_PATTERN.match(body):
  42. raise RuntimeError(f'Invalid body: {body[:200]}...')
  43. return r, url, body
  44. def find(url, providers):
  45. _, _, body = fetch_with_redirect(url)
  46. # Get bucket name
  47. m = NAME_PATTERN.search(body)
  48. if not m:
  49. raise RuntimeError('Could not find bucket name')
  50. name = m.group(1)
  51. if '&' in name:
  52. raise RuntimeError(f'Unsupported bucket name: {name!r}')
  53. # Get name and mtime of first object
  54. m = KEY_PATTERN.search(body)
  55. if m:
  56. firstKey = m.group(1)
  57. m = MTIME_PATTERN.search(body)
  58. if not m:
  59. raise RuntimeError('Got key but no mtime')
  60. firstMtime = m.group(1)
  61. else:
  62. print('Warning: no key found, cannot verify that it is the same bucket', file = sys.stderr)
  63. firstKey, firstMtime = None, None
  64. # Start searching
  65. for provider in providers:
  66. for testUrlTemplate in PROVIDERS[provider]:
  67. testUrl = testUrlTemplate.format(name)
  68. r, testUrl, body = fetch_with_redirect(testUrl)
  69. if r.status_code != 200:
  70. continue
  71. # Compare first object
  72. if not firstKey:
  73. continue
  74. m = KEY_PATTERN.search(body)
  75. if not m:
  76. print(f'No key in {testUrl}', file = sys.stderr)
  77. continue
  78. testFirstKey = m.group(1)
  79. m = MTIME_PATTERN.search(body)
  80. if not m:
  81. print(f'Got key but no mtime in {testUrl}', file = sys.stderr)
  82. continue
  83. testFirstMtime = m.group(1)
  84. if (firstKey, firstMtime) == (testFirstKey, testFirstMtime):
  85. print(f'Found the bucket: {url} == {testUrl}')
  86. if __name__ == '__main__':
  87. if not 2 <= len(sys.argv) <= 3 or sys.argv[1] in ('--help', '-h'):
  88. print('Usage: s3-bucket-find-direct-url URL [PROVIDER]', file = sys.stderr)
  89. print("Searches for an S3 bucket that's available at URL (e.g. CDN or proxy), optionally filtered by PROVIDER", file = sys.stderr)
  90. print(f'Providers: {", ".join(PROVIDERS)}', file = sys.stderr)
  91. sys.exit(1)
  92. url = sys.argv[1]
  93. providers = (sys.argv[2],) if len(sys.argv) == 3 else tuple(PROVIDERS.keys())
  94. find(url, providers)