The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

111 lines
3.8 KiB

  1. #!/usr/bin/env python3
  2. import re
  3. import requests
  4. import sys
  5. import urllib3
  6. RESPONSE_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'''<ListBucketResult xmlns=(["'])http://(?:s3\.amazonaws\.com/doc/2006-03-01/|doc\.s3\.amazonaws\.com/2006-03-01)\3>''')
  7. NAME_PATTERN = re.compile(r'<Name>([^<]*)</Name>')
  8. KEY_PATTERN = re.compile(r'<Key>([^<]*)</Key>')
  9. MTIME_PATTERN = re.compile(r'<LastModified>([^<]*)</LastModified>')
  10. REDIRECT_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'<Error><Code>PermanentRedirect</Code>')
  11. REDIRECT_TARGET_ENDPOINT_PATTERN = re.compile(r'<Endpoint>([^<]*)</Endpoint>')
  12. REDIRECT_TARGET_BUCKET_PATTERN = re.compile(r'<Bucket>([^<]*)</Bucket>')
  13. PROVIDERS = {
  14. 'amazon': ['https://s3.amazonaws.com/{}/', 'https://{}.s3.amazonaws.com/'],
  15. 'google': ['https://storage.googleapis.com/{}/'],
  16. 'scaleway': ['https://s3.nl-ams.scw.cloud/{}/', 'https://s3.fr-par.scw.cloud/{}/'],
  17. 'wasabi': ['https://s3.wasabisys.com/{}/'],
  18. }
  19. # AWS S3 buckets whose names contain a . are broken because AWS can't be bothered to serve valid TLS certs for them.
  20. urllib3.disable_warnings()
  21. def fetch_with_redirect(url):
  22. print(f'Fetching {url}', file = sys.stderr)
  23. r = requests.get(url, verify = False, timeout = 60)
  24. print(f'{r.status_code} {url}', file = sys.stderr)
  25. body = r.text
  26. if r.status_code == 301 and REDIRECT_PATTERN.match(body):
  27. m = REDIRECT_TARGET_ENDPOINT_PATTERN.search(body)
  28. if not m:
  29. raise RuntimeError('Could not get redirect endpoint')
  30. endpoint = m.group(1)
  31. m = REDIRECT_TARGET_BUCKET_PATTERN.search(body)
  32. if not m:
  33. raise RuntimeError('Could not get redirect bucket')
  34. bucket = m.group(1)
  35. print(f'Redirect to endpoint {endpoint!r} bucket {bucket!r}')
  36. url = f'https://{endpoint}/{bucket}/'
  37. print(f'Fetching {url}')
  38. r = requests.get(url, timeout = 60)
  39. print(f'{r.status_code} {url}', file = sys.stderr)
  40. body = r.text
  41. if r.status_code == 200 and not RESPONSE_PATTERN.match(body):
  42. raise RuntimeError(f'Invalid body: {body[:200]}...')
  43. return r, url, body
  44. def find(url, providers):
  45. _, _, body = fetch_with_redirect(url)
  46. # Get bucket name
  47. m = NAME_PATTERN.search(body)
  48. if not m:
  49. raise RuntimeError('Could not find bucket name')
  50. name = m.group(1)
  51. if '&' in name:
  52. raise RuntimeError(f'Unsupported bucket name: {name!r}')
  53. # Get name and mtime of first object
  54. m = KEY_PATTERN.search(body)
  55. if m:
  56. firstKey = m.group(1)
  57. m = MTIME_PATTERN.search(body)
  58. if not m:
  59. raise RuntimeError('Got key but no mtime')
  60. firstMtime = m.group(1)
  61. else:
  62. print('Warning: no key found, cannot verify that it is the same bucket', file = sys.stderr)
  63. firstKey, firstMtime = None, None
  64. # Start searching
  65. for provider in providers:
  66. for testUrlTemplate in PROVIDERS[provider]:
  67. testUrl = testUrlTemplate.format(name)
  68. r, testUrl, body = fetch_with_redirect(testUrl)
  69. if r.status_code != 200:
  70. continue
  71. # Compare first object
  72. if not firstKey:
  73. continue
  74. m = KEY_PATTERN.search(body)
  75. if not m:
  76. print(f'No key in {testUrl}', file = sys.stderr)
  77. continue
  78. testFirstKey = m.group(1)
  79. m = MTIME_PATTERN.search(body)
  80. if not m:
  81. print(f'Got key but no mtime in {testUrl}', file = sys.stderr)
  82. continue
  83. testFirstMtime = m.group(1)
  84. if (firstKey, firstMtime) == (testFirstKey, testFirstMtime):
  85. print(f'Found the bucket: {url} == {testUrl}')
  86. if __name__ == '__main__':
  87. if not 2 <= len(sys.argv) <= 3 or sys.argv[1] in ('--help', '-h'):
  88. print('Usage: s3-bucket-find-direct-url URL [PROVIDER]', file = sys.stderr)
  89. print("Searches for an S3 bucket that's available at URL (e.g. CDN or proxy), optionally filtered by PROVIDER", file = sys.stderr)
  90. print(f'Providers: {", ".join(PROVIDERS)}', file = sys.stderr)
  91. sys.exit(1)
  92. url = sys.argv[1]
  93. providers = (sys.argv[2],) if len(sys.argv) == 3 else tuple(PROVIDERS.keys())
  94. find(url, providers)