Add support for PermanentRedirect error responses

6 months ago · 7e458457d6
--- a/+ 33
+++ b/+ 33
@@ -2,27 +2,55 @@
 import re
 import requests
 import sys
 import urllib3


 RESPONSE_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'''<ListBucketResult xmlns=(["'])http://(?:s3\.amazonaws\.com/doc/2006-03-01/|doc\.s3\.amazonaws\.com/2006-03-01)\3>''')
 NAME_PATTERN = re.compile(r'<Name>([^<]*)</Name>')
 KEY_PATTERN = re.compile(r'<Key>([^<]*)</Key>')
 MTIME_PATTERN = re.compile(r'<LastModified>([^<]*)</LastModified>')
 REDIRECT_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'<Error><Code>PermanentRedirect</Code>')
 REDIRECT_TARGET_ENDPOINT_PATTERN = re.compile(r'<Endpoint>([^<]*)</Endpoint>')
 REDIRECT_TARGET_BUCKET_PATTERN = re.compile(r'<Bucket>([^<]*)</Bucket>')
 PROVIDERS = {
 	'amazon': ['https://s3.amazonaws.com/{}/'],
 	'amazon': ['https://s3.amazonaws.com/{}/', 'https://{}.s3.amazonaws.com/'],
 	'google': ['https://storage.googleapis.com/{}/'],
 	'scaleway': ['https://s3.nl-ams.scw.cloud/{}/', 'https://s3.fr-par.scw.cloud/{}/'],
 	'wasabi': ['https://s3.wasabisys.com/{}/'],
 }


 def find(url, providers):
 # AWS S3 buckets whose names contain a . are broken because AWS can't be bothered to serve valid TLS certs for them.
 urllib3.disable_warnings()


 def fetch_with_redirect(url):
 	print(f'Fetching {url}', file = sys.stderr)
 	r = requests.get(url, timeout = 60)
 	r = requests.get(url, verify = False, timeout = 60)
 	print(f'{r.status_code} {url}', file = sys.stderr)
 	body = r.text
 	if not RESPONSE_PATTERN.match(body):
 	if r.status_code == 301 and REDIRECT_PATTERN.match(body):
 		m = REDIRECT_TARGET_ENDPOINT_PATTERN.search(body)
 		if not m:
 			raise RuntimeError('Could not get redirect endpoint')
 		endpoint = m.group(1)
 		m = REDIRECT_TARGET_BUCKET_PATTERN.search(body)
 		if not m:
 			raise RuntimeError('Could not get redirect bucket')
 		bucket = m.group(1)
 		print(f'Redirect to endpoint {endpoint!r} bucket {bucket!r}')
 		url = f'https://{endpoint}/{bucket}/'
 		print(f'Fetching {url}')
 		r = requests.get(url, timeout = 60)
 		print(f'{r.status_code} {url}', file = sys.stderr)
 		body = r.text
 	if r.status_code == 200 and not RESPONSE_PATTERN.match(body):
 		raise RuntimeError(f'Invalid body: {body[:200]}...')
 	return r, url, body


 def find(url, providers):
 	_, _, body = fetch_with_redirect(url)

 	# Get bucket name
 	m = NAME_PATTERN.search(body)
@@ -48,14 +76,9 @@ def find(url, providers):
 	for provider in providers:
 		for testUrlTemplate in PROVIDERS[provider]:
 			testUrl = testUrlTemplate.format(name)
 			print(f'Fetching {testUrl}', file = sys.stderr)
 			r = requests.get(testUrl, timeout = 60)
 			print(f'{r.status_code} {testUrl}', file = sys.stderr)
 			r, testUrl, body = fetch_with_redirect(testUrl)
 			if r.status_code != 200:
 				continue
 			body = r.text
 			if not RESPONSE_PATTERN.match(body):
 				raise RuntimeError(f'Invalid body: {body[:200]}...')

 			# Compare first object
 			if not firstKey: