@@ -100,6 +100,7 @@ class Item: | |||||
raise ValueError('Incomplete URL and no baseUrl to join it with') | raise ValueError('Incomplete URL and no baseUrl to join it with') | ||||
else: | else: | ||||
url = self.baseUrl.join(url) | url = self.baseUrl.join(url) | ||||
originalUrl = url | |||||
if responseHandler is None: | if responseHandler is None: | ||||
responseHandler = self.defaultResponseHandler | responseHandler = self.defaultResponseHandler | ||||
assert method in ('GET', 'POST'), 'method must be GET or POST' | assert method in ('GET', 'POST'), 'method must be GET or POST' | ||||
@@ -164,6 +165,10 @@ class Item: | |||||
self.logger.error(f'Request for {url} failed {attempt} times') | self.logger.error(f'Request for {url} failed {attempt} times') | ||||
retResponse.qhistory = tuple(history) | retResponse.qhistory = tuple(history) | ||||
return retResponse | return retResponse | ||||
elif action == ACTION_TOO_MANY_REDIRECTS: | |||||
self.logger.error(f'Request for {url} (from {originalUrl}) exceeded redirect limit') | |||||
retResponse.qhistory = tuple(history) | |||||
return retResponse | |||||
elif action == ACTION_RETRY: | elif action == ACTION_RETRY: | ||||
# Nothing to do, just go to the next cycle | # Nothing to do, just go to the next cycle | ||||
pass | pass | ||||
@@ -26,6 +26,9 @@ ACTION_FOLLOW_OR_SUCCESS = 3 | |||||
ACTION_RETRIES_EXCEEDED = 4 | ACTION_RETRIES_EXCEEDED = 4 | ||||
'''This request failed repeatedly and exceeded the retry limit.''' | '''This request failed repeatedly and exceeded the retry limit.''' | ||||
ACTION_TOO_MANY_REDIRECTS = 5 | |||||
'''Too many redirects were encountered.''' | |||||
DEFAULT_HEADERS = [ | DEFAULT_HEADERS = [ | ||||
('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'), | ('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'), | ||||
('Accept', '*/*'), | ('Accept', '*/*'), | ||||
@@ -191,6 +191,22 @@ def handle_response_limit_error_retries(maxRetries, handler = handle_response_de | |||||
return _handler | return _handler | ||||
def handle_response_limit_redirect_depth(maxRedirects, handler = handle_response_default): | |||||
''' | |||||
A response handler that limits how many redirects are followed. It behaves as handler otherwise, which defaults to handle_response_default. | |||||
The same details as for handle_response_limit_error_retries apply. | |||||
''' | |||||
async def _handler(**kwargs): | |||||
action, writeToWarc = await handler(**kwargs) | |||||
# redirectLevel starts off at 0 so if it is equal to maxRedirects - 1, there were exactly maxRedirects redirects | |||||
if action == ACTION_FOLLOW_OR_SUCCESS and kwargs['redirectLevel'] >= maxRedirects - 1: | |||||
action = ACTION_TOO_MANY_REDIRECTS | |||||
return action, writeToWarc | |||||
return _handler | |||||
def _get_dependency_versions(*pkgs): | def _get_dependency_versions(*pkgs): | ||||
pending = set(pkgs) | pending = set(pkgs) | ||||
have = set(pkgs) | have = set(pkgs) | ||||