From 6fafd326859b494a097d38a72a1a3d766f90058b Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Wed, 24 Jul 2019 15:39:32 +0000 Subject: [PATCH] Error when the retries are exceeded --- qwarc/__init__.py | 3 +++ qwarc/const.py | 3 +++ qwarc/utils.py | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/qwarc/__init__.py b/qwarc/__init__.py index fc243ec..fd7e5a3 100644 --- a/qwarc/__init__.py +++ b/qwarc/__init__.py @@ -95,6 +95,9 @@ class Item: method = 'GET' data = None attempt = 0 + elif action == ACTION_RETRIES_EXCEEDED: + logging.error(f'Request for {url} failed {attempt} times') + return response, tuple(history) elif action == ACTION_RETRY: # Nothing to do, just go to the next cycle pass diff --git a/qwarc/const.py b/qwarc/const.py index 0ac6cd9..7d8952f 100644 --- a/qwarc/const.py +++ b/qwarc/const.py @@ -21,3 +21,6 @@ ACTION_RETRY = 2 ACTION_FOLLOW_OR_SUCCESS = 3 '''If the response contains a Location or URI header, follow it. Otherwise, treat it as a success.''' #TODO: Rename to ACTION_FOLLOW maybe? However, the current name makes it more clear what qwarc does when there's a redirect without a redirect target... + +ACTION_RETRIES_EXCEEDED = 4 +'''This request failed repeatedly and exceeded the retry limit.''' diff --git a/qwarc/utils.py b/qwarc/utils.py index 4690d9c..af3e980 100644 --- a/qwarc/utils.py +++ b/qwarc/utils.py @@ -178,6 +178,6 @@ def handle_response_limit_error_retries(maxRetries, handler = handle_response_de async def _handler(url, attempt, response, exc): action, writeToWarc = await handler(url, attempt, response, exc) if action == ACTION_RETRY and attempt > maxRetries: - action = ACTION_IGNORE + action = ACTION_RETRIES_EXCEEDED return action, writeToWarc return _handler