@@ -127,7 +127,7 @@ def generate_range_items(start, stop, step):
yield f'{i}-{min(i + step - 1, stop)}'
yield f'{i}-{min(i + step - 1, stop)}'
async def handle_response_default(url, attempt, response, exc):
async def handle_response_default(url, attempt, response, exc, item ):
'''
'''
The default response handler, which behaves as follows:
The default response handler, which behaves as follows:
- If there is no response (e.g. timeout error), retry the retrieval after a delay of 5 seconds.
- If there is no response (e.g. timeout error), retry the retrieval after a delay of 5 seconds.
@@ -141,7 +141,7 @@ async def handle_response_default(url, attempt, response, exc):
Note that this handler does not limit the number of retries on errors.
Note that this handler does not limit the number of retries on errors.
Parameters: url (yarl.URL instance), attempt (int), response (aiohttp.ClientResponse or None), exc (Exception or None)
Parameters: url (yarl.URL instance), attempt (int), response (aiohttp.ClientResponse or None), exc (Exception or None), item (qwarc.Item instance)
At least one of response and exc is not None.
At least one of response and exc is not None.
Returns: (one of the qwarc.RESPONSE_* constants, bool signifying whether to write to WARC or not)
Returns: (one of the qwarc.RESPONSE_* constants, bool signifying whether to write to WARC or not)
The latter is ignored when exc is not None; responses that triggered an exception are never written to WARC.
The latter is ignored when exc is not None; responses that triggered an exception are never written to WARC.
@@ -166,10 +166,10 @@ async def handle_response_default(url, attempt, response, exc):
return ACTION_RETRY, True
return ACTION_RETRY, True
async def handle_response_ignore_redirects(url, attempt, response, exc):
async def handle_response_ignore_redirects(url, attempt, response, exc, item ):
'''A response handler that does not follow redirects, i.e. treats them as a success instead. It behaves as handle_response_default otherwise.'''
'''A response handler that does not follow redirects, i.e. treats them as a success instead. It behaves as handle_response_default otherwise.'''
action, writeToWarc = await handle_response_default(url, attempt, response, exc)
action, writeToWarc = await handle_response_default(url, attempt, response, exc, item )
if action == ACTION_FOLLOW_OR_SUCCESS:
if action == ACTION_FOLLOW_OR_SUCCESS:
action = ACTION_SUCCESS
action = ACTION_SUCCESS
return action, writeToWarc
return action, writeToWarc
@@ -183,8 +183,8 @@ def handle_response_limit_error_retries(maxRetries, handler = handle_response_de
If you use the same limit many times, you should keep the return value (the response handler) of this method and reuse it to avoid creating a new function every time.
If you use the same limit many times, you should keep the return value (the response handler) of this method and reuse it to avoid creating a new function every time.
'''
'''
async def _handler(url, attempt, response, exc):
action, writeToWarc = await handler(url, attempt, response, exc)
async def _handler(url, attempt, response, exc, item ):
action, writeToWarc = await handler(url, attempt, response, exc, item )
if action == ACTION_RETRY and attempt > maxRetries:
if action == ACTION_RETRY and attempt > maxRetries:
action = ACTION_RETRIES_EXCEEDED
action = ACTION_RETRIES_EXCEEDED
return action, writeToWarc
return action, writeToWarc