Browse Source

Write only successful retrievals (i.e. ones that don't cause an exception) to WARC

tags/v0.2.2
JustAnotherArchivist 4 years ago
parent
commit
cb0d11284e
2 changed files with 3 additions and 2 deletions
  1. +2
    -2
      qwarc/__init__.py
  2. +1
    -0
      qwarc/utils.py

+ 2
- 2
qwarc/__init__.py View File

@@ -91,6 +91,8 @@ class Item:
exc = e # Pass the exception outward for the history
else:
action, writeToWarc = await responseHandler(url, attempt, response, None)
if response and exc is None and writeToWarc:
self.warc.write_client_response(response)
history.append((response, exc))
if action in (ACTION_SUCCESS, ACTION_IGNORE):
return response, tuple(history)
@@ -111,8 +113,6 @@ class Item:
pass
finally:
if response:
if writeToWarc:
self.warc.write_client_response(response)
await response.release()

async def process(self):


+ 1
- 0
qwarc/utils.py View File

@@ -144,6 +144,7 @@ async def handle_response_default(url, attempt, response, exc):
Parameters: url (yarl.URL instance), attempt (int), response (aiohttp.ClientResponse or None), exc (Exception or None)
At least one of response and exc is not None.
Returns: (one of the qwarc.RESPONSE_* constants, bool signifying whether to write to WARC or not)
The latter is ignored when exc is not None; responses that triggered an exception are never written to WARC.
'''

#TODO: Document that `attempt` is reset on redirects


Loading…
Cancel
Save