Browse Source

Move response/exception history to ClientResponse.qhistory

It is rarely necessary to access the history, and the tuple return value clutters the spec file code.

As a consequence, it's no longer possible to return None if an error occurred without losing the history.
To replace that, this also introduces a DummyClientResponse, which is kind of ClientResponse-like, has the same qhistory attribute, and evaluates to False when cast to bool (such that the intuitive `if response` works as expected).
master
JustAnotherArchivist 3 years ago
parent
commit
b30ccf8bf8
3 changed files with 34 additions and 8 deletions
  1. +8
    -6
      qwarc/__init__.py
  2. +8
    -2
      qwarc/aiohttp.py
  3. +18
    -0
      qwarc/utils.py

+ 8
- 6
qwarc/__init__.py View File

@@ -74,8 +74,7 @@ class Item:
verify_ssl: bool, whether the SSL/TLS certificate should be validated
timeout: int or float, how long the fetch may take at most in total (sending request until finishing reading the response)

Returns response (a ClientResponse object or None) and history (a tuple of (response, exception) tuples).
response can be None and history can be an empty tuple, depending on the circumstances (e.g. timeouts).
Returns response (a ClientResponse object or a qwarc.utils.DummyClientResponse object)
'''

#TODO: Rewrite using 'async with self.session.get'
@@ -126,13 +125,15 @@ class Item:
if response and exc is None and writeToWarc:
self.warc.write_client_response(response)
history.append((response, exc))
retResponse = response if exc is None else None
retResponse = response if exc is None else qwarc.utils.DummyClientResponse()
if action in (ACTION_SUCCESS, ACTION_IGNORE):
return retResponse, tuple(history)
retResponse.qhistory = tuple(history)
return retResponse
elif action == ACTION_FOLLOW_OR_SUCCESS:
redirectUrl = response.headers.get('Location') or response.headers.get('URI')
if not redirectUrl:
return retResponse, tuple(history)
retResponse.qhistory = tuple(history)
return retResponse
url = url.join(yarl.URL(redirectUrl))
if response.status in (301, 302, 303) and method == 'POST':
method = 'GET'
@@ -140,7 +141,8 @@ class Item:
attempt = 0
elif action == ACTION_RETRIES_EXCEEDED:
self.logger.error(f'Request for {url} failed {attempt} times')
return retResponse, tuple(history)
retResponse.qhistory = tuple(history)
return retResponse
elif action == ACTION_RETRY:
# Nothing to do, just go to the next cycle
pass


+ 8
- 2
qwarc/aiohttp.py View File

@@ -84,6 +84,7 @@ class ClientResponse(aiohttp.client_reqrep.ClientResponse):
super().__init__(*args, **kwargs)
self._rawData = None
self._remoteAddress = None
self._qhistory = None # _history is used by aiohttp internally

async def start(self, connection, readUntilEof):
self._rawData = connection.protocol.rawData
@@ -110,8 +111,13 @@ class ClientResponse(aiohttp.client_reqrep.ClientResponse):
def remoteAddress(self):
return self._remoteAddress

def set_history(self, history):
self._history = history #FIXME: Uses private attribute of aiohttp.client_reqrep.ClientResponse
@property
def qhistory(self):
return self._qhistory

@qhistory.setter
def qhistory(self, history):
self._qhistory = history

def iter_all(self):
return itertools.chain(self.history, (self,))


+ 18
- 0
qwarc/utils.py View File

@@ -260,3 +260,21 @@ class ReadonlyFileView:
if key == 'writable':
return False
return getattr(self._fp, key)


class DummyClientResponse:
'''A ClientResponse-like object for when no actual ClientResponse is available. Always evaluates to False when cast to a bool.'''

def __init__(self):
self._qhistory = None

@property
def qhistory(self):
return self._qhistory

@qhistory.setter
def qhistory(self, history):
self._qhistory = history

def __bool__(self):
return False

Loading…
Cancel
Save