diff --git a/qwarc/__init__.py b/qwarc/__init__.py index 98f4c3f..c501765 100644 --- a/qwarc/__init__.py +++ b/qwarc/__init__.py @@ -74,8 +74,7 @@ class Item: verify_ssl: bool, whether the SSL/TLS certificate should be validated timeout: int or float, how long the fetch may take at most in total (sending request until finishing reading the response) - Returns response (a ClientResponse object or None) and history (a tuple of (response, exception) tuples). - response can be None and history can be an empty tuple, depending on the circumstances (e.g. timeouts). + Returns response (a ClientResponse object or a qwarc.utils.DummyClientResponse object) ''' #TODO: Rewrite using 'async with self.session.get' @@ -126,13 +125,15 @@ class Item: if response and exc is None and writeToWarc: self.warc.write_client_response(response) history.append((response, exc)) - retResponse = response if exc is None else None + retResponse = response if exc is None else qwarc.utils.DummyClientResponse() if action in (ACTION_SUCCESS, ACTION_IGNORE): - return retResponse, tuple(history) + retResponse.qhistory = tuple(history) + return retResponse elif action == ACTION_FOLLOW_OR_SUCCESS: redirectUrl = response.headers.get('Location') or response.headers.get('URI') if not redirectUrl: - return retResponse, tuple(history) + retResponse.qhistory = tuple(history) + return retResponse url = url.join(yarl.URL(redirectUrl)) if response.status in (301, 302, 303) and method == 'POST': method = 'GET' @@ -140,7 +141,8 @@ class Item: attempt = 0 elif action == ACTION_RETRIES_EXCEEDED: self.logger.error(f'Request for {url} failed {attempt} times') - return retResponse, tuple(history) + retResponse.qhistory = tuple(history) + return retResponse elif action == ACTION_RETRY: # Nothing to do, just go to the next cycle pass diff --git a/qwarc/aiohttp.py b/qwarc/aiohttp.py index 931a7f0..92ee5ea 100644 --- a/qwarc/aiohttp.py +++ b/qwarc/aiohttp.py @@ -84,6 +84,7 @@ class ClientResponse(aiohttp.client_reqrep.ClientResponse): super().__init__(*args, **kwargs) self._rawData = None self._remoteAddress = None + self._qhistory = None # _history is used by aiohttp internally async def start(self, connection, readUntilEof): self._rawData = connection.protocol.rawData @@ -110,8 +111,13 @@ class ClientResponse(aiohttp.client_reqrep.ClientResponse): def remoteAddress(self): return self._remoteAddress - def set_history(self, history): - self._history = history #FIXME: Uses private attribute of aiohttp.client_reqrep.ClientResponse + @property + def qhistory(self): + return self._qhistory + + @qhistory.setter + def qhistory(self, history): + self._qhistory = history def iter_all(self): return itertools.chain(self.history, (self,)) diff --git a/qwarc/utils.py b/qwarc/utils.py index 29599f5..93fd624 100644 --- a/qwarc/utils.py +++ b/qwarc/utils.py @@ -260,3 +260,21 @@ class ReadonlyFileView: if key == 'writable': return False return getattr(self._fp, key) + + +class DummyClientResponse: + '''A ClientResponse-like object for when no actual ClientResponse is available. Always evaluates to False when cast to a bool.''' + + def __init__(self): + self._qhistory = None + + @property + def qhistory(self): + return self._qhistory + + @qhistory.setter + def qhistory(self, history): + self._qhistory = history + + def __bool__(self): + return False