diff --git a/qwarc/__init__.py b/qwarc/__init__.py index 0b2b3e4..b52777f 100644 --- a/qwarc/__init__.py +++ b/qwarc/__init__.py @@ -263,14 +263,11 @@ class QWARC: raise async def run(self, loop): - headers = [('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0')] #TODO: Move elsewhere - for i in range(self._concurrency): session = _aiohttp.ClientSession( connector = qwarc.aiohttp.TCPConnector(loop = loop), request_class = qwarc.aiohttp.ClientRequest, response_class = qwarc.aiohttp.ClientResponse, - skip_auto_headers = ['Accept-Encoding'], loop = loop ) self._sessions.append(session) @@ -332,7 +329,7 @@ class QWARC: raise session = self._freeSessions.popleft() - item = self._make_item(itemType, itemValue, session, headers) + item = self._make_item(itemType, itemValue, session, DEFAULT_HEADERS) task = asyncio.ensure_future(item.process()) #TODO: Is there a better way to add custom information to a task/coroutine object? task.taskType = 'process' diff --git a/qwarc/aiohttp.py b/qwarc/aiohttp.py index ab5012a..a1e407f 100644 --- a/qwarc/aiohttp.py +++ b/qwarc/aiohttp.py @@ -72,6 +72,8 @@ class TCPConnector(aiohttp.connector.TCPConnector): class ClientRequest(aiohttp.client_reqrep.ClientRequest): + DEFAULT_HEADERS = {} + def send(self, connection): connection.protocol.reset_raw_data() return super().send(connection) diff --git a/qwarc/const.py b/qwarc/const.py index eb57fd4..1c0db65 100644 --- a/qwarc/const.py +++ b/qwarc/const.py @@ -25,3 +25,9 @@ ACTION_FOLLOW_OR_SUCCESS = 3 ACTION_RETRIES_EXCEEDED = 4 '''This request failed repeatedly and exceeded the retry limit.''' + +DEFAULT_HEADERS = [ + ('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'), + ('Accept', '*/*'), +] +'''The default HTTP headers sent on every request if not overridden'''