|
|
@@ -30,7 +30,7 @@ class Item: |
|
|
|
|
|
|
|
self.childItems = [] |
|
|
|
|
|
|
|
async def fetch(self, url, responseHandler = qwarc.utils.handle_response_default, method = 'GET', data = None): |
|
|
|
async def fetch(self, url, responseHandler = qwarc.utils.handle_response_default, method = 'GET', data = None, headers = []): |
|
|
|
''' |
|
|
|
HTTP GET or POST a URL |
|
|
|
|
|
|
@@ -38,6 +38,7 @@ class Item: |
|
|
|
responseHandler: a callable that determines how the response is handled. See qwarc.utils.handle_response_default for details. |
|
|
|
method: str, must be 'GET' or 'POST' |
|
|
|
data: dict or list/tuple of lists/tuples of length two or bytes or file-like or None, the data to be sent in the request body |
|
|
|
headers: list of 2-tuples, additional headers for this request only |
|
|
|
|
|
|
|
Returns response (a ClientResponse object or None) and history (a tuple of (response, exception) tuples). |
|
|
|
response can be None and history can be an empty tuple, depending on the circumstances (e.g. timeouts). |
|
|
@@ -47,6 +48,8 @@ class Item: |
|
|
|
|
|
|
|
url = yarl.URL(url) # Explicitly convert for normalisation, percent-encoding, etc. |
|
|
|
assert method in ('GET', 'POST'), 'method must be GET or POST' |
|
|
|
headers = self.headers + headers |
|
|
|
#TODO Deduplicate headers with later values overriding earlier ones |
|
|
|
history = [] |
|
|
|
attempt = 0 |
|
|
|
#TODO redirectLevel |
|
|
@@ -60,7 +63,7 @@ class Item: |
|
|
|
try: |
|
|
|
with _aiohttp.Timeout(60): |
|
|
|
logging.info('Fetching {}'.format(url)) |
|
|
|
response = await self.session.request(method, url, data = data, headers = self.headers, allow_redirects = False) |
|
|
|
response = await self.session.request(method, url, data = data, headers = headers, allow_redirects = False) |
|
|
|
try: |
|
|
|
ret = await response.text(errors = 'surrogateescape') |
|
|
|
except: |
|
|
|