소스 검색

Add fromResponse parameter for URL completion and automatic Referer header

master
JustAnotherArchivist 3 년 전
부모
커밋
59ae1183d2
1개의 변경된 파일12개의 추가작업 그리고 5개의 파일을 삭제
  1. +12
    -5
      qwarc/__init__.py

+ 12
- 5
qwarc/__init__.py 파일 보기

@@ -49,12 +49,15 @@ class Item:
else:
self._baseUrl = yarl.URL(baseUrl)

def _merge_headers(self, headers):
def _merge_headers(self, headers, extraHeaders = []):
d = {} # Preserves order from Python 3.7 (guaranteed) or CPython 3.6 (implementation detail)
keys = {} # casefolded key -> d key
for key, value in self.headers:
d[key] = value
keys[key.casefold()] = key
for key, value in extraHeaders:
d[key] = value
keys[key.casefold()] = key
for key, value in headers:
keyc = key.casefold()
if value is None:
@@ -75,7 +78,7 @@ class Item:
out.append((key, value))
return out

async def fetch(self, url, responseHandler = None, method = 'GET', data = None, headers = [], verify_ssl = True, timeout = 60):
async def fetch(self, url, responseHandler = None, method = 'GET', data = None, headers = [], verify_ssl = True, timeout = 60, fromResponse = None):
'''
HTTP GET or POST a URL

@@ -88,6 +91,7 @@ class Item:
If a header appears multiple times, only the last one is used. To send a header multiple times, pass a tuple of values.
verify_ssl: bool, whether the SSL/TLS certificate should be validated
timeout: int or float, how long the fetch may take at most in total (sending request until finishing reading the response)
fromResponse: ClientResponse or None; if provided, use fromResponse.url for the url completion (instead of self.baseUrl) and add it as a Referer header

Returns response (a ClientResponse object or a qwarc.utils.DummyClientResponse object)
'''
@@ -96,13 +100,16 @@ class Item:

url = yarl.URL(url) # Explicitly convert for normalisation, percent-encoding, etc.
if not url.scheme or not url.host:
if not self.baseUrl:
if fromResponse is not None:
url = fromResponse.url.join(url)
elif not self.baseUrl:
raise ValueError('Incomplete URL and no baseUrl to join it with')
url = self.baseUrl.join(url)
else:
url = self.baseUrl.join(url)
if responseHandler is None:
responseHandler = self.defaultResponseHandler
assert method in ('GET', 'POST'), 'method must be GET or POST'
headers = self._merge_headers(headers)
headers = self._merge_headers(headers, extraHeaders = [('Referer', str(fromResponse.url))] if fromResponse is not None else [])
history = []
attempt = 0
#TODO redirectLevel


불러오는 중...
취소
저장