Преглед на файлове

Add warcinfo record with version information on Python, system, and dependencies

tags/v0.2.0
JustAnotherArchivist преди 4 години
родител
ревизия
85d78cee13
променени са 2 файла, в които са добавени 46 реда и са изтрити 0 реда
  1. +34
    -0
      qwarc/utils.py
  2. +12
    -0
      qwarc/warc.py

+ 34
- 0
qwarc/utils.py Целия файл

@@ -1,7 +1,11 @@
from qwarc.const import *
import aiohttp
import asyncio
import functools
import logging
import os
import pkg_resources
import platform


PAGESIZE = os.sysconf('SC_PAGE_SIZE')
@@ -181,3 +185,33 @@ def handle_response_limit_error_retries(maxRetries, handler = handle_response_de
action = ACTION_RETRIES_EXCEEDED
return action, writeToWarc
return _handler


def _get_dependency_versions(pkg):
pending = {pkg}
have = {pkg}
while pending:
key = pending.pop()
try:
dist = pkg_resources.get_distribution(key)
except pkg_resources.DistributionNotFound:
logging.error(f'Unable to get distribution {key}')
yield dist.key, dist.version
for requirement in dist.requires():
if requirement.key not in have:
pending.add(requirement.key)
have.add(requirement.key)


@functools.lru_cache(maxsize = 1)
def get_software_info():
# Taken from crocoite.utils, authored by PromyLOPh in commit 6ccd72ab on 2018-12-08 under MIT licence
return {
'platform': platform.platform(),
'python': {
'implementation': platform.python_implementation(),
'version': platform.python_version(),
'build': platform.python_build(),
},
'self': [{"package": package, "version": version} for package, version in _get_dependency_versions(__package__)],
}

+ 12
- 0
qwarc/warc.py Целия файл

@@ -1,6 +1,8 @@
import fcntl
import io
import json
import logging
import qwarc.utils
import time
import warcio

@@ -46,6 +48,16 @@ class WARC:
self._warcWriter = warcio.warcwriter.WARCWriter(self._file, gzip = True)
self._closed = False
self._counter += 1
self.write_warcinfo_record()

def write_warcinfo_record(self):
record = self._warcWriter.create_warc_record(
'urn:qwarc:warcinfo',
'warcinfo',
payload = io.BytesIO(json.dumps(qwarc.utils.get_software_info(), indent = 2).encode('utf-8')),
warc_headers_dict = {'Content-Type': 'application/json; charset=utf-8'},
)
self._warcWriter.write_record(record)

def write_client_response(self, response):
'''


Зареждане…
Отказ
Запис