From 2fc496ec4086fc6bd4029eb9da273d506add56b8 Mon Sep 17 00:00:00 2001 From: arkiver Date: Wed, 1 Apr 2020 04:10:57 +0200 Subject: [PATCH] Fix processing of GZ WARCs. --- .gitignore | 1 + megawarc | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b25c15b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*~ diff --git a/megawarc b/megawarc index 17bf41f..20f0411 100755 --- a/megawarc +++ b/megawarc @@ -435,8 +435,9 @@ class MegawarcPacker(object): elif filename.endswith(".gz"): dict_id = None if "gz" not in self.megawarcs: + base = self.output_basename self.megawarcs["gz"] = { - "warc": {"file": open(base + ".megawarc.warc.zst", "wb")}, + "warc": {"file": open(base + ".megawarc.warc.gz", "wb")}, "json": {"file": gzip.open(base + ".megawarc.json.gz", "wb")}, "tar": { "file": open(base + ".megawarc.tar", "wb"),