Browse Source

Fix processing of GZ WARCs.

master
arkiver 4 years ago
parent
commit
2fc496ec40
2 changed files with 3 additions and 1 deletions
  1. +1
    -0
      .gitignore
  2. +2
    -1
      megawarc

+ 1
- 0
.gitignore View File

@@ -0,0 +1 @@
*~

+ 2
- 1
megawarc View File

@@ -435,8 +435,9 @@ class MegawarcPacker(object):
elif filename.endswith(".gz"):
dict_id = None
if "gz" not in self.megawarcs:
base = self.output_basename
self.megawarcs["gz"] = {
"warc": {"file": open(base + ".megawarc.warc.zst", "wb")},
"warc": {"file": open(base + ".megawarc.warc.gz", "wb")},
"json": {"file": gzip.open(base + ".megawarc.json.gz", "wb")},
"tar": {
"file": open(base + ".megawarc.tar", "wb"),


Loading…
Cancel
Save