diff --git a/megawarc b/megawarc index 6162c87..b30c897 100755 --- a/megawarc +++ b/megawarc @@ -258,14 +258,20 @@ class MegawarcBuilder(object): def process(self): with open(self.output_warc_filename, "wb") as warc_out: with open(self.output_tar_filename, "wb") as tar_out: - with gzip.open(self.output_json_filename, "wb") as json_out: - with tarfile.open(self.input_filename, "r") as tar: + json_out = gzip.open(self.output_json_filename, "wb") + try: + tar = tarfile.open(self.input_filename, "r") + try: for tarinfo in tar: self.process_entry(tarinfo, warc_out, tar_out, json_out) padding = (tarfile.RECORDSIZE - tar_out.tell()) % tarfile.RECORDSIZE if padding > 0: tar_out.write("\0" * padding) + finally: + tar.close() + finally: + json_out.close() def process_entry(self, entry, warc_out, tar_out, json_out): with open(self.input_filename, "r") as tar: @@ -342,7 +348,8 @@ class MegawarcPacker(object): def process(self, filelist): with open(self.output_warc_filename, "wb") as warc_out: with open(self.output_tar_filename, "wb") as tar_out: - with gzip.open(self.output_json_filename, "wb") as json_out: + json_out = gzip.open(self.output_json_filename, "wb") + try: def each_file(arg, dirname, names): for n in names: n = os.path.join(dirname, n) @@ -358,6 +365,8 @@ class MegawarcPacker(object): padding = (tarfile.RECORDSIZE - tar_out.tell()) % tarfile.RECORDSIZE if padding > 0: tar_out.write("\0" * padding) + finally: + json_out.close() def process_file(self, filename, warc_out, tar_out, json_out): # make tar header @@ -453,7 +462,8 @@ class MegawarcRestorer(object): self.input_json_filename = output_filename + ".megawarc.json.gz" def process(self): - with gzip.open(self.input_json_filename, "rb") as json_in: + json_in = gzip.open(self.input_json_filename, "rb") + try: with open(self.output_filename, "wb") as tar_out: for line in json_in: entry = json.loads(line) @@ -462,6 +472,8 @@ class MegawarcRestorer(object): padding = (tarfile.RECORDSIZE - tar_out.tell()) % tarfile.RECORDSIZE if padding > 0: tar_out.write("\0" * padding) + finally: + json_in.close() def process_entry(self, entry, tar_out):