diff --git a/megawarc b/megawarc index f931698..f2dd938 100755 --- a/megawarc +++ b/megawarc @@ -76,23 +76,65 @@ try: except ImportError: from ordereddict import OrderedDict -# open input_filename and write the data from offset to -# (offset+size) to stream -def copy_to_stream(stream, input_filename, offset, size): - with open(input_filename, "r") as f: - f.seek(offset) +class ProgressInfo(object): + def __init__(self, maximum): + self._current = 0 + self._maximum = maximum + self._previous_percentage = None + self._active = sys.stderr.isatty() + self.print_status() + + def update(self, new_value): + self._current = new_value + self.print_status() + + def print_status(self): + if not self._active: + return + + percentage = int(float(self._current) / float(self._maximum) * 100) + if self._maximum < 0: + # count down + percentage = -percentage + percentage = max(0, min(100, percentage)) + if self._previous_percentage != percentage: + self._previous_percentage = percentage + sys.stderr.write("\r %3d%%" % percentage) + + def clear(self): + if self._active: + sys.stderr.write("\r \r") + self._active = False - to_read = size - while to_read > 0: - buf_size = min(to_read, 4096) - buf = f.read(buf_size) - l = len(buf) - if l < buf_size: - raise Exception("End of file: %d bytes expected, but %d bytes read." % (buf_size, l)) - stream.write(buf) - to_read -= l - stream.flush() +# open input_filename and write the data from offset to +# (offset+size) to stream +def copy_to_stream(stream, input_filename, offset, size, verbose=False): + if verbose and size > 10 * 1024 * 1024: + progress = ProgressInfo(-size) + else: + progress = None + + try: + with open(input_filename, "r") as f: + f.seek(offset) + + to_read = size + while to_read > 0: + buf_size = min(to_read, 4096) + buf = f.read(buf_size) + l = len(buf) + if l < buf_size: + raise Exception("End of file: %d bytes expected, but %d bytes read." % (buf_size, l)) + stream.write(buf) + to_read -= l + if progress: + progress.update(-to_read) + + stream.flush() + finally: + if progress: + progress.clear() # part of a stream as a file @@ -165,6 +207,10 @@ class CopyReader(object): def test_gz(filename, offset, size, verbose=False, copy_to_file=None): with open(filename, "r") as f_stream: f = RangeFile(f_stream, offset, size) + if verbose and size > 10 * 1024 * 1024: + progress = ProgressInfo(-size) + else: + progress = None if copy_to_file: f = CopyReader(f, copy_to_file) start_pos = copy_to_file.tell() @@ -178,6 +224,8 @@ def test_gz(filename, offset, size, verbose=False, copy_to_file=None): while True: buf = f.read(4096) size -= len(buf) + if progress: + progress.update(-size) if len(buf) > 0: gz.stdin.write(buf) else: @@ -186,7 +234,11 @@ def test_gz(filename, offset, size, verbose=False, copy_to_file=None): ret = gz.wait() if ret != 0: raise IOError("Could not decompress warc.gz. gunzip returned %d." % ret) + if progress: + progress.clear() except (IOError, OSError) as e: + if progress: + progress.clear() if verbose: print >>sys.stderr, e if copy_to_file: