Browse Source

Progress info for large files.

master
Alard 11 years ago
parent
commit
5b5bcf3c62
1 changed files with 67 additions and 15 deletions
  1. +67
    -15
      megawarc

+ 67
- 15
megawarc View File

@@ -76,23 +76,65 @@ try:
except ImportError:
from ordereddict import OrderedDict

# open input_filename and write the data from offset to
# (offset+size) to stream
def copy_to_stream(stream, input_filename, offset, size):
with open(input_filename, "r") as f:
f.seek(offset)
class ProgressInfo(object):
def __init__(self, maximum):
self._current = 0
self._maximum = maximum
self._previous_percentage = None
self._active = sys.stderr.isatty()
self.print_status()

def update(self, new_value):
self._current = new_value
self.print_status()

def print_status(self):
if not self._active:
return

percentage = int(float(self._current) / float(self._maximum) * 100)
if self._maximum < 0:
# count down
percentage = -percentage
percentage = max(0, min(100, percentage))
if self._previous_percentage != percentage:
self._previous_percentage = percentage
sys.stderr.write("\r %3d%%" % percentage)

def clear(self):
if self._active:
sys.stderr.write("\r \r")
self._active = False

to_read = size
while to_read > 0:
buf_size = min(to_read, 4096)
buf = f.read(buf_size)
l = len(buf)
if l < buf_size:
raise Exception("End of file: %d bytes expected, but %d bytes read." % (buf_size, l))
stream.write(buf)
to_read -= l

stream.flush()
# open input_filename and write the data from offset to
# (offset+size) to stream
def copy_to_stream(stream, input_filename, offset, size, verbose=False):
if verbose and size > 10 * 1024 * 1024:
progress = ProgressInfo(-size)
else:
progress = None

try:
with open(input_filename, "r") as f:
f.seek(offset)

to_read = size
while to_read > 0:
buf_size = min(to_read, 4096)
buf = f.read(buf_size)
l = len(buf)
if l < buf_size:
raise Exception("End of file: %d bytes expected, but %d bytes read." % (buf_size, l))
stream.write(buf)
to_read -= l
if progress:
progress.update(-to_read)

stream.flush()
finally:
if progress:
progress.clear()


# part of a stream as a file
@@ -165,6 +207,10 @@ class CopyReader(object):
def test_gz(filename, offset, size, verbose=False, copy_to_file=None):
with open(filename, "r") as f_stream:
f = RangeFile(f_stream, offset, size)
if verbose and size > 10 * 1024 * 1024:
progress = ProgressInfo(-size)
else:
progress = None
if copy_to_file:
f = CopyReader(f, copy_to_file)
start_pos = copy_to_file.tell()
@@ -178,6 +224,8 @@ def test_gz(filename, offset, size, verbose=False, copy_to_file=None):
while True:
buf = f.read(4096)
size -= len(buf)
if progress:
progress.update(-size)
if len(buf) > 0:
gz.stdin.write(buf)
else:
@@ -186,7 +234,11 @@ def test_gz(filename, offset, size, verbose=False, copy_to_file=None):
ret = gz.wait()
if ret != 0:
raise IOError("Could not decompress warc.gz. gunzip returned %d." % ret)
if progress:
progress.clear()
except (IOError, OSError) as e:
if progress:
progress.clear()
if verbose:
print >>sys.stderr, e
if copy_to_file:


Loading…
Cancel
Save