From 5c654cb16bc7599cf492f4c9e97f8894799ab0ad Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 30 Apr 2019 14:27:58 +0000 Subject: [PATCH] Split out size formatting --- format-size | 37 +++++++++++++++++++++++++++++++++++++ warc-size | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100755 format-size diff --git a/format-size b/format-size new file mode 100755 index 0000000..754960f --- /dev/null +++ b/format-size @@ -0,0 +1,37 @@ +#!/bin/bash +# Format size in bytes into a readable string of the form "1.23 KiB"; reads from stdin and arguments and can take multiple numbers separated by any amount of whitespace including newlines +{ + if [ ! -t 0 ]; then cat; fi + echo "$@" +} | + tr '\t ' '\n' | + awk ' + BEGIN { + units[0] = "B"; + units[1] = "KiB"; + units[2] = "MiB"; + units[3] = "GiB"; + units[4] = "TiB"; + units[5] = "PiB"; + } + + { + size += $1; + } + + END { + if (size > 0) { + magnitude = int(log(size) / log(1024)); + if (magnitude > 5) { + magnitude = 5; + } + } else { + magnitude = 0; + } + if (magnitude > 0) { + sizeformat = "%.2f"; + } else { + sizeformat = "%d"; + } + printf sizeformat " %s\n", size / (1024 ^ magnitude), units[magnitude]; + }' diff --git a/warc-size b/warc-size index 8465a73..47c0308 100755 --- a/warc-size +++ b/warc-size @@ -1,3 +1,3 @@ #!/bin/bash # Total size of all WARCs in the current directory (or subdirectories) -find -name '*.warc.gz' -printf '%s\n' | awk 'BEGIN { units[0] = "B"; units[1] = "KiB"; units[2] = "MiB"; units[3] = "GiB"; units[4] = "TiB"; units[5] = "PiB"; } { size += $1 } END { if (size > 0) { magnitude = int(log(size) / log(1024)); if (magnitude > 5) { magnitude = 5; } } else { magnitude = 0; } if (magnitude > 0) { sizeformat = "%.2f"; } else { sizeformat = "%d"; } printf sizeformat " %s\n", size / (1024 ^ magnitude), units[magnitude]; }' +find -name '*.warc.gz' -printf '%s\n' | "$(cd "$(dirname "$0")"; pwd -P)/format-size"