|
|
@@ -0,0 +1,58 @@ |
|
|
|
#!/bin/bash |
|
|
|
# Usage: archivebot-high-resources |
|
|
|
# Prints the CPU, RAM, and disk usage (log + DB) of all running jobs in a table with particularly bad values highlighted in red |
|
|
|
# If stdout is not a TTY, the output is not tabularised or coloured. |
|
|
|
# Output order is arbitrary. |
|
|
|
|
|
|
|
function get_utime_rss_du { |
|
|
|
# Non-empty argument 1 disables du |
|
|
|
while read -r pid jobid path rss; do |
|
|
|
# utime in clock ticks |
|
|
|
utime="$(cut -d ' ' -f 14 <"/proc/${pid}/stat")" |
|
|
|
if [[ "$1" ]]; then |
|
|
|
#TODO This is kind of slow; is there a faster way? |
|
|
|
du="$(($(du -k -s "${path}"/wpull.* | cut -d $'\t' -f 1 | tr '\n' '+'; echo 0)))" |
|
|
|
else |
|
|
|
du=-1 |
|
|
|
fi |
|
|
|
printf '%s %s %s %s %s %s\n' "$1" "${pid}" "${jobid}" "${utime}" "${rss}" "${du}" |
|
|
|
done < <(ps -a -o pid,rss,cmd | grep '[w]pull' | sed 's,^\s*\([0-9][0-9]*\)\s\s*\([0-9][0-9]*\)\s\s*.*\s\([^\s]*/\([0-9a-z][0-9a-z]*\)\)/[w]pull\.log\s.*$,\1 \4 \3 \2,') |
|
|
|
} |
|
|
|
|
|
|
|
interval=5 |
|
|
|
clk_tck="$(getconf CLK_TCK)" |
|
|
|
[ ! -t 1 ]; stdout_tty=$? # 0 = is not tty, 1 = is tty |
|
|
|
|
|
|
|
{ |
|
|
|
get_utime_rss_du before no-du |
|
|
|
sleep "${interval}" |
|
|
|
get_utime_rss_du after |
|
|
|
} | python3 -c \ |
|
|
|
"$(cat <<'EOF' |
|
|
|
|
|
|
|
import sys |
|
|
|
|
|
|
|
CLK_TCK = int(sys.argv[1]) |
|
|
|
INTERVAL = int(sys.argv[2]) |
|
|
|
STDOUT_TTY = bool(int(sys.argv[3])) |
|
|
|
|
|
|
|
RED = '\x1b[0;31m' if STDOUT_TTY else '' |
|
|
|
RESET = '\x1b[0m' if STDOUT_TTY else '' |
|
|
|
|
|
|
|
stats = {"before": {}, "after": {}} |
|
|
|
for t, pid, jobid, utime, rss, du in map(str.split, map(str.strip, sys.stdin)): |
|
|
|
stats[t][f"{pid}_{jobid}"] = (int(pid), jobid, int(utime), int(rss), int(du)) |
|
|
|
|
|
|
|
print('PID', 'JOBID', 'CPU[%]', 'RSS[KiB]', 'DISK[KiB]') |
|
|
|
for key in set(stats['before'].keys()) & set(stats['after'].keys()): |
|
|
|
pid, jobid, utime0, _, _ = stats['before'][key] |
|
|
|
_, _, utime1, rss, du = stats['after'][key] |
|
|
|
cpup = (utime1 - utime0) / CLK_TCK / INTERVAL * 100 |
|
|
|
print('{} {} {}{:.1f}{} {}{}{} {}{}{}'.format(pid, jobid, RED if cpup >= 75 else '', cpup, RESET, RED if rss >= 200_000 else '', rss, RESET, RED if du >= 10_000_000 else '', du, RESET)) |
|
|
|
|
|
|
|
EOF |
|
|
|
)" \ |
|
|
|
"${clk_tck}" \ |
|
|
|
"${interval}" \ |
|
|
|
"${stdout_tty}" \ |
|
|
|
| if [[ "${stdout_tty}" == 1 ]]; then column -t; else cat; fi |