#!/bin/bash # Usage: archivebot-high-resources # Prints the CPU, RAM, and disk usage (log + DB) of all running jobs in a table with particularly bad values highlighted in red # If stdout is not a TTY, the output is not tabularised or coloured. # Output order is arbitrary. function get_utime_rss_du { # Non-empty argument 1 disables du while read -r pid jobid path rss; do # utime in clock ticks utime="$(cut -d ' ' -f 14 <"/proc/${pid}/stat")" if [[ "$1" ]]; then #TODO This is kind of slow; is there a faster way? du="$(($(du -k -s "${path}"/wpull.* | cut -d $'\t' -f 1 | tr '\n' '+'; echo 0)))" else du=-1 fi printf '%s %s %s %s %s %s\n' "$1" "${pid}" "${jobid}" "${utime}" "${rss}" "${du}" done < <(ps -a -o pid,rss,cmd | grep '[w]pull' | sed 's,^\s*\([0-9][0-9]*\)\s\s*\([0-9][0-9]*\)\s\s*.*\s\([^\s]*/\([0-9a-z][0-9a-z]*\)\)/[w]pull\.log\s.*$,\1 \4 \3 \2,') } interval=5 clk_tck="$(getconf CLK_TCK)" [ ! -t 1 ]; stdout_tty=$? # 0 = is not tty, 1 = is tty { get_utime_rss_du before no-du sleep "${interval}" get_utime_rss_du after } | python3 -c \ "$(cat <<'EOF' import sys CLK_TCK = int(sys.argv[1]) INTERVAL = int(sys.argv[2]) STDOUT_TTY = bool(int(sys.argv[3])) RED = '\x1b[0;31m' if STDOUT_TTY else '' RESET = '\x1b[0m' if STDOUT_TTY else '' stats = {"before": {}, "after": {}} for t, pid, jobid, utime, rss, du in map(str.split, map(str.strip, sys.stdin)): stats[t][f"{pid}_{jobid}"] = (int(pid), jobid, int(utime), int(rss), int(du)) print('PID', 'JOBID', 'CPU[%]', 'RSS[KiB]', 'DISK[KiB]') for key in set(stats['before'].keys()) & set(stats['after'].keys()): pid, jobid, utime0, _, _ = stats['before'][key] _, _, utime1, rss, du = stats['after'][key] cpup = (utime1 - utime0) / CLK_TCK / INTERVAL * 100 print('{} {} {}{:.1f}{} {}{}{} {}{}{}'.format(pid, jobid, RED if cpup >= 75 else '', cpup, RESET, RED if rss >= 200_000 else '', rss, RESET, RED if du >= 10_000_000 else '', du, RESET)) EOF )" \ "${clk_tck}" \ "${interval}" \ "${stdout_tty}" \ | if [[ "${stdout_tty}" == 1 ]]; then column -t; else cat; fi