The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

59 lines
2.0 KiB

  1. #!/bin/bash
  2. # Usage: archivebot-high-resources
  3. # Prints the CPU, RAM, and disk usage (log + DB) of all running jobs in a table with particularly bad values highlighted in red
  4. # If stdout is not a TTY, the output is not tabularised or coloured.
  5. # Output order is arbitrary.
  6. function get_utime_rss_du {
  7. # Non-empty argument 1 disables du
  8. while read -r pid jobid path rss; do
  9. # utime in clock ticks
  10. utime="$(cut -d ' ' -f 14 <"/proc/${pid}/stat")"
  11. if [[ "$1" ]]; then
  12. #TODO This is kind of slow; is there a faster way?
  13. du="$(($(du -k -s "${path}"/wpull.* | cut -d $'\t' -f 1 | tr '\n' '+'; echo 0)))"
  14. else
  15. du=-1
  16. fi
  17. printf '%s %s %s %s %s %s\n' "$1" "${pid}" "${jobid}" "${utime}" "${rss}" "${du}"
  18. done < <(ps -a -o pid,rss,cmd | grep '[w]pull' | sed 's,^\s*\([0-9][0-9]*\)\s\s*\([0-9][0-9]*\)\s\s*.*\s\([^\s]*/\([0-9a-z][0-9a-z]*\)\)/[w]pull\.log\s.*$,\1 \4 \3 \2,')
  19. }
  20. interval=5
  21. clk_tck="$(getconf CLK_TCK)"
  22. [ ! -t 1 ]; stdout_tty=$? # 0 = is not tty, 1 = is tty
  23. {
  24. get_utime_rss_du before no-du
  25. sleep "${interval}"
  26. get_utime_rss_du after
  27. } | python3 -c \
  28. "$(cat <<'EOF'
  29. import sys
  30. CLK_TCK = int(sys.argv[1])
  31. INTERVAL = int(sys.argv[2])
  32. STDOUT_TTY = bool(int(sys.argv[3]))
  33. RED = '\x1b[0;31m' if STDOUT_TTY else ''
  34. RESET = '\x1b[0m' if STDOUT_TTY else ''
  35. stats = {"before": {}, "after": {}}
  36. for t, pid, jobid, utime, rss, du in map(str.split, map(str.strip, sys.stdin)):
  37. stats[t][f"{pid}_{jobid}"] = (int(pid), jobid, int(utime), int(rss), int(du))
  38. print('PID', 'JOBID', 'CPU[%]', 'RSS[KiB]', 'DISK[KiB]')
  39. for key in set(stats['before'].keys()) & set(stats['after'].keys()):
  40. pid, jobid, utime0, _, _ = stats['before'][key]
  41. _, _, utime1, rss, du = stats['after'][key]
  42. cpup = (utime1 - utime0) / CLK_TCK / INTERVAL * 100
  43. print('{} {} {}{:.1f}{} {}{}{} {}{}{}'.format(pid, jobid, RED if cpup >= 75 else '', cpup, RESET, RED if rss >= 200_000 else '', rss, RESET, RED if du >= 10_000_000 else '', du, RESET))
  44. EOF
  45. )" \
  46. "${clk_tck}" \
  47. "${interval}" \
  48. "${stdout_tty}" \
  49. | if [[ "${stdout_tty}" == 1 ]]; then column -t; else cat; fi