Refactor the Bash/Python abomination into a pure Python script so I get to keep my sanity while editing

4 years ago · 3f7d84ab12
--- a/+ 163
+++ b/+ 163
@@ -1,227 +1,171 @@
 #!/bin/bash
 declare -a columns columndefs columnattributes
 columns+=('jobid'); columndefs+=('job["job_data"]["ident"]'); columnattributes+=('');
 columns+=('url'); columndefs+=('job["job_data"]["url"]'); columnattributes+=('');
 columns+=('user'); columndefs+=('job["job_data"]["started_by"]'); columnattributes+=('');
 columns+=('pipenick'); columndefs+=('pipelines[job["job_data"]["pipeline_id"]] if job["job_data"]["pipeline_id"] in pipelines else "unknown"'); columnattributes+=('');
 columns+=('queued'); columndefs+=('job["job_data"]["queued_at"]'); columnattributes+=('date');
 columns+=('started'); columndefs+=('job["job_data"]["started_at"]'); columnattributes+=('date');
 columns+=('last active'); columndefs+=('int(job["ts"])'); columnattributes+=('date,coloured');

 function valid_column {
 	local candidate="$1"
 	local column
 	for column in "${columns[@]}"
 	do
 		[[ "${candidate}" == "${column}" ]] && return 0
 	done
 	return 1
 #!/usr/bin/env python3
 import argparse
 import datetime
 import json
 import re
 import sys
 import time
 import urllib.request

 # Column definitions
 columns = {
 	'jobid': (lambda job, pipelines: job["job_data"]["ident"], ()),
 	'url': (lambda job, pipelines: job["job_data"]["url"], ()),
 	'user': (lambda job, pipelines: job["job_data"]["started_by"], ()),
 	'pipenick': (lambda job, pipelines: pipelines[job["job_data"]["pipeline_id"]] if job["job_data"]["pipeline_id"] in pipelines else "unknown", ()),
 	'queued': (lambda job, pipelines: job["job_data"]["queued_at"], ('date',)),
 	'started': (lambda job, pipelines: job["job_data"]["started_at"], ('date',)),
 	'last active': (lambda job, pipelines: int(job["ts"]), ('date', 'coloured')),
 }

 sortcolumns=()
 filter=
 filtercaseinsensitive=
 nocolours=
 notable=
 dates= # Whether to use full dates for the time columns rather than expired time strings
 while [[ $# -gt 0 ]]
 do
 	if [[ "$1" == "--help" || "$1" == "-h" ]]
 	then
 		echo "Usage: archivebot-jobs [options]" >&2
 		echo "Prints a table of current AB jobs" >&2
 		echo "Options:" >&2
 		echo "  --help, -h                  Show this message and exit." >&2
 		echo "  --sort [-]COLUMN, -s        Sort the table by a column (descending if preceded by '-'). This can be used multiple times to refine the sorting." >&2
 		echo "  --filter FILTER, -f         Filter the table for rows where a COLUMN has a certain VALUE. If specified multiple times, only the last value is used." >&2
 		echo "                              The FILTER has this format: COLUMN{=|<|>|^|*|$|~}VALUE" >&2
 		echo "                              = means the value must be exactly as specified; < and > mean it must be less/greater than the specified; ^ and $ mean it must start/end with the specified; * means it must contain the specified; ~ means it must match the specified regex." >&2
 		echo "  --ifilter FILTER, -i        Like --filter, but case-insensitive" >&2
 		echo "  --no-colours, --no-colors   Don't colourise the last activity column if it's been a while." >&2
 		echo "  --no-table                  Raw output without feeding through column(1); columns are separated by tabs." >&2
 		echo "  --dates                     Print dates instead of elapsed times for queued/started/last active columns." >&2
 		echo "The COLUMNs are the names of each column, printed in capital letters in the first line of the output." >&2
 		exit 0
 	elif [[ "$1" == "--sort" || "$1" == "-s" ]]
 	then
 		sortcolumns+=("$2")
 		shift
 	elif [[ "$1" == "--filter" || "$1" == "-f" ]]
 	then
 		filter="$2"
 		filtercaseinsensitive=
 		shift
 	elif [[ "$1" == "--ifilter" || "$1" == "-i" ]]
 	then
 		filter="$2"
 		filtercaseinsensitive=1
 		shift
 	elif [[ "$1" == "--no-colours" || "$1" == "--no-colors" ]]
 	then
 		nocolours=1
 	elif [[ "$1" == "--no-table" ]]
 	then
 		notable=1
 	elif [[ "$1" == "--dates" ]]
 	then
 		dates=1
 	else
 		echo "Unknown option: $1" >&2
 		exit 1
 	fi
 	shift
 done

 # Validate sortcolumns and filter
 if [[ "${filter}" ]]
 then
 	if [[ "${filter}" == *$'\n'* ]]
 	then
 		echo "Invalid filter: newlines not allowed" >&2
 		exit 1
 	fi
 	if [[ ! ( "${filter}" == *'='* || "${filter}" == *'<'* || "${filter}" == *'>'* || "${filter}" == *'^'* || "${filter}" == *'*'* || "${filter}" == *'$'* || "${filter}" == *'~'* ) ]]
 	then
 		echo "Invalid filter: ${filter}" >&2
 		exit 1
 	fi
 	column="${filter%%[=<>^*$~]*}"
 	if ! valid_column "${column,,}"
 	then
 		echo "Invalid filter column: ${column}" >&2
 		exit 1
 	fi
 fi
 if [[ ${#sortcolumns[@]} -gt 0 ]]
 then
 	for column in "${sortcolumns[@]}"
 	do
 		column="${column#-}"
 		if ! valid_column "${column,,}"
 		then
 			echo "Invalid sort column: ${column}" >&2
 			exit 1
 		fi
 	done
 else
 	# Default sort order
 	sortcolumns+=("jobid")
 fi

 if [[ "${notable}" ]]
 then
 	column=("cat")
 else
 	column=("column" "-t" $'-s\t')
 fi

 jobdata="$(curl -s -H "Accept: application/json" "http://dashboard.at.ninjawedding.org/logs/recent?count=1" 2>/dev/null)"
 pipelinedata="$(curl -s -H "Accept: application/json" "http://dashboard.at.ninjawedding.org/pipelines" 2>/dev/null)"

 if [[ -z "${jobdata}" || -z "${pipelinedata}" ]]
 then
 	echo "Error retrieving job or pipeline data" >&2
 	exit 1
 fi

 { echo "${jobdata}"; echo "${pipelinedata}"; echo "${filter}"; } | python3 -c \
 '
 if True: # For sensible indentation
 	import datetime
 	import json
 	import sys
 	import time

 	currentTime = time.time()
 	def render_date(ts, coloured = False):
 		global currentTime
 		diff = currentTime - ts
 		colourStr = f"\x1b[{0 if diff < 6 * 3600 else 7};31m" if coloured and diff >= 300 else ""
 		colourEndStr = "\x1b[0m" if colourStr else ""
 		if "'${dates}'":
 			return colourStr + datetime.datetime.fromtimestamp(ts).isoformat(sep = " ") + colourEndStr
 		if diff <= 0:
 			return "now"
 		elif diff < 60:
 			return "<1 min ago"
 		elif diff < 86400:
 			return colourStr + (f"{diff // 3600:.0f}h " if diff >= 3600 else "") + f"{(diff % 3600) // 60:.0f}mn ago" + colourEndStr
 		else:
 			return colourStr + f"{diff // 86400:.0f}d {(diff % 86400) // 3600:.0f}h ago" + colourEndStr

 	jobdata = json.loads(sys.stdin.readline())
 	pipelinedata = json.loads(sys.stdin.readline())
 	filter = sys.stdin.readline().strip()

 	pipelines = {p["id"]: p["nickname"] for p in pipelinedata["pipelines"]}

 	jobs = []
 	for job in jobdata:
 		jobs.append({'"$(for i in ${!columns[@]}; do echo '
 			"'"${columns[$i]}"'": '"${columndefs[$i]}"','; done)"'
 		})

 	columns = ('"$(for column in "${columns[@]}"; do echo '"'"${column}"'", '; done)"')
 	columnAttributes = {'"$(for i in ${!columns[@]}; do echo -n '"'"${columns[$i]}"'": "'"${columnattributes[$i]}"'".split(","), '; done)"'}

 	# Filter
 	if filter:
 		import re
 		match = re.match(r"^(?P<column>[A-Za-z ]+)(?P<op>[=<>^*$~])(?P<value>.*)$", filter)
 defaultSort = 'jobid'

 # Parse arguments
 class FilterAction(argparse.Action):
 	def __call__(self, parser, namespace, values, optionString = None):
 		global columns
 		match = re.match(r"^(?P<column>[A-Za-z ]+)(?P<op>[=<>^*$~])(?P<value>.*)$", values[0])
 		if not match:
 			raise argparse.ArgumentError('Invalid filter')
 		filterDict = match.groupdict()
 		filterDict["column"] = filterDict["column"].lower()
 		assert filterDict["column"] in columns
 		compFunc = {
 			"=": lambda a, b: a == b,
 			"<": lambda a, b: a < b,
 			">": lambda a, b: a > b,
 			"^": lambda a, b: a.startswith(b),
 			"*": lambda a, b: b in a,
 			"$": lambda a, b: a.endswith(b),
 			"~": lambda a, b: re.search(b, a) is not None,
 		}[filterDict["op"]]
 		if isinstance(jobs[0][filterDict["column"]], (int, float)):
 			filterDict["value"] = float(filterDict["value"])
 		transform = lambda x: x.lower() if "'${filtercaseinsensitive}'" and isinstance(x, str) else x
 		jobs = [job for job in jobs if compFunc(transform(job[filterDict["column"]]), transform(filterDict["value"]))]
 		transform = (lambda x: x.lower() if isinstance(x, str) else x) if optionString in ('--ifilter', '-i') else (lambda x: x)
 		setattr(namespace, self.dest, (filterDict, transform))

 def parse_sort(value):
 	global columns
 	sortDesc = value.startswith('-')
 	if sortDesc:
 		value = value[1:]
 	value = value.lower()
 	if value not in columns:
 		raise argparse.ArgumentError('Invalid column name')
 	return (value, sortDesc)

 class SortAction(argparse.Action):
 	def __call__(self, parser, namespace, values, optionString = None):
 		result = parse_sort(values[0])
 		if getattr(namespace, self.dest, None) is None:
 			setattr(namespace, self.dest, [])
 		getattr(namespace, self.dest).append(result)

 parser = argparse.ArgumentParser(formatter_class = argparse.RawTextHelpFormatter)
 parser.add_argument('--sort', '-s', nargs = 1, type = str, action = SortAction, help = "Sort the table by a COLUMN (descending if preceded by '-'). This can be used multiple times to refine the sorting.")
 parser.add_argument('--filter', '-f', nargs = 1, type = str, action = FilterAction, help = '\n'.join([
 	'Filter the table for rows where a COLUMN has a certain VALUE. If specified multiple times, only the last value is used.',
 	'FILTER has the format COLUMN{=|<|>|^|*|$|~}VALUE',
 	'  = means the value must be exactly as specified.',
 	'  < and > mean it must be less/greater than the specified.',
 	'  ^ and $ mean it must start/end with the specified.',
 	'  * means it must contain the specified.',
 	'  ~ means it must match the specified regex.',
  ]))
 parser.add_argument('--ifilter', '-i', nargs = 1, type = str, action = FilterAction, dest = 'filter', help = 'Like --filter but case-insensitive')
 parser.add_argument('--no-colours', '--no-colors', action = 'store_true', help = "Don't colourise the last activity column if it's been a while.")
 parser.add_argument('--no-table', action = 'store_true', help = 'Raw output without feeding through column(1); columns are separated by tabs.')
 parser.add_argument('--dates', action = 'store_true', help = 'Print dates instead of elapsed times for queued/started/last active columns.')
 args = parser.parse_args()

 if not args.sort:
 	args.sort = [parse_sort(defaultSort)]

 # Retrieve
 def fetch(url):
 	req = urllib.request.Request(url)
 	req.add_header('Accept', 'application/json')
 	with urllib.request.urlopen(req) as f:
 		if f.getcode() != 200:
 			raise RuntimeError('Could not fetch job data')
 		return json.load(f)

 jobdata = fetch('http://dashboard.at.ninjawedding.org/logs/recent?count=1')
 pipelinedata = fetch('http://dashboard.at.ninjawedding.org/pipelines')
 currentTime = time.time()

 # Process
 pipelines = {p["id"]: p["nickname"] for p in pipelinedata["pipelines"]}

 jobs = []
 for job in jobdata:
 	jobs.append({column: columnFunc(job, pipelines) for column, (columnFunc, _) in columns.items()})

 if not jobs:
 	# Nothing to do
 	sys.exit(0)

 # Filter
 if args.filter:
 	filterDict, transform = args.filter
 	compFunc = {
 		"=": lambda a, b: a == b,
 		"<": lambda a, b: a < b,
 		">": lambda a, b: a > b,
 		"^": lambda a, b: a.startswith(b),
 		"*": lambda a, b: b in a,
 		"$": lambda a, b: a.endswith(b),
 		"~": lambda a, b: re.search(b, a) is not None,
 	}[filterDict["op"]]
 	if isinstance(jobs[0][filterDict["column"]], (int, float)):
 		filterDict["value"] = float(filterDict["value"])
 	jobs = [job for job in jobs if compFunc(transform(job[filterDict["column"]]), transform(filterDict["value"]))]

 	if not jobs:
 		sys.exit(0)

 	# Sort
 	class reversor: # https://stackoverflow.com/a/56842689
 		def __init__(self, obj):
 			self.obj = obj

 		def __eq__(self, other):
 			return other.obj == self.obj

 		def __lt__(self, other):
 			return other.obj < self.obj

 	sortColumnsRaw = ('"$(printf "'%s', " "${sortcolumns[@]}")"')
 	sortColumns = tuple((column[1:] if column.startswith("-") else column).lower() for column in sortColumnsRaw)
 	sortColumnAsc = tuple(not column.startswith("-") for column in sortColumnsRaw)
 	assert all(column in columns for column in sortColumns)
 	if not "'${dates}'":
 		# Reverse sorting order for columns which have a date attribute since the column will have elapsed time
 		sortColumnAttrs = tuple(columnAttr for column, columnAttr in columnAttributes.items() if column in sortColumns)
 		sortColumnAsc = tuple(not columnAsc if "date" in columnAttr else columnAsc for columnAsc, column, columnAttr in zip(sortColumnAsc, sortColumns, sortColumnAttrs))
 	jobs = sorted(jobs, key = lambda job: tuple(job[column] if columnAsc else reversor(job[column]) for column, columnAsc in zip(sortColumns, sortColumnAsc)))

 	# Renderers
 	renderers = {}
 	for column, columnAttr in columnAttributes.items():
 		if "date" in columnAttr:
 			if "coloured" in columnAttr:
 				renderers[column] = lambda x: render_date(x, coloured = not "'${nocolours}'")
 			else:
 				renderers[column] = render_date

 	# Print
 	print("\t".join(column.upper() for column in columns))
 	for job in jobs:
 		for column in renderers:
 			job[column] = renderers[column](job[column])
 		print("\t".join(job[column] for column in columns))
 ' | "${column[@]}"
 # Sort
 class reversor: # https://stackoverflow.com/a/56842689
 	def __init__(self, obj):
 		self.obj = obj

 	def __eq__(self, other):
 		return other.obj == self.obj

 	def __lt__(self, other):
 		return other.obj < self.obj

 sortColumns = tuple((column, descending, columns[column]) for column, descending in args.sort)
 if not args.dates:
 	# Reverse sorting order for columns which have a date attribute since the column will have elapsed time
 	sortColumns = tuple((column, not descending if 'date' in columnInfo[1] else descending, columnInfo) for column, descending, columnInfo in sortColumns)
 jobs = sorted(jobs, key = lambda job: tuple(job[column] if not descending else reversor(job[column]) for column, descending, _ in sortColumns))

 # Renderers
 def render_date(ts, coloured = False):
 	global args, currentTime
 	diff = currentTime - ts
 	colourStr = f"\x1b[{0 if diff < 6 * 3600 else 7};31m" if coloured and diff >= 300 else ""
 	colourEndStr = "\x1b[0m" if colourStr else ""
 	if args.dates:
 		return (colourStr, datetime.datetime.fromtimestamp(ts).isoformat(sep = " "), colourEndStr)
 	if diff <= 0:
 		return "now"
 	elif diff < 60:
 		return "<1 min ago"
 	elif diff < 86400:
 		return (colourStr, (f"{diff // 3600:.0f}h " if diff >= 3600 else "") + f"{(diff % 3600) // 60:.0f}mn ago", colourEndStr)
 	else:
 		return (colourStr, f"{diff // 86400:.0f}d {(diff % 86400) // 3600:.0f}h ago", colourEndStr)

 renderers = {}
 for column, (_, columnAttr) in columns.items():
 	if "date" in columnAttr:
 		if "coloured" in columnAttr:
 			renderers[column] = lambda x: render_date(x, coloured = not args.no_colours)
 		else:
 			renderers[column] = render_date

 # Print
 output = []
 output.append(tuple(column.upper() for column in columns))
 for job in jobs:
 	for column in renderers:
 		job[column] = renderers[column](job[column])
 	output.append(tuple(job[column] for column in columns))

 if not args.no_table:
 	widths = tuple(max(len(field) if isinstance(field, str) else len(field[1]) for field in column) for column in zip(*output))
 	for row in output:
 		print('  '.join((value.ljust(width) if isinstance(value, str) else ''.join((value[0], value[1], value[2], ' ' * (width - len(value[1]))))) for value, width in zip(row, widths)))
 else:
 	for row in output:
 		print('\t'.join(field if isinstance(field, str) else ''.join(field) for field in row))