67 lines
2.7 KiB
Bash
Executable file
67 lines
2.7 KiB
Bash
Executable file
#!/bin/sh
|
|
# fleet-status — one-screen dashboard of every agent node, in the terminal.
|
|
#
|
|
# For each agent host in mesh-hosts.json (ssh_user != null): read its
|
|
# data/agent-status.json (locally for this node, over ssh for the rest — using
|
|
# the fleet ssh names the agents themselves maintain) and render a table:
|
|
#
|
|
# NODE LOC ROUTE HEAD AGE HOSTNAME DISCOVERED
|
|
# fennel HOME en0 af54b67 4s fennel apricot=10.0.0.118 ...
|
|
#
|
|
# AGE is seconds since the agent's last cycle — STALE (>90s) means the agent is
|
|
# down or wedged on that node. "no status" = agent not yet running new code
|
|
# (e.g. waiting on its next pull).
|
|
#
|
|
# Read-only; safe from anywhere on the mesh.
|
|
|
|
set -eu
|
|
|
|
self=$0
|
|
while [ -L "$self" ]; do
|
|
link=$(readlink "$self")
|
|
case $link in /*) self=$link ;; *) self=$(dirname "$self")/$link ;; esac
|
|
done
|
|
root=$(cd "$(dirname "$self")" && pwd)
|
|
while [ "$root" != "/" ] && [ ! -f "$root/data/mesh-hosts.json" ]; do root=$(dirname "$root"); done
|
|
data_file="$root/data/mesh-hosts.json"
|
|
[ -f "$data_file" ] || { echo "fleet-status: cannot locate data/mesh-hosts.json" >&2; exit 1; }
|
|
command -v jq >/dev/null || { echo "fleet-status: jq not installed" >&2; exit 1; }
|
|
|
|
short=$(hostname 2>/dev/null | cut -d. -f1)
|
|
now=$(date +%s)
|
|
|
|
printf '%-11s %-5s %-7s %-9s %-7s %-11s %s\n' NODE LOC ROUTE HEAD AGE HOSTNAME DISCOVERED
|
|
jq -r '.hosts[] | select(.ssh_user != null) | .name' "$data_file" | while read -r node; do
|
|
is_self=0
|
|
[ "$node" = "$short" ] && is_self=1
|
|
# Also self if any alias matches our short hostname.
|
|
if [ "$is_self" -eq 0 ]; then
|
|
if jq -e --arg n "$node" --arg h "$short" \
|
|
'.hosts[] | select(.name == $n) | .aliases | index($h)' "$data_file" >/dev/null 2>&1; then
|
|
is_self=1
|
|
fi
|
|
fi
|
|
|
|
if [ "$is_self" -eq 1 ]; then
|
|
raw=$(cat "$root/data/agent-status.json" 2>/dev/null || true)
|
|
else
|
|
raw=$(ssh -n -o ConnectTimeout=5 -o BatchMode=yes "$node" \
|
|
'cat ~/net-tools/data/agent-status.json 2>/dev/null' 2>/dev/null || true)
|
|
fi
|
|
|
|
if [ -z "$raw" ] || ! printf '%s' "$raw" | jq -e . >/dev/null 2>&1; then
|
|
printf '%-11s %s\n' "$node" "— no status (agent down, unreachable, or awaiting pull)"
|
|
continue
|
|
fi
|
|
printf '%s' "$raw" | jq -r --argjson now "$now" '
|
|
( $now - .ts ) as $age
|
|
| [ .self,
|
|
(.location // "-"),
|
|
(.lan_route_via // "-"),
|
|
(.head // "-"),
|
|
(if $age > 90 then "STALE" else "\($age)s" end),
|
|
(.hostname | split(".")[0]),
|
|
( .discovered | to_entries | map("\(.key)=\(.value)") | join(" ") | if . == "" then "-" else . end )
|
|
] | @tsv
|
|
' | awk -F'\t' '{printf "%-11s %-5s %-7s %-9s %-7s %-11s %s\n", $1,$2,$3,$4,$5,$6,$7}'
|
|
done
|