#!/bin/sh # host-probe [port] — one-shot: print state and exit # host-probe --watch [port] — loop, emit only on state change # # Distinguishes three states by probing layers independently: # up ICMP + TCP accept + SSH banner exchange all succeed # wedged ICMP + TCP accept succeed, banner exchange times out # (kernel networking alive, userspace frozen — classic # D-state / OOM / disk hang signature) # down no ICMP or no TCP accept # # Suitable both as a standalone check and as the command body for the # Monitor tool (one stdout line per state change). # # Env: # HOST_PROBE_INTERVAL seconds between polls in --watch mode (default 30) # HOST_PROBE_TIMEOUT per-probe timeout in seconds (default 3) set -eu interval=${HOST_PROBE_INTERVAL:-30} timeout=${HOST_PROBE_TIMEOUT:-3} usage() { sed -n '2,/^$/p' "$0" | sed 's/^# \{0,1\}//' exit 2 } watch=false case "${1:-}" in ''|-h|--help|help) usage ;; --watch) watch=true; shift ;; esac [ $# -ge 1 ] && [ $# -le 2 ] || usage host=$1 port=${2:-22} probe_icmp() { ping -c1 -W"$timeout" "$host" >/dev/null 2>&1 } probe_tcp() { # -G is the BSD/macOS connect timeout flag; falls back to -w on Linux nc. nc -z -G"$timeout" "$host" "$port" >/dev/null 2>&1 \ || nc -z -w"$timeout" "$host" "$port" >/dev/null 2>&1 } probe_banner() { # SSH banner arrives unsolicited within milliseconds on a healthy sshd. # Frozen userspace: TCP accepts but no banner ever lands. banner=$( ( nc -G"$timeout" "$host" "$port" /dev/null ) & wait "$nc_pid" 2>/dev/null ) 2>/dev/null | head -c 100 ) [ -n "$banner" ] } classify() { if ! probe_icmp; then echo down; return; fi if ! probe_tcp; then echo down; return; fi if ! probe_banner; then echo wedged; return; fi echo up } stamp() { date -u +%H:%M:%SZ; } if [ "$watch" = false ]; then classify exit 0 fi prev="" while :; do state=$(classify) if [ "$state" != "$prev" ]; then echo "[$(stamp)] $host:$port $state" prev=$state fi sleep "$interval" done