diff --git a/README.md b/README.md index aa0ba76..a72a070 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,17 @@ Mesh/LAN tooling for the four-host **wg1 mesh** + home LAN, built around one source of truth ([`data/mesh-hosts.json`](data/mesh-hosts.json)). Components: -- **`bin/net`** — **the one command**: `status · whoami · doctor · sync · up · - down · enroll phone · gui`. Imports the agent as a library, so every surface - shares one implementation. The renderers (`host-apply`, `mesh-hosts-render`, - `wg-dns-sync`, `fleet-status`) remain as internals/direct tools. +- **`bin/net`** — **the one command**: `status · whoami · doctor · issues · + sync · up · down · enroll phone · gui`. Imports the agent as a library, so + every surface shares one implementation. The renderers (`host-apply`, + `mesh-hosts-render`, `wg-dns-sync`, `fleet-status`) remain as internals/direct + tools. +- **[`data/known-issues.json`](data/known-issues.json)** — the **triage + registry**: features that are known-broken or intentionally parked. `net + issues` lists them; `net doctor ` annotates each host with its parked + features (`⚠ KNOWN-…`) so a triaged problem is never re-investigated from + scratch. An optional per-issue `probe` (same shape as a host `identity`) + lets `doctor` flag an issue as *maybe-resolved* when it starts passing. - **`gui/`** — Mesh control, the for-dummies window (`net gui`): plain-language status per device; right-click for the power tools (copy address, ssh here, diagnose path, `.wg` address). Every menu item is a `net` verb. @@ -82,6 +89,7 @@ manages (it removes them; its block supersedes them). | `bin/fleet-status` | anywhere | Terminal dashboard: one row per agent node (location, route, repo HEAD, snapshot age, discovered IPs), read from each node's `data/agent-status.json` over the fleet ssh names. `STALE`/`no status` = that agent needs attention. | | `bin/wg-dns-sync` | **apricot** | Renders `mesh-hosts.json` → `/etc/dnsmasq.d/wg-mesh.conf` (host `.wg` + `.lan` records on `10.9.0.2:53`, for wg clients with `DNS=10.9.0.2`). Idempotent; `--dry-run`. | | `bin/mesh-hosts-render` | **every host** | Renders the fleet `/etc/hosts` block (bare/`.lan` at current IPs, `.wg`, service vhosts) and splices it at the top of `/etc/hosts`, adopting any loose lines it supersedes. Idempotent. `--print`/`--diff`/`--install`. | +| `bin/forge-dns-render` | **laptop/dev machines** | DX-only: renders cloud Forgejo shortcuts (mcforge, ctforge, ...) from `~/.vault/*_forge_creds` into a managed block at the bottom of `/etc/hosts`. Used by `net sync` and per-project `./run forge:dns`. Adopts loose entries. `--print`/`--diff`/`--install`. | | `smart-lan-router/` | **fennel** | `com.lilith.smart-lan-router.plist` (launchd) + `install-agent.sh` (one installer: launchd or systemd) + `smart-lan-router.service.tmpl`. | | [`tray/`](tray/) | **fennel** (menu bar) | The fleet tray (absorbed from the old `wireguard-vpn-tray` repo). Icon = tunnel state (green/yellow/red); menu = live fleet view from `data/agent-status.json`: agent freshness, HOME/AWAY + route, discovered host IPs, repo HEAD. Connect/disconnect actions. Install: `bash tray/install-tray.sh` (as the user, no sudo). | @@ -121,7 +129,7 @@ its own code changes — fleet updates propagate by pushing to the forge. | add/rename a host, change a MAC, add a service vhost or phone | edit [`data/mesh-hosts.json`](data/mesh-hosts.json), let autocommit push — **every agent pulls, restarts on code change, and converges (incl. its OS hostname) within minutes** | | react to a host changing DHCP IP | nothing — agents discover it by MAC and regenerate `/etc/hosts` + ssh automatically | | rename a node's OS hostname | nothing by hand — `fleet.enforce_hostname` makes the node's own agent do it | -| force a regen now | `sudo bin/mesh-hosts-render --install` and `bin/host-apply --ssh-apply` | +| force a regen now | `net sync` (mesh-hosts + forge-dns + ssh) or the individual `sudo ... --install` | | apricot mesh DNS (phones) | `sudo wg-dns-sync` on apricot | | enroll a phone | `wg-phone-add -d ` then add a `class: phone` entry | diff --git a/bin/forge-dns-render b/bin/forge-dns-render new file mode 100755 index 0000000..b09ba3a --- /dev/null +++ b/bin/forge-dns-render @@ -0,0 +1,138 @@ +#!/bin/sh +# forge-dns-render — manage cloud DX forge shortnames (mcforge for magic-civilization, +# ctforge for cocottetech/lilithplatform, ...) in /etc/hosts as part of the shared +# net-tools infra installers for dev DX. +# +# Reads current FORGE_IP from ~/.vault/*_forge_creds (maintained by the per-project +# ./run forge:up / forge:dns in @magic-civilization and @cocottetech). +# +# Emits a marked, idempotently-replaceable block. `net sync` (and manual +# mesh-hosts-render + host-apply) now also converges the DX forge shortcuts. +# Per-project `./run forge:dns` prefers this central renderer when available. +# +# The block lives at the bottom (after the prepended mesh-hosts fleet block). +# Loose hand-managed lines for these names are adopted (removed) on --install. +# +# Usage: +# forge-dns-render # print the block (default) +# forge-dns-render --install # splice/replace in /etc/hosts (sudo) +# forge-dns-render --diff # show what --install would do +# +# Exit codes match mesh-hosts-render (0 success, 2 needs root). + +set -eu + +mode=print +case "${1:-}" in + ""|--print) mode=print ;; + --install) mode=install ;; + --diff) mode=diff ;; + *) echo "forge-dns-render: unknown arg '$1' (use --print|--install|--diff)" >&2; exit 1 ;; +esac + +BEGIN='# >>> dx-forges (managed by net-tools/bin/forge-dns-render)' +END='# <<< dx-forges' +HOSTS_FILE=/etc/hosts +VAULT_DIR="${HOME}/.vault" + +# Map vault basename -> the short hostname users type for :3000 +map_short() { + case "$1" in + mc_forge_creds) echo mcforge ;; + cocotte_forge_creds) echo ctforge ;; + *) echo "" ;; + esac +} + +render_block() { + printf '%s\n' "$BEGIN" + printf '# Cloud DX Forgejo /etc/hosts shortcuts (mcforge, ctforge, ...).\n' + printf '# IPs from ~/.vault/*_forge_creds FORGE_IP (refreshed by project ./run forge:up).\n' + printf '# Re-run after any forge:up (droplet gets a new IP on restore-from-snapshot).\n' + printf '# http://:3000 — also kept fresh by net-tools infra (net sync).\n' + had=0 + for f in "$VAULT_DIR"/mc_forge_creds "$VAULT_DIR"/cocotte_forge_creds; do + [ -f "$f" ] || continue + base=$(basename "$f") + short=$(map_short "$base") + [ -n "$short" ] || continue + ip=$(grep -E '^FORGE_IP=' "$f" 2>/dev/null | head -1 | cut -d= -f2- | tr -d ' \t\r\n') + [ -n "$ip" ] || continue + printf '%s\t%s\n' "$ip" "$short" + had=1 + done + if [ "$had" -eq 0 ]; then + printf '# (no active DX forges — ./run forge:up in the relevant project(s) first)\n' + fi + printf '%s\n' "$END" +} + +block=$(render_block) + +if [ "$mode" = "print" ]; then + printf '%s\n' "$block" + exit 0 +fi + +# All shortnames we own (for adoption of stale loose lines). +managed=$(printf '%s\n' "$block" | awk '!/^#/ && NF >= 2 { for (i = 2; i <= NF; i++) print $i }' | sort -u | tr '\n' ' ') + +current=$(cat "$HOSTS_FILE" 2>/dev/null || true) + +# Strip any previous copy of our block. +stripped=$(printf '%s\n' "$current" | awk -v b="$BEGIN" -v e="$END" ' + $0 == b { skip = 1 } + skip != 1 { print } + $0 == e { skip = 0 } +') + +# Adopt (remove) any loose lines or name tokens that match our managed forges. +# (Same policy as mesh-hosts-render: the tool owns these names now.) +stripped=$(printf '%s\n' "$stripped" | awk -v names="$managed" ' + BEGIN { n = split(names, a, /[[:space:]]+/); for (i = 1; i <= n; i++) if (a[i] != "") set[a[i]] = 1 } + /^[[:space:]]*#/ || NF < 2 { print; next } + { + kept = ""; removed = 0 + for (i = 2; i <= NF; i++) { + if ($i in set) removed++ + else kept = kept " " $i + } + if (removed == 0) { print; next } + if (kept == "") next + print $1 kept + } +') + +# Trim leading/trailing blank lines that accumulate from repeated splices. +stripped=$(printf '%s\n' "$stripped" | awk 'NF {f=1; p=NR} f {l[NR]=$0} END {for(i=1;i<=p;i++) if (i in l) print l[i]}') + +# Append the dx-forges block at the bottom (mesh-hosts block is prepended at top). +new=$(printf '%s\n\n%s\n' "$stripped" "$block") + +if [ "$mode" = "diff" ]; then + if command -v diff >/dev/null 2>&1; then + printf '%s\n' "$new" | diff -u "$HOSTS_FILE" - || true + else + printf '%s\n' "$new" + fi + exit 0 +fi + +# --install +if printf '%s\n' "$new" | cmp -s - "$HOSTS_FILE"; then + echo "forge-dns-render: $HOSTS_FILE already up to date" + exit 0 +fi + +SUDO="" +if [ "$(id -u)" -ne 0 ]; then + if command -v sudo >/dev/null 2>&1; then + SUDO="sudo" + else + echo "forge-dns-render: --install needs root" >&2 + exit 2 + fi +fi + +printf '%s\n' "$new" | $SUDO tee "$HOSTS_FILE" >/dev/null +echo "forge-dns-render: updated $HOSTS_FILE" diff --git a/bin/net b/bin/net index d8727e0..bec325c 100755 --- a/bin/net +++ b/bin/net @@ -9,7 +9,9 @@ surface can disagree with another. net status fleet table (every agent's last snapshot) net whoami which host this is, roles, vantage net doctor [host] probe lan/wg/identity per path, name the chokepoint - net sync force-converge this node's /etc/hosts + ssh now + (annotates any KNOWN-BROKEN/parked features per host) + net issues [host] list known-broken / parked features (the triage registry) + net sync force-converge this node'\''s /etc/hosts (mesh fleet + dx-forges shortcuts) + ssh now net up | net down bring the wg tunnel up / down net enroll phone NAME --os ios|android [--wg 10.9.0.N] wg peer + QR (wg-phone-add) + declared entry @@ -54,6 +56,31 @@ def overlay() -> dict: return {} +def issues_for(host: str | None = None) -> list[dict]: + """Known-broken/parked features from data/known-issues.json (graceful if absent).""" + p = os.path.join(ROOT, "data", "known-issues.json") + try: + reg = slr.load_json(p).get("issues", []) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return [] + return [i for i in reg if host is None or i.get("host") == host] + + +def issue_resolved(issue: dict, ip: str | None) -> bool | None: + """Re-run an issue's optional L7 probe (same shape as mesh-hosts identity). + + True = probe now passes → the issue may be fixed. + False = probe still fails → still broken. + None = no probe / not checkable → declarative only. + """ + probe = issue.get("probe") + if not probe or not ip: + return None + url = probe["url"].replace("{ip}", ip) + rc, out, _ = slr._run(["/usr/bin/curl", "-s", "--max-time", "4", url], 6) + return rc == 0 and all(m in out for m in probe.get("markers", [])) + + def host_entry(d: dict, name: str) -> dict | None: for h in d.get("hosts", []): if h["name"] == name or name in (h.get("aliases") or []): @@ -77,11 +104,13 @@ def ping_ms(ip: str, timeout_s: int = 2) -> float | None: def cmd_whoami(_args: list[str]) -> int: ctx = slr.build_ctx(slr.find_data_file()) cfg = slr.load_config(slr.find_data_file()) - home, gw, gwif = slr.is_home(cfg) + home, gw, gwif, note = slr.preview_location(cfg, ctx["roles"]) print(f"host : {ctx['self_name'] or 'UNKNOWN — not in mesh-hosts.json'}") print(f"platform : {slr.PLATFORM}") print(f"roles : {', '.join(sorted(ctx['roles']))}") print(f"location : {'HOME' if home else 'AWAY'} (gw {gw} on {gwif})") + if note: + print(f"route : {note}") print(f"vantage : {'LAN-capable' if (ctx['self_lan'] is not None or 'route' in ctx['roles']) else 'mesh-only'}") return 0 @@ -90,10 +119,49 @@ def cmd_status(_args: list[str]) -> int: os.execv(os.path.join(ROOT, "bin", "fleet-status"), ["fleet-status"]) +def cmd_issues(args: list[str]) -> int: + """List known-broken / parked features (optionally for one host).""" + host = args[0] if args else None + if host: + d = data() + h = host_entry(d, host) + if not h: + print(f"issues: unknown host '{host}'", file=sys.stderr) + return 1 + host = h["name"] # normalize alias → canonical + items = issues_for(host) + if not items: + print(f"no known issues{f' for {host}' if host else ''} — clean") + return 0 + sev = {"broken": "✗", "degraded": "~", "parked": "▪"} + for i in sorted(items, key=lambda x: (x.get("host", ""), x.get("id", ""))): + st = i.get("status", "broken") + print(f"\n{sev.get(st, '?')} {i['id']} [{st}] @ {i.get('host', 'fleet')}" + + (f" · {i['unit']}" if i.get("unit") else "")) + print(f" {i.get('title', '')}") + if i.get("summary"): + print(f" why : {i['summary']}") + if i.get("fix"): + print(f" fix : {i['fix']}") + if i.get("ref"): + print(f" ref : {i['ref']}") + print(f" since {i.get('since', '?')}") + print() + return 0 + + def cmd_sync(_args: list[str]) -> int: rc1 = subprocess.run(["sudo", os.path.join(ROOT, "bin", "mesh-hosts-render"), "--install"]).returncode rc2 = subprocess.run([os.path.join(ROOT, "bin", "host-apply"), "--ssh-apply"]).returncode - return rc1 or rc2 + rc3 = 0 + fbin = os.path.join(ROOT, "bin", "forge-dns-render") + if os.path.exists(fbin): + # best-effort; may prompt or fail if no sudo, but include in DX convergence + try: + subprocess.run(["sudo", fbin, "--install"], check=False) + except Exception: + rc3 = 1 + return rc1 or rc2 or rc3 def cmd_doctor(args: list[str]) -> int: @@ -142,6 +210,15 @@ def cmd_doctor(args: list[str]) -> int: else: print(f" → DOWN on every path — host offline, or this node's tunnel is down") worst = max(worst, 2) + # known-broken / parked features: triaged already, don't re-investigate + for iss in issues_for(name): + resolved = issue_resolved(iss, lan_ip) + if resolved is True: + print(f" ⚠ known-issue {iss['id']} may be RESOLVED — re-verify & clear: {iss['title']}") + worst = max(worst, 1) + else: + st = iss.get("status", "broken").upper() + print(f" ⚠ KNOWN-{st}: {iss['title']} (since {iss.get('since', '?')}) — {iss.get('summary', '')}") print() return worst @@ -226,7 +303,7 @@ def cmd_gui(_args: list[str]) -> int: VERBS = { "status": cmd_status, "whoami": cmd_whoami, "sync": cmd_sync, - "doctor": cmd_doctor, "up": cmd_up, "down": cmd_down, + "doctor": cmd_doctor, "issues": cmd_issues, "up": cmd_up, "down": cmd_down, "enroll": cmd_enroll, "gui": cmd_gui, }