From 2e8a5443a2e87b76e1449c2d1520e79d8f345a1a Mon Sep 17 00:00:00 2001 From: Natalie Date: Sun, 28 Jun 2026 14:49:28 -0400 Subject: [PATCH] tray: add DO cloud config switcher + make green reflect new lime-based mesh - derive visible hosts, Fleet label, and critical peers from dx.hide_homelan in mesh-hosts.json - "Toggle homelan visibility (recovery)" menu action + immediate re-render of /etc/hosts+ssh - only lime (DO backend with MCPs/tools) is core critical for the icon; redroid is shown but does not degrade - stale homelan (pear etc) filtered from tray labels when using the new DO config (hide=true) - tray README and module docs updated; the switcher makes the active config obvious in the menu bar - this is the private path for MCPs on DO (and other internal tools): wg mesh (yuzu hub + lime spoke) + net-tools names, no public exposure . --- tray/README.md | 20 ++++- tray/vpn_tray.py | 228 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 231 insertions(+), 17 deletions(-) diff --git a/tray/README.md b/tray/README.md index 1d9414e..72036ec 100644 --- a/tray/README.md +++ b/tray/README.md @@ -1,12 +1,28 @@ # wireguard-vpn-tray macOS menu-bar app showing live WireGuard mesh (`wg1`) status as a colored -hexagon icon plus a **Status** menu item. +hexagon icon plus a **Status** menu item. This is also the switcher for the +active *fleet config* (new DO cloud vs optional homelan recovery data). -- 🟢 **green** — tunnel up and the mesh hub (`10.9.0.1`) is reachable +- 🟢 **green** — tunnel up, hub reachable, and all critical hosts for the + *active config* (DO lime etc. when homelan hidden; or homelan+cloud when + un-hidden) are reachable. +- 🟠 **orange** — tunnel up + hub ok, but one or more critical hosts for the + current config are down (partial mesh). - 🟡 **yellow** — tunnel up but the hub is not yet reachable (connecting) - 🔴 **red** — no tunnel interface present +The tray reads `data/mesh-hosts.json` (and its `dx.hide_homelan`) so the +displayed hosts, the "Fleet:" label, and the critical set for the icon are +always consistent with the active config. "DO cloud-only (homelan hidden)" is +the normal production view; homelan data stays in the json for recovery but is +filtered out of the tray (and of rendered ssh/hosts) until you toggle. + +Click **"Toggle homelan visibility (recovery)"** in the menu to flip +`dx.hide_homelan`, auto re-render this machine's /etc/hosts + ssh fleet block, +and refresh the tray labels. Commit the json change (the toggle only does the +local render) so the rest of the fleet sees the new config on their next pull. + Built on the in-house `lilith_tray` framework (vendored here under `lilith_tray/`) via its macOS `rumps` backend. diff --git a/tray/vpn_tray.py b/tray/vpn_tray.py index a24a23b..4e13026 100755 --- a/tray/vpn_tray.py +++ b/tray/vpn_tray.py @@ -1,15 +1,30 @@ #!/usr/bin/env python3 """WireGuard VPN + net-tools fleet system tray app. -Tunnel state drives the icon (as before); the menu additionally shows the -net-tools fleet view, read from the agent's per-cycle snapshot -(net-tools/data/agent-status.json) — location HOME/AWAY, the LAN route, every -discovered host's current IP, and agent freshness. +This is the UI for the private wg1 mesh (net-tools) that lets us reach +internal-only tools (MCPs on DO lime:3910+, quinn.api INTERNAL on lime:3030, +forge, etc.) without any public listeners or ports. All those services bind +only to 10.9.0.0/24 (or lo) + ufw rules; plum joins via this tray (or `net up`) +and talks to lime.yg etc. The hub is still yuzu in Iceland; DO droplets (lime, +redroid) are spokes on the same mesh. + +Tunnel state drives the icon; the menu shows: +- the live fleet view from agent-status.json (location, route, agent freshness) +- the *active fleet config* (Fleet: line) derived from dx.hide_homelan in + data/mesh-hosts.json — "DO cloud-only (homelan hidden)" is the normal new + config; the homelan hosts (pear/apricot) are preserved in the json only for + one-day recovery and are filtered from labels/criticals when hidden. +- a toggle action so the switch between configs is one click and obvious. + +No more hard-coded "pear" critical peer (that made the icon stay orange after +the homelan died). Critical reachability is derived from the active hosts in +the json so the icon goes green exactly when the DO mesh (lime etc.) is healthy. """ from __future__ import annotations import json +import os import re import subprocess import sys @@ -30,7 +45,15 @@ MESH_PREFIX = "10.9.0." MESH_HUB = "10.9.0.1" # The net-tools agent's per-cycle snapshot (written by smart-lan-router.py). AGENT_STATUS = Path(__file__).resolve().parent.parent / "data" / "agent-status.json" +MESH_DATA = Path(__file__).resolve().parent.parent / "data" / "mesh-hosts.json" AGENT_STALE_SEC = 90 +TRAY_LABEL = "com.wireguard.vpn-tray" +# Critical peers are no longer a hardcoded homelan name. We derive the set of +# hosts that must be reachable (for green vs degraded) from mesh-hosts.json, +# respecting dx.hide_homelan. This lets the tray reflect the *active fleet +# config*: the new DO cloud setup (lime + redroid + yuzu over wg) vs the +# optional homelan (apricot/pear) kept only for future recovery. +# When hide=true we only require the cloud hosts; pear etc. never cause orange. class VPNTray(TrayApp): @@ -46,13 +69,19 @@ class VPNTray(TrayApp): "connected": TrayIcon.from_file(icons_dir / "vpn-green-18@2x.png"), "disconnected": TrayIcon.from_file(icons_dir / "vpn-red-18@2x.png"), "connecting": TrayIcon.from_file(icons_dir / "vpn-yellow-18@2x.png"), + "degraded": TrayIcon.from_file(icons_dir / "vpn-orange-18@2x.png"), }, initial_icon="disconnected", menu=[ TrayMenuItem.action("Connect", self._connect), TrayMenuItem.action("Disconnect", self._disconnect), TrayMenuItem.separator(), - TrayMenuItem.quit("Quit"), + # The switcher: makes the active "config" (DO cloud vs optional + # homelan recovery data) explicit in the tray and lets you flip + # without hand-editing mesh-hosts.json + re-running renders. + TrayMenuItem.action("Toggle homelan visibility (recovery)", self._toggle_homelan_visibility), + TrayMenuItem.separator(), + TrayMenuItem.quit("Quit", before=self._unload_launch_agent), ], poll_interval=5, ) @@ -61,7 +90,53 @@ class VPNTray(TrayApp): # the "Status" label can never disagree. self._state: str = "disconnected" self._ip: str | None = None + self._degraded: list[str] = [] # critical peers currently unreachable super().__init__(config) + # mesh data is small; we reload on demand so toggle is immediately visible + # without restarting the tray process. + + def _load_mesh_data(self) -> dict: + """Load mesh-hosts.json (SoT for hosts, wg addrs, and dx.hide_homelan).""" + try: + with open(MESH_DATA, encoding="utf-8") as fh: + return json.load(fh) + except (OSError, json.JSONDecodeError): + return {} + + def _is_homelan_hidden(self) -> bool: + d = self._load_mesh_data() + return bool(d.get("dx", {}).get("hide_homelan", False)) + + def _visible_host_names(self) -> set[str]: + """Names (canonical + aliases) that are part of the active fleet config.""" + d = self._load_mesh_data() + hide = self._is_homelan_hidden() + names: set[str] = set() + for h in d.get("hosts", []): + if hide and h.get("class") != "cloud": + continue + names.add(h["name"]) + for a in (h.get("aliases") or []): + names.add(a) + return names + + def _critical_ips(self) -> list[str]: + """IPs of the *core* hosts for the active config (lime for DO tools access; + homelan pear only when explicitly un-hidden). Other cloud nodes (redroid) + are shown in the menu but do not affect the green/degraded icon.""" + d = self._load_mesh_data() + hide = self._is_homelan_hidden() + discovered = (self._agent_status() or {}).get("discovered") or {} + core = ("lime",) if hide else ("lime", "pear") + ips: list[str] = [] + for h in d.get("hosts", []): + name = h["name"] + if name not in core: + continue + ip = discovered.get(name) or h.get("lan") or h.get("wg") + if ip: + ips.append(ip) + return list(dict.fromkeys(ips)) def _wg_interface(self) -> tuple[str, str] | None: """Return (interface, ip) of the live WireGuard mesh tunnel, or None. @@ -95,11 +170,13 @@ class VPNTray(TrayApp): return current, ip return None - def _can_reach_vpn(self) -> bool: - """Check if we can reach the VPN server.""" + @staticmethod + def _ping(host: str, wait_ms: str = "1000") -> bool: + """One ICMP echo to ``host``; True on reply. ``wait_ms`` is the per-probe + timeout (macOS ping -W is milliseconds).""" try: result = subprocess.run( - ["ping", "-c", "1", "-W", "2", MESH_HUB], + ["ping", "-c", "1", "-W", wait_ms, host], capture_output=True, timeout=5, ) @@ -107,22 +184,58 @@ class VPNTray(TrayApp): except (subprocess.TimeoutExpired, FileNotFoundError): return False + def _can_reach_vpn(self) -> bool: + """Check if we can reach the VPN hub (proves the tunnel carries traffic).""" + return self._ping(MESH_HUB, "2000") + + def _degraded_peers(self) -> list[str]: + """Names of *core* critical hosts (per active fleet config) that are unreachable. + + Only lime (the DO host with our private MCPs/tools) is required for green + when using the new DO config. Redroid and other clouds are informational + only in the menu. When homelan is un-hidden for recovery we also require + pear. This replaces the old hard-coded CRITICAL_PEERS=("pear",) that + kept the tray orange forever after the homelan died.""" + degraded: list[str] = [] + d = self._load_mesh_data() + hide = self._is_homelan_hidden() + discovered = (self._agent_status() or {}).get("discovered") or {} + core = ("lime",) if hide else ("lime", "pear") + for h in d.get("hosts", []): + name = h.get("name") + if name not in core: + continue + ip = discovered.get(name) or h.get("lan") or h.get("wg") + if ip and not self._ping(ip, "1500"): + degraded.append(name) + return degraded + def poll_status(self) -> str: """Refresh VPN state and return the matching icon key. Both the tray icon and the menu labels derive from the state computed here, so they always agree: - - no tunnel interface -> "disconnected" (red) - - tunnel up, hub unreachable -> "connecting" (yellow) - - tunnel up, hub reachable -> "connected" (green) + - no tunnel interface -> "disconnected" (red) + - tunnel up, hub unreachable -> "connecting" (yellow) + - tunnel up, hub ok, active-config peer down -> "degraded" (orange) + - tunnel up, hub + active config peers reachable -> "connected" (green) + + The "active config" peers come from mesh-hosts.json filtered by the + current dx.hide_homelan (DO lime etc. when using the new DO config). """ interface = self._wg_interface() if interface is None: self._state = "disconnected" self._ip = None + self._degraded = [] else: _, self._ip = interface - self._state = "connected" if self._can_reach_vpn() else "connecting" + if not self._can_reach_vpn(): + self._state = "connecting" + self._degraded = [] + else: + self._degraded = self._degraded_peers() + self._state = "degraded" if self._degraded else "connected" return self._state def _agent_status(self) -> dict | None: @@ -134,17 +247,31 @@ class VPNTray(TrayApp): return None def get_status_labels(self) -> dict[str, str]: - """Menu labels: tunnel state (from the last poll) + the fleet view - (from the net-tools agent snapshot).""" + """Menu labels: tunnel state + fleet view from agent snapshot + the + active DO/homelan fleet config (from dx.hide_homelan in mesh-hosts.json). + + We surface the hosts belonging to the active config (their wg IPs for + cloud/DO nodes; lan for visible homelan). Stale homelan entries are + filtered out when using the new DO config so the tray is not noisy.""" if self._state == "connected": labels = {"Status": "Connected"} if self._ip: labels["IP"] = self._ip + elif self._state == "degraded": + labels = {"Status": "Degraded — mesh partial"} + if self._ip: + labels["IP"] = self._ip + if self._degraded: + labels["Unreachable"] = ", ".join(self._degraded) elif self._state == "connecting": labels = {"Status": "Connecting..."} else: labels = {"Status": "Disconnected"} + d = self._load_mesh_data() + hide = self._is_homelan_hidden() + labels["Fleet"] = "DO cloud-only (homelan hidden)" if hide else "Full (homelan + cloud visible — recovery)" + agent = self._agent_status() if agent is None: labels["Agent"] = "no status" @@ -154,12 +281,37 @@ class VPNTray(TrayApp): if agent.get("location"): via = agent.get("lan_route_via") or "?" labels["Mode"] = f"{agent['location']} via {via}" + + vis = self._visible_host_names() for name, ip in sorted((agent.get("discovered") or {}).items()): - labels[name] = ip + if name in vis: + labels[name] = ip + + # Always list the active config's declared wg hosts (lime, yuzu, redroid + # etc for DO; plus homelan when un-hidden). Discovered lan overrides win + # for homelan hosts when visible. + for h in d.get("hosts", []): + name = h["name"] + if hide and h.get("class") != "cloud": + continue + if name not in labels: + wg = h.get("wg") + if wg: + labels[name] = wg if agent.get("head"): labels["Repo"] = agent["head"] return labels + def _unload_launch_agent(self) -> None: + """Quit tray; fleet agent will not respawn until install-tray clears the flag.""" + flag = Path(__file__).resolve().parent.parent / "data" / ".tray-disabled" + flag.parent.mkdir(parents=True, exist_ok=True) + flag.touch() + uid = os.getuid() + plist = Path.home() / "Library" / "LaunchAgents" / f"{TRAY_LABEL}.plist" + for target in (f"gui/{uid}/{TRAY_LABEL}", str(plist)): + subprocess.run(["launchctl", "bootout", target], capture_output=True) + def _connect(self) -> None: """Connect to VPN.""" if self._wg_interface() is not None: @@ -182,6 +334,52 @@ class VPNTray(TrayApp): script = f'''do shell script "wg-quick down {self.WG_CONF}" with administrator privileges''' subprocess.run(["osascript", "-e", script], capture_output=True) + def _toggle_homelan_visibility(self) -> None: + """Switch the active fleet config between the new DO cloud setup and the + optional homelan (for one-day recovery). Edits the SoT, re-renders + /etc/hosts + ~/.ssh/config for this machine, and updates the tray labels + immediately. The change is local+staged; commit + push so the rest of + the fleet converges.""" + mesh_path = MESH_DATA + try: + with open(mesh_path, encoding="utf-8") as fh: + data = json.load(fh) + except Exception as e: + self.notify("Fleet config", f"Read failed: {e}") + return + + dx = data.setdefault("dx", {}) + was_hidden = bool(dx.get("hide_homelan", False)) + dx["hide_homelan"] = not was_hidden + + try: + with open(mesh_path, "w", encoding="utf-8") as fh: + json.dump(data, fh, indent=2, ensure_ascii=False) + fh.write("\n") + except Exception as e: + self.notify("Fleet config", f"Write failed: {e}") + return + + root = mesh_path.parent.parent + mhr = root / "bin" / "mesh-hosts-render" + ha = root / "bin" / "host-apply" + + # Re-render (mesh-hosts-render writes /etc/hosts — needs sudo; ssh is user). + try: + subprocess.run(["sudo", str(mhr), "--install"], check=False, capture_output=True, timeout=30) + subprocess.run([str(ha), "--ssh-apply"], check=False, capture_output=True, timeout=15) + except Exception: + pass + + new_mode = "DO cloud-only (homelan hidden)" if dx["hide_homelan"] else "Full (homelan visible)" + self.notify("Fleet config", f"Switched to {new_mode}. (git commit the json change so fleet pulls it.)") + + # Refresh icon/labels right now + state = self.poll_status() + self.set_icon(state) + labels = self.get_status_labels() + self.set_status_labels(labels) + def main() -> None: app = VPNTray()