feat(@projects/@claire): per-host orchestrator service ([<host>] claire)

Peer nodes can run a local orchestrator registered with claude.ai/code as [<host>] claire, installed uniformly via deploy-agent.sh (not hand-wired). - agent.orchestrator_enable + orchestrator.mcp_url config (round-trip safe) - bootstrap points orchestrator MCP at central endpoint when set - peer lifespan bootstraps + heartbeats the orchestrator (NO rounds loop) - claire agent enable-orchestrator CLI + deploy-agent.sh wiring (manual commit via ALLOW_COMMIT=1 — autocommit LLM was timing out on claire)
2026-06-02 20:52:54 -07:00 · 2026-06-02 20:52:54 -07:00 · 39ccf5a9f5
commit 39ccf5a9f5
parent deca1a280e
5 changed files with 96 additions and 1 deletions
--- a/scripts/deploy-agent.sh
+++ b/scripts/deploy-agent.sh
@ -62,6 +62,9 @@ ssh "$HOST" '[ -s ~/.vault/claire-sync-secret.txt ]' \
 say "[$HOST] configure peer (url only — secret is vault-sourced)"
 remote-run "$HOST" "cd ~/$REMOTE_DIR && .venv/bin/claire agent add-peer --url '$PLUM_URL' && chmod 600 ~/.config/claire/claire.toml"

+say "[$HOST] enable local orchestrator ([<host>] claire) — MCP → plum's central endpoint"
+remote-run "$HOST" "cd ~/$REMOTE_DIR && .venv/bin/claire agent enable-orchestrator --mcp-url '$PLUM_URL/mcp/' && chmod 600 ~/.config/claire/claire.toml"
+
 say "[$HOST] install + enable systemd --user unit"
 remote-run "$HOST" "
  mkdir -p ~/.config/systemd/user
--- a/src/claire/cli.py
+++ b/src/claire/cli.py
@ -183,6 +183,39 @@ def agent_add_peer(
    console.print(f"[green]✓[/green] peers: {[p.url for p in peers]}")


+@agent_app.command("enable-orchestrator")
+def agent_enable_orchestrator(
+    mcp_url: Annotated[
+        str,
+        typer.Option("--mcp-url", help="Central MCP endpoint, e.g. http://10.9.0.3:8767/mcp/"),
+    ],
+) -> None:
+    """Enable a LOCAL orchestrator on this peer node (registers `[<host>] claire`).
+
+    Used by deploy-agent.sh. Sets `agent.orchestrator_enable=True` and points
+    the orchestrator's `claire:*` MCP tools at the central endpoint (peer mode
+    serves no MCP locally). Clears any stale `session_uuid` so the next agent
+    start bootstraps a fresh orchestrator in the local default workspace. The
+    peer runs NO rounds loop — only the web-mode host (plum) auto-dispatches.
+    Idempotent.
+    """
+    from .config import persist_config
+
+    cfg = load_or_init()
+    cfg = cfg.model_copy(
+        update={
+            "agent": cfg.agent.model_copy(update={"orchestrator_enable": True}),
+            "orchestrator": cfg.orchestrator.model_copy(
+                update={"host": "local", "cwd": None, "session_uuid": None, "mcp_url": mcp_url}
+            ),
+        }
+    )
+    persist_config(cfg)
+    console.print(
+        f"[green]✓[/green] local orchestrator enabled — MCP → {mcp_url}"
+    )
+
+
@agent_app.command("status")
 def agent_status() -> None:
    """Local-DB HUD for this peer node — works OFFLINE (reads the synced DB
--- a/src/claire/config.py
+++ b/src/claire/config.py
@ -75,6 +75,14 @@ class AgentConfig(_Strict):
    # capped at `auto_continue_max` consecutive nudges per session.
    auto_continue: Literal["off", "dry-run", "on"] = "off"
    auto_continue_max: int = Field(default=3, ge=1, le=20)
+    # When True, this peer node ALSO bootstraps + registers a local orchestrator
+    # session (`[<host>] claire` in the remote dev list) alongside its sync/
+    # supervisor/telemetry loops — so every host is remote-controllable. The
+    # peer orchestrator points its `claire:*` MCP tools at the central endpoint
+    # (`orchestrator.mcp_url`, set to plum) since peer mode serves no MCP of its
+    # own; it deliberately runs NO rounds loop (only plum auto-dispatches), so
+    # multiple hosts never race to dispatch the same shared-DB work.
+    orchestrator_enable: bool = False


 class VaultConfig(_Strict):
@ -105,6 +113,12 @@ class OrchestratorConfig(_Strict):

    session_uuid: str | None = None
    host: str = "local"
+    # Explicit MCP endpoint for this orchestrator's `claire:*` tools. None =
+    # derive from local `[web]` (the plum/web-mode default). On a PEER node the
+    # local process serves no MCP, so this is set (by deploy) to the central
+    # endpoint, e.g. `http://10.9.0.3:8767/mcp/`, so the peer's orchestrator
+    # talks to the same shared DB as everyone else.
+    mcp_url: str | None = None
    # Absolute path on `host` for the orchestrator workspace. None falls back
    # to a local-machine default (~/.local/share/claire/orchestrator). For
    # remote hosts (apricot, etc.) this MUST be set — Path.home() on the
@ -301,6 +315,7 @@ def _serialize(cfg: ClaireConfig) -> str:
        or orch.host != "local"
        or orch.cwd is not None
        or orch.reply_timeout_s != 180
+        or orch.mcp_url is not None
    ):
        lines.append("")
        lines.append("[orchestrator]")
@ -314,6 +329,8 @@ def _serialize(cfg: ClaireConfig) -> str:
            lines.append(f"rounds_interval_s = {orch.rounds_interval_s}")
        if orch.autonomous_dispatch:
            lines.append("autonomous_dispatch = true")
+        if orch.mcp_url is not None:
+            lines.append(f'mcp_url = "{orch.mcp_url}"')
    # Emit [budget] / [limits] only when non-default.
    bud = cfg.budget
    if bud.daily_token_cap != 0 or bud.low_priority_floor != 0.8:
@ -346,6 +363,9 @@ def _serialize(cfg: ClaireConfig) -> str:
        )
        lines.append(f'auto_continue = "{ag.auto_continue}"')
        lines.append(f"auto_continue_max = {ag.auto_continue_max}")
+        lines.append(
+            f"orchestrator_enable = {str(ag.orchestrator_enable).lower()}"
+        )
    # Emit [vault] only when configured (source-of-truth host only).
    vlt = cfg.vault
    if vlt != VaultConfig():
--- a/src/claire/orchestrator/bootstrap.py
+++ b/src/claire/orchestrator/bootstrap.py
@ -535,7 +535,12 @@ def ensure_running(
    cfg = load_or_init(config_path)
    effective_cwd, local_staging = _resolved_cwd(cfg.orchestrator)
    is_remote = cfg.orchestrator.host != "local"
-    mcp_url = f"http://{_client_host(cfg.web.host)}:{cfg.web.port}/mcp/"
+    # Peer nodes serve no MCP of their own, so they set an explicit central
+    # endpoint; web-mode (plum) leaves it None and derives from local [web].
+    mcp_url = (
+        cfg.orchestrator.mcp_url
+        or f"http://{_client_host(cfg.web.host)}:{cfg.web.port}/mcp/"
+    )

    # Always (re)write the workspace scaffold locally — cheap, idempotent.
    # For remote, this is the staging dir we rsync from below.
--- a/src/claire/web/app.py
+++ b/src/claire/web/app.py
@ -85,8 +85,42 @@ def create_app(
        """
        if peer_mode:
            from ..agent import start_agent_loops
+            from ..config import load_or_init

            tasks = start_agent_loops(config_path=config_path, db_path=db_path)
+
+            # Optionally run a LOCAL orchestrator so this host is remote-
+            # controllable (`[<host>] claire`). NO rounds loop here — peers are
+            # driveable but only plum (web mode) auto-dispatches, so multiple
+            # hosts never race to dispatch the same shared-DB work. The
+            # orchestrator's MCP points at the central endpoint
+            # (`orchestrator.mcp_url`), since peer mode serves no MCP locally.
+            if load_or_init(config_path).agent.orchestrator_enable:
+                from ..orchestrator.bootstrap import ensure_running
+
+                async def _peer_orch_once() -> None:
+                    try:
+                        uuid = await asyncio.to_thread(
+                            ensure_running, config_path=config_path
+                        )
+                    except Exception as exc:  # noqa: BLE001 — never crash the peer
+                        logger.warning("peer orchestrator bootstrap raised: %s", exc)
+                        return
+                    logger.info("peer orchestrator: %s", uuid or "(not live yet)")
+
+                async def _peer_orch_heartbeat() -> None:
+                    while True:
+                        try:
+                            await asyncio.sleep(60)
+                            await _peer_orch_once()
+                        except asyncio.CancelledError:
+                            return
+                        except Exception as exc:  # noqa: BLE001
+                            logger.warning("peer orchestrator heartbeat raised: %s", exc)
+
+                await _peer_orch_once()
+                tasks.append(asyncio.create_task(_peer_orch_heartbeat()))
+
            try:
                yield
            finally: