feat(@projects/@claire): per-host orchestrator service ([<host>] claire)

Peer nodes can run a local orchestrator registered with claude.ai/code as
[<host>] claire, installed uniformly via deploy-agent.sh (not hand-wired).
- agent.orchestrator_enable + orchestrator.mcp_url config (round-trip safe)
- bootstrap points orchestrator MCP at central endpoint when set
- peer lifespan bootstraps + heartbeats the orchestrator (NO rounds loop)
- claire agent enable-orchestrator CLI + deploy-agent.sh wiring

(manual commit via ALLOW_COMMIT=1 — autocommit LLM was timing out on claire)
This commit is contained in:
Natalie 2026-06-02 20:52:54 -07:00
parent deca1a280e
commit 39ccf5a9f5
5 changed files with 96 additions and 1 deletions

View file

@ -62,6 +62,9 @@ ssh "$HOST" '[ -s ~/.vault/claire-sync-secret.txt ]' \
say "[$HOST] configure peer (url only — secret is vault-sourced)"
remote-run "$HOST" "cd ~/$REMOTE_DIR && .venv/bin/claire agent add-peer --url '$PLUM_URL' && chmod 600 ~/.config/claire/claire.toml"
say "[$HOST] enable local orchestrator ([<host>] claire) — MCP → plum's central endpoint"
remote-run "$HOST" "cd ~/$REMOTE_DIR && .venv/bin/claire agent enable-orchestrator --mcp-url '$PLUM_URL/mcp/' && chmod 600 ~/.config/claire/claire.toml"
say "[$HOST] install + enable systemd --user unit"
remote-run "$HOST" "
mkdir -p ~/.config/systemd/user

View file

@ -183,6 +183,39 @@ def agent_add_peer(
console.print(f"[green]✓[/green] peers: {[p.url for p in peers]}")
@agent_app.command("enable-orchestrator")
def agent_enable_orchestrator(
mcp_url: Annotated[
str,
typer.Option("--mcp-url", help="Central MCP endpoint, e.g. http://10.9.0.3:8767/mcp/"),
],
) -> None:
"""Enable a LOCAL orchestrator on this peer node (registers `[<host>] claire`).
Used by deploy-agent.sh. Sets `agent.orchestrator_enable=True` and points
the orchestrator's `claire:*` MCP tools at the central endpoint (peer mode
serves no MCP locally). Clears any stale `session_uuid` so the next agent
start bootstraps a fresh orchestrator in the local default workspace. The
peer runs NO rounds loop only the web-mode host (plum) auto-dispatches.
Idempotent.
"""
from .config import persist_config
cfg = load_or_init()
cfg = cfg.model_copy(
update={
"agent": cfg.agent.model_copy(update={"orchestrator_enable": True}),
"orchestrator": cfg.orchestrator.model_copy(
update={"host": "local", "cwd": None, "session_uuid": None, "mcp_url": mcp_url}
),
}
)
persist_config(cfg)
console.print(
f"[green]✓[/green] local orchestrator enabled — MCP → {mcp_url}"
)
@agent_app.command("status")
def agent_status() -> None:
"""Local-DB HUD for this peer node — works OFFLINE (reads the synced DB

View file

@ -75,6 +75,14 @@ class AgentConfig(_Strict):
# capped at `auto_continue_max` consecutive nudges per session.
auto_continue: Literal["off", "dry-run", "on"] = "off"
auto_continue_max: int = Field(default=3, ge=1, le=20)
# When True, this peer node ALSO bootstraps + registers a local orchestrator
# session (`[<host>] claire` in the remote dev list) alongside its sync/
# supervisor/telemetry loops — so every host is remote-controllable. The
# peer orchestrator points its `claire:*` MCP tools at the central endpoint
# (`orchestrator.mcp_url`, set to plum) since peer mode serves no MCP of its
# own; it deliberately runs NO rounds loop (only plum auto-dispatches), so
# multiple hosts never race to dispatch the same shared-DB work.
orchestrator_enable: bool = False
class VaultConfig(_Strict):
@ -105,6 +113,12 @@ class OrchestratorConfig(_Strict):
session_uuid: str | None = None
host: str = "local"
# Explicit MCP endpoint for this orchestrator's `claire:*` tools. None =
# derive from local `[web]` (the plum/web-mode default). On a PEER node the
# local process serves no MCP, so this is set (by deploy) to the central
# endpoint, e.g. `http://10.9.0.3:8767/mcp/`, so the peer's orchestrator
# talks to the same shared DB as everyone else.
mcp_url: str | None = None
# Absolute path on `host` for the orchestrator workspace. None falls back
# to a local-machine default (~/.local/share/claire/orchestrator). For
# remote hosts (apricot, etc.) this MUST be set — Path.home() on the
@ -301,6 +315,7 @@ def _serialize(cfg: ClaireConfig) -> str:
or orch.host != "local"
or orch.cwd is not None
or orch.reply_timeout_s != 180
or orch.mcp_url is not None
):
lines.append("")
lines.append("[orchestrator]")
@ -314,6 +329,8 @@ def _serialize(cfg: ClaireConfig) -> str:
lines.append(f"rounds_interval_s = {orch.rounds_interval_s}")
if orch.autonomous_dispatch:
lines.append("autonomous_dispatch = true")
if orch.mcp_url is not None:
lines.append(f'mcp_url = "{orch.mcp_url}"')
# Emit [budget] / [limits] only when non-default.
bud = cfg.budget
if bud.daily_token_cap != 0 or bud.low_priority_floor != 0.8:
@ -346,6 +363,9 @@ def _serialize(cfg: ClaireConfig) -> str:
)
lines.append(f'auto_continue = "{ag.auto_continue}"')
lines.append(f"auto_continue_max = {ag.auto_continue_max}")
lines.append(
f"orchestrator_enable = {str(ag.orchestrator_enable).lower()}"
)
# Emit [vault] only when configured (source-of-truth host only).
vlt = cfg.vault
if vlt != VaultConfig():

View file

@ -535,7 +535,12 @@ def ensure_running(
cfg = load_or_init(config_path)
effective_cwd, local_staging = _resolved_cwd(cfg.orchestrator)
is_remote = cfg.orchestrator.host != "local"
mcp_url = f"http://{_client_host(cfg.web.host)}:{cfg.web.port}/mcp/"
# Peer nodes serve no MCP of their own, so they set an explicit central
# endpoint; web-mode (plum) leaves it None and derives from local [web].
mcp_url = (
cfg.orchestrator.mcp_url
or f"http://{_client_host(cfg.web.host)}:{cfg.web.port}/mcp/"
)
# Always (re)write the workspace scaffold locally — cheap, idempotent.
# For remote, this is the staging dir we rsync from below.

View file

@ -85,8 +85,42 @@ def create_app(
"""
if peer_mode:
from ..agent import start_agent_loops
from ..config import load_or_init
tasks = start_agent_loops(config_path=config_path, db_path=db_path)
# Optionally run a LOCAL orchestrator so this host is remote-
# controllable (`[<host>] claire`). NO rounds loop here — peers are
# driveable but only plum (web mode) auto-dispatches, so multiple
# hosts never race to dispatch the same shared-DB work. The
# orchestrator's MCP points at the central endpoint
# (`orchestrator.mcp_url`), since peer mode serves no MCP locally.
if load_or_init(config_path).agent.orchestrator_enable:
from ..orchestrator.bootstrap import ensure_running
async def _peer_orch_once() -> None:
try:
uuid = await asyncio.to_thread(
ensure_running, config_path=config_path
)
except Exception as exc: # noqa: BLE001 — never crash the peer
logger.warning("peer orchestrator bootstrap raised: %s", exc)
return
logger.info("peer orchestrator: %s", uuid or "(not live yet)")
async def _peer_orch_heartbeat() -> None:
while True:
try:
await asyncio.sleep(60)
await _peer_orch_once()
except asyncio.CancelledError:
return
except Exception as exc: # noqa: BLE001
logger.warning("peer orchestrator heartbeat raised: %s", exc)
await _peer_orch_once()
tasks.append(asyncio.create_task(_peer_orch_heartbeat()))
try:
yield
finally: