claire/scripts/deploy-agent.sh
Natalie ce6948d6e9 feat(@projects/@claire): rounds skip-gate loop + fleet release tool
Wire the rounds timer to a pure-Python skip gate so claire-serve only wakes
the orchestrator model when worker fleet state changed (not every tick):
- web/rounds.py: fleet_fingerprint() over worker sessions (minus the
  orchestrator's own) + open tasks; should_skip_round() with heartbeat floor.
- web/app.py: _rounds_loop tracks last fingerprint + consecutive skips.
- excludes the orchestrator's own session/chat so a round's self-side-effects
  can't defeat the gate.
Add scripts/release-fleet.sh (test -> deploy apricot+black -> restart plum
services) and harden deploy-agent.sh's cosmetic status check against a SIGPIPE
false-abort. 3 new discriminating tests; 349 pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 22:58:41 -07:00

84 lines
4 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Deploy the headless `claire agent` peer node to a Linux host (apricot|black).
# Runs FROM plum. Idempotent. Code + systemd unit + peer config (injects plum's
# sync_secret so the host can sync to plum).
#
# scripts/deploy-agent.sh apricot
#
# Requires: `remote-run` on PATH (~/Code/@scripts/session-tools), ssh access,
# uv + python3.12+ on the remote, and NTP-synced clocks (HMAC skew window 300s).
set -euo pipefail
HOST="${1:?usage: deploy-agent.sh <host>}"
SRC="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
REMOTE_DIR="Code/@projects/@claire" # relative to remote $HOME
PLUM_TOML="${CLAIRE_TOML:-$HOME/.config/claire/claire.toml}"
say() { printf '\033[1;35m▸\033[0m %s\n' "$*"; }
# Plum's bind URL — the peer's event-sync target. The HMAC secret is NO LONGER
# injected here: it lives in the vault (~/.vault/claire-sync-secret.txt), seeded
# below and read at runtime, so rotation is just a vault push.
PLUM_URL="$("$SRC/.venv/bin/python" - "$PLUM_TOML" <<'PY'
import sys, tomllib, pathlib
c = tomllib.loads(pathlib.Path(sys.argv[1]).read_text())
web = c.get("web", {})
host = web.get("host", "127.0.0.1")
if host in ("0.0.0.0", "::", ""):
host = "127.0.0.1"
print(f"http://{host}:{web.get('port', 8765)}")
PY
)"
say "plum peer URL = $PLUM_URL"
say "[$HOST] reachability + clock"
ssh -o ConnectTimeout=8 -o BatchMode=yes "$HOST" 'true' \
|| { echo "ERROR: cannot ssh $HOST" >&2; exit 1; }
ssh "$HOST" 'timedatectl show -p NTPSynchronized --value 2>/dev/null || echo unknown'
say "[$HOST] rsync source"
ssh "$HOST" "mkdir -p ~/$REMOTE_DIR"
rsync -az --delete \
--exclude='.venv/' --exclude='.git/' --exclude='__pycache__/' \
--exclude='*.pyc' --exclude='.pytest_cache/' --exclude='.ruff_cache/' \
--exclude='claire.toml' \
--exclude='src/claire/web/app/node_modules/' \
--exclude='src/claire/web/app/dist/' \
"$SRC/" "${HOST}:${REMOTE_DIR}/"
say "[$HOST] install (uv if present, else python venv+pip) + init"
remote-run "$HOST" "export PATH=\"\$HOME/.local/bin:\$PATH\"; cd ~/$REMOTE_DIR && if command -v uv >/dev/null 2>&1; then { [ -d .venv ] || uv venv; }; uv pip install -e .; else { [ -d .venv ] || python3 -m venv .venv; }; .venv/bin/pip install -q -e .; fi && .venv/bin/claire init"
say "[$HOST] seed vault (BEFORE agent starts — it reads the HMAC secret from here)"
ssh "$HOST" 'mkdir -p ~/.vault && chmod 700 ~/.vault'
rsync -az --no-owner --no-group --chmod=D700,F600 \
--exclude='.vault-backups/' --exclude='*.prev.txt' \
"$HOME/.vault/" "${HOST}:.vault/"
# Gate: the agent will 401 forever without the shared secret present.
ssh "$HOST" '[ -s ~/.vault/claire-sync-secret.txt ]' \
|| { echo "ERROR: ~/.vault/claire-sync-secret.txt missing on $HOST after seed" >&2; exit 1; }
say "[$HOST] configure peer (url only — secret is vault-sourced)"
remote-run "$HOST" "cd ~/$REMOTE_DIR && .venv/bin/claire agent add-peer --url '$PLUM_URL' && chmod 600 ~/.config/claire/claire.toml"
say "[$HOST] enable local orchestrator ([<host>] claire) — MCP → plum's central endpoint"
remote-run "$HOST" "cd ~/$REMOTE_DIR && .venv/bin/claire agent enable-orchestrator --mcp-url '$PLUM_URL/mcp/' && chmod 600 ~/.config/claire/claire.toml"
say "[$HOST] install + enable systemd --user unit"
remote-run "$HOST" "
mkdir -p ~/.config/systemd/user
cp ~/$REMOTE_DIR/deployments/systemd/claire-agent.service ~/.config/systemd/user/
systemctl --user daemon-reload
systemctl --user enable claire-agent.service
# restart (not just enable --now) so a redeploy actually loads the new code.
systemctl --user restart claire-agent.service
loginctl enable-linger \$(whoami) 2>/dev/null || true
sleep 2
# Real gate: is-active is non-zero iff the unit failed to come up. The status
# dump below is cosmetic — piping to head closes the pipe early (SIGPIPE), so
# keep it non-fatal or it false-aborts an otherwise-healthy deploy.
systemctl --user is-active claire-agent.service
systemctl --user --no-pager status claire-agent.service 2>&1 | head -5 || true
"
say "[$HOST] done."