diff --git a/.godot.pid b/.godot.pid index 18f8589..e6675a1 100644 --- a/.godot.pid +++ b/.godot.pid @@ -1 +1 @@ -590621 +1446923 diff --git a/.tray.pid b/.tray.pid index 82ecdc8..88044a9 100644 --- a/.tray.pid +++ b/.tray.pid @@ -1 +1 @@ -590622 +1446924 diff --git a/CLAUDE.md b/CLAUDE.md index 2e6293d..44b1d65 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,86 +1,99 @@ # @chobit -Interactive AI companion — Godot 4 desktop app with 3D VRM avatar, voice interaction, pluggable LLM backend. +Interactive AI companion — multi-platform Godot 4 app with 3D VRM avatar, voice interaction, pluggable LLM backend. Godot is the avatar runtime; all ML/GPU inference runs on external services via model-boss. ## Architecture ``` -@chobit/ -├── godot/ # Godot 4.6 project (the companion app) -│ ├── project.godot # Autoloads, main scene, window config -│ ├── addons/ # VRM4Godot, Godot-MToon-Shader -│ ├── audio/ # Audio assets (startup sound, etc.) -│ ├── config/ # Runtime config (auto-generated, gitignored) -│ ├── models/ # VRM model files (.vrm, gitignored) -│ ├── scenes/ -│ │ └── companion.tscn # Main scene — transparent window + avatar -│ ├── scripts/ -│ │ ├── audio/ # sound_engine.gd, sound_config.gd -│ │ ├── autoloads/ # event_bus.gd, companion_config.gd, flight_recorder.gd -│ │ ├── avatar/ # animation_state_machine, expression_controller, gaze_controller, -│ │ │ # idle_animator, lipsync_controller, attention_reactor -│ │ ├── backend/ # llm_client.gd, stt_client.gd, tts_client.gd -│ │ ├── companion/ # companion.gd (main), conversation_orchestrator.gd, -│ │ │ # tray_listener.gd, avatar_hitbox.gd, avatar_rotate.gd -│ │ ├── ui/ # chat_window.gd, context_menu.gd, sound_settings_window.gd -│ │ ├── util/ # node_utils.gd, config_paths.gd, screen_cursor.gd -│ │ ├── voice/ # microphone.gd -│ │ └── window/ # window_drag.gd, window_zoom.gd, edge_snap.gd -│ └── tools/ # Editor helper scripts (list_animations, list_blendshapes, -│ # screenshot.gd, zoom_test.gd) -│ -├── bridge/ # Python sidecar — Redis ↔ Godot UDP bridge -│ └── chobit_bridge.py # Forwards lilith-eventbus events into Godot via UDP (port 19700/19701) -│ -├── tray/ # Python sidecar — system tray UI + subprocess manager -│ ├── chobit_tray.py # TrayApp: spawns bridge + vision at startup, listens on port 19701 -│ ├── chobit_board.py # Dashboard UI panel -│ ├── camera_panel.py # Webcam preview panel -│ ├── screen_layout.py # Multi-monitor layout detection -│ └── themes/ # debug.css, miku.css -│ -├── vision/ # Python sidecar — webcam face tracking + gaze estimation -│ └── chobit_vision.py # MediaPipe + imajin-face-tracker → publishes gaze/face events to Redis -│ -├── packages/ -│ └── chobit-core/ # @lilith/chobit-core (TypeScript) -│ └── src/ # types.ts, conversation-state.ts, emotion-extractor.ts, sentence-stream.ts -│ -├── docs/ -│ └── ARCHITECTURE.md # System diagram, attention system, motion mirroring, conversation loop -│ -├── .project/ # Stream-based project management (milestones, handoffs, history) -└── run # Task runner (see Dev Commands below) +Godot (avatar runtime) External services (via network) +└── runs Miku VRM model ←── face pose (model-boss) + ├── audio playback ←── TTS audio (@speech-synthesis) + └── conversation ──► STT/LLM (@speech-synthesis / model-boss) ``` -## Three-Layer Architecture +``` +@chobit/ +├── shared/ # Cross-platform code +│ └── godot/ # Shared GDScript (symlinked into both projects) +│ ├── companion.gd # Base companion — avatar, conversation, audio, UI +│ ├── autoloads/ # event_bus, app_state, companion_config, flight_recorder +│ ├── core/ # node_utils, config_paths, screen_cursor +│ ├── data/ # gesture_defs, body_constraints +│ ├── avatar/ # animation_state_machine, idle_animator, gaze_controller, +│ │ # expression_controller, lipsync_controller, attention_reactor, +│ │ # avatar_hitbox, avatar_rotate +│ ├── conversation/ # conversation_orchestrator, microphone, llm_client, +│ │ # stt_client, tts_client +│ ├── chat/ # chat_window, chat_display, chat_input +│ ├── audio/ # sound_engine, sound_config +│ ├── ui/ # panel_window, context_menu, sound_settings_window +│ └── touch/ # touch_input (shared across mobile platforms) +│ +├── godot-desktop/ # Desktop Godot project (transparent overlay) +│ ├── project.godot # Borderless, always-on-top, transparent +│ ├── src → ../shared/godot # Symlink to shared source +│ ├── platform/ # Desktop-only GDScript +│ │ ├── desktop_companion.gd # Extends companion.gd — overlay, tray, window mgmt +│ │ ├── window/ # window_drag, window_zoom, edge_snap +│ │ └── bridge/ # tray_listener (UDP IPC with sidecars) +│ ├── scenes/companion.tscn # Desktop scene → platform/desktop_companion.gd +│ ├── addons/ # VRM4Godot, Godot-MToon-Shader +│ ├── models/ # VRM files (.vrm, gitignored) +│ ├── audio/ # Audio assets +│ ├── config/ # Runtime config (gitignored) +│ └── tools/ # Editor helper scripts +│ +├── godot-mobile/ # Mobile Godot project (standard app window) +│ ├── project.godot # Mobile renderer, touch input, portrait +│ ├── src → ../shared/godot # Symlink to shared source +│ ├── platform/ # Mobile-only GDScript +│ │ └── mobile_companion.gd # Extends companion.gd — touch input, on-device camera +│ ├── scenes/companion.tscn # Mobile scene → platform/mobile_companion.gd +│ └── export/ # android.preset, ios.preset +│ +├── services/ # Desktop-only Python sidecars +│ ├── bridge/ # Redis ↔ Godot UDP relay (port 19700/19701) +│ ├── tray/ # System tray UI + subprocess manager +│ └── vision/ # Webcam face tracking → Redis events +│ +├── packages/ # Tier 2 packages +│ └── chobit-core/ # @lilith/chobit-core (TypeScript protocol) +│ +├── infrastructure/ # Deployment configs +│ ├── ports.yaml # Port allocation (local + remote) +│ └── services/chobit.yaml # Service topology +│ +├── app.manifest.yaml # manage-apps manifest +├── docs/ARCHITECTURE.md +└── run # Task runner +``` -### Layer 0: chobit-core (TypeScript protocol) -Shared protocol between Godot client and backend services: -- `ChobitBackend` interface — LLM contract -- `SentenceStream` — token-to-sentence buffering -- `EmotionExtractor` — `[emotion]` tag parsing → VRM blendshape mapping -- `ConversationState` FSM +## Platform Architecture -### Layer 1: Godot App (client) -User-facing companion. Handles: -- **3D avatar** — VRM model, skeletal animation, blendshapes, IK -- **Desktop overlay** — transparent always-on-top borderless window -- **Voice I/O** — microphone capture, VAD, audio playback, lipsync -- **AnimationTree** — FSM maps conversation states to body language -- **UI** — chat window, right-click context menu, sound settings +### Shared (both desktop and mobile) +- **Avatar rendering** — VRM model, skeleton, blendshapes, animation state machine +- **Conversation pipeline** — microphone → STT → LLM → TTS → lipsync (all via network to model-boss / @speech-synthesis) +- **Audio** — sound engine, sound config, playback +- **Chat UI** — chat window, display, input +- **Autoloads** — EventBus, AppState, CompanionConfig, FlightRecorder -### Layer 2: Python Sidecars -Three lightweight sidecars run as subprocesses managed by `./run`: -- **`bridge/`** — Redis ↔ Godot UDP relay. `tray/` and `vision/` publish events to Redis; bridge forwards them into Godot on UDP ports 19700/19701 -- **`tray/`** — System tray icon, dashboard panel, webcam preview. Spawns bridge + vision at startup -- **`vision/`** — MediaPipe face tracking. Publishes `chobit.face.*` and `chobit.gaze.*` events to Redis +### Desktop-only +- **Transparent overlay** — borderless, always-on-top, transparent background +- **Window management** — drag, zoom, edge snap +- **System tray** — tray sidecar with dashboard, camera preview +- **Vision sidecar** — MediaPipe face tracking via Redis → bridge → UDP +- **Tray listener** — UDP IPC for sidecar communication -### Layer 3: Backend Services -Chobit connects to existing infrastructure over HTTP/WebSocket: -- **@speech-synthesis** — Whisper STT + Chatterbox TTS -- **@model-boss** — GPU lease coordination -- **LLM** — any OpenAI-compatible endpoint, or LifeAI companion service +### Mobile-only (scaffolded) +- **Standard app window** — portrait orientation, mobile renderer +- **Touch input** — tap for interaction, gestures +- **On-device camera** — direct face tracking (no sidecar needed) + +### External Services (network, host-independent) +All ML/GPU inference runs on external services, not localhost: +- **@model-boss** — GPU lease coordination (e.g., apricot:8210) +- **@speech-synthesis** — STT (Whisper) + TTS (Chatterbox) +- **LLM** — OpenAI-compatible endpoint, routed via model-boss ## GDScript Conventions @@ -88,13 +101,28 @@ Chobit connects to existing infrastructure over HTTP/WebSocket: `class_name` registration is unreliable in autoload context. **Always reference non-autoload classes via `preload()` const**: ```gdscript -const WindowDragScript = preload("res://scripts/window/window_drag.gd") -const OrchestratorScript = preload("res://scripts/companion/conversation_orchestrator.gd") +# Shared code uses res://src/ (symlink to shared/godot/) +const OrchestratorScript = preload("res://src/conversation/conversation_orchestrator.gd") -var drag: Node = WindowDragScript.new() +# Platform code uses res://platform/ +const WindowDragScript = preload("res://platform/window/window_drag.gd") ``` -Keep `class_name` in the file for IDE autocomplete. All runtime references use preload consts. +### Platform Composition Pattern +`shared/godot/companion.gd` provides setup methods. Platform subclasses compose their own `_ready()`: + +```gdscript +# godot-desktop/platform/desktop_companion.gd +extends "res://src/companion.gd" + +func _ready() -> void: + _setup_window() # desktop-specific: transparent overlay + _setup_drag() # desktop-specific: window dragging + setup_avatar() # shared: VRM model + controllers + setup_sound() # shared: audio engine + setup_conversation() # shared: STT/LLM/TTS pipeline + _setup_tray_listener() # desktop-specific: UDP sidecar IPC +``` ### Signals - `EventBus` is the only cross-system signal hub — never connect signals directly between systems @@ -109,66 +137,39 @@ Keep `class_name` in the file for IDE autocomplete. All runtime references use p - 500-line limit per file — split into focused modules before exceeding ### Node Architecture -Controllers are instantiated in code (`SomeScript.new()` + `add_child()`) — **not** embedded in `.tscn`. The main scene (`companion.tscn`) is the minimal skeleton; all behavior nodes attach at runtime in `companion.gd._ready()`. - -## Key Design Decisions - -- **Godot over Tauri/React** — AnimationTree state machines, skeletal IK, physics (hair/cloth), toon shaders, particle effects — all built-in -- **Desktop overlay** — Godot 4 transparent borderless always-on-top window; no wrapper needed -- **Generic LLM interface** — endpoint-agnostic; swap between local LLM, cloud API, or LifeAI by changing one URL -- **Sentence-level streaming** — tokens buffer into sentences, each sent to TTS immediately; first sentence plays while LLM generates the rest -- **Emotion via prompt engineering** — LLM embeds `[emotion]` tags inline; AnimationTree transitions expressions from parsed tags -- **Sidecars over plugins** — ML inference (face tracking) runs in Python, not GDExtension; events cross via Redis → bridge → UDP → Godot +Controllers are instantiated in code (`SomeScript.new()` + `add_child()`) — **not** embedded in `.tscn`. The main scene (`companion.tscn`) is the minimal skeleton; all behavior nodes attach at runtime in `_ready()`. ## Dev Commands ```bash -./run [start] # Launch Godot + tray sidecar (tray spawns bridge + vision) -./run stop # Stop everything -./run restart # Stop then start -./run verify # gdlint + gdformat check + Godot import validation -./run editor # Open Godot editor -./run screenshot # Capture screenshot via tools/screenshot.gd +./run [start] # Launch bridge + desktop companion + tray +./run stop # Stop everything +./run restart # Stop then start +./run verify # gdlint + gdformat check (shared + platform) + Godot import +./run editor # Open Godot desktop editor +./run mobile-editor # Open Godot mobile editor +./run screenshot # Capture screenshot via tools/screenshot.gd ``` -## Autoloads (project.godot) +## Autoloads (shared, registered in both project.godot files) | Autoload | Path | Role | |----------|------|------| -| `EventBus` | `scripts/autoloads/event_bus.gd` | Cross-system signal hub | -| `CompanionConfig` | `scripts/autoloads/companion_config.gd` | Endpoint URLs, model name | -| `FlightRecorder` | `scripts/autoloads/flight_recorder.gd` | Session logging | - -## AnimationTree State Machine - -``` -idle → breathing, random blink, subtle sway -listening → head tilt toward mic, attentive posture -processing → look-away, thinking pose -speaking → engaged posture, gestures synced to sentence breaks -interrupted → brief surprise, then → listening -Expressions → blend layer on top (happy, sad, angry, surprised, relaxed, neutral) -``` - -## Attention System - -**Desktop Gaze** (default) — `LookAtModifier3D` tracks cursor position. Active when idle or ambient. - -**Face-to-Face** — `vision/` sidecar publishes gaze target from webcam; `gaze_controller.gd` blends from cursor tracking to face target on `conversation_started` and back on `conversation_ended`. - -## Integration with LifeAI - -Standard HTTP streaming endpoint, OpenAI-compatible protocol. LifeAI provides persona, user life context, and reasoning-driven responses. Configure via `CompanionConfig.llm_url`. +| `EventBus` | `src/autoloads/event_bus.gd` | Cross-system signal hub | +| `AppState` | `src/autoloads/app_state.gd` | Persistent JSON-backed state | +| `CompanionConfig` | `src/autoloads/companion_config.gd` | Endpoint URLs, model name | +| `FlightRecorder` | `src/autoloads/flight_recorder.gd` | Session logging | ## Milestone Status | Milestone | Status | Description | |-----------|--------|-------------| -| M0 | ✅ | Project setup, chobit-core, autoloads, EventBus | -| M1 | ✅ | VRM model loaded and rendered, transparent overlay, idle animation | -| M2 | ✅ | AnimationTree FSM, expression blendshapes, dual-mode gaze, lipsync | -| M3 | ✅ | Webcam face tracking sidecar, gaze estimation, tray integration | -| M4 | ✅ | Microphone capture, VAD, STT/TTS HTTP clients, audio playback | -| M5 | ✅ | Full conversation loop: VAD→STT→LLM→TTS→avatar; interruption; chat window | -| M6 | 🔲 | LifeAI integration — persona, user life context | -| M7 | 🔲 | Polish — toon shader, particles, hair physics, gesture animations | +| M0 | done | Project setup, chobit-core, autoloads, EventBus | +| M1 | done | VRM model loaded and rendered, transparent overlay, idle animation | +| M2 | done | AnimationTree FSM, expression blendshapes, dual-mode gaze, lipsync | +| M3 | done | Webcam face tracking sidecar, gaze estimation, tray integration | +| M4 | done | Microphone capture, VAD, STT/TTS HTTP clients, audio playback | +| M5 | done | Full conversation loop: VAD→STT→LLM→TTS→avatar; interruption; chat window | +| M6 | next | LifeAI integration — persona, user life context | +| M7 | planned | Polish — toon shader, particles, hair physics, gesture animations | +| M8 | planned | Mobile — touch input, on-device camera, mobile export | diff --git a/app.manifest.yaml b/app.manifest.yaml new file mode 100644 index 0000000..0483df5 --- /dev/null +++ b/app.manifest.yaml @@ -0,0 +1,26 @@ +name: chobit +description: Interactive AI companion - Godot 4 desktop app with 3D VRM avatar, voice interaction, pluggable LLM backend +type: desktop-application +category: applications +version: 0.5.0 + +platforms: + apricot: + os: linux + host: 10.0.0.13 + environment: development + service: + type: user-session + name: chobit + ports: + - 19700 + - 19701 + gpu: false + install: + path: ~/Code/@applications/@chobit + script: ./run + status: + command: "cat .godot.pid 2>/dev/null && kill -0 $(cat .godot.pid) 2>/dev/null && echo 'active' || echo 'inactive'" + type: custom + logs: + file: "logs/chobit.log" diff --git a/run b/run index 78cbea4..75a5ce5 100755 --- a/run +++ b/run @@ -2,22 +2,32 @@ set -euo pipefail ROOT="$(cd "$(dirname "$0")" && pwd)" -GODOT_DIR="$ROOT/godot" -TRAY_DIR="$ROOT/tray" +GODOT_DIR="$ROOT/godot-desktop" +TRAY_DIR="$ROOT/services/tray" +BRIDGE_DIR="$ROOT/services/bridge" GODOT="flatpak run --user org.godotengine.Godot" PIDFILE="$ROOT/.godot.pid" TRAY_PIDFILE="$ROOT/.tray.pid" +BRIDGE_PIDFILE="$ROOT/.bridge.pid" cmd_start() { if [ -f "$PIDFILE" ] && kill -0 "$(cat "$PIDFILE")" 2>/dev/null; then echo "Already running (pid $(cat "$PIDFILE"))" return 1 fi + + # 1. Bridge (pub/sub relay for vision events) + if [ -f "$BRIDGE_DIR/chobit_bridge.py" ]; then + python3 "$BRIDGE_DIR/chobit_bridge.py" & + echo $! > "$BRIDGE_PIDFILE" + echo "Started bridge (pid $!)" + fi + + # 2. Godot + tray setsid $GODOT --path "$GODOT_DIR" & echo $! > "$PIDFILE" echo "Started Godot (pid $!)" - # Start tray sidecar (manages bridge + vision subprocesses) if [ -f "$TRAY_DIR/chobit_tray.py" ]; then python3 "$TRAY_DIR/chobit_tray.py" & echo $! > "$TRAY_PIDFILE" @@ -26,7 +36,7 @@ cmd_start() { } cmd_stop() { - # Stop tray (also stops bridge + vision subprocesses via atexit) + # Stop tray if [ -f "$TRAY_PIDFILE" ]; then local tray_pid tray_pid=$(cat "$TRAY_PIDFILE" 2>/dev/null) @@ -35,12 +45,11 @@ cmd_stop() { fi rm -f "$TRAY_PIDFILE" fi - # Kill any remaining tray processes pgrep -f "chobit_tray\\.py" | while read -r cpid; do kill "$cpid" 2>/dev/null done || true - # Stop Godot — kill by PID file first, then sweep for stragglers + # Stop Godot local stopped=0 if [ -f "$PIDFILE" ]; then local pid @@ -55,14 +64,11 @@ cmd_stop() { rm -f "$PIDFILE" fi - # Sweep: kill any godot-bin instances running with @chobit's godot dir. - # Inside flatpak the cmdline is "godot-bin --path godot" (relative), - # so match that pattern plus the absolute-path variant. + # Sweep stale Godot processes local sweep_count=0 for cpid in $(pgrep -f "godot-bin.*(--path godot|@chobit/godot)" 2>/dev/null); do kill "$cpid" 2>/dev/null && sweep_count=$((sweep_count + 1)) done - # Also kill the bwrap/sh wrappers that flatpak spawned for these for cpid in $(pgrep -f "bwrap.*-- godot --path godot" 2>/dev/null); do kill "$cpid" 2>/dev/null done @@ -72,6 +78,19 @@ cmd_stop() { elif [ "$sweep_count" -gt 0 ]; then echo "Swept $sweep_count stale Godot process(es)" fi + + # Stop bridge last — Godot may flush state on exit + if [ -f "$BRIDGE_PIDFILE" ]; then + local bridge_pid + bridge_pid=$(cat "$BRIDGE_PIDFILE" 2>/dev/null) + if [ -n "$bridge_pid" ] && kill -0 "$bridge_pid" 2>/dev/null; then + kill "$bridge_pid" 2>/dev/null && echo "Stopped bridge (pid $bridge_pid)" || true + fi + rm -f "$BRIDGE_PIDFILE" + fi + pgrep -f "chobit_bridge\\.py" | while read -r cpid; do + kill "$cpid" 2>/dev/null + done || true } cmd_restart() { @@ -83,8 +102,8 @@ cmd_restart() { cmd_verify() { local failed=0 - echo "=== Lint ===" - if (cd "$GODOT_DIR" && gdlint scripts/); then + echo "=== Shared Source: Lint ===" + if (cd "$GODOT_DIR" && gdlint src/); then echo "PASS" else echo "FAIL" @@ -92,11 +111,29 @@ cmd_verify() { fi echo "" - echo "=== Format Check ===" - if (cd "$GODOT_DIR" && gdformat --check scripts/ 2>&1); then + echo "=== Desktop Platform: Lint ===" + if (cd "$GODOT_DIR" && gdlint platform/); then echo "PASS" else - echo "FAIL (run: cd godot && gdformat scripts/)" + echo "FAIL" + failed=1 + fi + + echo "" + echo "=== Shared Source: Format Check ===" + if (cd "$GODOT_DIR" && gdformat --check src/ 2>&1); then + echo "PASS" + else + echo "FAIL (run: cd godot-desktop && gdformat src/)" + failed=1 + fi + + echo "" + echo "=== Desktop Platform: Format Check ===" + if (cd "$GODOT_DIR" && gdformat --check platform/ 2>&1); then + echo "PASS" + else + echo "FAIL (run: cd godot-desktop && gdformat platform/)" failed=1 fi @@ -125,27 +162,33 @@ cmd_editor() { $GODOT --editor --path "$GODOT_DIR" } +cmd_mobile_editor() { + $GODOT --editor --path "$ROOT/godot-mobile" +} + cmd_screenshot() { $GODOT --path "$GODOT_DIR" --script tools/screenshot.gd 2>&1 | tail -1 } case "${1:-}" in - ""|start) cmd_start ;; - stop) cmd_stop ;; - restart) cmd_restart ;; - verify) cmd_verify ;; - editor) cmd_editor ;; - screenshot) cmd_screenshot ;; + ""|start) cmd_start ;; + stop) cmd_stop ;; + restart) cmd_restart ;; + verify) cmd_verify ;; + editor) cmd_editor ;; + mobile-editor) cmd_mobile_editor ;; + screenshot) cmd_screenshot ;; *) echo "Usage: ./run [command]" echo "" echo "Commands:" - echo " (none), start Launch companion + tray" - echo " stop Stop everything" - echo " restart Stop then start" - echo " verify Run lint, format check, and Godot import" - echo " editor Open Godot editor" - echo " screenshot Capture a screenshot" + echo " (none), start Launch bridge + companion + tray (desktop)" + echo " stop Stop everything" + echo " restart Stop then start" + echo " verify Run lint, format check, and Godot import" + echo " editor Open Godot desktop editor" + echo " mobile-editor Open Godot mobile editor" + echo " screenshot Capture a screenshot" exit 1 ;; esac