diff --git a/godot-desktop/platform/bridge/tray_listener.gd b/godot-desktop/platform/bridge/tray_listener.gd index 6746869..529f8fa 100644 --- a/godot-desktop/platform/bridge/tray_listener.gd +++ b/godot-desktop/platform/bridge/tray_listener.gd @@ -5,41 +5,58 @@ extends Node const ScreenGazeScript = preload("res://src/core/screen_gaze.gd") const TrayDebugScript = preload("res://platform/bridge/tray_debug_commands.gd") +const GazeControllerScript = preload("res://src/avatar/gaze_controller.gd") +const IdleAnimatorScript = preload("res://src/avatar/idle_animator.gd") +const ExpressionControllerScript = preload("res://src/avatar/expression_controller.gd") +const GestureRegistryScript = preload("res://src/avatar/gesture_registry.gd") +const EdgeSnapScript = preload("res://platform/window/edge_snap.gd") +const WindowZoomScript = preload("res://platform/window/window_zoom.gd") +const SoundEngineScript = preload("res://src/audio/sound_engine.gd") +const SoundConfigScript = preload("res://src/audio/sound_config.gd") +const MicrophoneScript = preload("res://src/conversation/microphone.gd") const PORT: int = 19700 var _udp: PacketPeerUDP -var _companion: Node -var _gaze_controller: Node -var _idle_animator: Node -var _expression_controller: Node +var _companion: Node # desktop_companion — no class_name, kept as Node +var _gaze_controller: GazeControllerScript +var _idle_animator: IdleAnimatorScript +var _expression_controller: ExpressionControllerScript var _reply_ip: String = "127.0.0.1" var _reply_port: int = PORT + 1 var _last_attention: String = "absent" +var _last_confidence: float = 0.0 +var _last_head_yaw: float = 0.0 +var _last_head_pitch: float = 0.0 +var _last_iris_h: float = 0.5 +var _last_iris_v: float = 0.5 +var _has_face: bool = false func setup(companion: Node) -> void: _companion = companion _udp = PacketPeerUDP.new() - var err := _udp.bind(PORT, "127.0.0.1") + var err: Error = _udp.bind(PORT, "127.0.0.1") if err != OK: push_warning("UDPListener: Failed to bind port %d — is another instance running?" % PORT) _udp = null return # Find avatar controllers (recursive search — they live inside the VRM model node) - _gaze_controller = companion.find_child("GazeController", true, false) - _idle_animator = companion.find_child("IdleAnimator", true, false) - _expression_controller = companion.find_child("ExpressionController", true, false) + _gaze_controller = companion.find_child("GazeController", true, false) as GazeControllerScript + _idle_animator = companion.find_child("IdleAnimator", true, false) as IdleAnimatorScript + _expression_controller = ( + companion.find_child("ExpressionController", true, false) as ExpressionControllerScript + ) func _process(_delta: float) -> void: if _udp == null: return while _udp.get_available_packet_count() > 0: - var data := _udp.get_packet().get_string_from_utf8() - var sender_ip := _udp.get_packet_ip() - var sender_port := _udp.get_packet_port() + var data: String = _udp.get_packet().get_string_from_utf8() + var sender_ip: String = _udp.get_packet_ip() + var sender_port: int = _udp.get_packet_port() _handle_message(data, sender_ip, sender_port) @@ -51,7 +68,7 @@ func _handle_message( _reply_ip = sender_ip _reply_port = sender_port - var json := JSON.new() + var json: JSON = JSON.new() if json.parse(raw) != OK: return @@ -69,9 +86,10 @@ func _handle_command(msg: Dictionary) -> void: match cmd: "toggle_snap": - var snap := _get_edge_snap() + var snap: EdgeSnapScript = _get_edge_snap() if snap != null: snap.enabled = not snap.enabled + AppState.set_snap_enabled(snap.enabled) _respond({"snap_enabled": snap.enabled}) "toggle_gaze": if _gaze_controller != null: @@ -85,13 +103,13 @@ func _handle_command(msg: Dictionary) -> void: if _gaze_controller != null: var mode: String = msg.get("mode", "desktop") if mode == "face_to_face": - _gaze_controller.set_mode(_gaze_controller.GazeMode.FACE_TO_FACE) + _gaze_controller.set_mode(GazeControllerScript.GazeMode.FACE_TO_FACE) else: - _gaze_controller.set_mode(_gaze_controller.GazeMode.DESKTOP) + _gaze_controller.set_mode(GazeControllerScript.GazeMode.DESKTOP) _respond({"gaze_mode": _gaze_controller.get_mode_name()}) "set_zoom": var level: float = msg.get("level", 0.5) - var zoom := _companion.get_node_or_null("WindowZoom") + var zoom: WindowZoomScript = _get_window_zoom() if zoom != null: zoom.set_zoom_level(level) "reset_position": @@ -99,13 +117,13 @@ func _handle_command(msg: Dictionary) -> void: "quit": get_tree().quit() "test_snap": - var snap := _get_edge_snap() + var snap: EdgeSnapScript = _get_edge_snap() if snap != null: snap.try_snap() _respond({"snapped": true}) "set_state": var target: String = msg.get("state", "idle") - var valid := ["idle", "listening", "processing", "speaking"] + var valid: Array[String] = ["idle", "listening", "processing", "speaking"] if target in valid: EventBus.state_changed.emit("", target) _respond({"state": target}) @@ -116,7 +134,7 @@ func _handle_command(msg: Dictionary) -> void: "play_sound": _play_sound(msg.get("name", "chirp")) "list_sounds": - var engine := _get_sound_engine() + var engine: SoundEngineScript = _get_sound_engine() if engine != null: _respond({"sounds": engine.get_sound_names()}) "focus": @@ -153,7 +171,7 @@ func _handle_command(msg: Dictionary) -> void: EventBus.conversation_new_requested.emit() _respond({"ok": true}) "list_conversations": - var index := AppState.get_section("conversations") + var index: Dictionary = AppState.get_section("conversations") _respond({"conversations": index.get("list", [])}) "snd_config": _handle_sound_config(msg) @@ -166,7 +184,7 @@ func _handle_command(msg: Dictionary) -> void: FlightRecorder.record("vision.halo_toggled", "Gaze halo toggled", {"enabled": enabled}) _respond({"gaze_halo": enabled}) "toggle_mic": - var mic := _companion.get_node_or_null("Microphone") + var mic: MicrophoneScript = _get_microphone() if mic != null: mic.set_enabled(not mic._enabled) _respond({"mic_enabled": mic._enabled}) @@ -194,23 +212,32 @@ func _handle_envelope(msg: Dictionary) -> void: func _handle_face(payload: Dictionary) -> void: - var face_x = payload.get("face_x") - var face_y = payload.get("face_y") var attention: String = payload.get("attention", "absent") var confidence: float = payload.get("confidence", 0.0) - var head_yaw = payload.get("head_yaw") - var head_pitch = payload.get("head_pitch") - var iris_h = payload.get("iris_h") - var iris_v = payload.get("iris_v") + var has_position: bool = payload.has("face_x") and payload.has("face_y") + var has_pose: bool = ( + payload.has("head_yaw") + and payload.has("head_pitch") + and payload.has("iris_h") + and payload.has("iris_v") + ) + var face_x: float = float(payload.get("face_x", 0.0)) + var face_y: float = float(payload.get("face_y", 0.0)) + var head_yaw: float = float(payload.get("head_yaw", 0.0)) + var head_pitch: float = float(payload.get("head_pitch", 0.0)) + var iris_h: float = float(payload.get("iris_h", 0.5)) + var iris_v: float = float(payload.get("iris_v", 0.5)) - if face_x != null and face_y != null: - var pos := Vector2(float(face_x), float(face_y)) - EventBus.face_detected.emit(pos) + if has_position: + _has_face = true + EventBus.face_detected.emit(Vector2(face_x, face_y)) - if head_yaw != null and head_pitch != null and iris_h != null and iris_v != null: - EventBus.face_pose_updated.emit( - float(head_yaw), float(head_pitch), float(iris_h), float(iris_v) - ) + if has_pose: + _last_head_yaw = head_yaw + _last_head_pitch = head_pitch + _last_iris_h = iris_h + _last_iris_v = iris_v + EventBus.face_pose_updated.emit(head_yaw, head_pitch, iris_h, iris_v) if _gaze_controller != null: if attention == "looking": @@ -218,48 +245,49 @@ func _handle_face(payload: Dictionary) -> void: # Compute the direction from Miku's window toward the camera position. # The camera is above the monitor (negative y relative to screen center). # We invert: Miku looks slightly up and toward center to meet the user's eyes. - var ds := DisplayServer - var win_pos := ds.window_get_position() - var win_size := ds.window_get_size() - # Miku's center in screen coords - var miku_cx := win_pos.x + win_size.x / 2.0 - var miku_cy := win_pos.y + win_size.y / 2.0 - # Camera is assumed to be at center-top of primary monitor - var screen_w := ds.screen_get_size(ds.get_primary_screen()).x - var screen_y_top := ds.screen_get_position(ds.get_primary_screen()).y - var cam_cx := ds.screen_get_position(ds.get_primary_screen()).x + screen_w / 2.0 - var cam_cy := screen_y_top - 200.0 # ~200px above monitor top - # Direction from Miku toward camera, normalized - var dx := cam_cx - miku_cx - var dy := cam_cy - miku_cy - var max_dx := float(screen_w) - var max_dy := 800.0 # typical monitor height - var gaze_x := clampf(dx / max_dx * 2.0, -1.0, 1.0) - var gaze_y := clampf(dy / max_dy * 2.0, -1.0, 1.0) + var win_pos: Vector2i = DisplayServer.window_get_position() + var win_size: Vector2i = DisplayServer.window_get_size() + var miku_cx: float = float(win_pos.x) + float(win_size.x) / 2.0 + var miku_cy: float = float(win_pos.y) + float(win_size.y) / 2.0 + var screen_w: int = DisplayServer.screen_get_size(DisplayServer.get_primary_screen()).x + var screen_y_top: int = ( + DisplayServer.screen_get_position(DisplayServer.get_primary_screen()).y + ) + var cam_cx: float = ( + float(DisplayServer.screen_get_position(DisplayServer.get_primary_screen()).x) + + float(screen_w) / 2.0 + ) + var cam_cy: float = float(screen_y_top) - 200.0 # ~200px above monitor top + var dx: float = cam_cx - miku_cx + var dy: float = cam_cy - miku_cy + var gaze_x: float = clampf(dx / float(screen_w) * 2.0, -1.0, 1.0) + var gaze_y: float = clampf(dy / 800.0 * 2.0, -1.0, 1.0) _gaze_controller.set_face_target(Vector2(gaze_x, gaze_y)) - # Halo: user is looking at camera — place halo at camera position. EventBus.gaze_screen_updated.emit(Vector2(cam_cx, cam_cy)) - elif head_yaw != null and head_pitch != null and iris_h != null and iris_v != null: + elif has_pose: # Mirror mode — Miku follows where the user is looking. # Blend 70% head pose + 30% iris for smooth, accurate gaze direction. - var iris_h_deg := (float(iris_h) - 0.5) * 80.0 # iris [0..1] → ±40° - var iris_v_deg := (float(iris_v) - 0.5) * 60.0 # iris [0..1] → ±30° - var comp_yaw := float(head_yaw) * 0.7 + iris_h_deg * 0.3 - var comp_pitch := float(head_pitch) * 0.7 + iris_v_deg * 0.3 + var iris_h_deg: float = (iris_h - 0.5) * 80.0 # iris [0..1] → ±40° + var iris_v_deg: float = (iris_v - 0.5) * 60.0 # iris [0..1] → ±30° + var comp_yaw: float = head_yaw * 0.7 + iris_h_deg * 0.3 + var comp_pitch: float = head_pitch * 0.7 + iris_v_deg * 0.3 # Normalize to -1..1 (±35° yaw covers full gaze range, ±20° pitch) - var gaze_x := clampf(comp_yaw / 35.0, -1.0, 1.0) - var gaze_y := clampf(-comp_pitch / 20.0, -1.0, 1.0) # invert: up pitch = up gaze + var gaze_x: float = clampf(comp_yaw / 35.0, -1.0, 1.0) + var gaze_y: float = clampf(-comp_pitch / 20.0, -1.0, 1.0) # invert: up → up _gaze_controller.set_face_target(Vector2(gaze_x, gaze_y)) - # Halo: project gaze angles to screen pixel coords (same model as screen_layout.py). EventBus.gaze_screen_updated.emit( ScreenGazeScript.gaze_to_screen_coords(comp_yaw, comp_pitch) ) - elif face_x != null and face_y != null: + elif has_position: # Fallback: face position in camera frame (no gaze data) - _gaze_controller.set_face_target(Vector2(float(face_x), float(face_y))) + _gaze_controller.set_face_target(Vector2(face_x, face_y)) + + _last_confidence = confidence + if attention == "absent": + _has_face = false if attention != _last_attention: - var prev := _last_attention + var prev: String = _last_attention _last_attention = attention ( @@ -277,17 +305,31 @@ func _handle_face(payload: Dictionary) -> void: EventBus.attention_changed.emit(attention, confidence) -func _get_gesture_names() -> Array: - var names: Array = [] +## Re-emit the last known face state to any newly-connected listeners. +## Call this when a settings page becomes visible to initialize its UI. +func sync_face_state() -> void: + if _has_face: + EventBus.attention_changed.emit(_last_attention, _last_confidence) + EventBus.face_pose_updated.emit( + _last_head_yaw, _last_head_pitch, _last_iris_h, _last_iris_v + ) + else: + EventBus.face_lost.emit() + + +func _get_gesture_names() -> Array[String]: + var names: Array[String] = [] if _idle_animator != null and _idle_animator.gesture_reg != null: - names = _idle_animator.gesture_reg.get_names() + var reg: GestureRegistryScript = _idle_animator.gesture_reg as GestureRegistryScript + if reg != null: + names.assign(reg.get_names()) names.append("slow_blink") # special trigger handled outside gesture_reg return names -func _get_emotion_names() -> Array: +func _get_emotion_names() -> Array[String]: if _expression_controller != null: - return Array(_expression_controller.EMOTION_SHAPES) + return _expression_controller.EMOTION_SHAPES.duplicate() return ["happy", "sad", "angry", "surprised", "relaxed", "neutral"] @@ -331,7 +373,7 @@ func _play_animation(anim_name: String) -> void: func _play_sound(sound_name: String) -> void: - var engine := _get_sound_engine() + var engine: SoundEngineScript = _get_sound_engine() if engine != null: engine.play_sound(sound_name) _respond({"played_sound": sound_name}) @@ -342,29 +384,23 @@ func _play_sound(sound_name: String) -> void: func _handle_sound_config(msg: Dictionary) -> void: var slot: String = msg.get("slot", "") var sound: String = msg.get("sound", "") - var config := _companion.get_node_or_null("SoundConfig") + var config: SoundConfigScript = _get_sound_config() if config != null: - print("[TrayListener] Saving sound config: %s = '%s'" % [slot, sound]) config.set_sound(slot, sound) _respond({"snd_config": {"slot": slot, "sound": sound}}) else: - print("[TrayListener] ERROR: SoundConfig not found!") _respond({"error": "SoundConfig not found"}) func _build_sound_config() -> Dictionary: - var config := _companion.get_node_or_null("SoundConfig") + var config: SoundConfigScript = _get_sound_config() if config == null: - print("[TrayListener] ERROR: SoundConfig not found when fetching config!") return {"error": "SoundConfig not found"} var result: Dictionary = {} var slots: Dictionary = config.get_slots() - print("[TrayListener] Fetching sound config with %d slots" % slots.size()) for slot_key: String in slots.keys(): - var sound: String = config.get_sound(slot_key) - result[slot_key] = sound - print("[TrayListener] Fetched %s = '%s'" % [slot_key, sound]) + result[slot_key] = config.get_sound(slot_key) return result @@ -378,30 +414,44 @@ func _handle_state_get(msg: Dictionary) -> void: func _handle_state_set(msg: Dictionary) -> void: var section: String = msg.get("section", "") - var data: Variant = msg.get("data") - if section.is_empty() or not (data is Dictionary): + if section.is_empty() or not (msg.get("data") is Dictionary): _respond({"error": "missing section or data"}) return - AppState.set_section(section, data) + AppState.set_section(section, msg.get("data") as Dictionary) _respond({"ok": true}) -func _get_sound_engine() -> Node: - return _companion.get_node_or_null("SoundEngine") +func _get_edge_snap() -> EdgeSnapScript: + return _companion.get_node_or_null("EdgeSnap") as EdgeSnapScript + + +func _get_window_zoom() -> WindowZoomScript: + return _companion.get_node_or_null("WindowZoom") as WindowZoomScript + + +func _get_sound_engine() -> SoundEngineScript: + return _companion.get_node_or_null("SoundEngine") as SoundEngineScript + + +func _get_sound_config() -> SoundConfigScript: + return _companion.get_node_or_null("SoundConfig") as SoundConfigScript + + +func _get_microphone() -> MicrophoneScript: + return _companion.get_node_or_null("Microphone") as MicrophoneScript func _build_status() -> Dictionary: - var snap := _get_edge_snap() - var zoom := _companion.get_node_or_null("WindowZoom") - var gaze_mode := "desktop" - var face_detected := false + var snap: EdgeSnapScript = _get_edge_snap() + var zoom: WindowZoomScript = _get_window_zoom() + var gaze_mode: String = "desktop" + var face_detected: bool = false if _gaze_controller != null: gaze_mode = _gaze_controller.get_mode_name() face_detected = _gaze_controller.is_face_available() - var mic := _companion.get_node_or_null("Microphone") - var ds := DisplayServer - var win_pos := ds.window_get_position() - var win_size := ds.window_get_size() + var mic: MicrophoneScript = _get_microphone() + var win_pos: Vector2i = DisplayServer.window_get_position() + var win_size: Vector2i = DisplayServer.window_get_size() return { "running": true, "snap_enabled": snap.enabled if snap else false, @@ -418,12 +468,8 @@ func _build_status() -> Dictionary: } -func _get_edge_snap() -> Node: - return _companion.get_node_or_null("EdgeSnap") - - func _respond(data: Dictionary) -> void: - var response := JSON.stringify(data).to_utf8_buffer() + var response: PackedByteArray = JSON.stringify(data).to_utf8_buffer() _udp.set_dest_address(_reply_ip, _reply_port) _udp.put_packet(response)