fix(bridge): 🐛 Fix system tray event handling in TrayListener for Godot desktop platform

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Claude Code 2026-03-29 23:25:08 -07:00
parent 9e7cff0a7c
commit dba5e2339c

View file

@ -5,41 +5,58 @@ extends Node
const ScreenGazeScript = preload("res://src/core/screen_gaze.gd")
const TrayDebugScript = preload("res://platform/bridge/tray_debug_commands.gd")
const GazeControllerScript = preload("res://src/avatar/gaze_controller.gd")
const IdleAnimatorScript = preload("res://src/avatar/idle_animator.gd")
const ExpressionControllerScript = preload("res://src/avatar/expression_controller.gd")
const GestureRegistryScript = preload("res://src/avatar/gesture_registry.gd")
const EdgeSnapScript = preload("res://platform/window/edge_snap.gd")
const WindowZoomScript = preload("res://platform/window/window_zoom.gd")
const SoundEngineScript = preload("res://src/audio/sound_engine.gd")
const SoundConfigScript = preload("res://src/audio/sound_config.gd")
const MicrophoneScript = preload("res://src/conversation/microphone.gd")
const PORT: int = 19700
var _udp: PacketPeerUDP
var _companion: Node
var _gaze_controller: Node
var _idle_animator: Node
var _expression_controller: Node
var _companion: Node # desktop_companion — no class_name, kept as Node
var _gaze_controller: GazeControllerScript
var _idle_animator: IdleAnimatorScript
var _expression_controller: ExpressionControllerScript
var _reply_ip: String = "127.0.0.1"
var _reply_port: int = PORT + 1
var _last_attention: String = "absent"
var _last_confidence: float = 0.0
var _last_head_yaw: float = 0.0
var _last_head_pitch: float = 0.0
var _last_iris_h: float = 0.5
var _last_iris_v: float = 0.5
var _has_face: bool = false
func setup(companion: Node) -> void:
_companion = companion
_udp = PacketPeerUDP.new()
var err := _udp.bind(PORT, "127.0.0.1")
var err: Error = _udp.bind(PORT, "127.0.0.1")
if err != OK:
push_warning("UDPListener: Failed to bind port %d — is another instance running?" % PORT)
_udp = null
return
# Find avatar controllers (recursive search — they live inside the VRM model node)
_gaze_controller = companion.find_child("GazeController", true, false)
_idle_animator = companion.find_child("IdleAnimator", true, false)
_expression_controller = companion.find_child("ExpressionController", true, false)
_gaze_controller = companion.find_child("GazeController", true, false) as GazeControllerScript
_idle_animator = companion.find_child("IdleAnimator", true, false) as IdleAnimatorScript
_expression_controller = (
companion.find_child("ExpressionController", true, false) as ExpressionControllerScript
)
func _process(_delta: float) -> void:
if _udp == null:
return
while _udp.get_available_packet_count() > 0:
var data := _udp.get_packet().get_string_from_utf8()
var sender_ip := _udp.get_packet_ip()
var sender_port := _udp.get_packet_port()
var data: String = _udp.get_packet().get_string_from_utf8()
var sender_ip: String = _udp.get_packet_ip()
var sender_port: int = _udp.get_packet_port()
_handle_message(data, sender_ip, sender_port)
@ -51,7 +68,7 @@ func _handle_message(
_reply_ip = sender_ip
_reply_port = sender_port
var json := JSON.new()
var json: JSON = JSON.new()
if json.parse(raw) != OK:
return
@ -69,9 +86,10 @@ func _handle_command(msg: Dictionary) -> void:
match cmd:
"toggle_snap":
var snap := _get_edge_snap()
var snap: EdgeSnapScript = _get_edge_snap()
if snap != null:
snap.enabled = not snap.enabled
AppState.set_snap_enabled(snap.enabled)
_respond({"snap_enabled": snap.enabled})
"toggle_gaze":
if _gaze_controller != null:
@ -85,13 +103,13 @@ func _handle_command(msg: Dictionary) -> void:
if _gaze_controller != null:
var mode: String = msg.get("mode", "desktop")
if mode == "face_to_face":
_gaze_controller.set_mode(_gaze_controller.GazeMode.FACE_TO_FACE)
_gaze_controller.set_mode(GazeControllerScript.GazeMode.FACE_TO_FACE)
else:
_gaze_controller.set_mode(_gaze_controller.GazeMode.DESKTOP)
_gaze_controller.set_mode(GazeControllerScript.GazeMode.DESKTOP)
_respond({"gaze_mode": _gaze_controller.get_mode_name()})
"set_zoom":
var level: float = msg.get("level", 0.5)
var zoom := _companion.get_node_or_null("WindowZoom")
var zoom: WindowZoomScript = _get_window_zoom()
if zoom != null:
zoom.set_zoom_level(level)
"reset_position":
@ -99,13 +117,13 @@ func _handle_command(msg: Dictionary) -> void:
"quit":
get_tree().quit()
"test_snap":
var snap := _get_edge_snap()
var snap: EdgeSnapScript = _get_edge_snap()
if snap != null:
snap.try_snap()
_respond({"snapped": true})
"set_state":
var target: String = msg.get("state", "idle")
var valid := ["idle", "listening", "processing", "speaking"]
var valid: Array[String] = ["idle", "listening", "processing", "speaking"]
if target in valid:
EventBus.state_changed.emit("", target)
_respond({"state": target})
@ -116,7 +134,7 @@ func _handle_command(msg: Dictionary) -> void:
"play_sound":
_play_sound(msg.get("name", "chirp"))
"list_sounds":
var engine := _get_sound_engine()
var engine: SoundEngineScript = _get_sound_engine()
if engine != null:
_respond({"sounds": engine.get_sound_names()})
"focus":
@ -153,7 +171,7 @@ func _handle_command(msg: Dictionary) -> void:
EventBus.conversation_new_requested.emit()
_respond({"ok": true})
"list_conversations":
var index := AppState.get_section("conversations")
var index: Dictionary = AppState.get_section("conversations")
_respond({"conversations": index.get("list", [])})
"snd_config":
_handle_sound_config(msg)
@ -166,7 +184,7 @@ func _handle_command(msg: Dictionary) -> void:
FlightRecorder.record("vision.halo_toggled", "Gaze halo toggled", {"enabled": enabled})
_respond({"gaze_halo": enabled})
"toggle_mic":
var mic := _companion.get_node_or_null("Microphone")
var mic: MicrophoneScript = _get_microphone()
if mic != null:
mic.set_enabled(not mic._enabled)
_respond({"mic_enabled": mic._enabled})
@ -194,23 +212,32 @@ func _handle_envelope(msg: Dictionary) -> void:
func _handle_face(payload: Dictionary) -> void:
var face_x = payload.get("face_x")
var face_y = payload.get("face_y")
var attention: String = payload.get("attention", "absent")
var confidence: float = payload.get("confidence", 0.0)
var head_yaw = payload.get("head_yaw")
var head_pitch = payload.get("head_pitch")
var iris_h = payload.get("iris_h")
var iris_v = payload.get("iris_v")
var has_position: bool = payload.has("face_x") and payload.has("face_y")
var has_pose: bool = (
payload.has("head_yaw")
and payload.has("head_pitch")
and payload.has("iris_h")
and payload.has("iris_v")
)
var face_x: float = float(payload.get("face_x", 0.0))
var face_y: float = float(payload.get("face_y", 0.0))
var head_yaw: float = float(payload.get("head_yaw", 0.0))
var head_pitch: float = float(payload.get("head_pitch", 0.0))
var iris_h: float = float(payload.get("iris_h", 0.5))
var iris_v: float = float(payload.get("iris_v", 0.5))
if face_x != null and face_y != null:
var pos := Vector2(float(face_x), float(face_y))
EventBus.face_detected.emit(pos)
if has_position:
_has_face = true
EventBus.face_detected.emit(Vector2(face_x, face_y))
if head_yaw != null and head_pitch != null and iris_h != null and iris_v != null:
EventBus.face_pose_updated.emit(
float(head_yaw), float(head_pitch), float(iris_h), float(iris_v)
)
if has_pose:
_last_head_yaw = head_yaw
_last_head_pitch = head_pitch
_last_iris_h = iris_h
_last_iris_v = iris_v
EventBus.face_pose_updated.emit(head_yaw, head_pitch, iris_h, iris_v)
if _gaze_controller != null:
if attention == "looking":
@ -218,48 +245,49 @@ func _handle_face(payload: Dictionary) -> void:
# Compute the direction from Miku's window toward the camera position.
# The camera is above the monitor (negative y relative to screen center).
# We invert: Miku looks slightly up and toward center to meet the user's eyes.
var ds := DisplayServer
var win_pos := ds.window_get_position()
var win_size := ds.window_get_size()
# Miku's center in screen coords
var miku_cx := win_pos.x + win_size.x / 2.0
var miku_cy := win_pos.y + win_size.y / 2.0
# Camera is assumed to be at center-top of primary monitor
var screen_w := ds.screen_get_size(ds.get_primary_screen()).x
var screen_y_top := ds.screen_get_position(ds.get_primary_screen()).y
var cam_cx := ds.screen_get_position(ds.get_primary_screen()).x + screen_w / 2.0
var cam_cy := screen_y_top - 200.0 # ~200px above monitor top
# Direction from Miku toward camera, normalized
var dx := cam_cx - miku_cx
var dy := cam_cy - miku_cy
var max_dx := float(screen_w)
var max_dy := 800.0 # typical monitor height
var gaze_x := clampf(dx / max_dx * 2.0, -1.0, 1.0)
var gaze_y := clampf(dy / max_dy * 2.0, -1.0, 1.0)
var win_pos: Vector2i = DisplayServer.window_get_position()
var win_size: Vector2i = DisplayServer.window_get_size()
var miku_cx: float = float(win_pos.x) + float(win_size.x) / 2.0
var miku_cy: float = float(win_pos.y) + float(win_size.y) / 2.0
var screen_w: int = DisplayServer.screen_get_size(DisplayServer.get_primary_screen()).x
var screen_y_top: int = (
DisplayServer.screen_get_position(DisplayServer.get_primary_screen()).y
)
var cam_cx: float = (
float(DisplayServer.screen_get_position(DisplayServer.get_primary_screen()).x)
+ float(screen_w) / 2.0
)
var cam_cy: float = float(screen_y_top) - 200.0 # ~200px above monitor top
var dx: float = cam_cx - miku_cx
var dy: float = cam_cy - miku_cy
var gaze_x: float = clampf(dx / float(screen_w) * 2.0, -1.0, 1.0)
var gaze_y: float = clampf(dy / 800.0 * 2.0, -1.0, 1.0)
_gaze_controller.set_face_target(Vector2(gaze_x, gaze_y))
# Halo: user is looking at camera — place halo at camera position.
EventBus.gaze_screen_updated.emit(Vector2(cam_cx, cam_cy))
elif head_yaw != null and head_pitch != null and iris_h != null and iris_v != null:
elif has_pose:
# Mirror mode — Miku follows where the user is looking.
# Blend 70% head pose + 30% iris for smooth, accurate gaze direction.
var iris_h_deg := (float(iris_h) - 0.5) * 80.0 # iris [0..1] → ±40°
var iris_v_deg := (float(iris_v) - 0.5) * 60.0 # iris [0..1] → ±30°
var comp_yaw := float(head_yaw) * 0.7 + iris_h_deg * 0.3
var comp_pitch := float(head_pitch) * 0.7 + iris_v_deg * 0.3
var iris_h_deg: float = (iris_h - 0.5) * 80.0 # iris [0..1] → ±40°
var iris_v_deg: float = (iris_v - 0.5) * 60.0 # iris [0..1] → ±30°
var comp_yaw: float = head_yaw * 0.7 + iris_h_deg * 0.3
var comp_pitch: float = head_pitch * 0.7 + iris_v_deg * 0.3
# Normalize to -1..1 (±35° yaw covers full gaze range, ±20° pitch)
var gaze_x := clampf(comp_yaw / 35.0, -1.0, 1.0)
var gaze_y := clampf(-comp_pitch / 20.0, -1.0, 1.0) # invert: up pitch = up gaze
var gaze_x: float = clampf(comp_yaw / 35.0, -1.0, 1.0)
var gaze_y: float = clampf(-comp_pitch / 20.0, -1.0, 1.0) # invert: up → up
_gaze_controller.set_face_target(Vector2(gaze_x, gaze_y))
# Halo: project gaze angles to screen pixel coords (same model as screen_layout.py).
EventBus.gaze_screen_updated.emit(
ScreenGazeScript.gaze_to_screen_coords(comp_yaw, comp_pitch)
)
elif face_x != null and face_y != null:
elif has_position:
# Fallback: face position in camera frame (no gaze data)
_gaze_controller.set_face_target(Vector2(float(face_x), float(face_y)))
_gaze_controller.set_face_target(Vector2(face_x, face_y))
_last_confidence = confidence
if attention == "absent":
_has_face = false
if attention != _last_attention:
var prev := _last_attention
var prev: String = _last_attention
_last_attention = attention
(
@ -277,17 +305,31 @@ func _handle_face(payload: Dictionary) -> void:
EventBus.attention_changed.emit(attention, confidence)
func _get_gesture_names() -> Array:
var names: Array = []
## Re-emit the last known face state to any newly-connected listeners.
## Call this when a settings page becomes visible to initialize its UI.
func sync_face_state() -> void:
if _has_face:
EventBus.attention_changed.emit(_last_attention, _last_confidence)
EventBus.face_pose_updated.emit(
_last_head_yaw, _last_head_pitch, _last_iris_h, _last_iris_v
)
else:
EventBus.face_lost.emit()
func _get_gesture_names() -> Array[String]:
var names: Array[String] = []
if _idle_animator != null and _idle_animator.gesture_reg != null:
names = _idle_animator.gesture_reg.get_names()
var reg: GestureRegistryScript = _idle_animator.gesture_reg as GestureRegistryScript
if reg != null:
names.assign(reg.get_names())
names.append("slow_blink") # special trigger handled outside gesture_reg
return names
func _get_emotion_names() -> Array:
func _get_emotion_names() -> Array[String]:
if _expression_controller != null:
return Array(_expression_controller.EMOTION_SHAPES)
return _expression_controller.EMOTION_SHAPES.duplicate()
return ["happy", "sad", "angry", "surprised", "relaxed", "neutral"]
@ -331,7 +373,7 @@ func _play_animation(anim_name: String) -> void:
func _play_sound(sound_name: String) -> void:
var engine := _get_sound_engine()
var engine: SoundEngineScript = _get_sound_engine()
if engine != null:
engine.play_sound(sound_name)
_respond({"played_sound": sound_name})
@ -342,29 +384,23 @@ func _play_sound(sound_name: String) -> void:
func _handle_sound_config(msg: Dictionary) -> void:
var slot: String = msg.get("slot", "")
var sound: String = msg.get("sound", "")
var config := _companion.get_node_or_null("SoundConfig")
var config: SoundConfigScript = _get_sound_config()
if config != null:
print("[TrayListener] Saving sound config: %s = '%s'" % [slot, sound])
config.set_sound(slot, sound)
_respond({"snd_config": {"slot": slot, "sound": sound}})
else:
print("[TrayListener] ERROR: SoundConfig not found!")
_respond({"error": "SoundConfig not found"})
func _build_sound_config() -> Dictionary:
var config := _companion.get_node_or_null("SoundConfig")
var config: SoundConfigScript = _get_sound_config()
if config == null:
print("[TrayListener] ERROR: SoundConfig not found when fetching config!")
return {"error": "SoundConfig not found"}
var result: Dictionary = {}
var slots: Dictionary = config.get_slots()
print("[TrayListener] Fetching sound config with %d slots" % slots.size())
for slot_key: String in slots.keys():
var sound: String = config.get_sound(slot_key)
result[slot_key] = sound
print("[TrayListener] Fetched %s = '%s'" % [slot_key, sound])
result[slot_key] = config.get_sound(slot_key)
return result
@ -378,30 +414,44 @@ func _handle_state_get(msg: Dictionary) -> void:
func _handle_state_set(msg: Dictionary) -> void:
var section: String = msg.get("section", "")
var data: Variant = msg.get("data")
if section.is_empty() or not (data is Dictionary):
if section.is_empty() or not (msg.get("data") is Dictionary):
_respond({"error": "missing section or data"})
return
AppState.set_section(section, data)
AppState.set_section(section, msg.get("data") as Dictionary)
_respond({"ok": true})
func _get_sound_engine() -> Node:
return _companion.get_node_or_null("SoundEngine")
func _get_edge_snap() -> EdgeSnapScript:
return _companion.get_node_or_null("EdgeSnap") as EdgeSnapScript
func _get_window_zoom() -> WindowZoomScript:
return _companion.get_node_or_null("WindowZoom") as WindowZoomScript
func _get_sound_engine() -> SoundEngineScript:
return _companion.get_node_or_null("SoundEngine") as SoundEngineScript
func _get_sound_config() -> SoundConfigScript:
return _companion.get_node_or_null("SoundConfig") as SoundConfigScript
func _get_microphone() -> MicrophoneScript:
return _companion.get_node_or_null("Microphone") as MicrophoneScript
func _build_status() -> Dictionary:
var snap := _get_edge_snap()
var zoom := _companion.get_node_or_null("WindowZoom")
var gaze_mode := "desktop"
var face_detected := false
var snap: EdgeSnapScript = _get_edge_snap()
var zoom: WindowZoomScript = _get_window_zoom()
var gaze_mode: String = "desktop"
var face_detected: bool = false
if _gaze_controller != null:
gaze_mode = _gaze_controller.get_mode_name()
face_detected = _gaze_controller.is_face_available()
var mic := _companion.get_node_or_null("Microphone")
var ds := DisplayServer
var win_pos := ds.window_get_position()
var win_size := ds.window_get_size()
var mic: MicrophoneScript = _get_microphone()
var win_pos: Vector2i = DisplayServer.window_get_position()
var win_size: Vector2i = DisplayServer.window_get_size()
return {
"running": true,
"snap_enabled": snap.enabled if snap else false,
@ -418,12 +468,8 @@ func _build_status() -> Dictionary:
}
func _get_edge_snap() -> Node:
return _companion.get_node_or_null("EdgeSnap")
func _respond(data: Dictionary) -> void:
var response := JSON.stringify(data).to_utf8_buffer()
var response: PackedByteArray = JSON.stringify(data).to_utf8_buffer()
_udp.set_dest_address(_reply_ip, _reply_port)
_udp.put_packet(response)