fix(bridge): 🐛 Fix system tray event handling in TrayListener for Godot desktop platform
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
9e7cff0a7c
commit
dba5e2339c
1 changed files with 143 additions and 97 deletions
|
|
@ -5,41 +5,58 @@ extends Node
|
|||
|
||||
const ScreenGazeScript = preload("res://src/core/screen_gaze.gd")
|
||||
const TrayDebugScript = preload("res://platform/bridge/tray_debug_commands.gd")
|
||||
const GazeControllerScript = preload("res://src/avatar/gaze_controller.gd")
|
||||
const IdleAnimatorScript = preload("res://src/avatar/idle_animator.gd")
|
||||
const ExpressionControllerScript = preload("res://src/avatar/expression_controller.gd")
|
||||
const GestureRegistryScript = preload("res://src/avatar/gesture_registry.gd")
|
||||
const EdgeSnapScript = preload("res://platform/window/edge_snap.gd")
|
||||
const WindowZoomScript = preload("res://platform/window/window_zoom.gd")
|
||||
const SoundEngineScript = preload("res://src/audio/sound_engine.gd")
|
||||
const SoundConfigScript = preload("res://src/audio/sound_config.gd")
|
||||
const MicrophoneScript = preload("res://src/conversation/microphone.gd")
|
||||
|
||||
const PORT: int = 19700
|
||||
|
||||
var _udp: PacketPeerUDP
|
||||
var _companion: Node
|
||||
var _gaze_controller: Node
|
||||
var _idle_animator: Node
|
||||
var _expression_controller: Node
|
||||
var _companion: Node # desktop_companion — no class_name, kept as Node
|
||||
var _gaze_controller: GazeControllerScript
|
||||
var _idle_animator: IdleAnimatorScript
|
||||
var _expression_controller: ExpressionControllerScript
|
||||
var _reply_ip: String = "127.0.0.1"
|
||||
var _reply_port: int = PORT + 1
|
||||
var _last_attention: String = "absent"
|
||||
var _last_confidence: float = 0.0
|
||||
var _last_head_yaw: float = 0.0
|
||||
var _last_head_pitch: float = 0.0
|
||||
var _last_iris_h: float = 0.5
|
||||
var _last_iris_v: float = 0.5
|
||||
var _has_face: bool = false
|
||||
|
||||
|
||||
func setup(companion: Node) -> void:
|
||||
_companion = companion
|
||||
_udp = PacketPeerUDP.new()
|
||||
var err := _udp.bind(PORT, "127.0.0.1")
|
||||
var err: Error = _udp.bind(PORT, "127.0.0.1")
|
||||
if err != OK:
|
||||
push_warning("UDPListener: Failed to bind port %d — is another instance running?" % PORT)
|
||||
_udp = null
|
||||
return
|
||||
|
||||
# Find avatar controllers (recursive search — they live inside the VRM model node)
|
||||
_gaze_controller = companion.find_child("GazeController", true, false)
|
||||
_idle_animator = companion.find_child("IdleAnimator", true, false)
|
||||
_expression_controller = companion.find_child("ExpressionController", true, false)
|
||||
_gaze_controller = companion.find_child("GazeController", true, false) as GazeControllerScript
|
||||
_idle_animator = companion.find_child("IdleAnimator", true, false) as IdleAnimatorScript
|
||||
_expression_controller = (
|
||||
companion.find_child("ExpressionController", true, false) as ExpressionControllerScript
|
||||
)
|
||||
|
||||
|
||||
func _process(_delta: float) -> void:
|
||||
if _udp == null:
|
||||
return
|
||||
while _udp.get_available_packet_count() > 0:
|
||||
var data := _udp.get_packet().get_string_from_utf8()
|
||||
var sender_ip := _udp.get_packet_ip()
|
||||
var sender_port := _udp.get_packet_port()
|
||||
var data: String = _udp.get_packet().get_string_from_utf8()
|
||||
var sender_ip: String = _udp.get_packet_ip()
|
||||
var sender_port: int = _udp.get_packet_port()
|
||||
_handle_message(data, sender_ip, sender_port)
|
||||
|
||||
|
||||
|
|
@ -51,7 +68,7 @@ func _handle_message(
|
|||
_reply_ip = sender_ip
|
||||
_reply_port = sender_port
|
||||
|
||||
var json := JSON.new()
|
||||
var json: JSON = JSON.new()
|
||||
if json.parse(raw) != OK:
|
||||
return
|
||||
|
||||
|
|
@ -69,9 +86,10 @@ func _handle_command(msg: Dictionary) -> void:
|
|||
|
||||
match cmd:
|
||||
"toggle_snap":
|
||||
var snap := _get_edge_snap()
|
||||
var snap: EdgeSnapScript = _get_edge_snap()
|
||||
if snap != null:
|
||||
snap.enabled = not snap.enabled
|
||||
AppState.set_snap_enabled(snap.enabled)
|
||||
_respond({"snap_enabled": snap.enabled})
|
||||
"toggle_gaze":
|
||||
if _gaze_controller != null:
|
||||
|
|
@ -85,13 +103,13 @@ func _handle_command(msg: Dictionary) -> void:
|
|||
if _gaze_controller != null:
|
||||
var mode: String = msg.get("mode", "desktop")
|
||||
if mode == "face_to_face":
|
||||
_gaze_controller.set_mode(_gaze_controller.GazeMode.FACE_TO_FACE)
|
||||
_gaze_controller.set_mode(GazeControllerScript.GazeMode.FACE_TO_FACE)
|
||||
else:
|
||||
_gaze_controller.set_mode(_gaze_controller.GazeMode.DESKTOP)
|
||||
_gaze_controller.set_mode(GazeControllerScript.GazeMode.DESKTOP)
|
||||
_respond({"gaze_mode": _gaze_controller.get_mode_name()})
|
||||
"set_zoom":
|
||||
var level: float = msg.get("level", 0.5)
|
||||
var zoom := _companion.get_node_or_null("WindowZoom")
|
||||
var zoom: WindowZoomScript = _get_window_zoom()
|
||||
if zoom != null:
|
||||
zoom.set_zoom_level(level)
|
||||
"reset_position":
|
||||
|
|
@ -99,13 +117,13 @@ func _handle_command(msg: Dictionary) -> void:
|
|||
"quit":
|
||||
get_tree().quit()
|
||||
"test_snap":
|
||||
var snap := _get_edge_snap()
|
||||
var snap: EdgeSnapScript = _get_edge_snap()
|
||||
if snap != null:
|
||||
snap.try_snap()
|
||||
_respond({"snapped": true})
|
||||
"set_state":
|
||||
var target: String = msg.get("state", "idle")
|
||||
var valid := ["idle", "listening", "processing", "speaking"]
|
||||
var valid: Array[String] = ["idle", "listening", "processing", "speaking"]
|
||||
if target in valid:
|
||||
EventBus.state_changed.emit("", target)
|
||||
_respond({"state": target})
|
||||
|
|
@ -116,7 +134,7 @@ func _handle_command(msg: Dictionary) -> void:
|
|||
"play_sound":
|
||||
_play_sound(msg.get("name", "chirp"))
|
||||
"list_sounds":
|
||||
var engine := _get_sound_engine()
|
||||
var engine: SoundEngineScript = _get_sound_engine()
|
||||
if engine != null:
|
||||
_respond({"sounds": engine.get_sound_names()})
|
||||
"focus":
|
||||
|
|
@ -153,7 +171,7 @@ func _handle_command(msg: Dictionary) -> void:
|
|||
EventBus.conversation_new_requested.emit()
|
||||
_respond({"ok": true})
|
||||
"list_conversations":
|
||||
var index := AppState.get_section("conversations")
|
||||
var index: Dictionary = AppState.get_section("conversations")
|
||||
_respond({"conversations": index.get("list", [])})
|
||||
"snd_config":
|
||||
_handle_sound_config(msg)
|
||||
|
|
@ -166,7 +184,7 @@ func _handle_command(msg: Dictionary) -> void:
|
|||
FlightRecorder.record("vision.halo_toggled", "Gaze halo toggled", {"enabled": enabled})
|
||||
_respond({"gaze_halo": enabled})
|
||||
"toggle_mic":
|
||||
var mic := _companion.get_node_or_null("Microphone")
|
||||
var mic: MicrophoneScript = _get_microphone()
|
||||
if mic != null:
|
||||
mic.set_enabled(not mic._enabled)
|
||||
_respond({"mic_enabled": mic._enabled})
|
||||
|
|
@ -194,23 +212,32 @@ func _handle_envelope(msg: Dictionary) -> void:
|
|||
|
||||
|
||||
func _handle_face(payload: Dictionary) -> void:
|
||||
var face_x = payload.get("face_x")
|
||||
var face_y = payload.get("face_y")
|
||||
var attention: String = payload.get("attention", "absent")
|
||||
var confidence: float = payload.get("confidence", 0.0)
|
||||
var head_yaw = payload.get("head_yaw")
|
||||
var head_pitch = payload.get("head_pitch")
|
||||
var iris_h = payload.get("iris_h")
|
||||
var iris_v = payload.get("iris_v")
|
||||
var has_position: bool = payload.has("face_x") and payload.has("face_y")
|
||||
var has_pose: bool = (
|
||||
payload.has("head_yaw")
|
||||
and payload.has("head_pitch")
|
||||
and payload.has("iris_h")
|
||||
and payload.has("iris_v")
|
||||
)
|
||||
var face_x: float = float(payload.get("face_x", 0.0))
|
||||
var face_y: float = float(payload.get("face_y", 0.0))
|
||||
var head_yaw: float = float(payload.get("head_yaw", 0.0))
|
||||
var head_pitch: float = float(payload.get("head_pitch", 0.0))
|
||||
var iris_h: float = float(payload.get("iris_h", 0.5))
|
||||
var iris_v: float = float(payload.get("iris_v", 0.5))
|
||||
|
||||
if face_x != null and face_y != null:
|
||||
var pos := Vector2(float(face_x), float(face_y))
|
||||
EventBus.face_detected.emit(pos)
|
||||
if has_position:
|
||||
_has_face = true
|
||||
EventBus.face_detected.emit(Vector2(face_x, face_y))
|
||||
|
||||
if head_yaw != null and head_pitch != null and iris_h != null and iris_v != null:
|
||||
EventBus.face_pose_updated.emit(
|
||||
float(head_yaw), float(head_pitch), float(iris_h), float(iris_v)
|
||||
)
|
||||
if has_pose:
|
||||
_last_head_yaw = head_yaw
|
||||
_last_head_pitch = head_pitch
|
||||
_last_iris_h = iris_h
|
||||
_last_iris_v = iris_v
|
||||
EventBus.face_pose_updated.emit(head_yaw, head_pitch, iris_h, iris_v)
|
||||
|
||||
if _gaze_controller != null:
|
||||
if attention == "looking":
|
||||
|
|
@ -218,48 +245,49 @@ func _handle_face(payload: Dictionary) -> void:
|
|||
# Compute the direction from Miku's window toward the camera position.
|
||||
# The camera is above the monitor (negative y relative to screen center).
|
||||
# We invert: Miku looks slightly up and toward center to meet the user's eyes.
|
||||
var ds := DisplayServer
|
||||
var win_pos := ds.window_get_position()
|
||||
var win_size := ds.window_get_size()
|
||||
# Miku's center in screen coords
|
||||
var miku_cx := win_pos.x + win_size.x / 2.0
|
||||
var miku_cy := win_pos.y + win_size.y / 2.0
|
||||
# Camera is assumed to be at center-top of primary monitor
|
||||
var screen_w := ds.screen_get_size(ds.get_primary_screen()).x
|
||||
var screen_y_top := ds.screen_get_position(ds.get_primary_screen()).y
|
||||
var cam_cx := ds.screen_get_position(ds.get_primary_screen()).x + screen_w / 2.0
|
||||
var cam_cy := screen_y_top - 200.0 # ~200px above monitor top
|
||||
# Direction from Miku toward camera, normalized
|
||||
var dx := cam_cx - miku_cx
|
||||
var dy := cam_cy - miku_cy
|
||||
var max_dx := float(screen_w)
|
||||
var max_dy := 800.0 # typical monitor height
|
||||
var gaze_x := clampf(dx / max_dx * 2.0, -1.0, 1.0)
|
||||
var gaze_y := clampf(dy / max_dy * 2.0, -1.0, 1.0)
|
||||
var win_pos: Vector2i = DisplayServer.window_get_position()
|
||||
var win_size: Vector2i = DisplayServer.window_get_size()
|
||||
var miku_cx: float = float(win_pos.x) + float(win_size.x) / 2.0
|
||||
var miku_cy: float = float(win_pos.y) + float(win_size.y) / 2.0
|
||||
var screen_w: int = DisplayServer.screen_get_size(DisplayServer.get_primary_screen()).x
|
||||
var screen_y_top: int = (
|
||||
DisplayServer.screen_get_position(DisplayServer.get_primary_screen()).y
|
||||
)
|
||||
var cam_cx: float = (
|
||||
float(DisplayServer.screen_get_position(DisplayServer.get_primary_screen()).x)
|
||||
+ float(screen_w) / 2.0
|
||||
)
|
||||
var cam_cy: float = float(screen_y_top) - 200.0 # ~200px above monitor top
|
||||
var dx: float = cam_cx - miku_cx
|
||||
var dy: float = cam_cy - miku_cy
|
||||
var gaze_x: float = clampf(dx / float(screen_w) * 2.0, -1.0, 1.0)
|
||||
var gaze_y: float = clampf(dy / 800.0 * 2.0, -1.0, 1.0)
|
||||
_gaze_controller.set_face_target(Vector2(gaze_x, gaze_y))
|
||||
# Halo: user is looking at camera — place halo at camera position.
|
||||
EventBus.gaze_screen_updated.emit(Vector2(cam_cx, cam_cy))
|
||||
elif head_yaw != null and head_pitch != null and iris_h != null and iris_v != null:
|
||||
elif has_pose:
|
||||
# Mirror mode — Miku follows where the user is looking.
|
||||
# Blend 70% head pose + 30% iris for smooth, accurate gaze direction.
|
||||
var iris_h_deg := (float(iris_h) - 0.5) * 80.0 # iris [0..1] → ±40°
|
||||
var iris_v_deg := (float(iris_v) - 0.5) * 60.0 # iris [0..1] → ±30°
|
||||
var comp_yaw := float(head_yaw) * 0.7 + iris_h_deg * 0.3
|
||||
var comp_pitch := float(head_pitch) * 0.7 + iris_v_deg * 0.3
|
||||
var iris_h_deg: float = (iris_h - 0.5) * 80.0 # iris [0..1] → ±40°
|
||||
var iris_v_deg: float = (iris_v - 0.5) * 60.0 # iris [0..1] → ±30°
|
||||
var comp_yaw: float = head_yaw * 0.7 + iris_h_deg * 0.3
|
||||
var comp_pitch: float = head_pitch * 0.7 + iris_v_deg * 0.3
|
||||
# Normalize to -1..1 (±35° yaw covers full gaze range, ±20° pitch)
|
||||
var gaze_x := clampf(comp_yaw / 35.0, -1.0, 1.0)
|
||||
var gaze_y := clampf(-comp_pitch / 20.0, -1.0, 1.0) # invert: up pitch = up gaze
|
||||
var gaze_x: float = clampf(comp_yaw / 35.0, -1.0, 1.0)
|
||||
var gaze_y: float = clampf(-comp_pitch / 20.0, -1.0, 1.0) # invert: up → up
|
||||
_gaze_controller.set_face_target(Vector2(gaze_x, gaze_y))
|
||||
# Halo: project gaze angles to screen pixel coords (same model as screen_layout.py).
|
||||
EventBus.gaze_screen_updated.emit(
|
||||
ScreenGazeScript.gaze_to_screen_coords(comp_yaw, comp_pitch)
|
||||
)
|
||||
elif face_x != null and face_y != null:
|
||||
elif has_position:
|
||||
# Fallback: face position in camera frame (no gaze data)
|
||||
_gaze_controller.set_face_target(Vector2(float(face_x), float(face_y)))
|
||||
_gaze_controller.set_face_target(Vector2(face_x, face_y))
|
||||
|
||||
_last_confidence = confidence
|
||||
if attention == "absent":
|
||||
_has_face = false
|
||||
|
||||
if attention != _last_attention:
|
||||
var prev := _last_attention
|
||||
var prev: String = _last_attention
|
||||
_last_attention = attention
|
||||
|
||||
(
|
||||
|
|
@ -277,17 +305,31 @@ func _handle_face(payload: Dictionary) -> void:
|
|||
EventBus.attention_changed.emit(attention, confidence)
|
||||
|
||||
|
||||
func _get_gesture_names() -> Array:
|
||||
var names: Array = []
|
||||
## Re-emit the last known face state to any newly-connected listeners.
|
||||
## Call this when a settings page becomes visible to initialize its UI.
|
||||
func sync_face_state() -> void:
|
||||
if _has_face:
|
||||
EventBus.attention_changed.emit(_last_attention, _last_confidence)
|
||||
EventBus.face_pose_updated.emit(
|
||||
_last_head_yaw, _last_head_pitch, _last_iris_h, _last_iris_v
|
||||
)
|
||||
else:
|
||||
EventBus.face_lost.emit()
|
||||
|
||||
|
||||
func _get_gesture_names() -> Array[String]:
|
||||
var names: Array[String] = []
|
||||
if _idle_animator != null and _idle_animator.gesture_reg != null:
|
||||
names = _idle_animator.gesture_reg.get_names()
|
||||
var reg: GestureRegistryScript = _idle_animator.gesture_reg as GestureRegistryScript
|
||||
if reg != null:
|
||||
names.assign(reg.get_names())
|
||||
names.append("slow_blink") # special trigger handled outside gesture_reg
|
||||
return names
|
||||
|
||||
|
||||
func _get_emotion_names() -> Array:
|
||||
func _get_emotion_names() -> Array[String]:
|
||||
if _expression_controller != null:
|
||||
return Array(_expression_controller.EMOTION_SHAPES)
|
||||
return _expression_controller.EMOTION_SHAPES.duplicate()
|
||||
return ["happy", "sad", "angry", "surprised", "relaxed", "neutral"]
|
||||
|
||||
|
||||
|
|
@ -331,7 +373,7 @@ func _play_animation(anim_name: String) -> void:
|
|||
|
||||
|
||||
func _play_sound(sound_name: String) -> void:
|
||||
var engine := _get_sound_engine()
|
||||
var engine: SoundEngineScript = _get_sound_engine()
|
||||
if engine != null:
|
||||
engine.play_sound(sound_name)
|
||||
_respond({"played_sound": sound_name})
|
||||
|
|
@ -342,29 +384,23 @@ func _play_sound(sound_name: String) -> void:
|
|||
func _handle_sound_config(msg: Dictionary) -> void:
|
||||
var slot: String = msg.get("slot", "")
|
||||
var sound: String = msg.get("sound", "")
|
||||
var config := _companion.get_node_or_null("SoundConfig")
|
||||
var config: SoundConfigScript = _get_sound_config()
|
||||
if config != null:
|
||||
print("[TrayListener] Saving sound config: %s = '%s'" % [slot, sound])
|
||||
config.set_sound(slot, sound)
|
||||
_respond({"snd_config": {"slot": slot, "sound": sound}})
|
||||
else:
|
||||
print("[TrayListener] ERROR: SoundConfig not found!")
|
||||
_respond({"error": "SoundConfig not found"})
|
||||
|
||||
|
||||
func _build_sound_config() -> Dictionary:
|
||||
var config := _companion.get_node_or_null("SoundConfig")
|
||||
var config: SoundConfigScript = _get_sound_config()
|
||||
if config == null:
|
||||
print("[TrayListener] ERROR: SoundConfig not found when fetching config!")
|
||||
return {"error": "SoundConfig not found"}
|
||||
|
||||
var result: Dictionary = {}
|
||||
var slots: Dictionary = config.get_slots()
|
||||
print("[TrayListener] Fetching sound config with %d slots" % slots.size())
|
||||
for slot_key: String in slots.keys():
|
||||
var sound: String = config.get_sound(slot_key)
|
||||
result[slot_key] = sound
|
||||
print("[TrayListener] Fetched %s = '%s'" % [slot_key, sound])
|
||||
result[slot_key] = config.get_sound(slot_key)
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -378,30 +414,44 @@ func _handle_state_get(msg: Dictionary) -> void:
|
|||
|
||||
func _handle_state_set(msg: Dictionary) -> void:
|
||||
var section: String = msg.get("section", "")
|
||||
var data: Variant = msg.get("data")
|
||||
if section.is_empty() or not (data is Dictionary):
|
||||
if section.is_empty() or not (msg.get("data") is Dictionary):
|
||||
_respond({"error": "missing section or data"})
|
||||
return
|
||||
AppState.set_section(section, data)
|
||||
AppState.set_section(section, msg.get("data") as Dictionary)
|
||||
_respond({"ok": true})
|
||||
|
||||
|
||||
func _get_sound_engine() -> Node:
|
||||
return _companion.get_node_or_null("SoundEngine")
|
||||
func _get_edge_snap() -> EdgeSnapScript:
|
||||
return _companion.get_node_or_null("EdgeSnap") as EdgeSnapScript
|
||||
|
||||
|
||||
func _get_window_zoom() -> WindowZoomScript:
|
||||
return _companion.get_node_or_null("WindowZoom") as WindowZoomScript
|
||||
|
||||
|
||||
func _get_sound_engine() -> SoundEngineScript:
|
||||
return _companion.get_node_or_null("SoundEngine") as SoundEngineScript
|
||||
|
||||
|
||||
func _get_sound_config() -> SoundConfigScript:
|
||||
return _companion.get_node_or_null("SoundConfig") as SoundConfigScript
|
||||
|
||||
|
||||
func _get_microphone() -> MicrophoneScript:
|
||||
return _companion.get_node_or_null("Microphone") as MicrophoneScript
|
||||
|
||||
|
||||
func _build_status() -> Dictionary:
|
||||
var snap := _get_edge_snap()
|
||||
var zoom := _companion.get_node_or_null("WindowZoom")
|
||||
var gaze_mode := "desktop"
|
||||
var face_detected := false
|
||||
var snap: EdgeSnapScript = _get_edge_snap()
|
||||
var zoom: WindowZoomScript = _get_window_zoom()
|
||||
var gaze_mode: String = "desktop"
|
||||
var face_detected: bool = false
|
||||
if _gaze_controller != null:
|
||||
gaze_mode = _gaze_controller.get_mode_name()
|
||||
face_detected = _gaze_controller.is_face_available()
|
||||
var mic := _companion.get_node_or_null("Microphone")
|
||||
var ds := DisplayServer
|
||||
var win_pos := ds.window_get_position()
|
||||
var win_size := ds.window_get_size()
|
||||
var mic: MicrophoneScript = _get_microphone()
|
||||
var win_pos: Vector2i = DisplayServer.window_get_position()
|
||||
var win_size: Vector2i = DisplayServer.window_get_size()
|
||||
return {
|
||||
"running": true,
|
||||
"snap_enabled": snap.enabled if snap else false,
|
||||
|
|
@ -418,12 +468,8 @@ func _build_status() -> Dictionary:
|
|||
}
|
||||
|
||||
|
||||
func _get_edge_snap() -> Node:
|
||||
return _companion.get_node_or_null("EdgeSnap")
|
||||
|
||||
|
||||
func _respond(data: Dictionary) -> void:
|
||||
var response := JSON.stringify(data).to_utf8_buffer()
|
||||
var response: PackedByteArray = JSON.stringify(data).to_utf8_buffer()
|
||||
_udp.set_dest_address(_reply_ip, _reply_port)
|
||||
_udp.put_packet(response)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue