diff --git a/@applications/api/src/clients/model-boss.client.ts b/@applications/api/src/clients/model-boss.client.ts index bf539b0..49a4dfb 100644 --- a/@applications/api/src/clients/model-boss.client.ts +++ b/@applications/api/src/clients/model-boss.client.ts @@ -6,6 +6,18 @@ export interface ChatMessage { content: string; } +export interface TtsSynthesizeRequest { + text: string; + exaggeration: number; + cfgWeight: number; +} + +export interface TtsSynthesizeResult { + /** Int16 PCM resampled to 22050Hz mono, ready for PcmPlayer */ + pcm: Int16Array; + durationMs: number; +} + export interface CompletionRequest { model?: string; messages: ChatMessage[]; @@ -31,6 +43,29 @@ export interface CompletionChunk { choices: CompletionChoice[]; } +/** + * Resample float32 PCM from inputRate to outputRate using linear interpolation, + * then convert to Int16. Used to normalise @model-boss TTS output (24kHz float32) + * to the 22050Hz Int16 format expected by PcmPlayerProcessor. + */ +function resampleFloat32ToInt16(input: Float32Array, inputRate: number, outputRate: number): Int16Array { + const ratio = inputRate / outputRate; + const outputLength = Math.ceil(input.length / ratio); + const output = new Int16Array(outputLength); + + for (let i = 0; i < outputLength; i++) { + const srcPos = i * ratio; + const srcIdx = Math.floor(srcPos); + const frac = srcPos - srcIdx; + const s0 = input[srcIdx] ?? 0; + const s1 = input[Math.min(srcIdx + 1, input.length - 1)] ?? 0; + const sample = s0 + frac * (s1 - s0); + output[i] = Math.max(-32768, Math.min(32767, Math.round(sample * 32767))); + } + + return output; +} + /** * SSE streaming client for @model-boss /v1/chat/completions. * @@ -45,6 +80,32 @@ export class ModelBossClient { this.baseUrl = this.config.get('MODEL_BOSS_URL', 'http://localhost:8210'); } + async synthesizeTts(request: TtsSynthesizeRequest): Promise { + const url = `${this.baseUrl}/api/v1/tts/synthesize`; + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + text: request.text, + exaggeration: request.exaggeration, + cfgWeight: request.cfgWeight, + }), + }); + + if (!response.ok) { + const body = await response.text(); + throw new Error(`@model-boss TTS failed: ${response.status} ${response.statusText} — ${body}`); + } + + const data = (await response.json()) as { audioB64: string; sampleRate: number; durationMs: number }; + + const bytes = Buffer.from(data.audioB64, 'base64'); + const float32 = new Float32Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 4); + const pcm = resampleFloat32ToInt16(float32, data.sampleRate, 22050); + + return { pcm, durationMs: data.durationMs }; + } + async *streamCompletion(request: CompletionRequest): AsyncGenerator { const url = `${this.baseUrl}/v1/chat/completions`; const response = await fetch(url, {