interface TTSRequest { text: string; voice?: string; speed?: number; } const PIPER_TTS_DEBUG = process.env.PIPER_TTS_DEBUG === '1'; function piperDebug(...args: unknown[]) { if (PIPER_TTS_DEBUG) console.log(...args); } const WYOMING_DOWN_COOLDOWN_MS = 60_000; let lastWyomingDownLog = 0; function isWyomingUnreachableMessage(msg: string): boolean { return /ECONNREFUSED|connection refused|ENOTFOUND|ETIMEDOUT/i.test(msg); } function logWyomingUnreachableThrottled(host: string, port: number, detail?: string) { const now = Date.now(); if (now - lastWyomingDownLog < WYOMING_DOWN_COOLDOWN_MS) return; lastWyomingDownLog = now; console.warn( `[Piper TTS] Wyoming/Piper not reachable at ${host}:${port}${detail ? ` — ${detail}` : ''}. ` + `Start the Wyoming Piper service or set PIPER_TTS_HOST / PIPER_TTS_PORT. ` + `Suppressing similar messages for ${WYOMING_DOWN_COOLDOWN_MS / 1000}s. Use PIPER_TTS_DEBUG=1 for verbose logs.` ); } /** * Proxy endpoint for Piper TTS using Wyoming protocol (TCP) * Wyoming protocol: JSON messages newline-delimited, then raw binary audio */ /** HTTP handler (plain `Request` / `Response`); bridged to Wyoming Piper over TCP. */ export async function handlePiperTtsPost(request: Request): Promise { piperDebug('Piper TTS API: Request received'); try { const body: TTSRequest = await request.json(); const { text, voice, speed } = body; piperDebug('Piper TTS API: Processing request', { textLength: text?.length, voice, speed, voiceType: typeof voice, voiceValue: voice }); if (!text?.trim()) { console.error('Piper TTS API: Missing text field'); return errorResponse(400, 'Missing required field: text'); } // Filter and prepare text const filteredText = filterCryptographicContent(text); if (!filteredText.trim()) { console.warn('Piper TTS API: Text is empty after filtering'); return errorResponse(400, 'Text contains only cryptographic addresses/IDs that cannot be read aloud'); } const sentences = splitIntoSentences(filteredText); const fullText = sentences.filter(s => s.trim().length > 0).join(' '); piperDebug(`Piper TTS API: Processing ${sentences.length} sentences, total length: ${fullText.length}`); // Use provided voice, or auto-detect language and select voice if not provided let selectedVoice = voice; if (!selectedVoice || selectedVoice.trim() === '') { const detectedLang = detectLanguage(fullText); selectedVoice = getVoiceForLanguage(detectedLang); piperDebug(`Piper TTS API: No voice provided, auto-detected language: ${detectedLang}, selected voice: ${selectedVoice}`); } else { piperDebug(`Piper TTS API: Using provided voice: ${selectedVoice}`); } // Stream audio response with cancellation support const abortController = new AbortController(); let wyomingCleanup: (() => void) | null = null; const stream = new ReadableStream({ async start(controller) { const tcpConfig = getTcpConfig(); try { const audioChunks: Uint8Array[] = []; let audioFormat: { rate: number; width: number; channels: number } | null = null; let totalBytes = 0; piperDebug('Piper TTS API: Connecting to Wyoming server at', tcpConfig.hostname, 'port', tcpConfig.port); await synthesizeWithWyoming( tcpConfig, fullText, selectedVoice, speed, abortController.signal, (cleanup) => { wyomingCleanup = cleanup; }, (chunk: Uint8Array, format?: { rate: number; width: number; channels: number }) => { if (abortController.signal.aborted) return; if (format && !audioFormat) { audioFormat = format; piperDebug('Piper TTS API: Received audio format:', format); } if (chunk.length > 0) { audioChunks.push(chunk); totalBytes += chunk.length; } } ); if (abortController.signal.aborted) { piperDebug('Piper TTS API: Synthesis aborted'); controller.close(); return; } if (!audioFormat || totalBytes === 0) { throw new Error('No audio data received from Wyoming server'); } piperDebug('Piper TTS API: Collected audio, total size:', totalBytes, 'bytes'); const format = audioFormat as { rate: number; width: number; channels: number }; const wavHeader = createWavHeader(format.rate, format.width, format.channels, totalBytes); controller.enqueue(wavHeader); for (const chunk of audioChunks) { if (abortController.signal.aborted) break; controller.enqueue(chunk); } controller.close(); } catch (error) { if (abortController.signal.aborted) { piperDebug('Piper TTS API: Operation cancelled'); controller.close(); } else { const msg = error instanceof Error ? error.message : String(error); if (isWyomingUnreachableMessage(msg)) { logWyomingUnreachableThrottled(tcpConfig.hostname, tcpConfig.port, msg); if (PIPER_TTS_DEBUG) console.error('Piper TTS API: Streaming error:', error); } else { console.error('Piper TTS API: Streaming error:', error); } controller.error(error); } } }, cancel() { piperDebug('Piper TTS API: Stream cancelled by client'); abortController.abort(); if (wyomingCleanup) { wyomingCleanup(); } } }); return new Response(stream, { headers: { 'Content-Type': 'audio/wav', 'Transfer-Encoding': 'chunked', 'Access-Control-Allow-Origin': '*', }, }); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; console.error('Piper TTS API error:', message); return errorResponse(500, message); } }; /** * Synthesize speech using Wyoming protocol * Protocol flow (standard): * 1. Send: {"type":"synthesize","data":{"text":"..."}}\n * 2. Receive format: {"rate":22050,"width":2,"channels":1}\n * 3. Receive raw binary audio (no delimiters) * 4. Optionally receive: {"type":"done"}\n or connection closes * * Some implementations may send audio-chunk messages: * - {"type":"audio-chunk","payload_length":N}\n followed by N bytes of binary audio * - These may arrive before or after the format message * - We handle both standard and audio-chunk variants for compatibility */ async function synthesizeWithWyoming( config: { hostname: string; port: number }, text: string, voice: string | undefined, speed: number | undefined, abortSignal: AbortSignal, onCleanup: (cleanup: () => void) => void, onChunk: (chunk: Uint8Array, format?: { rate: number; width: number; channels: number }) => void ): Promise { const net = await import('net'); return new Promise((resolve, reject) => { let socket: import('net').Socket | null = null; let buffer = Buffer.alloc(0); let audioFormat: { rate: number; width: number; channels: number } | null = null; let hasReceivedAudio = false; let isResolved = false; let lastDataTime = Date.now(); let completionTimer: NodeJS.Timeout | null = null; const preFormatAudioChunks: Uint8Array[] = []; // Buffer audio chunks received before format let hasProcessedAudioChunks = false; // Track if we've processed audio-chunk messages piperDebug('Wyoming: Creating TCP connection to', config.hostname, 'port', config.port); const cleanup = () => { if (socket && !socket.destroyed) { piperDebug('Wyoming: Cleaning up TCP connection'); socket.destroy(); } }; // Register cleanup function onCleanup(cleanup); // Check if already aborted if (abortSignal.aborted) { piperDebug('Wyoming: Abort signal already set, not connecting'); reject(new Error('Operation cancelled')); return; } // Listen for abort signal const abortHandler = () => { piperDebug('Wyoming: Abort signal received, cleaning up'); if (completionTimer) { clearTimeout(completionTimer); completionTimer = null; } cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; reject(new Error('Operation cancelled')); } }; abortSignal.addEventListener('abort', abortHandler); const timeout = setTimeout(() => { cleanup(); if (!isResolved) { isResolved = true; console.error('Wyoming: Timeout after 5 minutes'); reject(new Error('Wyoming protocol timeout')); } }, 300000); // 5 minutes try { socket = net.createConnection(config.port, config.hostname, () => { piperDebug('Wyoming: TCP connected successfully'); // Send synthesize request // Wyoming protocol expects voice as an object with 'name' property, not a plain string const message = { type: 'synthesize', data: { text, ...(voice ? { voice: { name: voice } } : {}), ...(speed !== undefined && speed !== 1.0 ? { speed } : {}), } }; const messageStr = JSON.stringify(message) + '\n'; piperDebug( 'Wyoming: Sending synthesize message, text length:', text.length, 'voice:', voice ? `{name: "${voice}"}` : 'none (will use default)' ); piperDebug('Wyoming: Full message:', messageStr.trim()); try { socket!.write(messageStr); piperDebug('Wyoming: Synthesize message sent'); } catch (writeError) { console.error('Wyoming: Failed to write message:', writeError); cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; reject(new Error(`Failed to send message: ${writeError instanceof Error ? writeError.message : String(writeError)}`)); } } }); } catch (error) { console.error('Wyoming: Failed to create connection:', error); cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; reject(new Error(`Failed to create connection: ${error instanceof Error ? error.message : String(error)}`)); } return; } socket.on('data', (data: Buffer) => { // Check if aborted if (abortSignal.aborted) { console.log('Wyoming: Aborted, ignoring data'); return; } lastDataTime = Date.now(); // Clear completion timer since we're receiving data if (completionTimer) { clearTimeout(completionTimer); completionTimer = null; } console.log('Wyoming: Received data, size:', data.length, 'bytes, audioFormat:', audioFormat ? 'received' : 'not received'); buffer = Buffer.concat([buffer, data]); // Process buffer while (buffer.length > 0) { // Check if aborted during processing if (abortSignal.aborted) { console.log('Wyoming: Aborted during buffer processing'); break; } // After format received, check for "done" message, audio-chunk messages, or process as raw audio if (audioFormat) { // Check if buffer starts with JSON (for done/error/audio-chunk messages) if (buffer.length > 0 && buffer[0] === 0x7b) { // '{' byte const newlineIndex = buffer.indexOf('\n'); if (newlineIndex !== -1) { try { const line = buffer.subarray(0, newlineIndex).toString('utf8').trim(); const message = JSON.parse(line); if (message.type === 'done') { console.log('Wyoming: Received done message'); if (completionTimer) { clearTimeout(completionTimer); completionTimer = null; } buffer = buffer.subarray(newlineIndex + 1); cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; resolve(); } return; } if (message.type === 'error') { console.error('Wyoming: Received error message:', message.message); buffer = buffer.subarray(newlineIndex + 1); cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; reject(new Error(message.message || 'Wyoming protocol error')); } return; } if (message.type === 'audio-stop') { console.log('Wyoming: Received audio-stop message'); buffer = buffer.subarray(newlineIndex + 1); if (completionTimer) { clearTimeout(completionTimer); completionTimer = null; } cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; resolve(); } return; } // Handle audio-chunk messages after format if (message.type === 'audio-chunk' && typeof message.payload_length === 'number') { const payloadLength = message.payload_length; const messageEnd = newlineIndex + 1; // If data_length is specified, there's additional JSON data before the payload const dataLength = typeof message.data_length === 'number' ? message.data_length : 0; const payloadStart = messageEnd + dataLength; const payloadEnd = payloadStart + payloadLength; if (buffer.length >= payloadEnd) { const audioPayload = new Uint8Array(buffer.subarray(payloadStart, payloadEnd)); onChunk(audioPayload); hasReceivedAudio = true; hasProcessedAudioChunks = true; buffer = buffer.subarray(payloadEnd); continue; // Continue processing loop } else { // Don't have full payload yet - wait for more data break; } } } catch (error) { // Not valid JSON - treat as raw audio // Fall through to raw audio processing } } else { // No newline yet - might be incomplete JSON, wait for more data break; } } // No JSON message found - process all buffer as raw audio if (buffer.length > 0) { onChunk(new Uint8Array(buffer)); hasReceivedAudio = true; buffer = Buffer.alloc(0); } // If we've received audio and buffer is empty, set a completion timer // This handles cases where the server doesn't send "done" or close connection if (hasReceivedAudio && buffer.length === 0 && !completionTimer) { completionTimer = setTimeout(() => { if (!isResolved && hasReceivedAudio && !abortSignal.aborted) { console.log('Wyoming: No data received for 500ms after audio, assuming completion'); cleanup(); clearTimeout(timeout); isResolved = true; resolve(); } }, 500); // 500ms timeout after last data } // Break and wait for more data (could be more audio or "done" message) break; } // Before format: scan buffer for JSON format message // Look for '{' followed by newline-delimited JSON let formatFound = false; let searchStart = 0; while (searchStart < buffer.length && !formatFound) { const braceIndex = buffer.indexOf(0x7b, searchStart); // '{' byte if (braceIndex === -1) { // No more '{' found - this is all binary data, buffer it break; } // Look for newline after this '{' const newlineIndex = buffer.indexOf('\n', braceIndex); if (newlineIndex === -1) { // No newline yet - wait for more data break; } // Try to parse as JSON const lineBytes = buffer.subarray(braceIndex, newlineIndex); const line = lineBytes.toString('utf8').trim(); if (line.endsWith('}')) { try { const message = JSON.parse(line); console.log('Wyoming: Received message:', JSON.stringify(message)); // Check for audio-start message (contains format info) if (message.type === 'audio-start' && (message.rate !== undefined || message.channels !== undefined)) { audioFormat = { rate: message.rate, width: message.width || 2, channels: message.channels, }; console.log('Wyoming: Audio format from audio-start:', audioFormat); // Send format notification onChunk(new Uint8Array(0), audioFormat); // Process any buffered audio chunks if (preFormatAudioChunks.length > 0) { console.log('Wyoming: Processing', preFormatAudioChunks.length, 'buffered audio chunks after audio-start'); for (const chunk of preFormatAudioChunks) { onChunk(chunk); hasReceivedAudio = true; } preFormatAudioChunks.length = 0; hasProcessedAudioChunks = true; } buffer = buffer.subarray(newlineIndex + 1); searchStart = 0; continue; } // Check for format message (can be a standalone format object or embedded in other messages) if (message.rate !== undefined || message.channels !== undefined) { audioFormat = { rate: message.rate, width: message.width || 2, channels: message.channels, }; console.log('Wyoming: Audio format:', audioFormat); // Remove everything up to and including the format message const dataAfterFormat = buffer.subarray(newlineIndex + 1); // Send format notification first onChunk(new Uint8Array(0), audioFormat); // Process any buffered audio chunks received before format if (preFormatAudioChunks.length > 0) { console.log('Wyoming: Processing', preFormatAudioChunks.length, 'buffered audio chunks'); for (const chunk of preFormatAudioChunks) { onChunk(chunk); hasReceivedAudio = true; } preFormatAudioChunks.length = 0; // Clear the buffer hasProcessedAudioChunks = true; } // Process any raw data before format as audio (protocol violation, but handle it) // BUT: Skip this if we've already processed audio-chunk messages, as that data // is likely protocol overhead or corrupted, not actual audio if (braceIndex > 0 && !hasProcessedAudioChunks) { const preFormatData = buffer.subarray(0, braceIndex); // Only process if it's not empty and looks like audio (not JSON) // Also check that it's a reasonable size (not just a few bytes of protocol overhead) if (preFormatData.length > 0 && preFormatData[0] !== 0x7b && preFormatData.length > 100) { console.warn('Wyoming: Processing', braceIndex, 'bytes of raw data received before format message as audio'); onChunk(new Uint8Array(preFormatData)); hasReceivedAudio = true; } else if (preFormatData.length > 0 && preFormatData.length <= 100) { console.warn('Wyoming: Skipping', preFormatData.length, 'bytes of data before format (likely protocol overhead)'); } } else if (braceIndex > 0 && hasProcessedAudioChunks) { console.warn('Wyoming: Skipping', braceIndex, 'bytes of data before format (audio-chunk messages already processed)'); } // Process data after format as audio if (dataAfterFormat.length > 0) { onChunk(new Uint8Array(dataAfterFormat)); hasReceivedAudio = true; } buffer = Buffer.alloc(0); formatFound = true; continue; // Continue processing loop } // Check for done/error messages if (message.type === 'done') { console.log('Wyoming: Received done message'); buffer = buffer.subarray(newlineIndex + 1); cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; if (hasReceivedAudio) { resolve(); } else { reject(new Error('No audio data received')); } } return; } if (message.type === 'error') { console.error('Wyoming: Received error message:', message.message); buffer = buffer.subarray(newlineIndex + 1); cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; reject(new Error(message.message || 'Wyoming protocol error')); } return; } // Handle audio-stop message if (message.type === 'audio-stop') { console.log('Wyoming: Received audio-stop message'); buffer = buffer.subarray(newlineIndex + 1); // If we have buffered audio chunks but no format, use default format if (preFormatAudioChunks.length > 0 && !audioFormat) { console.warn('Wyoming: Format message never received, using default format for', preFormatAudioChunks.length, 'buffered chunks'); // Default Piper TTS format: 22050 Hz, 16-bit (width=2), mono (channels=1) audioFormat = { rate: 22050, width: 2, channels: 1, }; console.log('Wyoming: Using default audio format:', audioFormat); // Send format notification onChunk(new Uint8Array(0), audioFormat); // Process buffered chunks for (const chunk of preFormatAudioChunks) { onChunk(chunk); hasReceivedAudio = true; } preFormatAudioChunks.length = 0; hasProcessedAudioChunks = true; } cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; if (hasReceivedAudio) { resolve(); } else { reject(new Error('No audio data received')); } } return; } // Handle audio-chunk messages if (message.type === 'audio-chunk' && typeof message.payload_length === 'number') { const payloadLength = message.payload_length; const messageEnd = newlineIndex + 1; // If data_length is specified, there's additional JSON data before the payload const dataLength = typeof message.data_length === 'number' ? message.data_length : 0; const payloadStart = messageEnd + dataLength; const payloadEnd = payloadStart + payloadLength; console.log('Wyoming: Processing audio-chunk, payload_length:', payloadLength, 'data_length:', dataLength, 'buffer length:', buffer.length, 'payloadStart:', payloadStart, 'payloadEnd:', payloadEnd); // Check if we have the full payload if (buffer.length >= payloadEnd) { // If there's data_length, try to parse the format from that data if (dataLength > 0 && !audioFormat) { const dataBytes = buffer.subarray(messageEnd, payloadStart); try { const dataStr = dataBytes.toString('utf8'); const formatData = JSON.parse(dataStr); if (formatData.rate !== undefined || formatData.channels !== undefined) { audioFormat = { rate: formatData.rate, width: formatData.width || 2, channels: formatData.channels, }; console.log('Wyoming: Found format in data section:', audioFormat); onChunk(new Uint8Array(0), audioFormat); } } catch (e) { console.warn('Wyoming: Failed to parse data section as JSON:', e); } } // Extract the audio payload (after the data section) const audioPayload = new Uint8Array(buffer.subarray(payloadStart, payloadEnd)); console.log('Wyoming: Extracted audio payload:', audioPayload.length, 'bytes, first 8 bytes:', Array.from(audioPayload.slice(0, 8)).map(b => '0x' + b.toString(16).padStart(2, '0')).join(' ')); // Check if format is embedded in the audio-chunk message itself if (!audioFormat && (message.rate !== undefined || message.channels !== undefined)) { audioFormat = { rate: message.rate || 22050, width: message.width || 2, channels: message.channels || 1, }; console.log('Wyoming: Found format in audio-chunk message:', audioFormat); onChunk(new Uint8Array(0), audioFormat); } // If we have format, process it as audio; otherwise buffer it if (audioFormat) { onChunk(audioPayload); hasReceivedAudio = true; hasProcessedAudioChunks = true; } else { // Buffer audio chunks until we get format preFormatAudioChunks.push(audioPayload); console.log('Wyoming: Buffering audio-chunk payload of', payloadLength, 'bytes (format not yet received)'); hasProcessedAudioChunks = true; // Mark that we've seen audio-chunk messages } // Remove the message and payload from buffer buffer = buffer.subarray(payloadEnd); searchStart = 0; // Reset search to start of buffer continue; } else { // Don't have full payload yet - wait for more data console.log('Wyoming: Waiting for more data, need', payloadEnd, 'have', buffer.length); break; } } // Other JSON message - skip it and continue searching searchStart = newlineIndex + 1; } catch (error) { // Not valid JSON - continue searching searchStart = braceIndex + 1; } } else { // Incomplete JSON - continue searching searchStart = braceIndex + 1; } } // If we found format, continue processing; otherwise wait for more data if (!formatFound) { break; } } }); socket.on('error', (error: Error) => { if (isWyomingUnreachableMessage(error.message)) { logWyomingUnreachableThrottled(config.hostname, config.port, error.message); if (PIPER_TTS_DEBUG) console.error('Wyoming: TCP error:', error.message); } else { console.error('Wyoming: TCP error:', error.message); } abortSignal.removeEventListener('abort', abortHandler); cleanup(); clearTimeout(timeout); if (!isResolved) { isResolved = true; reject(new Error(`TCP error: ${error.message}`)); } }); socket.on('close', () => { piperDebug( 'Wyoming: Connection closed, hasReceivedAudio:', hasReceivedAudio, 'buffer length:', buffer.length, 'buffered chunks:', preFormatAudioChunks.length ); if (completionTimer) { clearTimeout(completionTimer); completionTimer = null; } abortSignal.removeEventListener('abort', abortHandler); cleanup(); clearTimeout(timeout); // If we have buffered audio chunks but no format, use default format if (!abortSignal.aborted && preFormatAudioChunks.length > 0 && !audioFormat) { console.warn('Wyoming: Format message never received before connection close, using default format for', preFormatAudioChunks.length, 'buffered chunks'); // Default Piper TTS format: 22050 Hz, 16-bit (width=2), mono (channels=1) audioFormat = { rate: 22050, width: 2, channels: 1, }; console.log('Wyoming: Using default audio format:', audioFormat); // Send format notification onChunk(new Uint8Array(0), audioFormat); // Process buffered chunks for (const chunk of preFormatAudioChunks) { onChunk(chunk); hasReceivedAudio = true; } preFormatAudioChunks.length = 0; hasProcessedAudioChunks = true; } // Only process remaining buffer if not aborted if (!abortSignal.aborted && buffer.length > 0 && audioFormat) { console.log('Wyoming: Streaming remaining buffer:', buffer.length, 'bytes'); onChunk(new Uint8Array(buffer)); hasReceivedAudio = true; } if (!isResolved) { isResolved = true; if (abortSignal.aborted) { piperDebug('Wyoming: Connection closed after abort'); reject(new Error('Operation cancelled')); } else if (hasReceivedAudio) { piperDebug('Wyoming: Resolving - audio received'); resolve(); } else { piperDebug('Wyoming: Rejecting - no audio received'); reject(new Error('Connection closed without audio data')); } } }); }); } function getTcpConfig(): { hostname: string; port: number } { // Allow override via environment variable const piperHost = process.env.PIPER_TTS_HOST || process.env.PIPER_HOST; const piperPort = process.env.PIPER_TTS_PORT || process.env.PIPER_PORT; if (piperHost && piperPort) { return { hostname: piperHost, port: parseInt(piperPort, 10), }; } // Default: use Docker service name in production, localhost in development const isDevelopment = process.env.NODE_ENV === 'development'; return { hostname: isDevelopment ? 'localhost' : 'piper-tts', port: 10200, }; } function createWavHeader(sampleRate: number, bytesPerSample: number, channels: number, dataSize: number): Uint8Array { const header = new ArrayBuffer(44); const view = new DataView(header); // RIFF header view.setUint8(0, 0x52); // 'R' view.setUint8(1, 0x49); // 'I' view.setUint8(2, 0x46); // 'F' view.setUint8(3, 0x46); // 'F' view.setUint32(4, 36 + dataSize, true); // File size - 8 // WAVE header view.setUint8(8, 0x57); // 'W' view.setUint8(9, 0x41); // 'A' view.setUint8(10, 0x56); // 'V' view.setUint8(11, 0x45); // 'E' // fmt chunk view.setUint8(12, 0x66); // 'f' view.setUint8(13, 0x6D); // 'm' view.setUint8(14, 0x74); // 't' view.setUint8(15, 0x20); // ' ' view.setUint32(16, 16, true); // fmt chunk size view.setUint16(20, 1, true); // Audio format (1 = PCM) view.setUint16(22, channels, true); // Number of channels view.setUint32(24, sampleRate, true); // Sample rate view.setUint32(28, sampleRate * channels * bytesPerSample, true); // Byte rate view.setUint16(32, channels * bytesPerSample, true); // Block align view.setUint16(34, bytesPerSample * 8, true); // Bits per sample // data chunk view.setUint8(36, 0x64); // 'd' view.setUint8(37, 0x61); // 'a' view.setUint8(38, 0x74); // 't' view.setUint8(39, 0x61); // 'a' view.setUint32(40, dataSize, true); // Data size return new Uint8Array(header); } function filterCryptographicContent(text: string): string { let filtered = text; // Remove URLs filtered = filtered.replace(/https?:\/\/[^\s]+/gi, ''); filtered = filtered.replace(/www\.[^\s]+/gi, ''); // Remove Nostr URIs and bech32 addresses filtered = filtered.replace(/nostr:[^\s]+/gi, ''); filtered = filtered.replace(/\b(npub|note|nevent|naddr|nprofile|nsec|ncryptsec)1[a-z0-9]{20,}\b/gi, ''); // Remove hex strings filtered = filtered.replace(/\b[0-9a-f]{64}\b/gi, ''); filtered = filtered.replace(/\b[0-9a-f]{32,63}\b/gi, ''); // Remove emojis filtered = filtered.replace(/[\u{1F300}-\u{1F9FF}]/gu, ''); filtered = filtered.replace(/[\u{1F600}-\u{1F64F}]/gu, ''); filtered = filtered.replace(/[\u{2600}-\u{26FF}]/gu, ''); filtered = filtered.replace(/[\u{2700}-\u{27BF}]/gu, ''); // Remove markdown and asciidoc markup // Code blocks (markdown and asciidoc) filtered = filtered.replace(/```[\s\S]*?```/g, ''); filtered = filtered.replace(/`[^`]+`/g, ''); filtered = filtered.replace(/----[\s\S]*?----/g, ''); // AsciiDoc code blocks filtered = filtered.replace(/\[source[^\]]*\][\s\S]*?----/g, ''); // AsciiDoc source blocks // Headers (markdown and asciidoc) filtered = filtered.replace(/^#+\s+/gm, ''); // Markdown headers at start of line filtered = filtered.replace(/\s+#+\s+/g, ' '); // Markdown headers in middle of text filtered = filtered.replace(/^=+\s*$/gm, ''); // AsciiDoc headers (single line) filtered = filtered.replace(/^=+\s+/gm, ''); // AsciiDoc headers at start of line filtered = filtered.replace(/\s+=+\s+/g, ' '); // AsciiDoc headers in middle of text // Links (markdown and asciidoc) filtered = filtered.replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1'); // Markdown links filtered = filtered.replace(/\[\[([^\]]+)\]\]/g, '$1'); // AsciiDoc links filtered = filtered.replace(/link:([^\[]+)\[([^\]]+)\]/g, '$2'); // AsciiDoc link: syntax // Images (markdown and asciidoc) filtered = filtered.replace(/!\[([^\]]*)\]\([^\)]+\)/g, ''); // Markdown images filtered = filtered.replace(/image::?[^\[]+\[([^\]]*)\]/g, '$1'); // AsciiDoc images // Emphasis and formatting filtered = filtered.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold markdown filtered = filtered.replace(/\*([^*]+)\*/g, '$1'); // Italic markdown filtered = filtered.replace(/__([^_]+)__/g, '$1'); // Bold markdown (underscore) filtered = filtered.replace(/_([^_]+)_/g, '$1'); // Italic markdown (underscore) filtered = filtered.replace(/\*\*([^*]+)\*\*/g, '$1'); // Bold asciidoc filtered = filtered.replace(/\*([^*]+)\*/g, '$1'); // Italic asciidoc filtered = filtered.replace(/\+\+([^+]+)\+\+/g, '$1'); // Monospace asciidoc filtered = filtered.replace(/~~([^~]+)~~/g, '$1'); // Strikethrough markdown // Lists (markdown and asciidoc) filtered = filtered.replace(/^[\*\-\+]\s+/gm, ''); // Markdown unordered lists filtered = filtered.replace(/^\d+\.\s+/gm, ''); // Markdown ordered lists filtered = filtered.replace(/^\.\s+/gm, ''); // AsciiDoc unordered lists filtered = filtered.replace(/^\d+\.\s+/gm, ''); // AsciiDoc ordered lists // Blockquotes filtered = filtered.replace(/^>\s+/gm, ''); // Markdown blockquotes filtered = filtered.replace(/^\[quote[^\]]*\][\s\S]*?\[quote\]/g, ''); // AsciiDoc quotes // Horizontal rules filtered = filtered.replace(/^[-*_]{3,}\s*$/gm, ''); // Markdown horizontal rules filtered = filtered.replace(/^'''+\s*$/gm, ''); // AsciiDoc horizontal rules // Tables (markdown and asciidoc) filtered = filtered.replace(/\|/g, ' '); // Remove table separators filtered = filtered.replace(/^\|.+\|\s*$/gm, ''); // Remove table rows filtered = filtered.replace(/^\[cols?=[^\]]*\][\s\S]*?\|===\s*$/gm, ''); // AsciiDoc tables // Other asciidoc syntax filtered = filtered.replace(/\[\[([^\]]+)\]\]/g, ''); // AsciiDoc anchors filtered = filtered.replace(/\[NOTE\]/gi, ''); filtered = filtered.replace(/\[TIP\]/gi, ''); filtered = filtered.replace(/\[WARNING\]/gi, ''); filtered = filtered.replace(/\[IMPORTANT\]/gi, ''); filtered = filtered.replace(/\[CAUTION\]/gi, ''); filtered = filtered.replace(/\[source[^\]]*\]/gi, ''); filtered = filtered.replace(/\[caption[^\]]*\]/gi, ''); // Clean up whitespace filtered = filtered.replace(/\s+/g, ' ').trim(); return filtered; } function splitIntoSentences(text: string): string[] { const cleaned = text .replace(/^#+\s+/gm, '') .replace(/\n+/g, ' ') .trim(); const sentences: string[] = []; const regex = /([.!?]+)\s+/g; let lastIndex = 0; let match; while ((match = regex.exec(cleaned)) !== null) { const sentence = cleaned.substring(lastIndex, match.index + match[1].length).trim(); if (sentence.length > 0) { sentences.push(sentence); } lastIndex = match.index + match[0].length; } const remaining = cleaned.substring(lastIndex).trim(); if (remaining.length > 0) { sentences.push(remaining); } return sentences.length > 0 ? sentences : [cleaned]; } function errorResponse(status: number, message: string): Response { return new Response(JSON.stringify({ error: message }), { status, headers: { 'Content-Type': 'application/json' }, }); } /** * Simple language detection based on character patterns * Returns language code (e.g., 'en', 'de', 'fr', 'es', etc.) */ function detectLanguage(text: string): string { if (!text || text.length === 0) return 'en'; // Count character patterns to detect language const sample = text.substring(0, Math.min(500, text.length)); // German: ä, ö, ü, ß const germanChars = (sample.match(/[äöüßÄÖÜ]/g) || []).length; // French: é, è, ê, ç, à, etc. const frenchChars = (sample.match(/[éèêëàâäçôùûüÉÈÊËÀÂÄÇÔÙÛÜ]/g) || []).length; // Spanish: ñ, á, é, í, ó, ú, ¿, ¡ const spanishChars = (sample.match(/[ñáéíóúüÑÁÉÍÓÚÜ¿¡]/g) || []).length; // Italian: à, è, é, ì, ò, ù const italianChars = (sample.match(/[àèéìòùÀÈÉÌÒÙ]/g) || []).length; // Russian/Cyrillic const cyrillicChars = (sample.match(/[а-яёА-ЯЁ]/g) || []).length; // Chinese/Japanese/Korean (CJK) const cjkChars = (sample.match(/[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/g) || []).length; // Arabic const arabicChars = (sample.match(/[\u0600-\u06ff]/g) || []).length; // Calculate ratios const total = sample.length; const germanRatio = germanChars / total; const frenchRatio = frenchChars / total; const spanishRatio = spanishChars / total; const italianRatio = italianChars / total; const cyrillicRatio = cyrillicChars / total; const cjkRatio = cjkChars / total; const arabicRatio = arabicChars / total; // Detect based on highest ratio if (cyrillicRatio > 0.1) return 'ru'; if (cjkRatio > 0.1) return 'zh'; // Default to Chinese for CJK if (arabicRatio > 0.1) return 'ar'; if (germanRatio > 0.02) return 'de'; if (frenchRatio > 0.02) return 'fr'; if (spanishRatio > 0.02) return 'es'; if (italianRatio > 0.02) return 'it'; // Default to English return 'en'; } /** * Map language code to Piper voice name * Returns voice name (always returns a value, defaults to English) * Voice names follow pattern: {lang}_{locale}-{voice}-{quality} * * Note: These are common voice names. You may need to adjust based on * which voices are actually available in your piper-data directory. * To see available voices, check the piper-data folder or Wyoming server logs. */ function getVoiceForLanguage(lang: string): string { // Voice map keys / ids: keep in sync with `src/lib/trinity-languages.ts` (`TRINITY_PIPER_VOICE`, `EXTRA_READ_ALOUD_PIPER_VOICE`). const voiceMap: Record = { 'en': 'en_US-lessac-medium', // Default English voice 'de': 'de_DE-thorsten-medium', // German 'fr': 'fr_FR-siwis-medium', // French 'es': 'es_ES-davefx-medium', // Spanish 'it': 'it_IT-paola-medium', // Italian (rhasspy/piper-voices; install via scripts/download-piper-extra-voices.sh) 'ru': 'ru_RU-ruslan-medium', // Russian 'zh': 'zh_CN-huayan-medium', // Chinese 'ar': 'ar_JO-kareem-medium', // Arabic (rhasspy/piper-voices; install via scripts/download-piper-extra-voices.sh) 'pl': 'pl_PL-darkman-medium', // Polish 'pt': 'pt_BR-cadu-medium', // Portuguese (BR; rhasspy/piper-voices; same script) 'nl': 'nl_NL-mls-medium', // Dutch 'cs': 'cs_CZ-jirka-medium', // Czech 'tr': 'tr_TR-dfki-medium', // Turkish // 'ja': 'ja_JP-nanami-medium', // Japanese - not available // 'ko': 'ko_KR-kyungha-medium', // Korean - not available }; return voiceMap[lang] || voiceMap['en']; // Fall back to English }