/** * TTS Service * Manages text-to-speech with multiple provider support */ import type { TTSProvider, TTSProviderInterface, TTSOptions, TTSState, TTSVoice, TTSEventCallbacks } from './types.js'; /** * Base class for audio-based TTS providers */ abstract class AudioProvider implements TTSProviderInterface { protected state: TTSState = 'idle'; protected callbacks: TTSEventCallbacks = {}; protected audioElement: HTMLAudioElement | null = null; abstract readonly name: string; abstract readonly type: TTSProvider; protected updateState(newState: TTSState): void { if (this.state !== newState) { this.state = newState; this.callbacks.onStateChange?.(newState); } } protected setupAudioElement(audioUrl: string, volume: number = 1.0): void { this.audioElement = new Audio(audioUrl); this.audioElement.volume = volume; this.audioElement.onplay = () => this.updateState('playing'); this.audioElement.onpause = () => this.updateState('paused'); this.audioElement.onended = () => { this.updateState('idle'); URL.revokeObjectURL(audioUrl); this.audioElement = null; this.callbacks.onEnd?.(); }; this.audioElement.onerror = () => { this.updateState('error'); this.callbacks.onError?.(new Error('Audio playback failed')); }; this.audioElement.ontimeupdate = () => { if (this.audioElement?.duration) { this.callbacks.onProgress?.(this.audioElement.currentTime / this.audioElement.duration); } }; } async pause(): Promise { if (this.audioElement && this.state === 'playing') { this.audioElement.pause(); } } async resume(): Promise { if (this.audioElement && this.state === 'paused') { await this.audioElement.play(); } } async stop(): Promise { if (this.audioElement) { this.audioElement.pause(); this.audioElement.currentTime = 0; this.audioElement = null; } this.updateState('idle'); } getState(): TTSState { return this.state; } async getProgress(): Promise { if (this.audioElement?.duration) { return this.audioElement.currentTime / this.audioElement.duration; } return 0; } async setProgress(position: number): Promise { if (this.audioElement?.duration) { this.audioElement.currentTime = position * this.audioElement.duration; } } setCallbacks(callbacks: TTSEventCallbacks): void { this.callbacks = { ...this.callbacks, ...callbacks }; } abstract isAvailable(): Promise; abstract getVoices(): Promise; abstract speak(text: string, options?: TTSOptions): Promise; abstract destroy(): void; } /** * Web Speech API TTS Provider */ class WebSpeechProvider implements TTSProviderInterface { readonly name = 'Web Speech API'; readonly type: TTSProvider = 'webspeech'; private synth: SpeechSynthesis; private utterance: SpeechSynthesisUtterance | null = null; private state: TTSState = 'idle'; private callbacks: TTSEventCallbacks = {}; constructor() { if (typeof window === 'undefined' || !('speechSynthesis' in window)) { throw new Error('Web Speech API not available'); } this.synth = window.speechSynthesis; this.synth.onvoiceschanged = () => this.loadVoices(); this.loadVoices(); } private loadVoices(): void { // Voices loaded asynchronously } async isAvailable(): Promise { return typeof window !== 'undefined' && 'speechSynthesis' in window; } async getVoices(): Promise { const voices = this.synth.getVoices(); return voices.map(voice => ({ id: voice.voiceURI, name: voice.name, lang: voice.lang, gender: voice.name.toLowerCase().includes('female') ? 'female' : voice.name.toLowerCase().includes('male') ? 'male' : 'neutral', provider: 'webspeech' })); } private getBestVoice(lang: string = 'en'): SpeechSynthesisVoice | null { const voices = this.synth.getVoices(); const langPrefix = lang.split('-')[0]; const langVoices = voices.filter(v => v.lang.startsWith(langPrefix)); if (langVoices.length === 0) return voices[0] || null; return langVoices.find(v => v.name.toLowerCase().includes('google') || v.voiceURI.toLowerCase().includes('google') ) || langVoices.find(v => v.name.toLowerCase().includes('neural')) || langVoices[0]; } async speak(text: string, options?: TTSOptions): Promise { this.stop(); if (!text.trim()) return; this.utterance = new SpeechSynthesisUtterance(text); if (options?.voice?.provider === 'webspeech') { const voice = this.synth.getVoices().find(v => v.voiceURI === options.voice!.id); if (voice) this.utterance.voice = voice; } else { const bestVoice = this.getBestVoice(); if (bestVoice) this.utterance.voice = bestVoice; } this.utterance.rate = options?.speed ?? 1.0; this.utterance.pitch = options?.pitch ?? 1.0; this.utterance.volume = options?.volume ?? 1.0; if (this.utterance.voice) { this.utterance.lang = this.utterance.voice.lang; } this.utterance.onstart = () => this.updateState('playing'); this.utterance.onend = () => { this.updateState('idle'); this.utterance = null; this.callbacks.onEnd?.(); }; this.utterance.onerror = (event) => { this.updateState('error'); this.callbacks.onError?.(new Error(`Speech synthesis error: ${event.error}`)); }; this.synth.speak(this.utterance); this.updateState('playing'); } async pause(): Promise { if (this.state === 'playing' && this.synth.speaking) { this.synth.pause(); this.updateState('paused'); } } async resume(): Promise { if (this.state === 'paused' && this.synth.paused) { this.synth.resume(); this.updateState('playing'); } } async stop(): Promise { if (this.synth.speaking || this.synth.paused) { this.synth.cancel(); } this.utterance = null; this.updateState('idle'); } getState(): TTSState { return this.state; } async getProgress(): Promise { return 0; // Web Speech API doesn't provide progress } async setProgress(position: number): Promise { // Not supported - would need to stop and restart } destroy(): void { this.stop(); this.callbacks = {}; } setCallbacks(callbacks: TTSEventCallbacks): void { this.callbacks = { ...this.callbacks, ...callbacks }; } private updateState(newState: TTSState): void { if (this.state !== newState) { this.state = newState; this.callbacks.onStateChange?.(newState); } } } /** * OpenAI TTS Provider */ class OpenAIProvider extends AudioProvider { readonly name = 'OpenAI TTS'; readonly type: TTSProvider = 'openai'; private apiKey: string | null = null; constructor(apiKey?: string) { super(); this.apiKey = apiKey || null; } setApiKey(apiKey: string): void { this.apiKey = apiKey; } async isAvailable(): Promise { return this.apiKey !== null && typeof window !== 'undefined'; } async getVoices(): Promise { return [ { id: 'alloy', name: 'Alloy', lang: 'en', provider: 'openai' }, { id: 'echo', name: 'Echo', lang: 'en', provider: 'openai' }, { id: 'fable', name: 'Fable', lang: 'en', provider: 'openai' }, { id: 'onyx', name: 'Onyx', lang: 'en', provider: 'openai' }, { id: 'nova', name: 'Nova', lang: 'en', provider: 'openai' }, { id: 'shimmer', name: 'Shimmer', lang: 'en', provider: 'openai' } ]; } async speak(text: string, options?: TTSOptions): Promise { if (!this.apiKey) { throw new Error('OpenAI API key not set'); } this.stop(); if (!text.trim()) return; const voice = options?.voice || (await this.getVoices())[0]; const speed = Math.max(0.25, Math.min(4.0, options?.speed ?? 1.0)); try { this.updateState('synthesizing'); const response = await fetch('https://api.openai.com/v1/audio/speech', { method: 'POST', headers: { 'Authorization': `Bearer ${this.apiKey}`, 'Content-Type': 'application/json' }, body: JSON.stringify({ model: 'tts-1', input: text, voice: voice.id, speed }) }); if (!response.ok) { const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } })); const errorMessage = error.error?.message || response.statusText; if (response.status === 429 || errorMessage.toLowerCase().includes('quota')) { const quotaError = new Error(`OpenAI TTS quota exceeded: ${errorMessage}`); (quotaError as any).isQuotaError = true; throw quotaError; } throw new Error(`OpenAI TTS error: ${errorMessage}`); } const audioBlob = await response.blob(); const audioUrl = URL.createObjectURL(audioBlob); this.setupAudioElement(audioUrl, options?.volume ?? 1.0); await this.audioElement!.play(); this.updateState('playing'); } catch (error) { this.updateState('error'); const err = error instanceof Error ? error : new Error('Failed to speak text'); this.callbacks.onError?.(err); throw err; } } destroy(): void { this.stop(); this.callbacks = {}; this.apiKey = null; } } /** * Piper TTS Provider */ class PiperProvider extends AudioProvider { readonly name = 'Piper TTS'; readonly type: TTSProvider = 'piper'; private serverUrl: string | null = null; private abortController: AbortController | null = null; private timeoutId: ReturnType | null = null; private readonly defaultVoices: TTSVoice[] = [ // English (US) - all quality levels { id: 'en_US-lessac-low', name: 'English (US) - Lessac Low', lang: 'en-US', provider: 'piper' }, { id: 'en_US-lessac-medium', name: 'English (US) - Lessac Medium', lang: 'en-US', provider: 'piper' }, { id: 'en_US-lessac-high', name: 'English (US) - Lessac High', lang: 'en-US', provider: 'piper' }, // English (GB) { id: 'en_GB-alba-medium', name: 'English (GB) - Alba Medium', lang: 'en-GB', provider: 'piper' }, // German { id: 'de_DE-thorsten-low', name: 'German - Thorsten Low', lang: 'de-DE', provider: 'piper' }, { id: 'de_DE-thorsten-medium', name: 'German - Thorsten Medium', lang: 'de-DE', provider: 'piper' }, // French { id: 'fr_FR-siwis-low', name: 'French - Siwis Low', lang: 'fr-FR', provider: 'piper' }, { id: 'fr_FR-siwis-medium', name: 'French - Siwis Medium', lang: 'fr-FR', provider: 'piper' }, // Spanish { id: 'es_ES-davefx-medium', name: 'Spanish - Davefx Medium', lang: 'es-ES', provider: 'piper' }, // Italian - riccardo voices not available // Russian { id: 'ru_RU-ruslan-medium', name: 'Russian - Ruslan Medium', lang: 'ru-RU', provider: 'piper' }, // Chinese { id: 'zh_CN-huayan-medium', name: 'Chinese - Huayan Medium', lang: 'zh-CN', provider: 'piper' }, // Arabic - hafez voice not available // Polish { id: 'pl_PL-darkman-medium', name: 'Polish - Darkman Medium', lang: 'pl-PL', provider: 'piper' }, // Portuguese - edresson voice not available // Dutch { id: 'nl_NL-mls-medium', name: 'Dutch - MLS Medium', lang: 'nl-NL', provider: 'piper' }, // Czech { id: 'cs_CZ-jirka-medium', name: 'Czech - Jirka Medium', lang: 'cs-CZ', provider: 'piper' }, // Turkish { id: 'tr_TR-dfki-medium', name: 'Turkish - DFKI Medium', lang: 'tr-TR', provider: 'piper' }, // Japanese - nanami voice not available // Korean - kyungha voice not available ]; constructor(serverUrl?: string) { super(); this.serverUrl = serverUrl || null; } async isAvailable(): Promise { if (typeof window === 'undefined') return false; return this.serverUrl !== null || typeof Worker !== 'undefined'; } async getVoices(): Promise { return this.defaultVoices; } async initialize(): Promise { // Server-based synthesis doesn't need initialization } async speak(text: string, options?: TTSOptions): Promise { if (!text.trim()) return; this.stop(); this.updateState('synthesizing'); if (!this.serverUrl) { throw new Error('Piper TTS server URL not configured'); } const voice = options?.voice || this.defaultVoices[0]; const speed = Math.max(0.25, Math.min(2.0, options?.speed ?? 1.0)); console.log('PiperProvider: Using voice:', voice.id, 'from options:', options?.voice?.id || 'default'); try { // Create abort controller for cancellation this.abortController = new AbortController(); this.timeoutId = setTimeout(() => { console.log('Piper TTS: Request timeout'); this.abortController?.abort(); }, 300000); // 5 minutes const response = await fetch('/api/piper-tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text, voice: voice.id, speed }), signal: this.abortController.signal, }); // Clear timeout on success if (this.timeoutId) { clearTimeout(this.timeoutId); this.timeoutId = null; } if (!response.ok) { const errorText = await response.text().catch(() => response.statusText); throw new Error(`Piper TTS server error: ${response.status} ${errorText}`); } const audioBlob = await response.blob(); if (audioBlob.size === 0) { throw new Error('Received empty audio blob from Piper TTS server'); } // Cache the audio blob try { const { cacheMedia } = await import('../../services/cache/media-cache.js'); // Create a cache key from text + voice + speed for TTS const cacheKey = `tts:${voice.id}:${speed}:${text.substring(0, 100)}`; await cacheMedia(cacheKey, audioBlob, 'audio'); } catch (cacheError) { // Cache failure is non-critical console.debug('Failed to cache TTS audio:', cacheError); } const audioUrl = URL.createObjectURL(audioBlob); this.setupAudioElement(audioUrl, options?.volume ?? 1.0); await this.audioElement!.play(); this.updateState('playing'); } catch (error) { // Clear abort controller and timeout this.abortController = null; if (this.timeoutId) { clearTimeout(this.timeoutId); this.timeoutId = null; } if (error instanceof Error && error.name === 'AbortError') { console.log('Piper TTS: Request cancelled'); this.updateState('idle'); return; // Don't throw on cancellation } this.updateState('error'); const err = error instanceof Error ? error : new Error('Failed to speak text'); this.callbacks.onError?.(err); throw err; } } async stop(): Promise { // Abort ongoing fetch request if (this.abortController) { console.log('Piper TTS: Aborting request'); this.abortController.abort(); this.abortController = null; } if (this.timeoutId) { clearTimeout(this.timeoutId); this.timeoutId = null; } await super.stop(); } destroy(): void { this.stop(); this.callbacks = {}; } } /** * TTS Service * Manages TTS providers and provides unified interface */ export class TTSService { private provider: TTSProviderInterface | null = null; private providerType: TTSProvider = 'webspeech'; private callbacks: TTSEventCallbacks = {}; async initialize(providerType: TTSProvider = 'webspeech'): Promise { if (this.provider) { this.provider.destroy(); } this.providerType = providerType; if (providerType === 'webspeech') { this.provider = new WebSpeechProvider(); } else if (providerType === 'openai') { const { loadEncryptedApiKey } = await import('../security/api-key-storage.js'); const password = prompt('Enter your password to access OpenAI API key:'); if (!password) { throw new Error('Password required to access OpenAI API key'); } const apiKey = await loadEncryptedApiKey('tts.openai', password); if (!apiKey || !apiKey.startsWith('sk-')) { throw new Error('Invalid or missing OpenAI API key'); } this.provider = new OpenAIProvider(apiKey); } else if (providerType === 'piper') { const serverUrl = localStorage.getItem('piper_tts_server_url') || 'http://localhost:5000'; this.provider = new PiperProvider(serverUrl); await (this.provider as PiperProvider).initialize(); } else { throw new Error(`Unknown TTS provider: ${providerType}`); } if (this.provider.setCallbacks) { this.provider.setCallbacks(this.callbacks); } } async isAvailable(): Promise { if (!this.provider) { // Try to auto-initialize with best available provider try { const { hasApiKey } = await import('../security/api-key-storage.js'); if (await hasApiKey('tts.openai')) { try { await this.initialize('openai'); return true; } catch { // Fall through } } } catch { // Ignore } try { const serverUrl = localStorage.getItem('piper_tts_server_url') || 'http://localhost:5000'; const provider = new PiperProvider(serverUrl); if (await provider.isAvailable()) { await this.initialize('piper'); return true; } } catch { // Fall through } await this.initialize('webspeech'); } return this.provider ? await this.provider.isAvailable() : false; } async getVoices(): Promise { if (!this.provider) { await this.initialize(); } return this.provider ? await this.provider.getVoices() : []; } async speak(text: string, options?: TTSOptions): Promise { if (!this.provider) { await this.initialize(); } if (this.provider) { try { await this.provider.speak(text, options); } catch (error) { // Auto-fallback from OpenAI to Web Speech on quota error if (error instanceof Error && (error as any).isQuotaError && this.providerType === 'openai') { console.warn('OpenAI TTS quota exceeded, falling back to Web Speech API'); await this.initialize('webspeech'); if (this.provider) { await this.provider.speak(text, options); return; } } throw error; } } } async pause(): Promise { await this.provider?.pause(); } async resume(): Promise { await this.provider?.resume(); } async stop(): Promise { await this.provider?.stop(); } getState(): TTSState { return this.provider?.getState() ?? 'idle'; } async getProgress(): Promise { return this.provider ? await this.provider.getProgress() : 0; } async setProgress(position: number): Promise { if (this.provider) { await this.provider.setProgress(position); } } setCallbacks(callbacks: TTSEventCallbacks): void { this.callbacks = { ...this.callbacks, ...callbacks }; if (this.provider?.setCallbacks) { this.provider.setCallbacks(this.callbacks); } } getProviderType(): TTSProvider { return this.providerType; } destroy(): void { if (this.provider) { this.provider.destroy(); this.provider = null; } this.callbacks = {}; } } // Singleton instance let ttsServiceInstance: TTSService | null = null; export function getTTSService(): TTSService { if (!ttsServiceInstance) { ttsServiceInstance = new TTSService(); } return ttsServiceInstance; }