aitherboard/src/lib/services/tts/tts-service.ts

/**
 * TTS Service
 * Manages text-to-speech with multiple provider support
 */

import type { TTSProvider, TTSProviderInterface, TTSOptions, TTSState, TTSVoice, TTSEventCallbacks } from './types.js';

/**
 * Base class for audio-based TTS providers
 */
abstract class AudioProvider implements TTSProviderInterface {
  protected state: TTSState = 'idle';
  protected callbacks: TTSEventCallbacks = {};
  protected audioElement: HTMLAudioElement | null = null;

  abstract readonly name: string;
  abstract readonly type: TTSProvider;

  protected updateState(newState: TTSState): void {
    if (this.state !== newState) {
      this.state = newState;
      this.callbacks.onStateChange?.(newState);
    }
  }

  protected setupAudioElement(audioUrl: string, volume: number = 1.0): void {
    this.audioElement = new Audio(audioUrl);
    this.audioElement.volume = volume;

    this.audioElement.onplay = () => this.updateState('playing');
    this.audioElement.onpause = () => this.updateState('paused');
    this.audioElement.onended = () => {
      this.updateState('idle');
      URL.revokeObjectURL(audioUrl);
      this.audioElement = null;
      this.callbacks.onEnd?.();
    };
    this.audioElement.onerror = () => {
      this.updateState('error');
      this.callbacks.onError?.(new Error('Audio playback failed'));
    };
    this.audioElement.ontimeupdate = () => {
      if (this.audioElement?.duration) {
        this.callbacks.onProgress?.(this.audioElement.currentTime / this.audioElement.duration);
      }
    };
  }

  async pause(): Promise<void> {
    if (this.audioElement && this.state === 'playing') {
      this.audioElement.pause();
    }
  }

  async resume(): Promise<void> {
    if (this.audioElement && this.state === 'paused') {
      await this.audioElement.play();
    }
  }

  async stop(): Promise<void> {
    if (this.audioElement) {
      this.audioElement.pause();
      this.audioElement.currentTime = 0;
      this.audioElement = null;
    }
    this.updateState('idle');
  }

  getState(): TTSState {
    return this.state;
  }

  async getProgress(): Promise<number> {
    if (this.audioElement?.duration) {
      return this.audioElement.currentTime / this.audioElement.duration;
    }
    return 0;
  }

  async setProgress(position: number): Promise<void> {
    if (this.audioElement?.duration) {
      this.audioElement.currentTime = position * this.audioElement.duration;
    }
  }

  setCallbacks(callbacks: TTSEventCallbacks): void {
    this.callbacks = { ...this.callbacks, ...callbacks };
  }

  abstract isAvailable(): Promise<boolean>;
  abstract getVoices(): Promise<TTSVoice[]>;
  abstract speak(text: string, options?: TTSOptions): Promise<void>;
  abstract destroy(): void;
}

/**
 * Web Speech API TTS Provider
 */
class WebSpeechProvider implements TTSProviderInterface {
  readonly name = 'Web Speech API';
  readonly type: TTSProvider = 'webspeech';

  private synth: SpeechSynthesis;
  private utterance: SpeechSynthesisUtterance | null = null;
  private state: TTSState = 'idle';
  private callbacks: TTSEventCallbacks = {};

  constructor() {
    if (typeof window === 'undefined' || !('speechSynthesis' in window)) {
      throw new Error('Web Speech API not available');
    }
    this.synth = window.speechSynthesis;
    this.synth.onvoiceschanged = () => this.loadVoices();
    this.loadVoices();
  }

  private loadVoices(): void {
    // Voices loaded asynchronously
  }

  async isAvailable(): Promise<boolean> {
    return typeof window !== 'undefined' && 'speechSynthesis' in window;
  }

  async getVoices(): Promise<TTSVoice[]> {
    const voices = this.synth.getVoices();
    return voices.map(voice => ({
      id: voice.voiceURI,
      name: voice.name,
      lang: voice.lang,
      gender: voice.name.toLowerCase().includes('female') ? 'female' :
              voice.name.toLowerCase().includes('male') ? 'male' : 'neutral',
      provider: 'webspeech'
    }));
  }

  private getBestVoice(lang: string = 'en'): SpeechSynthesisVoice | null {
    const voices = this.synth.getVoices();
    const langPrefix = lang.split('-')[0];
    const langVoices = voices.filter(v => v.lang.startsWith(langPrefix));

    if (langVoices.length === 0) return voices[0] || null;

    return langVoices.find(v =>
      v.name.toLowerCase().includes('google') ||
      v.voiceURI.toLowerCase().includes('google')
    ) || langVoices.find(v => v.name.toLowerCase().includes('neural')) || langVoices[0];
  }

  async speak(text: string, options?: TTSOptions): Promise<void> {
    this.stop();
    if (!text.trim()) return;

    this.utterance = new SpeechSynthesisUtterance(text);

    if (options?.voice?.provider === 'webspeech') {
      const voice = this.synth.getVoices().find(v => v.voiceURI === options.voice!.id);
      if (voice) this.utterance.voice = voice;
    } else {
      const bestVoice = this.getBestVoice();
      if (bestVoice) this.utterance.voice = bestVoice;
    }

    this.utterance.rate = options?.speed ?? 1.0;
    this.utterance.pitch = options?.pitch ?? 1.0;
    this.utterance.volume = options?.volume ?? 1.0;
    if (this.utterance.voice) {
      this.utterance.lang = this.utterance.voice.lang;
    }

    this.utterance.onstart = () => this.updateState('playing');
    this.utterance.onend = () => {
      this.updateState('idle');
      this.utterance = null;
      this.callbacks.onEnd?.();
    };
    this.utterance.onerror = (event) => {
      this.updateState('error');
      this.callbacks.onError?.(new Error(`Speech synthesis error: ${event.error}`));
    };

    this.synth.speak(this.utterance);
    this.updateState('playing');
  }

  async pause(): Promise<void> {
    if (this.state === 'playing' && this.synth.speaking) {
      this.synth.pause();
      this.updateState('paused');
    }
  }

  async resume(): Promise<void> {
    if (this.state === 'paused' && this.synth.paused) {
      this.synth.resume();
      this.updateState('playing');
    }
  }

  async stop(): Promise<void> {
    if (this.synth.speaking || this.synth.paused) {
      this.synth.cancel();
    }
    this.utterance = null;
    this.updateState('idle');
  }

  getState(): TTSState {
    return this.state;
  }

  async getProgress(): Promise<number> {
    return 0; // Web Speech API doesn't provide progress
  }

  async setProgress(position: number): Promise<void> {
    // Not supported - would need to stop and restart
  }

  destroy(): void {
    this.stop();
    this.callbacks = {};
  }

  setCallbacks(callbacks: TTSEventCallbacks): void {
    this.callbacks = { ...this.callbacks, ...callbacks };
  }

  private updateState(newState: TTSState): void {
    if (this.state !== newState) {
      this.state = newState;
      this.callbacks.onStateChange?.(newState);
    }
  }
}

/**
 * OpenAI TTS Provider
 */
class OpenAIProvider extends AudioProvider {
  readonly name = 'OpenAI TTS';
  readonly type: TTSProvider = 'openai';

  private apiKey: string | null = null;

  constructor(apiKey?: string) {
    super();
    this.apiKey = apiKey || null;
  }

  setApiKey(apiKey: string): void {
    this.apiKey = apiKey;
  }

  async isAvailable(): Promise<boolean> {
    return this.apiKey !== null && typeof window !== 'undefined';
  }

  async getVoices(): Promise<TTSVoice[]> {
    return [
      { id: 'alloy', name: 'Alloy', lang: 'en', provider: 'openai' },
      { id: 'echo', name: 'Echo', lang: 'en', provider: 'openai' },
      { id: 'fable', name: 'Fable', lang: 'en', provider: 'openai' },
      { id: 'onyx', name: 'Onyx', lang: 'en', provider: 'openai' },
      { id: 'nova', name: 'Nova', lang: 'en', provider: 'openai' },
      { id: 'shimmer', name: 'Shimmer', lang: 'en', provider: 'openai' }
    ];
  }

  async speak(text: string, options?: TTSOptions): Promise<void> {
    if (!this.apiKey) {
      throw new Error('OpenAI API key not set');
    }

    this.stop();
    if (!text.trim()) return;

    const voice = options?.voice || (await this.getVoices())[0];
    const speed = Math.max(0.25, Math.min(4.0, options?.speed ?? 1.0));

    try {
      this.updateState('synthesizing');

      const response = await fetch('https://api.openai.com/v1/audio/speech', {
        method: 'POST',
        headers: {
          'Authorization': `Bearer ${this.apiKey}`,
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          model: 'tts-1',
          input: text,
          voice: voice.id,
          speed
        })
      });

      if (!response.ok) {
        const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } }));
        const errorMessage = error.error?.message || response.statusText;

        if (response.status === 429 || errorMessage.toLowerCase().includes('quota')) {
          const quotaError = new Error(`OpenAI TTS quota exceeded: ${errorMessage}`);
          (quotaError as any).isQuotaError = true;
          throw quotaError;
        }

        throw new Error(`OpenAI TTS error: ${errorMessage}`);
      }

      const audioBlob = await response.blob();
      const audioUrl = URL.createObjectURL(audioBlob);

      this.setupAudioElement(audioUrl, options?.volume ?? 1.0);
      await this.audioElement!.play();
      this.updateState('playing');
    } catch (error) {
      this.updateState('error');
      const err = error instanceof Error ? error : new Error('Failed to speak text');
      this.callbacks.onError?.(err);
      throw err;
    }
  }

  destroy(): void {
    this.stop();
    this.callbacks = {};
    this.apiKey = null;
  }
}

/**
 * Piper TTS Provider
 */
class PiperProvider extends AudioProvider {
  readonly name = 'Piper TTS';
  readonly type: TTSProvider = 'piper';

  private serverUrl: string | null = null;
  private abortController: AbortController | null = null;
  private timeoutId: ReturnType<typeof setTimeout> | null = null;

  private readonly defaultVoices: TTSVoice[] = [
    // English (US) - all quality levels
    { id: 'en_US-lessac-low', name: 'English (US) - Lessac Low', lang: 'en-US', provider: 'piper' },
    { id: 'en_US-lessac-medium', name: 'English (US) - Lessac Medium', lang: 'en-US', provider: 'piper' },
    { id: 'en_US-lessac-high', name: 'English (US) - Lessac High', lang: 'en-US', provider: 'piper' },
    // English (GB)
    { id: 'en_GB-alba-medium', name: 'English (GB) - Alba Medium', lang: 'en-GB', provider: 'piper' },
    // German
    { id: 'de_DE-thorsten-low', name: 'German - Thorsten Low', lang: 'de-DE', provider: 'piper' },
    { id: 'de_DE-thorsten-medium', name: 'German - Thorsten Medium', lang: 'de-DE', provider: 'piper' },
    // French
    { id: 'fr_FR-siwis-low', name: 'French - Siwis Low', lang: 'fr-FR', provider: 'piper' },
    { id: 'fr_FR-siwis-medium', name: 'French - Siwis Medium', lang: 'fr-FR', provider: 'piper' },
    // Spanish
    { id: 'es_ES-davefx-medium', name: 'Spanish - Davefx Medium', lang: 'es-ES', provider: 'piper' },
    // Italian - riccardo voices not available
    // Russian
    { id: 'ru_RU-ruslan-medium', name: 'Russian - Ruslan Medium', lang: 'ru-RU', provider: 'piper' },
    // Chinese
    { id: 'zh_CN-huayan-medium', name: 'Chinese - Huayan Medium', lang: 'zh-CN', provider: 'piper' },
    // Arabic - hafez voice not available
    // Polish
    { id: 'pl_PL-darkman-medium', name: 'Polish - Darkman Medium', lang: 'pl-PL', provider: 'piper' },
    // Portuguese - edresson voice not available
    // Dutch
    { id: 'nl_NL-mls-medium', name: 'Dutch - MLS Medium', lang: 'nl-NL', provider: 'piper' },
    // Czech
    { id: 'cs_CZ-jirka-medium', name: 'Czech - Jirka Medium', lang: 'cs-CZ', provider: 'piper' },
    // Turkish
    { id: 'tr_TR-dfki-medium', name: 'Turkish - DFKI Medium', lang: 'tr-TR', provider: 'piper' },
    // Japanese - nanami voice not available
    // Korean - kyungha voice not available
  ];

  constructor(serverUrl?: string) {
    super();
    this.serverUrl = serverUrl || null;
  }

  async isAvailable(): Promise<boolean> {
    if (typeof window === 'undefined') return false;
    return this.serverUrl !== null || typeof Worker !== 'undefined';
  }

  async getVoices(): Promise<TTSVoice[]> {
    return this.defaultVoices;
  }

  async initialize(): Promise<void> {
    // Server-based synthesis doesn't need initialization
  }

  async speak(text: string, options?: TTSOptions): Promise<void> {
    if (!text.trim()) return;

    this.stop();
    this.updateState('synthesizing');

    if (!this.serverUrl) {
      throw new Error('Piper TTS server URL not configured');
    }

    const voice = options?.voice || this.defaultVoices[0];
    const speed = Math.max(0.25, Math.min(2.0, options?.speed ?? 1.0));

    console.log('PiperProvider: Using voice:', voice.id, 'from options:', options?.voice?.id || 'default');

    try {
      // Create abort controller for cancellation
      this.abortController = new AbortController();
      this.timeoutId = setTimeout(() => {
        console.log('Piper TTS: Request timeout');
        this.abortController?.abort();
      }, 300000); // 5 minutes

      const response = await fetch('/api/piper-tts', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ text, voice: voice.id, speed }),
        signal: this.abortController.signal,
      });

      // Clear timeout on success
      if (this.timeoutId) {
        clearTimeout(this.timeoutId);
        this.timeoutId = null;
      }

      if (!response.ok) {
        const errorText = await response.text().catch(() => response.statusText);
        throw new Error(`Piper TTS server error: ${response.status} ${errorText}`);
      }

      const audioBlob = await response.blob();

      if (audioBlob.size === 0) {
        throw new Error('Received empty audio blob from Piper TTS server');
      }

      // Cache the audio blob
      try {
        const { cacheMedia } = await import('../../services/cache/media-cache.js');
        // Create a cache key from text + voice + speed for TTS
        const cacheKey = `tts:${voice.id}:${speed}:${text.substring(0, 100)}`;
        await cacheMedia(cacheKey, audioBlob, 'audio');
      } catch (cacheError) {
        // Cache failure is non-critical
        console.debug('Failed to cache TTS audio:', cacheError);
      }

      const audioUrl = URL.createObjectURL(audioBlob);
      this.setupAudioElement(audioUrl, options?.volume ?? 1.0);
      await this.audioElement!.play();
      this.updateState('playing');
    } catch (error) {
      // Clear abort controller and timeout
      this.abortController = null;
      if (this.timeoutId) {
        clearTimeout(this.timeoutId);
        this.timeoutId = null;
      }

      if (error instanceof Error && error.name === 'AbortError') {
        console.log('Piper TTS: Request cancelled');
        this.updateState('idle');
        return; // Don't throw on cancellation
      }

      this.updateState('error');
      const err = error instanceof Error ? error : new Error('Failed to speak text');
      this.callbacks.onError?.(err);
      throw err;
    }
  }

  async stop(): Promise<void> {
    // Abort ongoing fetch request
    if (this.abortController) {
      console.log('Piper TTS: Aborting request');
      this.abortController.abort();
      this.abortController = null;
    }
    if (this.timeoutId) {
      clearTimeout(this.timeoutId);
      this.timeoutId = null;
    }
    await super.stop();
  }

  destroy(): void {
    this.stop();
    this.callbacks = {};
  }
}

/**
 * TTS Service
 * Manages TTS providers and provides unified interface
 */
export class TTSService {
  private provider: TTSProviderInterface | null = null;
  private providerType: TTSProvider = 'webspeech';
  private callbacks: TTSEventCallbacks = {};

  async initialize(providerType: TTSProvider = 'webspeech'): Promise<void> {
    if (this.provider) {
      this.provider.destroy();
    }

    this.providerType = providerType;

    if (providerType === 'webspeech') {
      this.provider = new WebSpeechProvider();
    } else if (providerType === 'openai') {
      const { loadEncryptedApiKey } = await import('../security/api-key-storage.js');
      const password = prompt('Enter your password to access OpenAI API key:');
      if (!password) {
        throw new Error('Password required to access OpenAI API key');
      }

      const apiKey = await loadEncryptedApiKey('tts.openai', password);
      if (!apiKey || !apiKey.startsWith('sk-')) {
        throw new Error('Invalid or missing OpenAI API key');
      }

      this.provider = new OpenAIProvider(apiKey);
    } else if (providerType === 'piper') {
      const serverUrl = localStorage.getItem('piper_tts_server_url') || 'http://localhost:5000';
      this.provider = new PiperProvider(serverUrl);
      await (this.provider as PiperProvider).initialize();
    } else {
      throw new Error(`Unknown TTS provider: ${providerType}`);
    }

    if (this.provider.setCallbacks) {
      this.provider.setCallbacks(this.callbacks);
    }
  }

  async isAvailable(): Promise<boolean> {
    if (!this.provider) {
      // Try to auto-initialize with best available provider
      try {
        const { hasApiKey } = await import('../security/api-key-storage.js');
        if (await hasApiKey('tts.openai')) {
          try {
            await this.initialize('openai');
            return true;
          } catch {
            // Fall through
          }
        }
      } catch {
        // Ignore
      }

      try {
        const serverUrl = localStorage.getItem('piper_tts_server_url') || 'http://localhost:5000';
        const provider = new PiperProvider(serverUrl);
        if (await provider.isAvailable()) {
          await this.initialize('piper');
          return true;
        }
      } catch {
        // Fall through
      }

      await this.initialize('webspeech');
    }

    return this.provider ? await this.provider.isAvailable() : false;
  }

  async getVoices(): Promise<TTSVoice[]> {
    if (!this.provider) {
      await this.initialize();
    }
    return this.provider ? await this.provider.getVoices() : [];
  }

  async speak(text: string, options?: TTSOptions): Promise<void> {
    if (!this.provider) {
      await this.initialize();
    }

    if (this.provider) {
      try {
        await this.provider.speak(text, options);
      } catch (error) {
        // Auto-fallback from OpenAI to Web Speech on quota error
        if (error instanceof Error && (error as any).isQuotaError && this.providerType === 'openai') {
          console.warn('OpenAI TTS quota exceeded, falling back to Web Speech API');
          await this.initialize('webspeech');
          if (this.provider) {
            await this.provider.speak(text, options);
            return;
          }
        }
        throw error;
      }
    }
  }

  async pause(): Promise<void> {
    await this.provider?.pause();
  }

  async resume(): Promise<void> {
    await this.provider?.resume();
  }

  async stop(): Promise<void> {
    await this.provider?.stop();
  }

  getState(): TTSState {
    return this.provider?.getState() ?? 'idle';
  }

  async getProgress(): Promise<number> {
    return this.provider ? await this.provider.getProgress() : 0;
  }

  async setProgress(position: number): Promise<void> {
    if (this.provider) {
      await this.provider.setProgress(position);
    }
  }

  setCallbacks(callbacks: TTSEventCallbacks): void {
    this.callbacks = { ...this.callbacks, ...callbacks };
    if (this.provider?.setCallbacks) {
      this.provider.setCallbacks(this.callbacks);
    }
  }

  getProviderType(): TTSProvider {
    return this.providerType;
  }

  destroy(): void {
    if (this.provider) {
      this.provider.destroy();
      this.provider = null;
    }
    this.callbacks = {};
  }
}

// Singleton instance
let ttsServiceInstance: TTSService | null = null;

export function getTTSService(): TTSService {
  if (!ttsServiceInstance) {
    ttsServiceInstance = new TTSService();
  }
  return ttsServiceInstance;
}