From 991fee7431e94918db0678f5a4f491fff935c005 Mon Sep 17 00:00:00 2001 From: Silberengel Date: Sun, 29 Mar 2026 18:48:02 +0200 Subject: [PATCH] make read-aloud more efficient for large articles add a media player --- src/App.tsx | 2 + src/components/NoteOptions/useMenuActions.tsx | 15 +- src/components/ReadAloudPlayerModal.tsx | 269 ++++++++ src/i18n/locales/de.ts | 41 ++ src/i18n/locales/en.ts | 40 ++ src/lib/read-aloud.ts | 603 ++++++++++++++++-- 6 files changed, 900 insertions(+), 70 deletions(-) create mode 100644 src/components/ReadAloudPlayerModal.tsx diff --git a/src/App.tsx b/src/App.tsx index 0a39f984..f580206e 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -2,6 +2,7 @@ import 'yet-another-react-lightbox/styles.css' import './index.css' import PublishSuccessSubtleIndicator from '@/components/PublishSuccessSubtleIndicator' +import ReadAloudPlayerModal from '@/components/ReadAloudPlayerModal' import { Toaster } from '@/components/ui/sonner' import { BookmarksProvider } from '@/providers/BookmarksProvider' import { ContentPolicyProvider } from '@/providers/ContentPolicyProvider' @@ -52,6 +53,7 @@ export default function App(): JSX.Element { + diff --git a/src/components/NoteOptions/useMenuActions.tsx b/src/components/NoteOptions/useMenuActions.tsx index 1748c203..e526062f 100644 --- a/src/components/NoteOptions/useMenuActions.tsx +++ b/src/components/NoteOptions/useMenuActions.tsx @@ -428,6 +428,12 @@ export function useMenuActions({ return event.tags.find(tag => tag[0] === 'd')?.[1] || '' }, [isArticleType, event]) + /** Decent Newsroom article URLs are scoped by author: /p/{npub}/d/{identifier} */ + const authorNpubForDecentNewsroom = useMemo( + () => pubkeyToNpub(event.pubkey) ?? '', + [event.pubkey] + ) + // Generate naddr for Alexandria URL const naddr = useMemo(() => { if (!isArticleType || !dTag) return '' @@ -554,9 +560,11 @@ export function useMenuActions({ } const handleViewOnDecentNewsroom = () => { - if (!dTag) return + if (!dTag || !authorNpubForDecentNewsroom) return closeDrawer() - window.open(`https://decentnewsroom.com/article/d/${dTag}`, '_blank', 'noopener,noreferrer') + const p = encodeURIComponent(authorNpubForDecentNewsroom) + const d = encodeURIComponent(dTag) + window.open(`https://decentnewsroom.com/p/${p}/d/${d}`, '_blank', 'noopener,noreferrer') } const actions: MenuAction[] = [ { @@ -750,7 +758,7 @@ export function useMenuActions({ onClick: handleViewOnAlexandria }) } - if (dTag) { + if (dTag && authorNpubForDecentNewsroom) { actions.push({ icon: Globe, label: t('View on DecentNewsroom'), @@ -884,6 +892,7 @@ export function useMenuActions({ isArticleType, articleMetadata, dTag, + authorNpubForDecentNewsroom, naddr, onOpenPublicMessage, onOpenCallInvite, diff --git a/src/components/ReadAloudPlayerModal.tsx b/src/components/ReadAloudPlayerModal.tsx new file mode 100644 index 00000000..7a2ff651 --- /dev/null +++ b/src/components/ReadAloudPlayerModal.tsx @@ -0,0 +1,269 @@ +import { Button } from '@/components/ui/button' +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle +} from '@/components/ui/dialog' +import { + closeReadAloudPlayer, + getReadAloudServerSnapshot, + getReadAloudSnapshot, + subscribeReadAloud, + type ReadAloudSnapshot +} from '@/lib/read-aloud' +import { cn } from '@/lib/utils' +import type { TFunction } from 'i18next' +import { useCallback, useSyncExternalStore } from 'react' +import { useTranslation } from 'react-i18next' + +function formatClock(ts: number | null): string { + if (ts == null) return '—' + try { + return new Date(ts).toLocaleTimeString(undefined, { + hour: '2-digit', + minute: '2-digit', + second: '2-digit' + }) + } catch { + return '—' + } +} + +/** Lighter scrim than default bg-black/80; content stays above overlay (z-230 vs z-220). */ +const READ_ALOUD_OVERLAY_CLASS = + 'z-[220] bg-black/35 backdrop-blur-sm dark:bg-black/40' + +function sectionAriaLabel(i: number, snap: ReadAloudSnapshot, t: TFunction): string { + if (i < snap.chunksPlayed) { + return t('Read-aloud section done', { index: i + 1 }) + } + if (i > snap.currentChunkIndex) { + return t('Read-aloud section pending', { index: i + 1 }) + } + switch (snap.phase) { + case 'requesting': + return t('Read-aloud section fetching', { index: i + 1 }) + case 'buffering': + case 'preparing': + return t('Read-aloud section preparing audio', { index: i + 1 }) + case 'playing': + return t('Read-aloud section playing', { index: i + 1 }) + case 'paused': + return t('Read-aloud section paused', { index: i + 1 }) + default: + return t('Read-aloud section pending', { index: i + 1 }) + } +} + +function phaseLabel(s: ReadAloudSnapshot, t: (k: string) => string): string { + switch (s.phase) { + case 'idle': + return t('Read-aloud idle') + case 'preparing': + return t('Preparing read-aloud…') + case 'requesting': + return t('Requesting audio…') + case 'buffering': + return t('Loading audio…') + case 'playing': + return t('Playing') + case 'paused': + return t('Paused') + case 'done': + return t('Read-aloud finished') + case 'error': + return t('Read-aloud error') + default: + return s.phase + } +} + +export default function ReadAloudPlayerModal(): JSX.Element { + const { t } = useTranslation() + const snap = useSyncExternalStore( + subscribeReadAloud, + getReadAloudSnapshot, + getReadAloudServerSnapshot + ) + + const onOpenChange = useCallback((open: boolean) => { + if (!open) { + closeReadAloudPlayer() + } + }, []) + + const showChunks = snap.engine === 'piper' && snap.totalChunks > 0 + + const nChunks = snap.totalChunks + const overallPct = + nChunks > 0 + ? Math.min(100, ((snap.chunksPlayed + snap.chunkPlaybackRatio) / nChunks) * 100) + : 0 + + return ( + + + + {t('Read aloud')} + +
+ {snap.title ? ( +

{snap.title}

+ ) : null} +

{phaseLabel(snap, t)}

+ {snap.engine === 'piper' ? ( +

+ {t('TTS endpoint')}: {snap.backend || '—'} +

+ ) : snap.engine === 'webspeech' ? ( +

{t('Using browser speech synthesis')}

+ ) : null} + {snap.readAloudPiperSkipped || + snap.readAloudPiperTryStartedAt != null || + snap.usedPiperFallback ? ( +
+

{t('Read-aloud Piper status heading')}

+ {snap.readAloudPiperSkipped ? ( +

{t('Read-aloud Piper skipped notice')}

+ ) : null} + {snap.readAloudPiperTryStartedAt != null ? ( +

+ {t('Read-aloud Piper attempt started', { + time: formatClock(snap.readAloudPiperTryStartedAt) + })} +

+ ) : null} + {!snap.readAloudPiperSkipped && snap.backend ? ( +

+ {t('Read-aloud Piper endpoint tried', { url: snap.backend })} +

+ ) : null} +
+ ) : null} + {snap.engine === 'webspeech' && snap.usedPiperFallback ? ( +
+

+ {t('Read-aloud Piper fallback notice')} +

+ {snap.piperFallbackDetail ? ( +

+ + {t('Read-aloud Piper fallback detail label')}:{' '} + + {snap.piperFallbackDetail} +

+ ) : null} +
+ ) : null} + {showChunks ? ( +
+

+ {t('Read-aloud section progress', { + current: snap.currentChunkIndex + 1, + total: snap.totalChunks + })} +

+
+
+
+
+ {Array.from({ length: snap.totalChunks }, (_, i) => { + const done = i < snap.chunksPlayed + const active = i === snap.currentChunkIndex + const fetching = active && snap.phase === 'requesting' + const decoding = active && (snap.phase === 'buffering' || snap.phase === 'preparing') + const playing = active && snap.phase === 'playing' + const paused = active && snap.phase === 'paused' + return ( +
+ {(playing || paused) && !done ? ( +
+ ) : null} +
+ ) + })} +
+

+ {snap.phase === 'requesting' + ? t('Read-aloud legend fetching') + : snap.phase === 'buffering' || snap.phase === 'preparing' + ? t('Read-aloud legend buffering') + : snap.phase === 'playing' + ? t('Read-aloud legend playing') + : snap.phase === 'paused' + ? t('Read-aloud legend paused') + : null} +

+
+ ) : null} +
+
{t('Request sent')}
+
{formatClock(snap.requestSentAt)}
+
{t('Response received')}
+
{formatClock(snap.responseReceivedAt)}
+
{t('Playback started')}
+
{formatClock(snap.playbackStartedAt)}
+
{t('Characters')}
+
{snap.charCount > 0 ? snap.charCount.toLocaleString() : '—'}
+
+ {snap.error ? ( +

+ {snap.error} +

+ ) : null} +
+ +
+
+ +
+ ) +} diff --git a/src/i18n/locales/de.ts b/src/i18n/locales/de.ts index 967dc3cd..154761b1 100644 --- a/src/i18n/locales/de.ts +++ b/src/i18n/locales/de.ts @@ -181,6 +181,47 @@ export default { 'Vorlesen wird in diesem Browser nicht unterstützt', 'Nothing to read aloud': 'Kein Text zum Vorlesen', 'Read-aloud failed': 'Vorlesen fehlgeschlagen', + 'Read aloud': 'Vorlesen', + 'Read-aloud idle': 'Leerlauf', + 'Preparing read-aloud…': 'Vorlesen wird vorbereitet…', + 'Requesting audio…': 'Audio wird angefordert…', + 'Loading audio…': 'Audio wird geladen…', + Playing: 'Wiedergabe', + Paused: 'Pausiert', + 'Read-aloud finished': 'Beendet', + 'Read-aloud error': 'Fehler', + 'TTS endpoint': 'TTS-Endpunkt', + 'Using browser speech synthesis': 'Browser-Sprachausgabe', + 'Read-aloud section progress': 'Abschnitt {{current}} von {{total}}', + 'Request sent': 'Anfrage gesendet', + 'Response received': 'Antwort erhalten', + 'Playback started': 'Wiedergabe gestartet', + Characters: 'Zeichen', + Pause: 'Pause', + Play: 'Abspielen', + Stop: 'Stopp', + 'Read-aloud sections': 'Vorlesen — Abschnitte', + 'Read-aloud overall progress': 'Gesamtfortschritt', + 'Read-aloud section done': 'Abschnitt {{index}}: fertig', + 'Read-aloud section pending': 'Abschnitt {{index}}: noch nicht gestartet', + 'Read-aloud section fetching': 'Abschnitt {{index}}: Audio wird angefordert', + 'Read-aloud section preparing audio': 'Abschnitt {{index}}: Audio wird geladen', + 'Read-aloud section playing': 'Abschnitt {{index}}: Wiedergabe', + 'Read-aloud section paused': 'Abschnitt {{index}}: pausiert', + 'Read-aloud legend fetching': 'Audio für diesen Abschnitt wird vom Server angefordert…', + 'Read-aloud legend buffering': 'Audio für diesen Abschnitt wird decodiert…', + 'Read-aloud legend playing': 'Dieser Abschnitt wird wiedergegeben.', + 'Read-aloud legend paused': 'Wiedergabe pausiert.', + 'Read-aloud Piper fallback notice': + 'Die Server-Stimme (Piper) konnte nicht genutzt werden. Es wird die Browser-Sprachausgabe verwendet.', + 'Read-aloud Piper fallback detail label': 'Piper-Fehler', + 'Read-aloud Piper status region': 'Piper-Sprachausgabe (Server)', + 'Read-aloud Piper status heading': 'Piper (Server-Stimme)', + 'Read-aloud Piper skipped notice': + 'Für diese App ist keine Piper-URL gesetzt (siehe VITE_READ_ALOUD_TTS_URL). Es wird nur die Browser-Stimme verwendet — der Server wurde nicht angesprochen.', + 'Read-aloud Piper attempt started': + 'Piper wurde um {{time}} gestartet (dieses Vorlesen hat zuerst den Server verwendet).', + 'Read-aloud Piper endpoint tried': 'Verwendete URL: {{url}}', 'Join the video call': 'Am Videoanruf teilnehmen', 'Schedule video call': 'Videoanruf planen', "You're invited to a scheduled video call.": diff --git a/src/i18n/locales/en.ts b/src/i18n/locales/en.ts index 8a46a3b1..8ec04163 100644 --- a/src/i18n/locales/en.ts +++ b/src/i18n/locales/en.ts @@ -180,6 +180,46 @@ export default { 'Read-aloud is not supported in this browser', 'Nothing to read aloud': 'Nothing to read aloud', 'Read-aloud failed': 'Read-aloud failed', + 'Read aloud': 'Read aloud', + 'Read-aloud idle': 'Idle', + 'Preparing read-aloud…': 'Preparing read-aloud…', + 'Requesting audio…': 'Requesting audio…', + 'Loading audio…': 'Loading audio…', + Playing: 'Playing', + Paused: 'Paused', + 'Read-aloud finished': 'Finished', + 'Read-aloud error': 'Error', + 'TTS endpoint': 'TTS endpoint', + 'Using browser speech synthesis': 'Using browser speech synthesis', + 'Read-aloud section progress': 'Section {{current}} of {{total}}', + 'Request sent': 'Request sent', + 'Response received': 'Response received', + 'Playback started': 'Playback started', + Characters: 'Characters', + Pause: 'Pause', + Play: 'Play', + Stop: 'Stop', + 'Read-aloud sections': 'Read-aloud sections', + 'Read-aloud overall progress': 'Overall progress', + 'Read-aloud section done': 'Section {{index}}: finished', + 'Read-aloud section pending': 'Section {{index}}: not started yet', + 'Read-aloud section fetching': 'Section {{index}}: requesting audio', + 'Read-aloud section preparing audio': 'Section {{index}}: loading audio', + 'Read-aloud section playing': 'Section {{index}}: playing', + 'Read-aloud section paused': 'Section {{index}}: paused', + 'Read-aloud legend fetching': 'Requesting audio for this section from the server…', + 'Read-aloud legend buffering': 'Decoding audio for this section…', + 'Read-aloud legend playing': 'Playing this section.', + 'Read-aloud legend paused': 'Playback paused.', + 'Read-aloud Piper fallback notice': + 'Server voice (Piper) could not be used. Playing with your browser voice instead.', + 'Read-aloud Piper fallback detail label': 'Piper error', + 'Read-aloud Piper status region': 'Piper text-to-speech status', + 'Read-aloud Piper status heading': 'Piper (server voice)', + 'Read-aloud Piper skipped notice': + 'No Piper URL is configured for this app (see VITE_READ_ALOUD_TTS_URL). Only the browser voice is used — the server was not contacted.', + 'Read-aloud Piper attempt started': 'Piper was started at {{time}} (this read-aloud used the server first).', + 'Read-aloud Piper endpoint tried': 'URL used: {{url}}', 'Join the video call': 'Join the video call', 'Schedule video call': 'Schedule video call', "You're invited to a scheduled video call.": "You're invited to a scheduled video call.", diff --git a/src/lib/read-aloud.ts b/src/lib/read-aloud.ts index c9e8abe2..3415a49a 100644 --- a/src/lib/read-aloud.ts +++ b/src/lib/read-aloud.ts @@ -3,6 +3,9 @@ import { getLongFormArticleMetadataFromEvent } from '@/lib/event-metadata' import logger from '@/lib/logger' import { Event, kinds } from 'nostr-tools' +/** Keep each Piper request small: long JSON bodies and WAV responses can OOM or time out the server. */ +const PIPER_CHUNK_MAX_CHARS = 3600 + function readAloudEndpointForLog(): string { const u = READ_ALOUD_TTS_URL if (!u) return '' @@ -16,6 +19,134 @@ function readAloudEndpointForLog(): string { export type ReadAloudResult = 'ok' | 'unsupported' | 'empty' | 'error' +export type ReadAloudPhase = + | 'idle' + | 'preparing' + | 'requesting' + | 'buffering' + | 'playing' + | 'paused' + | 'done' + | 'error' + +export type ReadAloudEngine = 'idle' | 'piper' | 'webspeech' + +export type ReadAloudSnapshot = { + open: boolean + title: string + engine: ReadAloudEngine + phase: ReadAloudPhase + totalChunks: number + currentChunkIndex: number + /** Piper: chunks fully played (0 .. totalChunks). */ + chunksPlayed: number + /** Piper: 0–1 within the current chunk (from media timeupdate). */ + chunkPlaybackRatio: number + charCount: number + requestSentAt: number | null + responseReceivedAt: number | null + playbackStartedAt: number | null + finishedAt: number | null + error: string | null + /** True when Piper was tried first and we fell back to Web Speech (still playing or finished). */ + usedPiperFallback: boolean + /** Piper failure message for the fallback notice (optional detail). */ + piperFallbackDetail: string | null + /** No `READ_ALOUD_TTS_URL` — Piper was never available for this read-aloud. */ + readAloudPiperSkipped: boolean + /** When the Piper path started (first UI frame); kept after fallback for the timeline. */ + readAloudPiperTryStartedAt: number | null + volume: number + backend: string +} + +const initialSnapshot: ReadAloudSnapshot = { + open: false, + title: '', + engine: 'idle', + phase: 'idle', + totalChunks: 0, + currentChunkIndex: 0, + chunksPlayed: 0, + chunkPlaybackRatio: 0, + charCount: 0, + requestSentAt: null, + responseReceivedAt: null, + playbackStartedAt: null, + finishedAt: null, + error: null, + usedPiperFallback: false, + piperFallbackDetail: null, + readAloudPiperSkipped: false, + readAloudPiperTryStartedAt: null, + volume: 1, + backend: '' +} + +let snapshot: ReadAloudSnapshot = { ...initialSnapshot } +const listeners = new Set<() => void>() + +function emit(): void { + listeners.forEach((l) => l()) +} + +function patchSnapshot(p: Partial): void { + snapshot = { ...snapshot, ...p } + emit() +} + +export function subscribeReadAloud(onStoreChange: () => void): () => void { + listeners.add(onStoreChange) + return () => listeners.delete(onStoreChange) +} + +export function getReadAloudSnapshot(): ReadAloudSnapshot { + return snapshot +} + +export function getReadAloudServerSnapshot(): ReadAloudSnapshot { + return { ...initialSnapshot } +} + +let readAloudAbort: AbortController | null = null +let readAloudAudio: HTMLAudioElement | null = null +let readAloudUserPaused = false +let unpauseResolvers: Array<() => void> = [] + +function resolveUnpauses(): void { + const r = unpauseResolvers + unpauseResolvers = [] + r.forEach((fn) => { + fn() + }) +} + +function waitUntilUnpaused(): Promise { + if (!readAloudUserPaused) return Promise.resolve() + return new Promise((resolve) => { + unpauseResolvers.push(resolve) + }) +} + +/** Let the read-aloud modal paint Piper / status before fetch or Web Speech starts. */ +function yieldForReadAloudUi(): Promise { + return new Promise((resolve) => { + requestAnimationFrame(() => { + requestAnimationFrame(() => { + window.setTimeout(resolve, 48) + }) + }) + }) +} + +export function closeReadAloudPlayer(): void { + stopReadAloudPlayback() + readAloudUserPaused = false + unpauseResolvers = [] + snapshot = { ...initialSnapshot } + emit() +} + const KINDS_WITH_METADATA_TITLE = new Set([ kinds.LongFormArticle, ExtendedKind.PUBLICATION, @@ -24,19 +155,20 @@ const KINDS_WITH_METADATA_TITLE = new Set([ ExtendedKind.WIKI_ARTICLE ]) -let readAloudAbort: AbortController | null = null -let readAloudAudio: HTMLAudioElement | null = null - function stopReadAloudPlayback(): void { readAloudAbort?.abort() readAloudAbort = null if (readAloudAudio) { const url = readAloudAudio.src - readAloudAudio.onended = null - readAloudAudio.onerror = null - readAloudAudio.pause() - readAloudAudio.removeAttribute('src') - readAloudAudio.load() + const el = readAloudAudio + el.onended = null + el.onerror = null + el.pause() + el.removeAttribute('src') + el.load() + if (el.parentNode) { + el.parentNode.removeChild(el) + } if (url.startsWith('blob:')) { URL.revokeObjectURL(url) } @@ -45,6 +177,68 @@ function stopReadAloudPlayback(): void { window.speechSynthesis?.cancel() } +/** Cut index in `s` for the first chunk: prefer after whitespace so words stay intact; only split at `maxLen` if there is no space in the window. */ +function splitAfterLastWhitespaceInWindow(s: string, maxLen: number): number { + const window = s.slice(0, maxLen) + for (let i = window.length - 1; i > 0; i--) { + if (/\s/u.test(window[i]!)) { + return i + 1 + } + } + return maxLen +} + +function splitOversizedPiece(piece: string, maxLen: number): string[] { + const out: string[] = [] + let s = piece + while (s.length > maxLen) { + const cut = splitAfterLastWhitespaceInWindow(s, maxLen) + const part = s.slice(0, cut).trimEnd() + if (part) out.push(part) + s = s.slice(cut).trimStart() + } + if (s) out.push(s) + return out +} + +/** Split plain text into segments under Piper's practical request size (paragraph boundaries first). */ +function splitTextIntoTtsChunks(text: string, maxLen: number = PIPER_CHUNK_MAX_CHARS): string[] { + const normalized = text.replace(/\r\n/g, '\n').trim() + if (!normalized) return [] + if (normalized.length <= maxLen) return [normalized] + + const paras = normalized + .split(/\n\n+/) + .map((p) => p.trim()) + .filter(Boolean) + const chunks: string[] = [] + let current = '' + + const flush = (): void => { + if (current) { + chunks.push(current) + current = '' + } + } + + for (const para of paras) { + if (para.length > maxLen) { + flush() + chunks.push(...splitOversizedPiece(para, maxLen)) + continue + } + const joined = current ? `${current}\n\n${para}` : para + if (joined.length <= maxLen) { + current = joined + } else { + flush() + current = para + } + } + flush() + return chunks +} + /** Strip common Markdown / AsciiDoc / code so TTS reads plain text (same idea as NotePage preview). */ function stripMarkupForReadAloud(content: string): string { let text = content @@ -61,6 +255,14 @@ function stripMarkupForReadAloud(content: string): string { return text.trim() } +function readAloudTitleFromEvent(event: Event): string { + if (KINDS_WITH_METADATA_TITLE.has(event.kind)) { + const meta = getLongFormArticleMetadataFromEvent(event) + return meta.title?.trim() ?? '' + } + return '' +} + function buildReadAloudPlainText(event: Event): string { let raw = event.content?.trim() ?? '' if (KINDS_WITH_METADATA_TITLE.has(event.kind)) { @@ -73,88 +275,313 @@ function buildReadAloudPlainText(event: Event): string { return stripMarkupForReadAloud(raw) } -/** - * Piper / Wyoming proxy (aitherboard-compatible): POST JSON, receive WAV. - */ -async function speakViaPiperTts(text: string): Promise { - stopReadAloudPlayback() - readAloudAbort = new AbortController() +function playPiperBlob(blob: Blob, signal: AbortSignal): Promise<'ok' | 'error' | 'aborted'> { + return new Promise((resolve) => { + if (signal.aborted) { + resolve('aborted') + return + } - try { - const response = await fetch(READ_ALOUD_TTS_URL, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ text, speed: 1 }), - signal: readAloudAbort.signal - }) + const audioUrl = URL.createObjectURL(blob) + const audio = new Audio() + readAloudAudio = audio + audio.volume = snapshot.volume + audio.src = audioUrl + audio.preload = 'auto' + try { + audio.setAttribute('data-jumble-read-aloud', '') + audio.style.display = 'none' + document.body.appendChild(audio) + } catch { + /* detached Audio() still works in most browsers */ + } + + let lastRatioEmit = 0 - if (!response.ok) { - logger.warn('[ReadAloud] Piper HTTP error', { - status: response.status, - endpoint: readAloudEndpointForLog() + const onPlay = (): void => { + if (signal.aborted || readAloudAudio !== audio) return + patchSnapshot({ + phase: 'playing', + playbackStartedAt: snapshot.playbackStartedAt ?? Date.now() }) - return 'error' } - const blob = await response.blob() - if (!blob.size) { - logger.warn('[ReadAloud] Piper returned empty body', { endpoint: readAloudEndpointForLog() }) - return 'error' + const onPause = (): void => { + if (signal.aborted || readAloudAudio !== audio) return + if (audio.ended) return + patchSnapshot({ phase: 'paused' }) } - const audioUrl = URL.createObjectURL(blob) - const audio = new Audio() - readAloudAudio = audio - audio.src = audioUrl + const onTimeUpdate = (): void => { + if (signal.aborted || readAloudAudio !== audio) return + const now = Date.now() + if (now - lastRatioEmit < 150) return + lastRatioEmit = now + const d = audio.duration + if (!d || !Number.isFinite(d) || d <= 0) return + patchSnapshot({ chunkPlaybackRatio: Math.min(1, audio.currentTime / d) }) + } - const cleanupBlob = () => { + const cleanup = (): void => { + audio.removeEventListener('play', onPlay) + audio.removeEventListener('pause', onPause) + audio.removeEventListener('timeupdate', onTimeUpdate) + audio.onended = null + audio.onerror = null + signal.removeEventListener('abort', onAbort) + if (audio.parentNode) { + audio.parentNode.removeChild(audio) + } if (audio.src.startsWith('blob:')) { URL.revokeObjectURL(audioUrl) } } - audio.addEventListener('ended', () => { - cleanupBlob() + const onAbort = (): void => { + cleanup() + audio.pause() if (readAloudAudio === audio) { readAloudAudio = null } - }) - audio.addEventListener('error', () => { - cleanupBlob() - }) + resolve('aborted') + } - try { - await audio.play() - return 'ok' - } catch (playErr) { + signal.addEventListener('abort', onAbort) + audio.addEventListener('play', onPlay) + audio.addEventListener('pause', onPause) + audio.addEventListener('timeupdate', onTimeUpdate) + + audio.onended = (): void => { + patchSnapshot({ chunkPlaybackRatio: 1 }) + cleanup() + if (readAloudAudio === audio) { + readAloudAudio = null + } + resolve('ok') + } + + audio.onerror = (): void => { + cleanup() + if (readAloudAudio === audio) { + readAloudAudio = null + } + resolve('error') + } + + void audio.play().catch((playErr: unknown) => { logger.warn('[ReadAloud] Piper audio.play() blocked or failed', { endpoint: readAloudEndpointForLog(), error: playErr instanceof Error ? playErr.message : String(playErr) }) - cleanupBlob() + cleanup() if (readAloudAudio === audio) { readAloudAudio = null } - return 'error' - } - } catch (e) { - const isAbort = - (e instanceof DOMException && e.name === 'AbortError') || - (e instanceof Error && e.name === 'AbortError') - if (isAbort) { - return 'ok' + resolve('error') + }) + }) +} + +async function speakViaPiperTtsChunks(chunks: string[]): Promise { + stopReadAloudPlayback() + readAloudAbort = new AbortController() + const signal = readAloudAbort.signal + + if (chunks.length === 0) { + return 'empty' + } + + try { + for (let i = 0; i < chunks.length; i++) { + await waitUntilUnpaused() + if (signal.aborted) { + return 'ok' + } + + const sentAt = Date.now() + patchSnapshot({ + currentChunkIndex: i, + chunksPlayed: i, + phase: 'requesting', + requestSentAt: sentAt, + responseReceivedAt: null, + chunkPlaybackRatio: 0 + }) + + let response: Response + try { + response = await fetch(READ_ALOUD_TTS_URL, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text: chunks[i], speed: 1 }), + signal + }) + } catch (e) { + const isAbort = + (e instanceof DOMException && e.name === 'AbortError') || + (e instanceof Error && e.name === 'AbortError') + if (isAbort) { + return 'ok' + } + const msg = e instanceof Error ? e.message : String(e) + logger.warn('[ReadAloud] Piper fetch failed (check CORS on the TTS host or use same-origin /api/piper-tts)', { + endpoint: readAloudEndpointForLog(), + error: msg + }) + patchSnapshot({ + phase: 'error', + error: `Part ${i + 1} of ${chunks.length}: ${msg}` + }) + return 'error' + } + + if (!response.ok) { + logger.warn('[ReadAloud] Piper HTTP error', { + status: response.status, + endpoint: readAloudEndpointForLog() + }) + patchSnapshot({ + phase: 'error', + error: `Part ${i + 1} of ${chunks.length}: HTTP ${response.status}` + }) + return 'error' + } + + const blob = await response.blob() + if (!blob.size) { + logger.warn('[ReadAloud] Piper returned empty body', { endpoint: readAloudEndpointForLog() }) + patchSnapshot({ + phase: 'error', + error: `Part ${i + 1} of ${chunks.length}: empty audio response` + }) + return 'error' + } + + patchSnapshot({ + responseReceivedAt: Date.now(), + phase: 'buffering' + }) + + await waitUntilUnpaused() + if (signal.aborted) { + return 'ok' + } + + const played = await playPiperBlob(blob, signal) + if (played === 'aborted') { + return 'ok' + } + if (played === 'error') { + patchSnapshot({ + phase: 'error', + error: `Part ${i + 1} of ${chunks.length}: playback failed (browser blocked audio or corrupt WAV)` + }) + return 'error' + } } - logger.warn('[ReadAloud] Piper fetch failed (check CORS on the TTS host or use same-origin /api/piper-tts)', { - endpoint: readAloudEndpointForLog(), - error: e instanceof Error ? e.message : String(e) + + patchSnapshot({ + phase: 'done', + finishedAt: Date.now(), + currentChunkIndex: chunks.length - 1, + chunksPlayed: chunks.length, + chunkPlaybackRatio: 0 }) - return 'error' + return 'ok' + } finally { + readAloudAbort = null } } -function speakViaWebSpeech(text: string): void { +async function speakViaWebSpeech( + text: string, + title: string, + options?: { fromPiperFallback?: boolean; browserOnlyNoPiper?: boolean } +): Promise { stopReadAloudPlayback() - window.speechSynthesis.speak(new SpeechSynthesisUtterance(text)) + readAloudUserPaused = false + resolveUnpauses() + + if (!window.speechSynthesis) { + patchSnapshot({ + open: true, + title, + engine: 'webspeech', + phase: 'error', + error: 'Speech synthesis is not available', + charCount: text.length, + backend: '', + ...(!options?.fromPiperFallback ? { usedPiperFallback: false, piperFallbackDetail: null } : {}), + ...(options?.browserOnlyNoPiper + ? { readAloudPiperSkipped: true, readAloudPiperTryStartedAt: null } + : !options?.fromPiperFallback + ? { readAloudPiperSkipped: false, readAloudPiperTryStartedAt: null } + : {}) + }) + return 'unsupported' + } + + let webspeechPiperFields: Partial + if (options?.browserOnlyNoPiper) { + webspeechPiperFields = { + readAloudPiperSkipped: true, + readAloudPiperTryStartedAt: null, + backend: '' + } + } else if (options?.fromPiperFallback) { + webspeechPiperFields = { readAloudPiperSkipped: false, backend: snapshot.backend } + } else { + webspeechPiperFields = { + readAloudPiperSkipped: false, + readAloudPiperTryStartedAt: null, + backend: '' + } + } + + patchSnapshot({ + open: true, + title, + engine: 'webspeech', + phase: 'buffering', + charCount: text.length, + totalChunks: 0, + currentChunkIndex: 0, + chunksPlayed: 0, + chunkPlaybackRatio: 0, + requestSentAt: null, + responseReceivedAt: null, + playbackStartedAt: null, + finishedAt: null, + error: null, + ...(!options?.fromPiperFallback ? { usedPiperFallback: false, piperFallbackDetail: null } : {}), + ...webspeechPiperFields + }) + + if (options?.browserOnlyNoPiper || options?.fromPiperFallback) { + await yieldForReadAloudUi() + } + + const u = new SpeechSynthesisUtterance(text) + u.onstart = (): void => { + patchSnapshot({ + phase: 'playing', + playbackStartedAt: Date.now() + }) + } + u.onend = (): void => { + patchSnapshot({ + phase: 'done', + finishedAt: Date.now() + }) + } + u.onerror = (ev): void => { + patchSnapshot({ + phase: 'error', + error: ev.error ?? 'speech synthesis error' + }) + } + window.speechSynthesis.speak(u) + return 'ok' } export async function speakNoteReadAloud(event: Event): Promise { @@ -167,21 +594,63 @@ export async function speakNoteReadAloud(event: Event): Promise return 'empty' } + const title = readAloudTitleFromEvent(event) + if (READ_ALOUD_TTS_URL) { - const piperResult = await speakViaPiperTts(text) + stopReadAloudPlayback() + readAloudUserPaused = false + resolveUnpauses() + + const chunks = splitTextIntoTtsChunks(text, PIPER_CHUNK_MAX_CHARS) + patchSnapshot({ + open: true, + title, + engine: 'piper', + phase: 'preparing', + charCount: text.length, + totalChunks: chunks.length, + currentChunkIndex: 0, + chunksPlayed: 0, + chunkPlaybackRatio: 0, + requestSentAt: null, + responseReceivedAt: null, + playbackStartedAt: null, + finishedAt: null, + error: null, + usedPiperFallback: false, + piperFallbackDetail: null, + readAloudPiperSkipped: false, + readAloudPiperTryStartedAt: Date.now(), + backend: readAloudEndpointForLog() + }) + + await yieldForReadAloudUi() + + const piperResult = await speakViaPiperTtsChunks(chunks) if (piperResult === 'ok') { return 'ok' } + logger.warn( - '[ReadAloud] Using Web Speech fallback — Piper did not play. See previous [ReadAloud] log for cause.', + '[ReadAloud] Using Web Speech fallback — Piper did not play. See previous [ReadAloud] log or player error.', { endpoint: readAloudEndpointForLog() } ) - } - if (!window.speechSynthesis) { - return READ_ALOUD_TTS_URL ? 'error' : 'unsupported' + const prior = snapshot.error?.trim() || null + patchSnapshot({ + engine: 'webspeech', + phase: 'preparing', + error: null, + usedPiperFallback: true, + piperFallbackDetail: prior, + totalChunks: 0, + currentChunkIndex: 0, + chunksPlayed: 0, + chunkPlaybackRatio: 0 + }) + + return await speakViaWebSpeech(text, title, { fromPiperFallback: true }) } - speakViaWebSpeech(text) - return 'ok' + return await speakViaWebSpeech(text, title, { browserOnlyNoPiper: true }) }