Browse Source

make read-aloud more efficient for large articles

add a media player
imwald
Silberengel 1 month ago
parent
commit
991fee7431
  1. 2
      src/App.tsx
  2. 15
      src/components/NoteOptions/useMenuActions.tsx
  3. 269
      src/components/ReadAloudPlayerModal.tsx
  4. 41
      src/i18n/locales/de.ts
  5. 40
      src/i18n/locales/en.ts
  6. 603
      src/lib/read-aloud.ts

2
src/App.tsx

@ -2,6 +2,7 @@ import 'yet-another-react-lightbox/styles.css' @@ -2,6 +2,7 @@ import 'yet-another-react-lightbox/styles.css'
import './index.css'
import PublishSuccessSubtleIndicator from '@/components/PublishSuccessSubtleIndicator'
import ReadAloudPlayerModal from '@/components/ReadAloudPlayerModal'
import { Toaster } from '@/components/ui/sonner'
import { BookmarksProvider } from '@/providers/BookmarksProvider'
import { ContentPolicyProvider } from '@/providers/ContentPolicyProvider'
@ -52,6 +53,7 @@ export default function App(): JSX.Element { @@ -52,6 +53,7 @@ export default function App(): JSX.Element {
<KindFilterProvider>
<UserPreferencesProvider>
<PageManager />
<ReadAloudPlayerModal />
<PublishSuccessSubtleIndicator />
<Toaster />
</UserPreferencesProvider>

15
src/components/NoteOptions/useMenuActions.tsx

@ -428,6 +428,12 @@ export function useMenuActions({ @@ -428,6 +428,12 @@ export function useMenuActions({
return event.tags.find(tag => tag[0] === 'd')?.[1] || ''
}, [isArticleType, event])
/** Decent Newsroom article URLs are scoped by author: /p/{npub}/d/{identifier} */
const authorNpubForDecentNewsroom = useMemo(
() => pubkeyToNpub(event.pubkey) ?? '',
[event.pubkey]
)
// Generate naddr for Alexandria URL
const naddr = useMemo(() => {
if (!isArticleType || !dTag) return ''
@ -554,9 +560,11 @@ export function useMenuActions({ @@ -554,9 +560,11 @@ export function useMenuActions({
}
const handleViewOnDecentNewsroom = () => {
if (!dTag) return
if (!dTag || !authorNpubForDecentNewsroom) return
closeDrawer()
window.open(`https://decentnewsroom.com/article/d/${dTag}`, '_blank', 'noopener,noreferrer')
const p = encodeURIComponent(authorNpubForDecentNewsroom)
const d = encodeURIComponent(dTag)
window.open(`https://decentnewsroom.com/p/${p}/d/${d}`, '_blank', 'noopener,noreferrer')
}
const actions: MenuAction[] = [
{
@ -750,7 +758,7 @@ export function useMenuActions({ @@ -750,7 +758,7 @@ export function useMenuActions({
onClick: handleViewOnAlexandria
})
}
if (dTag) {
if (dTag && authorNpubForDecentNewsroom) {
actions.push({
icon: Globe,
label: t('View on DecentNewsroom'),
@ -884,6 +892,7 @@ export function useMenuActions({ @@ -884,6 +892,7 @@ export function useMenuActions({
isArticleType,
articleMetadata,
dTag,
authorNpubForDecentNewsroom,
naddr,
onOpenPublicMessage,
onOpenCallInvite,

269
src/components/ReadAloudPlayerModal.tsx

@ -0,0 +1,269 @@ @@ -0,0 +1,269 @@
import { Button } from '@/components/ui/button'
import {
Dialog,
DialogContent,
DialogHeader,
DialogTitle
} from '@/components/ui/dialog'
import {
closeReadAloudPlayer,
getReadAloudServerSnapshot,
getReadAloudSnapshot,
subscribeReadAloud,
type ReadAloudSnapshot
} from '@/lib/read-aloud'
import { cn } from '@/lib/utils'
import type { TFunction } from 'i18next'
import { useCallback, useSyncExternalStore } from 'react'
import { useTranslation } from 'react-i18next'
function formatClock(ts: number | null): string {
if (ts == null) return '—'
try {
return new Date(ts).toLocaleTimeString(undefined, {
hour: '2-digit',
minute: '2-digit',
second: '2-digit'
})
} catch {
return '—'
}
}
/** Lighter scrim than default bg-black/80; content stays above overlay (z-230 vs z-220). */
const READ_ALOUD_OVERLAY_CLASS =
'z-[220] bg-black/35 backdrop-blur-sm dark:bg-black/40'
function sectionAriaLabel(i: number, snap: ReadAloudSnapshot, t: TFunction): string {
if (i < snap.chunksPlayed) {
return t('Read-aloud section done', { index: i + 1 })
}
if (i > snap.currentChunkIndex) {
return t('Read-aloud section pending', { index: i + 1 })
}
switch (snap.phase) {
case 'requesting':
return t('Read-aloud section fetching', { index: i + 1 })
case 'buffering':
case 'preparing':
return t('Read-aloud section preparing audio', { index: i + 1 })
case 'playing':
return t('Read-aloud section playing', { index: i + 1 })
case 'paused':
return t('Read-aloud section paused', { index: i + 1 })
default:
return t('Read-aloud section pending', { index: i + 1 })
}
}
function phaseLabel(s: ReadAloudSnapshot, t: (k: string) => string): string {
switch (s.phase) {
case 'idle':
return t('Read-aloud idle')
case 'preparing':
return t('Preparing read-aloud…')
case 'requesting':
return t('Requesting audio…')
case 'buffering':
return t('Loading audio…')
case 'playing':
return t('Playing')
case 'paused':
return t('Paused')
case 'done':
return t('Read-aloud finished')
case 'error':
return t('Read-aloud error')
default:
return s.phase
}
}
export default function ReadAloudPlayerModal(): JSX.Element {
const { t } = useTranslation()
const snap = useSyncExternalStore(
subscribeReadAloud,
getReadAloudSnapshot,
getReadAloudServerSnapshot
)
const onOpenChange = useCallback((open: boolean) => {
if (!open) {
closeReadAloudPlayer()
}
}, [])
const showChunks = snap.engine === 'piper' && snap.totalChunks > 0
const nChunks = snap.totalChunks
const overallPct =
nChunks > 0
? Math.min(100, ((snap.chunksPlayed + snap.chunkPlaybackRatio) / nChunks) * 100)
: 0
return (
<Dialog open={snap.open} onOpenChange={onOpenChange}>
<DialogContent
className="z-[230] max-w-md border-2 border-border bg-card shadow-2xl"
overlayClassName={READ_ALOUD_OVERLAY_CLASS}
>
<DialogHeader>
<DialogTitle className="pr-8 text-foreground">{t('Read aloud')}</DialogTitle>
</DialogHeader>
<div className="space-y-3 text-sm">
{snap.title ? (
<p className="font-medium text-foreground line-clamp-2">{snap.title}</p>
) : null}
<p className="text-muted-foreground">{phaseLabel(snap, t)}</p>
{snap.engine === 'piper' ? (
<p className="text-xs text-muted-foreground break-all">
{t('TTS endpoint')}: {snap.backend || '—'}
</p>
) : snap.engine === 'webspeech' ? (
<p className="text-xs text-muted-foreground">{t('Using browser speech synthesis')}</p>
) : null}
{snap.readAloudPiperSkipped ||
snap.readAloudPiperTryStartedAt != null ||
snap.usedPiperFallback ? (
<div
className={cn(
'space-y-1.5 rounded-md border px-3 py-2 text-xs',
snap.readAloudPiperSkipped
? 'border-sky-500/35 bg-sky-500/10'
: 'border-border bg-muted/40'
)}
role="region"
aria-label={t('Read-aloud Piper status region')}
>
<p className="font-semibold text-foreground">{t('Read-aloud Piper status heading')}</p>
{snap.readAloudPiperSkipped ? (
<p className="text-muted-foreground">{t('Read-aloud Piper skipped notice')}</p>
) : null}
{snap.readAloudPiperTryStartedAt != null ? (
<p className="text-muted-foreground">
{t('Read-aloud Piper attempt started', {
time: formatClock(snap.readAloudPiperTryStartedAt)
})}
</p>
) : null}
{!snap.readAloudPiperSkipped && snap.backend ? (
<p className="break-all text-muted-foreground">
{t('Read-aloud Piper endpoint tried', { url: snap.backend })}
</p>
) : null}
</div>
) : null}
{snap.engine === 'webspeech' && snap.usedPiperFallback ? (
<div
className="rounded-md border border-amber-500/40 bg-amber-500/10 px-3 py-2 text-xs text-foreground"
role="status"
>
<p className="font-medium text-amber-950 dark:text-amber-100">
{t('Read-aloud Piper fallback notice')}
</p>
{snap.piperFallbackDetail ? (
<p className="mt-1.5 whitespace-pre-wrap break-words text-muted-foreground">
<span className="font-medium text-foreground/90">
{t('Read-aloud Piper fallback detail label')}:{' '}
</span>
{snap.piperFallbackDetail}
</p>
) : null}
</div>
) : null}
{showChunks ? (
<div className="space-y-2" role="region" aria-label={t('Read-aloud sections')}>
<p className="text-xs text-muted-foreground">
{t('Read-aloud section progress', {
current: snap.currentChunkIndex + 1,
total: snap.totalChunks
})}
</p>
<div
className="h-2 w-full overflow-hidden rounded-full bg-muted"
role="progressbar"
aria-valuemin={0}
aria-valuemax={100}
aria-valuenow={Math.round(overallPct)}
aria-label={t('Read-aloud overall progress')}
>
<div
className="h-full rounded-full bg-primary transition-[width] duration-200 ease-out"
style={{ width: `${overallPct}%` }}
/>
</div>
<div className="flex gap-1" role="list">
{Array.from({ length: snap.totalChunks }, (_, i) => {
const done = i < snap.chunksPlayed
const active = i === snap.currentChunkIndex
const fetching = active && snap.phase === 'requesting'
const decoding = active && (snap.phase === 'buffering' || snap.phase === 'preparing')
const playing = active && snap.phase === 'playing'
const paused = active && snap.phase === 'paused'
return (
<div
key={i}
role="listitem"
className={cn(
'relative h-8 min-w-0 flex-1 overflow-hidden rounded-sm border border-border',
done && 'bg-primary',
!done && !active && 'bg-muted',
fetching && 'animate-pulse bg-amber-500/40',
decoding && !fetching && 'animate-pulse bg-amber-500/25',
(playing || paused) && !done && 'bg-muted'
)}
title={sectionAriaLabel(i, snap, t)}
>
{(playing || paused) && !done ? (
<div
className={cn(
'absolute inset-y-0 left-0 bg-primary/90',
paused && 'opacity-80'
)}
style={{
width: `${Math.round(Math.min(1, snap.chunkPlaybackRatio) * 100)}%`
}}
/>
) : null}
</div>
)
})}
</div>
<p className="text-[10px] leading-tight text-muted-foreground">
{snap.phase === 'requesting'
? t('Read-aloud legend fetching')
: snap.phase === 'buffering' || snap.phase === 'preparing'
? t('Read-aloud legend buffering')
: snap.phase === 'playing'
? t('Read-aloud legend playing')
: snap.phase === 'paused'
? t('Read-aloud legend paused')
: null}
</p>
</div>
) : null}
<dl className="grid grid-cols-[auto_1fr] gap-x-3 gap-y-1 text-xs border border-border rounded-md p-2 bg-muted/30">
<dt className="text-muted-foreground">{t('Request sent')}</dt>
<dd>{formatClock(snap.requestSentAt)}</dd>
<dt className="text-muted-foreground">{t('Response received')}</dt>
<dd>{formatClock(snap.responseReceivedAt)}</dd>
<dt className="text-muted-foreground">{t('Playback started')}</dt>
<dd>{formatClock(snap.playbackStartedAt)}</dd>
<dt className="text-muted-foreground">{t('Characters')}</dt>
<dd>{snap.charCount > 0 ? snap.charCount.toLocaleString() : '—'}</dd>
</dl>
{snap.error ? (
<p className="text-xs text-destructive whitespace-pre-wrap break-words border border-destructive/30 rounded-md p-2 bg-destructive/5">
{snap.error}
</p>
) : null}
<div className="flex justify-end pt-2">
<Button type="button" variant="secondary" size="sm" onClick={() => closeReadAloudPlayer()}>
{t('Close')}
</Button>
</div>
</div>
</DialogContent>
</Dialog>
)
}

41
src/i18n/locales/de.ts

@ -181,6 +181,47 @@ export default { @@ -181,6 +181,47 @@ export default {
'Vorlesen wird in diesem Browser nicht unterstützt',
'Nothing to read aloud': 'Kein Text zum Vorlesen',
'Read-aloud failed': 'Vorlesen fehlgeschlagen',
'Read aloud': 'Vorlesen',
'Read-aloud idle': 'Leerlauf',
'Preparing read-aloud…': 'Vorlesen wird vorbereitet…',
'Requesting audio…': 'Audio wird angefordert…',
'Loading audio…': 'Audio wird geladen…',
Playing: 'Wiedergabe',
Paused: 'Pausiert',
'Read-aloud finished': 'Beendet',
'Read-aloud error': 'Fehler',
'TTS endpoint': 'TTS-Endpunkt',
'Using browser speech synthesis': 'Browser-Sprachausgabe',
'Read-aloud section progress': 'Abschnitt {{current}} von {{total}}',
'Request sent': 'Anfrage gesendet',
'Response received': 'Antwort erhalten',
'Playback started': 'Wiedergabe gestartet',
Characters: 'Zeichen',
Pause: 'Pause',
Play: 'Abspielen',
Stop: 'Stopp',
'Read-aloud sections': 'Vorlesen — Abschnitte',
'Read-aloud overall progress': 'Gesamtfortschritt',
'Read-aloud section done': 'Abschnitt {{index}}: fertig',
'Read-aloud section pending': 'Abschnitt {{index}}: noch nicht gestartet',
'Read-aloud section fetching': 'Abschnitt {{index}}: Audio wird angefordert',
'Read-aloud section preparing audio': 'Abschnitt {{index}}: Audio wird geladen',
'Read-aloud section playing': 'Abschnitt {{index}}: Wiedergabe',
'Read-aloud section paused': 'Abschnitt {{index}}: pausiert',
'Read-aloud legend fetching': 'Audio für diesen Abschnitt wird vom Server angefordert…',
'Read-aloud legend buffering': 'Audio für diesen Abschnitt wird decodiert…',
'Read-aloud legend playing': 'Dieser Abschnitt wird wiedergegeben.',
'Read-aloud legend paused': 'Wiedergabe pausiert.',
'Read-aloud Piper fallback notice':
'Die Server-Stimme (Piper) konnte nicht genutzt werden. Es wird die Browser-Sprachausgabe verwendet.',
'Read-aloud Piper fallback detail label': 'Piper-Fehler',
'Read-aloud Piper status region': 'Piper-Sprachausgabe (Server)',
'Read-aloud Piper status heading': 'Piper (Server-Stimme)',
'Read-aloud Piper skipped notice':
'Für diese App ist keine Piper-URL gesetzt (siehe VITE_READ_ALOUD_TTS_URL). Es wird nur die Browser-Stimme verwendet — der Server wurde nicht angesprochen.',
'Read-aloud Piper attempt started':
'Piper wurde um {{time}} gestartet (dieses Vorlesen hat zuerst den Server verwendet).',
'Read-aloud Piper endpoint tried': 'Verwendete URL: {{url}}',
'Join the video call': 'Am Videoanruf teilnehmen',
'Schedule video call': 'Videoanruf planen',
"You're invited to a scheduled video call.":

40
src/i18n/locales/en.ts

@ -180,6 +180,46 @@ export default { @@ -180,6 +180,46 @@ export default {
'Read-aloud is not supported in this browser',
'Nothing to read aloud': 'Nothing to read aloud',
'Read-aloud failed': 'Read-aloud failed',
'Read aloud': 'Read aloud',
'Read-aloud idle': 'Idle',
'Preparing read-aloud…': 'Preparing read-aloud…',
'Requesting audio…': 'Requesting audio…',
'Loading audio…': 'Loading audio…',
Playing: 'Playing',
Paused: 'Paused',
'Read-aloud finished': 'Finished',
'Read-aloud error': 'Error',
'TTS endpoint': 'TTS endpoint',
'Using browser speech synthesis': 'Using browser speech synthesis',
'Read-aloud section progress': 'Section {{current}} of {{total}}',
'Request sent': 'Request sent',
'Response received': 'Response received',
'Playback started': 'Playback started',
Characters: 'Characters',
Pause: 'Pause',
Play: 'Play',
Stop: 'Stop',
'Read-aloud sections': 'Read-aloud sections',
'Read-aloud overall progress': 'Overall progress',
'Read-aloud section done': 'Section {{index}}: finished',
'Read-aloud section pending': 'Section {{index}}: not started yet',
'Read-aloud section fetching': 'Section {{index}}: requesting audio',
'Read-aloud section preparing audio': 'Section {{index}}: loading audio',
'Read-aloud section playing': 'Section {{index}}: playing',
'Read-aloud section paused': 'Section {{index}}: paused',
'Read-aloud legend fetching': 'Requesting audio for this section from the server…',
'Read-aloud legend buffering': 'Decoding audio for this section…',
'Read-aloud legend playing': 'Playing this section.',
'Read-aloud legend paused': 'Playback paused.',
'Read-aloud Piper fallback notice':
'Server voice (Piper) could not be used. Playing with your browser voice instead.',
'Read-aloud Piper fallback detail label': 'Piper error',
'Read-aloud Piper status region': 'Piper text-to-speech status',
'Read-aloud Piper status heading': 'Piper (server voice)',
'Read-aloud Piper skipped notice':
'No Piper URL is configured for this app (see VITE_READ_ALOUD_TTS_URL). Only the browser voice is used — the server was not contacted.',
'Read-aloud Piper attempt started': 'Piper was started at {{time}} (this read-aloud used the server first).',
'Read-aloud Piper endpoint tried': 'URL used: {{url}}',
'Join the video call': 'Join the video call',
'Schedule video call': 'Schedule video call',
"You're invited to a scheduled video call.": "You're invited to a scheduled video call.",

603
src/lib/read-aloud.ts

@ -3,6 +3,9 @@ import { getLongFormArticleMetadataFromEvent } from '@/lib/event-metadata' @@ -3,6 +3,9 @@ import { getLongFormArticleMetadataFromEvent } from '@/lib/event-metadata'
import logger from '@/lib/logger'
import { Event, kinds } from 'nostr-tools'
/** Keep each Piper request small: long JSON bodies and WAV responses can OOM or time out the server. */
const PIPER_CHUNK_MAX_CHARS = 3600
function readAloudEndpointForLog(): string {
const u = READ_ALOUD_TTS_URL
if (!u) return ''
@ -16,6 +19,134 @@ function readAloudEndpointForLog(): string { @@ -16,6 +19,134 @@ function readAloudEndpointForLog(): string {
export type ReadAloudResult = 'ok' | 'unsupported' | 'empty' | 'error'
export type ReadAloudPhase =
| 'idle'
| 'preparing'
| 'requesting'
| 'buffering'
| 'playing'
| 'paused'
| 'done'
| 'error'
export type ReadAloudEngine = 'idle' | 'piper' | 'webspeech'
export type ReadAloudSnapshot = {
open: boolean
title: string
engine: ReadAloudEngine
phase: ReadAloudPhase
totalChunks: number
currentChunkIndex: number
/** Piper: chunks fully played (0 .. totalChunks). */
chunksPlayed: number
/** Piper: 0–1 within the current chunk (from media timeupdate). */
chunkPlaybackRatio: number
charCount: number
requestSentAt: number | null
responseReceivedAt: number | null
playbackStartedAt: number | null
finishedAt: number | null
error: string | null
/** True when Piper was tried first and we fell back to Web Speech (still playing or finished). */
usedPiperFallback: boolean
/** Piper failure message for the fallback notice (optional detail). */
piperFallbackDetail: string | null
/** No `READ_ALOUD_TTS_URL` — Piper was never available for this read-aloud. */
readAloudPiperSkipped: boolean
/** When the Piper path started (first UI frame); kept after fallback for the timeline. */
readAloudPiperTryStartedAt: number | null
volume: number
backend: string
}
const initialSnapshot: ReadAloudSnapshot = {
open: false,
title: '',
engine: 'idle',
phase: 'idle',
totalChunks: 0,
currentChunkIndex: 0,
chunksPlayed: 0,
chunkPlaybackRatio: 0,
charCount: 0,
requestSentAt: null,
responseReceivedAt: null,
playbackStartedAt: null,
finishedAt: null,
error: null,
usedPiperFallback: false,
piperFallbackDetail: null,
readAloudPiperSkipped: false,
readAloudPiperTryStartedAt: null,
volume: 1,
backend: ''
}
let snapshot: ReadAloudSnapshot = { ...initialSnapshot }
const listeners = new Set<() => void>()
function emit(): void {
listeners.forEach((l) => l())
}
function patchSnapshot(p: Partial<ReadAloudSnapshot>): void {
snapshot = { ...snapshot, ...p }
emit()
}
export function subscribeReadAloud(onStoreChange: () => void): () => void {
listeners.add(onStoreChange)
return () => listeners.delete(onStoreChange)
}
export function getReadAloudSnapshot(): ReadAloudSnapshot {
return snapshot
}
export function getReadAloudServerSnapshot(): ReadAloudSnapshot {
return { ...initialSnapshot }
}
let readAloudAbort: AbortController | null = null
let readAloudAudio: HTMLAudioElement | null = null
let readAloudUserPaused = false
let unpauseResolvers: Array<() => void> = []
function resolveUnpauses(): void {
const r = unpauseResolvers
unpauseResolvers = []
r.forEach((fn) => {
fn()
})
}
function waitUntilUnpaused(): Promise<void> {
if (!readAloudUserPaused) return Promise.resolve()
return new Promise((resolve) => {
unpauseResolvers.push(resolve)
})
}
/** Let the read-aloud modal paint Piper / status before fetch or Web Speech starts. */
function yieldForReadAloudUi(): Promise<void> {
return new Promise((resolve) => {
requestAnimationFrame(() => {
requestAnimationFrame(() => {
window.setTimeout(resolve, 48)
})
})
})
}
export function closeReadAloudPlayer(): void {
stopReadAloudPlayback()
readAloudUserPaused = false
unpauseResolvers = []
snapshot = { ...initialSnapshot }
emit()
}
const KINDS_WITH_METADATA_TITLE = new Set<number>([
kinds.LongFormArticle,
ExtendedKind.PUBLICATION,
@ -24,19 +155,20 @@ const KINDS_WITH_METADATA_TITLE = new Set<number>([ @@ -24,19 +155,20 @@ const KINDS_WITH_METADATA_TITLE = new Set<number>([
ExtendedKind.WIKI_ARTICLE
])
let readAloudAbort: AbortController | null = null
let readAloudAudio: HTMLAudioElement | null = null
function stopReadAloudPlayback(): void {
readAloudAbort?.abort()
readAloudAbort = null
if (readAloudAudio) {
const url = readAloudAudio.src
readAloudAudio.onended = null
readAloudAudio.onerror = null
readAloudAudio.pause()
readAloudAudio.removeAttribute('src')
readAloudAudio.load()
const el = readAloudAudio
el.onended = null
el.onerror = null
el.pause()
el.removeAttribute('src')
el.load()
if (el.parentNode) {
el.parentNode.removeChild(el)
}
if (url.startsWith('blob:')) {
URL.revokeObjectURL(url)
}
@ -45,6 +177,68 @@ function stopReadAloudPlayback(): void { @@ -45,6 +177,68 @@ function stopReadAloudPlayback(): void {
window.speechSynthesis?.cancel()
}
/** Cut index in `s` for the first chunk: prefer after whitespace so words stay intact; only split at `maxLen` if there is no space in the window. */
function splitAfterLastWhitespaceInWindow(s: string, maxLen: number): number {
const window = s.slice(0, maxLen)
for (let i = window.length - 1; i > 0; i--) {
if (/\s/u.test(window[i]!)) {
return i + 1
}
}
return maxLen
}
function splitOversizedPiece(piece: string, maxLen: number): string[] {
const out: string[] = []
let s = piece
while (s.length > maxLen) {
const cut = splitAfterLastWhitespaceInWindow(s, maxLen)
const part = s.slice(0, cut).trimEnd()
if (part) out.push(part)
s = s.slice(cut).trimStart()
}
if (s) out.push(s)
return out
}
/** Split plain text into segments under Piper's practical request size (paragraph boundaries first). */
function splitTextIntoTtsChunks(text: string, maxLen: number = PIPER_CHUNK_MAX_CHARS): string[] {
const normalized = text.replace(/\r\n/g, '\n').trim()
if (!normalized) return []
if (normalized.length <= maxLen) return [normalized]
const paras = normalized
.split(/\n\n+/)
.map((p) => p.trim())
.filter(Boolean)
const chunks: string[] = []
let current = ''
const flush = (): void => {
if (current) {
chunks.push(current)
current = ''
}
}
for (const para of paras) {
if (para.length > maxLen) {
flush()
chunks.push(...splitOversizedPiece(para, maxLen))
continue
}
const joined = current ? `${current}\n\n${para}` : para
if (joined.length <= maxLen) {
current = joined
} else {
flush()
current = para
}
}
flush()
return chunks
}
/** Strip common Markdown / AsciiDoc / code so TTS reads plain text (same idea as NotePage preview). */
function stripMarkupForReadAloud(content: string): string {
let text = content
@ -61,6 +255,14 @@ function stripMarkupForReadAloud(content: string): string { @@ -61,6 +255,14 @@ function stripMarkupForReadAloud(content: string): string {
return text.trim()
}
function readAloudTitleFromEvent(event: Event): string {
if (KINDS_WITH_METADATA_TITLE.has(event.kind)) {
const meta = getLongFormArticleMetadataFromEvent(event)
return meta.title?.trim() ?? ''
}
return ''
}
function buildReadAloudPlainText(event: Event): string {
let raw = event.content?.trim() ?? ''
if (KINDS_WITH_METADATA_TITLE.has(event.kind)) {
@ -73,88 +275,313 @@ function buildReadAloudPlainText(event: Event): string { @@ -73,88 +275,313 @@ function buildReadAloudPlainText(event: Event): string {
return stripMarkupForReadAloud(raw)
}
/**
* Piper / Wyoming proxy (aitherboard-compatible): POST JSON, receive WAV.
*/
async function speakViaPiperTts(text: string): Promise<ReadAloudResult> {
stopReadAloudPlayback()
readAloudAbort = new AbortController()
function playPiperBlob(blob: Blob, signal: AbortSignal): Promise<'ok' | 'error' | 'aborted'> {
return new Promise((resolve) => {
if (signal.aborted) {
resolve('aborted')
return
}
try {
const response = await fetch(READ_ALOUD_TTS_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text, speed: 1 }),
signal: readAloudAbort.signal
})
const audioUrl = URL.createObjectURL(blob)
const audio = new Audio()
readAloudAudio = audio
audio.volume = snapshot.volume
audio.src = audioUrl
audio.preload = 'auto'
try {
audio.setAttribute('data-jumble-read-aloud', '')
audio.style.display = 'none'
document.body.appendChild(audio)
} catch {
/* detached Audio() still works in most browsers */
}
let lastRatioEmit = 0
if (!response.ok) {
logger.warn('[ReadAloud] Piper HTTP error', {
status: response.status,
endpoint: readAloudEndpointForLog()
const onPlay = (): void => {
if (signal.aborted || readAloudAudio !== audio) return
patchSnapshot({
phase: 'playing',
playbackStartedAt: snapshot.playbackStartedAt ?? Date.now()
})
return 'error'
}
const blob = await response.blob()
if (!blob.size) {
logger.warn('[ReadAloud] Piper returned empty body', { endpoint: readAloudEndpointForLog() })
return 'error'
const onPause = (): void => {
if (signal.aborted || readAloudAudio !== audio) return
if (audio.ended) return
patchSnapshot({ phase: 'paused' })
}
const audioUrl = URL.createObjectURL(blob)
const audio = new Audio()
readAloudAudio = audio
audio.src = audioUrl
const onTimeUpdate = (): void => {
if (signal.aborted || readAloudAudio !== audio) return
const now = Date.now()
if (now - lastRatioEmit < 150) return
lastRatioEmit = now
const d = audio.duration
if (!d || !Number.isFinite(d) || d <= 0) return
patchSnapshot({ chunkPlaybackRatio: Math.min(1, audio.currentTime / d) })
}
const cleanupBlob = () => {
const cleanup = (): void => {
audio.removeEventListener('play', onPlay)
audio.removeEventListener('pause', onPause)
audio.removeEventListener('timeupdate', onTimeUpdate)
audio.onended = null
audio.onerror = null
signal.removeEventListener('abort', onAbort)
if (audio.parentNode) {
audio.parentNode.removeChild(audio)
}
if (audio.src.startsWith('blob:')) {
URL.revokeObjectURL(audioUrl)
}
}
audio.addEventListener('ended', () => {
cleanupBlob()
const onAbort = (): void => {
cleanup()
audio.pause()
if (readAloudAudio === audio) {
readAloudAudio = null
}
})
audio.addEventListener('error', () => {
cleanupBlob()
})
resolve('aborted')
}
try {
await audio.play()
return 'ok'
} catch (playErr) {
signal.addEventListener('abort', onAbort)
audio.addEventListener('play', onPlay)
audio.addEventListener('pause', onPause)
audio.addEventListener('timeupdate', onTimeUpdate)
audio.onended = (): void => {
patchSnapshot({ chunkPlaybackRatio: 1 })
cleanup()
if (readAloudAudio === audio) {
readAloudAudio = null
}
resolve('ok')
}
audio.onerror = (): void => {
cleanup()
if (readAloudAudio === audio) {
readAloudAudio = null
}
resolve('error')
}
void audio.play().catch((playErr: unknown) => {
logger.warn('[ReadAloud] Piper audio.play() blocked or failed', {
endpoint: readAloudEndpointForLog(),
error: playErr instanceof Error ? playErr.message : String(playErr)
})
cleanupBlob()
cleanup()
if (readAloudAudio === audio) {
readAloudAudio = null
}
return 'error'
}
} catch (e) {
const isAbort =
(e instanceof DOMException && e.name === 'AbortError') ||
(e instanceof Error && e.name === 'AbortError')
if (isAbort) {
return 'ok'
resolve('error')
})
})
}
async function speakViaPiperTtsChunks(chunks: string[]): Promise<ReadAloudResult> {
stopReadAloudPlayback()
readAloudAbort = new AbortController()
const signal = readAloudAbort.signal
if (chunks.length === 0) {
return 'empty'
}
try {
for (let i = 0; i < chunks.length; i++) {
await waitUntilUnpaused()
if (signal.aborted) {
return 'ok'
}
const sentAt = Date.now()
patchSnapshot({
currentChunkIndex: i,
chunksPlayed: i,
phase: 'requesting',
requestSentAt: sentAt,
responseReceivedAt: null,
chunkPlaybackRatio: 0
})
let response: Response
try {
response = await fetch(READ_ALOUD_TTS_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: chunks[i], speed: 1 }),
signal
})
} catch (e) {
const isAbort =
(e instanceof DOMException && e.name === 'AbortError') ||
(e instanceof Error && e.name === 'AbortError')
if (isAbort) {
return 'ok'
}
const msg = e instanceof Error ? e.message : String(e)
logger.warn('[ReadAloud] Piper fetch failed (check CORS on the TTS host or use same-origin /api/piper-tts)', {
endpoint: readAloudEndpointForLog(),
error: msg
})
patchSnapshot({
phase: 'error',
error: `Part ${i + 1} of ${chunks.length}: ${msg}`
})
return 'error'
}
if (!response.ok) {
logger.warn('[ReadAloud] Piper HTTP error', {
status: response.status,
endpoint: readAloudEndpointForLog()
})
patchSnapshot({
phase: 'error',
error: `Part ${i + 1} of ${chunks.length}: HTTP ${response.status}`
})
return 'error'
}
const blob = await response.blob()
if (!blob.size) {
logger.warn('[ReadAloud] Piper returned empty body', { endpoint: readAloudEndpointForLog() })
patchSnapshot({
phase: 'error',
error: `Part ${i + 1} of ${chunks.length}: empty audio response`
})
return 'error'
}
patchSnapshot({
responseReceivedAt: Date.now(),
phase: 'buffering'
})
await waitUntilUnpaused()
if (signal.aborted) {
return 'ok'
}
const played = await playPiperBlob(blob, signal)
if (played === 'aborted') {
return 'ok'
}
if (played === 'error') {
patchSnapshot({
phase: 'error',
error: `Part ${i + 1} of ${chunks.length}: playback failed (browser blocked audio or corrupt WAV)`
})
return 'error'
}
}
logger.warn('[ReadAloud] Piper fetch failed (check CORS on the TTS host or use same-origin /api/piper-tts)', {
endpoint: readAloudEndpointForLog(),
error: e instanceof Error ? e.message : String(e)
patchSnapshot({
phase: 'done',
finishedAt: Date.now(),
currentChunkIndex: chunks.length - 1,
chunksPlayed: chunks.length,
chunkPlaybackRatio: 0
})
return 'error'
return 'ok'
} finally {
readAloudAbort = null
}
}
function speakViaWebSpeech(text: string): void {
async function speakViaWebSpeech(
text: string,
title: string,
options?: { fromPiperFallback?: boolean; browserOnlyNoPiper?: boolean }
): Promise<ReadAloudResult> {
stopReadAloudPlayback()
window.speechSynthesis.speak(new SpeechSynthesisUtterance(text))
readAloudUserPaused = false
resolveUnpauses()
if (!window.speechSynthesis) {
patchSnapshot({
open: true,
title,
engine: 'webspeech',
phase: 'error',
error: 'Speech synthesis is not available',
charCount: text.length,
backend: '',
...(!options?.fromPiperFallback ? { usedPiperFallback: false, piperFallbackDetail: null } : {}),
...(options?.browserOnlyNoPiper
? { readAloudPiperSkipped: true, readAloudPiperTryStartedAt: null }
: !options?.fromPiperFallback
? { readAloudPiperSkipped: false, readAloudPiperTryStartedAt: null }
: {})
})
return 'unsupported'
}
let webspeechPiperFields: Partial<ReadAloudSnapshot>
if (options?.browserOnlyNoPiper) {
webspeechPiperFields = {
readAloudPiperSkipped: true,
readAloudPiperTryStartedAt: null,
backend: ''
}
} else if (options?.fromPiperFallback) {
webspeechPiperFields = { readAloudPiperSkipped: false, backend: snapshot.backend }
} else {
webspeechPiperFields = {
readAloudPiperSkipped: false,
readAloudPiperTryStartedAt: null,
backend: ''
}
}
patchSnapshot({
open: true,
title,
engine: 'webspeech',
phase: 'buffering',
charCount: text.length,
totalChunks: 0,
currentChunkIndex: 0,
chunksPlayed: 0,
chunkPlaybackRatio: 0,
requestSentAt: null,
responseReceivedAt: null,
playbackStartedAt: null,
finishedAt: null,
error: null,
...(!options?.fromPiperFallback ? { usedPiperFallback: false, piperFallbackDetail: null } : {}),
...webspeechPiperFields
})
if (options?.browserOnlyNoPiper || options?.fromPiperFallback) {
await yieldForReadAloudUi()
}
const u = new SpeechSynthesisUtterance(text)
u.onstart = (): void => {
patchSnapshot({
phase: 'playing',
playbackStartedAt: Date.now()
})
}
u.onend = (): void => {
patchSnapshot({
phase: 'done',
finishedAt: Date.now()
})
}
u.onerror = (ev): void => {
patchSnapshot({
phase: 'error',
error: ev.error ?? 'speech synthesis error'
})
}
window.speechSynthesis.speak(u)
return 'ok'
}
export async function speakNoteReadAloud(event: Event): Promise<ReadAloudResult> {
@ -167,21 +594,63 @@ export async function speakNoteReadAloud(event: Event): Promise<ReadAloudResult> @@ -167,21 +594,63 @@ export async function speakNoteReadAloud(event: Event): Promise<ReadAloudResult>
return 'empty'
}
const title = readAloudTitleFromEvent(event)
if (READ_ALOUD_TTS_URL) {
const piperResult = await speakViaPiperTts(text)
stopReadAloudPlayback()
readAloudUserPaused = false
resolveUnpauses()
const chunks = splitTextIntoTtsChunks(text, PIPER_CHUNK_MAX_CHARS)
patchSnapshot({
open: true,
title,
engine: 'piper',
phase: 'preparing',
charCount: text.length,
totalChunks: chunks.length,
currentChunkIndex: 0,
chunksPlayed: 0,
chunkPlaybackRatio: 0,
requestSentAt: null,
responseReceivedAt: null,
playbackStartedAt: null,
finishedAt: null,
error: null,
usedPiperFallback: false,
piperFallbackDetail: null,
readAloudPiperSkipped: false,
readAloudPiperTryStartedAt: Date.now(),
backend: readAloudEndpointForLog()
})
await yieldForReadAloudUi()
const piperResult = await speakViaPiperTtsChunks(chunks)
if (piperResult === 'ok') {
return 'ok'
}
logger.warn(
'[ReadAloud] Using Web Speech fallback — Piper did not play. See previous [ReadAloud] log for cause.',
'[ReadAloud] Using Web Speech fallback — Piper did not play. See previous [ReadAloud] log or player error.',
{ endpoint: readAloudEndpointForLog() }
)
}
if (!window.speechSynthesis) {
return READ_ALOUD_TTS_URL ? 'error' : 'unsupported'
const prior = snapshot.error?.trim() || null
patchSnapshot({
engine: 'webspeech',
phase: 'preparing',
error: null,
usedPiperFallback: true,
piperFallbackDetail: prior,
totalChunks: 0,
currentChunkIndex: 0,
chunksPlayed: 0,
chunkPlaybackRatio: 0
})
return await speakViaWebSpeech(text, title, { fromPiperFallback: true })
}
speakViaWebSpeech(text)
return 'ok'
return await speakViaWebSpeech(text, title, { browserOnlyNoPiper: true })
}

Loading…
Cancel
Save