16 changed files with 854 additions and 75 deletions
@ -0,0 +1,35 @@ |
|||||||
|
import { describe, expect, it } from 'vitest' |
||||||
|
import { detectReadAloudContentLanguage } from '@/lib/read-aloud-content-language' |
||||||
|
|
||||||
|
describe('detectReadAloudContentLanguage', () => { |
||||||
|
it('detects German from umlauts', () => { |
||||||
|
expect(detectReadAloudContentLanguage('Grüße aus München und Spaß')).toBe('de') |
||||||
|
}) |
||||||
|
|
||||||
|
it('detects plain ASCII English', () => { |
||||||
|
expect(detectReadAloudContentLanguage('Hello this is a test note about nothing special.')).toBe('en') |
||||||
|
}) |
||||||
|
|
||||||
|
it('prefers British English when UK spellings dominate', () => { |
||||||
|
const s = |
||||||
|
'The colour of the behaviour was odd; we organised a favourable defence of the centre.' |
||||||
|
expect(detectReadAloudContentLanguage(s)).toBe('en-gb') |
||||||
|
}) |
||||||
|
|
||||||
|
it('prefers US English when American spellings dominate', () => { |
||||||
|
const s = 'The color and behavior at the center were organized with a traveling neighbor.' |
||||||
|
expect(detectReadAloudContentLanguage(s)).toBe('en') |
||||||
|
}) |
||||||
|
|
||||||
|
it('detects Russian from Cyrillic', () => { |
||||||
|
expect(detectReadAloudContentLanguage('Привет мир это тест')).toBe('ru') |
||||||
|
}) |
||||||
|
|
||||||
|
it('detects Polish', () => { |
||||||
|
expect(detectReadAloudContentLanguage('Cześć, to jest żółć i łódź.')).toBe('pl') |
||||||
|
}) |
||||||
|
|
||||||
|
it('detects Turkish from distinctive letters (avoid ü so German is not triggered)', () => { |
||||||
|
expect(detectReadAloudContentLanguage('Merhaba, dağ ve şeker \u0130stanbul.')).toBe('tr') |
||||||
|
}) |
||||||
|
}) |
||||||
@ -0,0 +1,69 @@ |
|||||||
|
/** |
||||||
|
* Heuristic language guess for read-aloud / Piper when there is no persisted translation `lang`. |
||||||
|
* Keep in sync with `services/piper-tts-proxy/server.ts` `detectLanguage` (same script / ratio logic), |
||||||
|
* plus `en-gb` hints and a few extra Latin scripts (pl, cs, tr) that have Piper voices in-app. |
||||||
|
*/ |
||||||
|
export function detectReadAloudContentLanguage(text: string): string { |
||||||
|
if (!text || text.length === 0) return 'en' |
||||||
|
|
||||||
|
const sample = text.slice(0, Math.min(500, text.length)) |
||||||
|
const total = sample.length || 1 |
||||||
|
|
||||||
|
const germanChars = (sample.match(/[äöüßÄÖÜ]/g) || []).length |
||||||
|
const frenchChars = (sample.match(/[éèêëàâäçôùûüÉÈÊËÀÂÄÇÔÙÛÜ]/g) || []).length |
||||||
|
const spanishChars = (sample.match(/[ñáéíóúüÑÁÉÍÓÚÜ¿¡]/g) || []).length |
||||||
|
const italianChars = (sample.match(/[àèéìòùÀÈÉÌÒÙ]/g) || []).length |
||||||
|
const cyrillicChars = (sample.match(/[а-яёА-ЯЁ]/g) || []).length |
||||||
|
const hangulChars = (sample.match(/[\uac00-\ud7af]/g) || []).length |
||||||
|
const kanaChars = (sample.match(/[\u3040-\u309f\u30a0-\u30ff]/g) || []).length |
||||||
|
const hanChars = (sample.match(/[\u4e00-\u9fff]/g) || []).length |
||||||
|
const arabicChars = (sample.match(/[\u0600-\u06ff]/g) || []).length |
||||||
|
const polishChars = (sample.match(/[ąćęłńóśźżĄĆĘŁŃÓŚŹŻ]/g) || []).length |
||||||
|
const czechChars = (sample.match(/[řůŘŮ]/g) || []).length |
||||||
|
/** Exclude üöç (shared with German / French); rely on ğ/ı/ş/İ so “Grüße” is not Turkish. */ |
||||||
|
const turkishChars = (sample.match(/[ğĞıİşŞ]/g) || []).length |
||||||
|
|
||||||
|
const cyrillicRatio = cyrillicChars / total |
||||||
|
const hangulRatio = hangulChars / total |
||||||
|
const kanaRatio = kanaChars / total |
||||||
|
const hanRatio = hanChars / total |
||||||
|
const arabicRatio = arabicChars / total |
||||||
|
const germanRatio = germanChars / total |
||||||
|
const frenchRatio = frenchChars / total |
||||||
|
const spanishRatio = spanishChars / total |
||||||
|
const italianRatio = italianChars / total |
||||||
|
const polishRatio = polishChars / total |
||||||
|
const czechRatio = czechChars / total |
||||||
|
const turkishRatio = turkishChars / total |
||||||
|
|
||||||
|
if (cyrillicRatio > 0.1) return 'ru' |
||||||
|
if (hangulRatio > 0.06 || kanaRatio > 0.02) return 'en' |
||||||
|
if (hanRatio > 0.1) return 'zh' |
||||||
|
if (arabicRatio > 0.1) return 'ar' |
||||||
|
if (germanRatio > 0.02) return 'de' |
||||||
|
if (frenchRatio > 0.02) return 'fr' |
||||||
|
/** Before Spanish: shared letters like `ó` (Polish) would otherwise count as Spanish. */ |
||||||
|
if (polishRatio > 0.02) return 'pl' |
||||||
|
if (czechRatio > 0.015) return 'cs' |
||||||
|
if (spanishRatio > 0.02) return 'es' |
||||||
|
if (italianRatio > 0.02) return 'it' |
||||||
|
if (turkishRatio > 0.02) return 'tr' |
||||||
|
|
||||||
|
if (preferBritishEnglish(sample)) { |
||||||
|
return 'en-gb' |
||||||
|
} |
||||||
|
return 'en' |
||||||
|
} |
||||||
|
|
||||||
|
/** Weak signal: UK spellings vs US spellings when the rest looks like Latin “English”. */ |
||||||
|
function preferBritishEnglish(sample: string): boolean { |
||||||
|
const uk = |
||||||
|
/\b(colour|behaviour|realise|realising|centre|defence|favour|favourite|organised|travelling|neighbour|humour|labour)\b/gi |
||||||
|
const us = |
||||||
|
/\b(color|behavior|realize|realizing|center|defense|favor|favorite|organized|traveling|neighbor|humor|labor)\b/gi |
||||||
|
let ukN = 0 |
||||||
|
let usN = 0 |
||||||
|
for (const _ of sample.matchAll(uk)) ukN++ |
||||||
|
for (const _ of sample.matchAll(us)) usN++ |
||||||
|
return ukN > usN |
||||||
|
} |
||||||
@ -0,0 +1,127 @@ |
|||||||
|
import { describe, expect, it, vi, beforeEach } from 'vitest' |
||||||
|
import { finalizeEvent, generateSecretKey, getPublicKey, nip19 } from 'nostr-tools' |
||||||
|
import type { Event } from 'nostr-tools' |
||||||
|
import client from '@/services/client.service' |
||||||
|
import { expandNostrReferencesForReadAloud } from '@/lib/read-aloud' |
||||||
|
|
||||||
|
vi.mock('@/i18n', () => ({ |
||||||
|
default: { |
||||||
|
t: (key: string, opts?: { name?: string }) => { |
||||||
|
const table: Record<string, string> = { |
||||||
|
'Read aloud unknown author': 'Unknown author', |
||||||
|
'Read aloud embedded note unavailable': 'Quoted note not loaded.', |
||||||
|
'Read aloud nostr profile unavailable': 'Nostr profile reference.', |
||||||
|
'Read aloud relay reference': 'Nostr relay reference.', |
||||||
|
'Read aloud nostr reference unavailable': 'Nostr reference.' |
||||||
|
} |
||||||
|
if (key === 'Read aloud quoted from') { |
||||||
|
return `Quoted from ${opts?.name ?? ''}.` |
||||||
|
} |
||||||
|
return table[key] ?? key |
||||||
|
} |
||||||
|
}, |
||||||
|
normalizeToSupportedAppLanguage: (c: string) => c as 'en', |
||||||
|
LocalizedLanguageNames: {} as Record<'en', string> |
||||||
|
})) |
||||||
|
|
||||||
|
vi.mock('@/services/client.service', () => ({ |
||||||
|
default: { |
||||||
|
peekSessionCachedEvent: vi.fn(), |
||||||
|
eventService: { |
||||||
|
getSessionMetadataForPubkey: vi.fn() |
||||||
|
} |
||||||
|
} |
||||||
|
})) |
||||||
|
|
||||||
|
const peek = vi.mocked(client.peekSessionCachedEvent) |
||||||
|
const getMeta = vi.mocked(client.eventService.getSessionMetadataForPubkey) |
||||||
|
|
||||||
|
describe('expandNostrReferencesForReadAloud', () => { |
||||||
|
beforeEach(() => { |
||||||
|
peek.mockReset() |
||||||
|
getMeta.mockReset() |
||||||
|
}) |
||||||
|
|
||||||
|
it('leaves plain text unchanged', () => { |
||||||
|
expect(expandNostrReferencesForReadAloud('hello world')).toBe('hello world') |
||||||
|
}) |
||||||
|
|
||||||
|
it('replaces nostr:note with placeholder when event is not cached', () => { |
||||||
|
const sk = generateSecretKey() |
||||||
|
const ev = finalizeEvent({ kind: 1, content: 'x', tags: [], created_at: 1 }, sk) |
||||||
|
const note = nip19.noteEncode(ev.id) |
||||||
|
peek.mockReturnValue(undefined) |
||||||
|
expect(expandNostrReferencesForReadAloud(`See nostr:${note} please`)).toBe('See Quoted note not loaded. please') |
||||||
|
}) |
||||||
|
|
||||||
|
it('replaces nostr:note with quoted-from line and body when cached', () => { |
||||||
|
const sk = generateSecretKey() |
||||||
|
const pk = getPublicKey(sk) |
||||||
|
const inner = finalizeEvent({ kind: 1, content: 'Inner **bold**', tags: [], created_at: 2 }, sk) |
||||||
|
const note = nip19.noteEncode(inner.id) |
||||||
|
peek.mockImplementation((id: string) => { |
||||||
|
if (id === note || id === inner.id) return inner as Event |
||||||
|
return undefined |
||||||
|
}) |
||||||
|
const profile = finalizeEvent( |
||||||
|
{ |
||||||
|
kind: 0, |
||||||
|
pubkey: pk, |
||||||
|
content: JSON.stringify({ display_name: 'Pat' }), |
||||||
|
tags: [], |
||||||
|
created_at: 1 |
||||||
|
}, |
||||||
|
sk |
||||||
|
) as Event |
||||||
|
getMeta.mockImplementation((hex: string) => (hex.toLowerCase() === pk.toLowerCase() ? profile : undefined)) |
||||||
|
const out = expandNostrReferencesForReadAloud(`Quote: nostr:${note} end`) |
||||||
|
expect(out).toContain('Quoted from Pat.') |
||||||
|
expect(out).toContain('Inner bold') |
||||||
|
expect(out).not.toContain('nostr:') |
||||||
|
}) |
||||||
|
|
||||||
|
it('uses unknown author when embedded note is cached but profile is not', () => { |
||||||
|
const sk = generateSecretKey() |
||||||
|
const pk = getPublicKey(sk) |
||||||
|
const inner = finalizeEvent({ kind: 1, content: 'Hi', tags: [], created_at: 2 }, sk) |
||||||
|
const note = nip19.noteEncode(inner.id) |
||||||
|
peek.mockImplementation((id: string) => (id === note || id === inner.id ? (inner as Event) : undefined)) |
||||||
|
getMeta.mockReturnValue(undefined) |
||||||
|
const out = expandNostrReferencesForReadAloud(`nostr:${note}`) |
||||||
|
expect(out).toMatch(/^Quoted from Unknown author\. Hi$/) |
||||||
|
}) |
||||||
|
|
||||||
|
it('replaces nostr:npub with display name when kind 0 is cached', () => { |
||||||
|
const sk = generateSecretKey() |
||||||
|
const pk = getPublicKey(sk) |
||||||
|
const profile = finalizeEvent( |
||||||
|
{ |
||||||
|
kind: 0, |
||||||
|
pubkey: pk, |
||||||
|
content: JSON.stringify({ name: 'n_name' }), |
||||||
|
tags: [], |
||||||
|
created_at: 1 |
||||||
|
}, |
||||||
|
sk |
||||||
|
) as Event |
||||||
|
const npub = nip19.npubEncode(pk) |
||||||
|
getMeta.mockImplementation((hex: string) => (hex.toLowerCase() === pk.toLowerCase() ? profile : undefined)) |
||||||
|
expect(expandNostrReferencesForReadAloud(`Hey nostr:${npub}!`)).toBe('Hey n_name!') |
||||||
|
}) |
||||||
|
|
||||||
|
it('replaces nostr:npub with placeholder when profile is not cached', () => { |
||||||
|
const sk = generateSecretKey() |
||||||
|
const pk = getPublicKey(sk) |
||||||
|
const npub = nip19.npubEncode(pk) |
||||||
|
getMeta.mockReturnValue(undefined) |
||||||
|
expect(expandNostrReferencesForReadAloud(`x nostr:${npub} y`)).toBe('x Nostr profile reference. y') |
||||||
|
}) |
||||||
|
|
||||||
|
it('does not match bare npub inside nostr:npub… (no double replacement)', () => { |
||||||
|
const sk = generateSecretKey() |
||||||
|
const pk = getPublicKey(sk) |
||||||
|
const npub = nip19.npubEncode(pk) |
||||||
|
getMeta.mockReturnValue(undefined) |
||||||
|
expect(expandNostrReferencesForReadAloud(`nostr:${npub}`)).toBe('Nostr profile reference.') |
||||||
|
}) |
||||||
|
}) |
||||||
@ -0,0 +1,85 @@ |
|||||||
|
import { afterEach, describe, expect, it, vi } from 'vitest' |
||||||
|
import type { Event } from 'nostr-tools' |
||||||
|
import { translateNoteForDisplay } from '@/lib/translate-note-for-menu' |
||||||
|
|
||||||
|
const BLOCKQUOTE_THREE_LINES = |
||||||
|
'Intro paragraph.\n\n' + |
||||||
|
'> Stephen cheered when John was fired.\n' + |
||||||
|
'> We do not like Stephen.\n' + |
||||||
|
'> John should not have been fired.\n\n' + |
||||||
|
'After quote.' |
||||||
|
|
||||||
|
vi.mock('@/lib/translate-client', () => ({ |
||||||
|
/** Identity so chunk assembly can be asserted without delimiter noise from the mock. */ |
||||||
|
translatePlainText: vi.fn(async (text: string) => text), |
||||||
|
normalizeTranslateLangCode: (c: string) => c, |
||||||
|
translateApiLanguageCode: (c: string) => c, |
||||||
|
translateServerSupportsLogicalTarget: () => true, |
||||||
|
isTranslateConfigured: () => true, |
||||||
|
fetchTranslateLanguages: vi.fn(async () => []), |
||||||
|
clearTranslateLanguagesCache: vi.fn() |
||||||
|
})) |
||||||
|
|
||||||
|
function kind1Event(content: string): Event { |
||||||
|
return { |
||||||
|
id: '0'.repeat(64), |
||||||
|
pubkey: '1'.repeat(64), |
||||||
|
kind: 1, |
||||||
|
content, |
||||||
|
tags: [], |
||||||
|
created_at: 0, |
||||||
|
sig: '' |
||||||
|
} as Event |
||||||
|
} |
||||||
|
|
||||||
|
describe('translateNoteForDisplay', () => { |
||||||
|
afterEach(async () => { |
||||||
|
const { translatePlainText } = await import('@/lib/translate-client') |
||||||
|
vi.mocked(translatePlainText).mockImplementation(async (text: string) => text) |
||||||
|
}) |
||||||
|
|
||||||
|
it('keeps blank lines across chunk boundaries (list → paragraph)', async () => { |
||||||
|
const fill = 'word '.repeat(520) |
||||||
|
const content = `${fill.trimEnd()}\n\nFINAL_LINE_UNIQUE` |
||||||
|
expect(content.length).toBeGreaterThan(2500) |
||||||
|
|
||||||
|
const out = await translateNoteForDisplay(kind1Event(content), 'fr') |
||||||
|
expect(out.content).toContain('\n\nFINAL_LINE_UNIQUE') |
||||||
|
}) |
||||||
|
|
||||||
|
it('keeps markdown bullet list, blank line, and paragraph across chunk splits', async () => { |
||||||
|
const long = 'x'.repeat(700) |
||||||
|
const tail = 'z'.repeat(2200) |
||||||
|
const content = `- ${long}\n- y\n\nPARA_MARKER${tail}` |
||||||
|
expect(content.length).toBeGreaterThan(2500) |
||||||
|
|
||||||
|
const out = await translateNoteForDisplay(kind1Event(content), 'fr') |
||||||
|
expect(out.content).toContain('\n\nPARA_MARKER') |
||||||
|
expect(out.content).toContain('- y') |
||||||
|
expect(out.content).toContain('PARA_MARKER') |
||||||
|
expect(out.content).toBe(content) |
||||||
|
}) |
||||||
|
|
||||||
|
it('coalesces consecutive Markdown blockquote bodies into one translatePlainText (embedded newlines)', async () => { |
||||||
|
const { translatePlainText } = await import('@/lib/translate-client') |
||||||
|
const spy = vi.mocked(translatePlainText) |
||||||
|
spy.mockClear() |
||||||
|
spy.mockImplementation(async (s: string) => `[${s}]`) |
||||||
|
const ev = { |
||||||
|
id: '0'.repeat(64), |
||||||
|
pubkey: '1'.repeat(64), |
||||||
|
kind: 1, |
||||||
|
content: BLOCKQUOTE_THREE_LINES, |
||||||
|
tags: [], |
||||||
|
created_at: 0, |
||||||
|
sig: '' |
||||||
|
} as Event |
||||||
|
await translateNoteForDisplay(ev, 'de') |
||||||
|
const payloads = spy.mock.calls.map((c) => String(c[0])) |
||||||
|
const merged = payloads.find( |
||||||
|
(p) => p.includes('Stephen cheered when John was fired') && p.includes('We do not like Stephen') |
||||||
|
) |
||||||
|
expect(merged).toBeDefined() |
||||||
|
expect(merged).toContain('John should not have been fired') |
||||||
|
}) |
||||||
|
}) |
||||||
Loading…
Reference in new issue