You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
239 lines
8.2 KiB
239 lines
8.2 KiB
import { TRANSLATE_URL } from '@/constants' |
|
import { electronAwareFetch } from '@/lib/electron-aware-fetch' |
|
import logger from '@/lib/logger' |
|
import { sha256 } from '@noble/hashes/sha256' |
|
import { bytesToHex } from '@noble/hashes/utils' |
|
|
|
const memoryCache = new Map<string, { text: string; at: number }>() |
|
const MAX_MEMORY = 80 |
|
const CACHE_TTL_MS = 1000 * 60 * 60 * 24 |
|
|
|
function cacheKey(source: string, sourceLang: string, targetLang: string): string { |
|
const h = bytesToHex(sha256(new TextEncoder().encode(`${sourceLang}|${targetLang}|${source}`))) |
|
return h |
|
} |
|
|
|
function pruneMemory(): void { |
|
const now = Date.now() |
|
for (const [k, v] of memoryCache) { |
|
if (now - v.at > CACHE_TTL_MS) memoryCache.delete(k) |
|
} |
|
while (memoryCache.size > MAX_MEMORY) { |
|
const first = memoryCache.keys().next().value |
|
if (first) memoryCache.delete(first) |
|
else break |
|
} |
|
} |
|
|
|
export function isTranslateConfigured(): boolean { |
|
return Boolean(TRANSLATE_URL.trim()) |
|
} |
|
|
|
/** LibreTranslate uses ISO 639-1; map a few common mistypes (defence in depth). */ |
|
const LANG_ALIASES: Record<string, string> = { |
|
sp: 'es', |
|
ger: 'de', |
|
eng: 'en', |
|
fra: 'fr', |
|
ita: 'it', |
|
por: 'pt', |
|
// LibreTranslate/Argos use Bokmål code `nb`; ISO 639-1 `no` is not in the model index. |
|
no: 'nb' |
|
} |
|
|
|
export function normalizeTranslateLangCode(code: string): string { |
|
const t = code.trim().toLowerCase() |
|
return (LANG_ALIASES[t] ?? code.trim()) || 'en' |
|
} |
|
|
|
/** |
|
* LibreTranslate/Argos only registers `en` — regional English codes are for grammar (LanguageTool) |
|
* and read-aloud; the translate API still expects `en`. |
|
*/ |
|
export function translateApiLanguageCode(code: string): string { |
|
const n = normalizeTranslateLangCode(code).toLowerCase().replace(/_/gu, '-') |
|
if (n === 'en-gb' || n === 'en-us') return 'en' |
|
return normalizeTranslateLangCode(code) |
|
} |
|
|
|
export type TranslateLanguageOption = { code: string; name: string } |
|
|
|
function advertisedApiCodeKey(code: string): string { |
|
return translateApiLanguageCode(code).trim().toLowerCase().replace(/_/gu, '-') |
|
} |
|
|
|
/** Codes last returned by GET `/languages` (API form, e.g. `en` for `en-gb`). Empty fetch clears this. */ |
|
let advertisedTranslateApiCodes: Set<string> | null = null |
|
|
|
function recordAdvertisedTranslateCodesFromServer(list: readonly TranslateLanguageOption[]): void { |
|
if (list.length === 0) { |
|
advertisedTranslateApiCodes = null |
|
return |
|
} |
|
advertisedTranslateApiCodes = new Set(list.map((o) => advertisedApiCodeKey(o.code))) |
|
} |
|
|
|
/** |
|
* True if we have not yet seen a successful `/languages` response, or the server advertises the |
|
* Libre `target` we would send for this logical menu code. |
|
*/ |
|
export function translateServerSupportsLogicalTarget(targetCode: string): boolean { |
|
if (!advertisedTranslateApiCodes) return true |
|
return advertisedTranslateApiCodes.has(advertisedApiCodeKey(targetCode)) |
|
} |
|
|
|
let languagesCache: { list: TranslateLanguageOption[]; at: number } | null = null |
|
const LANGUAGES_CACHE_TTL_MS = 60_000 |
|
|
|
function parseLanguagesResponse(data: unknown): TranslateLanguageOption[] { |
|
if (!Array.isArray(data)) return [] |
|
const out: TranslateLanguageOption[] = [] |
|
for (const row of data) { |
|
if (typeof row === 'string') { |
|
out.push({ code: row, name: row }) |
|
} else if (row && typeof row === 'object' && 'code' in row) { |
|
const r = row as { code: unknown; name?: unknown } |
|
const code = String(r.code) |
|
const name = typeof r.name === 'string' && r.name.trim() ? r.name.trim() : code |
|
out.push({ code, name }) |
|
} |
|
} |
|
const seen = new Set<string>() |
|
const dedup = out.filter((o) => { |
|
if (seen.has(o.code)) return false |
|
seen.add(o.code) |
|
return true |
|
}) |
|
dedup.sort((a, b) => a.name.localeCompare(b.name, undefined, { sensitivity: 'base' })) |
|
return dedup |
|
} |
|
|
|
/** GET `/languages` on the configured translate base (same-origin `/api/translate` in dev). */ |
|
export async function fetchTranslateLanguages(): Promise<TranslateLanguageOption[]> { |
|
const base = TRANSLATE_URL.trim().replace(/\/$/u, '') |
|
if (!base) return [] |
|
const now = Date.now() |
|
if (languagesCache && now - languagesCache.at < LANGUAGES_CACHE_TTL_MS) { |
|
recordAdvertisedTranslateCodesFromServer(languagesCache.list) |
|
return languagesCache.list |
|
} |
|
const url = `${base}/languages` |
|
const res = await electronAwareFetch(url) |
|
if (!res.ok) { |
|
logger.warn('[Translate] /languages failed', { status: res.status }) |
|
languagesCache = null |
|
advertisedTranslateApiCodes = null |
|
return [] |
|
} |
|
try { |
|
const data = (await res.json()) as unknown |
|
const list = parseLanguagesResponse(data) |
|
languagesCache = { list, at: now } |
|
recordAdvertisedTranslateCodesFromServer(list) |
|
return list |
|
} catch (e) { |
|
logger.warn('[Translate] /languages parse error', { e }) |
|
languagesCache = null |
|
advertisedTranslateApiCodes = null |
|
return [] |
|
} |
|
} |
|
|
|
export function clearTranslateLanguagesCache(): void { |
|
languagesCache = null |
|
advertisedTranslateApiCodes = null |
|
} |
|
|
|
/** |
|
* LibreTranslate / Argos often corrupts hashtag-only lines (random glyphs, subtitle-like junk, |
|
* dropped letters). Nostr-style hashtags must stay verbatim. |
|
*/ |
|
export function shouldSkipMachineTranslatePlainCore(core: string): boolean { |
|
return /^(?:#[\p{L}\p{N}\p{M}_-]+(?:\s+#[\p{L}\p{N}\p{M}_-]+)*)\s*$/u.test(core.trim()) |
|
} |
|
|
|
export async function translatePlainText( |
|
text: string, |
|
targetLang: string, |
|
sourceLang: string = 'auto' |
|
): Promise<string> { |
|
const base = TRANSLATE_URL.trim().replace(/\/$/u, '') |
|
if (!base) { |
|
throw new Error('Translation URL not configured') |
|
} |
|
|
|
/** LibreTranslate often trims `q` / `translatedText`; keep edge whitespace so markup segments still join cleanly. */ |
|
const leadingWs = text.match(/^\s*/u)?.[0] ?? '' |
|
const trailingWs = text.match(/\s*$/u)?.[0] ?? '' |
|
const core = text.slice(leadingWs.length, text.length - trailingWs.length) |
|
if (core === '') { |
|
return text |
|
} |
|
|
|
if (shouldSkipMachineTranslatePlainCore(core)) { |
|
return text |
|
} |
|
|
|
const resolvedTarget = translateApiLanguageCode(targetLang) |
|
const resolvedSource = |
|
sourceLang === 'auto' ? 'auto' : translateApiLanguageCode(sourceLang) |
|
|
|
if (!translateServerSupportsLogicalTarget(targetLang)) { |
|
const want = advertisedApiCodeKey(targetLang) |
|
throw new Error( |
|
`This translate server does not offer machine translation for “${want}” (that code is not in GET /languages). ` + |
|
'You can still use grammar check and read-aloud on text that is already in that language.' |
|
) |
|
} |
|
if (resolvedSource !== 'auto' && !translateServerSupportsLogicalTarget(sourceLang)) { |
|
const want = advertisedApiCodeKey(sourceLang) |
|
throw new Error( |
|
`This translate server does not offer “${want}” as a source language (not in /languages). Pick another source or use “Detect automatically”.` |
|
) |
|
} |
|
|
|
const key = cacheKey(core, resolvedSource, resolvedTarget) |
|
const hit = memoryCache.get(key) |
|
if (hit && Date.now() - hit.at < CACHE_TTL_MS) { |
|
logger.info('[AdvancedLab] translate', { |
|
source: resolvedSource, |
|
target: resolvedTarget, |
|
inputChars: text.length, |
|
outputChars: hit.text.length + leadingWs.length + trailingWs.length, |
|
cacheHit: true |
|
}) |
|
return leadingWs + hit.text + trailingWs |
|
} |
|
|
|
const url = `${base}/translate` |
|
const res = await electronAwareFetch(url, { |
|
method: 'POST', |
|
headers: { 'Content-Type': 'application/json' }, |
|
body: JSON.stringify({ |
|
q: core, |
|
source: resolvedSource, |
|
target: resolvedTarget, |
|
format: 'text' |
|
}) |
|
}) |
|
if (!res.ok) { |
|
const err = await res.text().catch(() => '') |
|
logger.warn('[Translate] HTTP error', { status: res.status, err: err.slice(0, 200) }) |
|
const detail = err.replace(/\s+/gu, ' ').trim().slice(0, 160) |
|
throw new Error( |
|
detail ? `Translate: ${res.status} — ${detail}` : `Translate: ${res.status}` |
|
) |
|
} |
|
const data = (await res.json()) as { translatedText?: string } |
|
const outCore = data.translatedText ?? '' |
|
pruneMemory() |
|
memoryCache.set(key, { text: outCore, at: Date.now() }) |
|
logger.info('[AdvancedLab] translate', { |
|
source: resolvedSource, |
|
target: resolvedTarget, |
|
inputChars: text.length, |
|
outputChars: leadingWs.length + outCore.length + trailingWs.length, |
|
cacheHit: false |
|
}) |
|
return leadingWs + outCore + trailingWs |
|
}
|
|
|