You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
302 lines
9.5 KiB
302 lines
9.5 KiB
import { ExtendedKind } from '@/constants' |
|
import { isAsciidocMarkupKind } from '@/lib/advanced-event-lab-kinds' |
|
import { |
|
translateAdvancedLabMarkup, |
|
type AdvancedLabMarkupMode |
|
} from '@/lib/advanced-lab-markup-protect' |
|
import { EMBEDDED_EVENT_REGEX } from '@/lib/content-patterns' |
|
import { getLongFormArticleMetadataFromEvent } from '@/lib/event-metadata' |
|
import { getParentEventHexId } from '@/lib/event' |
|
import { setNoteTranslation } from '@/lib/note-translation-display' |
|
import { normalizeTranslateLangCode } from '@/lib/translate-client' |
|
import { nip19, type Event } from 'nostr-tools' |
|
|
|
const CHUNK_MAX = 2500 |
|
|
|
/** GFM-style blockquote line (indent, `>`, optional space, body). */ |
|
const MD_BLOCKQUOTE_LINE = /^([\t ]{0,3})(> ?)(.*)$/ |
|
|
|
function isMarkdownFenceDelimiterLine(line: string): boolean { |
|
return /^[\t ]{0,3}```/.test(line.replace(/\r$/u, '')) |
|
} |
|
|
|
/** |
|
* LibreTranslate can leave an isolated middle line in English when each `>` line is translated |
|
* separately. Coalesce consecutive blockquote bodies (outside fenced code) into one request with |
|
* embedded newlines preserved via {@link translateAdvancedLabMarkup} options. |
|
*/ |
|
async function translateMarkdownBodyCoalescingBlockquotes(text: string, target: string): Promise<string> { |
|
const lines = text.split(/\r?\n/) |
|
let inFence = false |
|
type PlainSeg = { type: 'plain'; lines: string[] } |
|
type BqSeg = { type: 'bq'; lines: string[] } |
|
type Seg = PlainSeg | BqSeg |
|
const segments: Seg[] = [] |
|
|
|
const pushPlainLine = (ln: string): void => { |
|
const last = segments[segments.length - 1] |
|
if (last?.type === 'plain') last.lines.push(ln) |
|
else segments.push({ type: 'plain', lines: [ln] }) |
|
} |
|
|
|
let i = 0 |
|
while (i < lines.length) { |
|
const line = lines[i]! |
|
if (isMarkdownFenceDelimiterLine(line)) { |
|
inFence = !inFence |
|
pushPlainLine(line) |
|
i++ |
|
continue |
|
} |
|
if (inFence) { |
|
pushPlainLine(line) |
|
i++ |
|
continue |
|
} |
|
const m = line.match(MD_BLOCKQUOTE_LINE) |
|
if (m) { |
|
const runLines: string[] = [] |
|
while (i < lines.length) { |
|
if (isMarkdownFenceDelimiterLine(lines[i]!)) break |
|
const m2 = lines[i]!.match(MD_BLOCKQUOTE_LINE) |
|
if (!m2) break |
|
runLines.push(lines[i]!) |
|
i++ |
|
} |
|
segments.push({ type: 'bq', lines: runLines }) |
|
continue |
|
} |
|
pushPlainLine(line) |
|
i++ |
|
} |
|
|
|
const outs: string[] = [] |
|
for (const seg of segments) { |
|
if (seg.type === 'plain') { |
|
const joined = seg.lines.join('\n') |
|
outs.push(joined === '' ? '' : await translateAdvancedLabMarkup(joined, target, 'auto', 'markdown')) |
|
continue |
|
} |
|
const runLines = seg.lines |
|
const prefixes: string[] = [] |
|
const bodies: string[] = [] |
|
for (const ln of runLines) { |
|
const mm = ln.match(MD_BLOCKQUOTE_LINE)! |
|
prefixes.push(mm[1]! + mm[2]!) |
|
bodies.push(mm[3] ?? '') |
|
} |
|
if (bodies.length === 0) continue |
|
if (bodies.length === 1) { |
|
const tb = await translateAdvancedLabMarkup(bodies[0]!, target, 'auto', 'markdown') |
|
outs.push(`${prefixes[0]}${tb}`) |
|
continue |
|
} |
|
const joinedBodies = bodies.join('\n') |
|
const translatedJoined = await translateAdvancedLabMarkup(joinedBodies, target, 'auto', 'markdown', { |
|
preserveEmbeddedNewlinesInTranslatable: true |
|
}) |
|
const outLines = translatedJoined.split(/\r?\n/) |
|
if (outLines.length !== bodies.length) { |
|
const perLine = await Promise.all( |
|
bodies.map((b) => translateAdvancedLabMarkup(b, target, 'auto', 'markdown')) |
|
) |
|
outs.push(prefixes.map((pref, idx) => `${pref}${perLine[idx]}`).join('\n')) |
|
} else { |
|
outs.push(prefixes.map((pref, idx) => `${pref}${outLines[idx] ?? ''}`).join('\n')) |
|
} |
|
} |
|
return outs.join('\n') |
|
} |
|
|
|
async function translateBodyChunk( |
|
core: string, |
|
target: string, |
|
markupMode: AdvancedLabMarkupMode |
|
): Promise<string> { |
|
if (core.trim() === '') return '' |
|
if (markupMode === 'markdown') { |
|
return translateMarkdownBodyCoalescingBlockquotes(core, target) |
|
} |
|
return translateAdvancedLabMarkup(core, target, 'auto', markupMode) |
|
} |
|
|
|
function looksLikeStringifiedJsonObject(content: string): boolean { |
|
const trimmed = content.trim() |
|
if ( |
|
!(trimmed.startsWith('{') && trimmed.endsWith('}')) && |
|
!(trimmed.startsWith('[') && trimmed.endsWith(']')) |
|
) { |
|
return false |
|
} |
|
try { |
|
const parsed = JSON.parse(trimmed) as unknown |
|
return parsed !== null && typeof parsed === 'object' |
|
} catch { |
|
return false |
|
} |
|
} |
|
|
|
export function eventHasTranslatableTextBody(event: Event): boolean { |
|
const c = event.content?.trim() ?? '' |
|
if (!c) return false |
|
if (event.kind === ExtendedKind.VOICE || event.kind === ExtendedKind.VOICE_COMMENT) { |
|
return false |
|
} |
|
if (looksLikeStringifiedJsonObject(c)) return false |
|
return true |
|
} |
|
|
|
export function articleHasTranslatableTitle(event: Event): boolean { |
|
return Boolean(getLongFormArticleMetadataFromEvent(event).title?.trim()) |
|
} |
|
|
|
/** |
|
* Same exclusions as the advanced lab (`translateAdvancedLabMarkup`). Chunk large bodies for the API. |
|
* |
|
* Trailing whitespace/newlines on a chunk must not be dropped when advancing `rest` (they are not |
|
* re-sent on the next iteration). Do not `trimStart()` the remainder or blank lines after lists and |
|
* paragraph breaks vanish from the output. |
|
*/ |
|
async function translateLongProtectedBody( |
|
text: string, |
|
target: string, |
|
markupMode: AdvancedLabMarkupMode |
|
): Promise<string> { |
|
const t = text.trim() |
|
if (!t) return text |
|
if (t.length <= CHUNK_MAX) { |
|
return translateBodyChunk(t, target, markupMode) |
|
} |
|
const blocks: string[] = [] |
|
let rest = t |
|
while (rest.length) { |
|
let slice = rest.slice(0, CHUNK_MAX) |
|
const nl = slice.lastIndexOf('\n') |
|
if (nl > 600) { |
|
slice = rest.slice(0, nl + 1) |
|
} |
|
let endCore = slice.length |
|
while (endCore > 0 && /\s/u.test(slice[endCore - 1]!)) { |
|
endCore-- |
|
} |
|
const core = slice.slice(0, endCore) |
|
const trailingLiteral = slice.slice(endCore) |
|
const translated = |
|
core.trim() === '' |
|
? '' |
|
: await translateBodyChunk(core, target, markupMode) |
|
blocks.push(translated + trailingLiteral) |
|
rest = rest.slice(slice.length) |
|
} |
|
return blocks.join('') |
|
} |
|
|
|
/** |
|
* @param targetCode LibreTranslate target as returned by `/languages` (e.g. `tr`, `zh-CN`). |
|
*/ |
|
export async function translateNoteForDisplay( |
|
event: Event, |
|
targetCode: string |
|
): Promise<{ content: string; title?: string }> { |
|
const target = normalizeTranslateLangCode(targetCode) |
|
const markupMode: AdvancedLabMarkupMode = isAsciidocMarkupKind(event.kind) ? 'asciidoc' : 'markdown' |
|
const meta = getLongFormArticleMetadataFromEvent(event) |
|
const origTitle = meta.title?.trim() |
|
const title = origTitle |
|
? await translateAdvancedLabMarkup(origTitle, target, 'auto', markupMode) |
|
: undefined |
|
const rawContent = event.content ?? '' |
|
const content = rawContent.trim() |
|
? await translateLongProtectedBody(rawContent, target, markupMode) |
|
: rawContent |
|
return { content: content || rawContent, title } |
|
} |
|
|
|
/** |
|
* Parent (`e` reply) and `nostr:…` embeds in the body — same scope as prefetch, but not every thread `e` tag. |
|
*/ |
|
export function collectRelatedNoteTranslateTargets(event: Event): { |
|
hexIds: string[] |
|
nip19Pointers: string[] |
|
} { |
|
const hexSet = new Set<string>() |
|
const nip19Set = new Set<string>() |
|
const self = event.id.toLowerCase() |
|
const addHex = (id: string | undefined) => { |
|
if (!id) return |
|
const h = id.trim().toLowerCase() |
|
if (/^[0-9a-f]{64}$/.test(h) && h !== self) hexSet.add(h) |
|
} |
|
|
|
addHex(getParentEventHexId(event)) |
|
|
|
const body = event.content ?? '' |
|
for (const full of body.match(EMBEDDED_EVENT_REGEX) ?? []) { |
|
const colon = full.indexOf(':') |
|
if (colon < 0) continue |
|
const bech32 = full.slice(colon + 1).trim() |
|
if (!bech32) continue |
|
try { |
|
const { type, data } = nip19.decode(bech32) |
|
if (type === 'note') addHex(data) |
|
else if (type === 'nevent') addHex(data.id) |
|
else if (type === 'naddr') nip19Set.add(bech32) |
|
} catch { |
|
/* ignore */ |
|
} |
|
} |
|
|
|
return { hexIds: Array.from(hexSet), nip19Pointers: Array.from(nip19Set) } |
|
} |
|
|
|
/** |
|
* Translates the note body/title and any reply-parent / embedded notes shown with it, then updates the translation store. |
|
*/ |
|
export async function translateNoteAndRelatedForDisplay( |
|
event: Event, |
|
targetCode: string, |
|
langLabel: string, |
|
fetchEvent: (id: string) => Promise<Event | undefined> |
|
): Promise<void> { |
|
const mainOut = await translateNoteForDisplay(event, targetCode) |
|
const { hexIds, nip19Pointers } = collectRelatedNoteTranslateTargets(event) |
|
const coIds: string[] = [] |
|
const seenRel = new Set<string>() |
|
const self = event.id.toLowerCase() |
|
|
|
const translateRelated = async (rel: Event) => { |
|
const idl = rel.id.toLowerCase() |
|
if (idl === self || seenRel.has(idl)) return |
|
if (!eventHasTranslatableTextBody(rel) && !articleHasTranslatableTitle(rel)) return |
|
seenRel.add(idl) |
|
try { |
|
const out = await translateNoteForDisplay(rel, targetCode) |
|
setNoteTranslation(rel.id, { |
|
lang: targetCode, |
|
langLabel, |
|
content: out.content, |
|
title: out.title |
|
}) |
|
coIds.push(rel.id) |
|
} catch { |
|
seenRel.delete(idl) |
|
} |
|
} |
|
|
|
for (const hex of hexIds) { |
|
const rel = await fetchEvent(hex) |
|
if (rel) await translateRelated(rel) |
|
} |
|
for (const ptr of nip19Pointers) { |
|
const rel = await fetchEvent(ptr) |
|
if (rel) await translateRelated(rel) |
|
} |
|
|
|
setNoteTranslation(event.id, { |
|
lang: targetCode, |
|
langLabel, |
|
content: mainOut.content, |
|
title: mainOut.title, |
|
coTranslatedIds: coIds.length > 0 ? coIds : undefined |
|
}) |
|
}
|
|
|