@ -4,12 +4,122 @@ import {
@@ -4,12 +4,122 @@ import {
translateAdvancedLabMarkup ,
type AdvancedLabMarkupMode
} from '@/lib/advanced-lab-markup-protect'
import { EMBEDDED_EVENT_REGEX } from '@/lib/content-patterns'
import { getLongFormArticleMetadataFromEvent } from '@/lib/event-metadata'
import { getParentEventHexId } from '@/lib/event'
import { setNoteTranslation } from '@/lib/note-translation-display'
import { normalizeTranslateLangCode } from '@/lib/translate-client'
import type { Event } from 'nostr-tools'
import { nip19 , type Event } from 'nostr-tools'
const CHUNK_MAX = 2500
/** GFM-style blockquote line (indent, `>`, optional space, body). */
const MD_BLOCKQUOTE_LINE = /^([\t ]{0,3})(> ?)(.*)$/
function isMarkdownFenceDelimiterLine ( line : string ) : boolean {
return /^[\t ]{0,3}```/ . test ( line . replace ( / \ r $ / u , ' ' ) )
}
/ * *
* LibreTranslate can leave an isolated middle line in English when each ` > ` line is translated
* separately . Coalesce consecutive blockquote bodies ( outside fenced code ) into one request with
* embedded newlines preserved via { @link translateAdvancedLabMarkup } options .
* /
async function translateMarkdownBodyCoalescingBlockquotes ( text : string , target : string ) : Promise < string > {
const lines = text . split ( /\r?\n/ )
let inFence = false
type PlainSeg = { type : 'plain' ; lines : string [ ] }
type BqSeg = { type : 'bq' ; lines : string [ ] }
type Seg = PlainSeg | BqSeg
const segments : Seg [ ] = [ ]
const pushPlainLine = ( ln : string ) : void = > {
const last = segments [ segments . length - 1 ]
if ( last ? . type === 'plain' ) last . lines . push ( ln )
else segments . push ( { type : 'plain' , lines : [ ln ] } )
}
let i = 0
while ( i < lines . length ) {
const line = lines [ i ] !
if ( isMarkdownFenceDelimiterLine ( line ) ) {
inFence = ! inFence
pushPlainLine ( line )
i ++
continue
}
if ( inFence ) {
pushPlainLine ( line )
i ++
continue
}
const m = line . match ( MD_BLOCKQUOTE_LINE )
if ( m ) {
const runLines : string [ ] = [ ]
while ( i < lines . length ) {
if ( isMarkdownFenceDelimiterLine ( lines [ i ] ! ) ) break
const m2 = lines [ i ] ! . match ( MD_BLOCKQUOTE_LINE )
if ( ! m2 ) break
runLines . push ( lines [ i ] ! )
i ++
}
segments . push ( { type : 'bq' , lines : runLines } )
continue
}
pushPlainLine ( line )
i ++
}
const outs : string [ ] = [ ]
for ( const seg of segments ) {
if ( seg . type === 'plain' ) {
const joined = seg . lines . join ( '\n' )
outs . push ( joined === '' ? '' : await translateAdvancedLabMarkup ( joined , target , 'auto' , 'markdown' ) )
continue
}
const runLines = seg . lines
const prefixes : string [ ] = [ ]
const bodies : string [ ] = [ ]
for ( const ln of runLines ) {
const mm = ln . match ( MD_BLOCKQUOTE_LINE ) !
prefixes . push ( mm [ 1 ] ! + mm [ 2 ] ! )
bodies . push ( mm [ 3 ] ? ? '' )
}
if ( bodies . length === 0 ) continue
if ( bodies . length === 1 ) {
const tb = await translateAdvancedLabMarkup ( bodies [ 0 ] ! , target , 'auto' , 'markdown' )
outs . push ( ` ${ prefixes [ 0 ] } ${ tb } ` )
continue
}
const joinedBodies = bodies . join ( '\n' )
const translatedJoined = await translateAdvancedLabMarkup ( joinedBodies , target , 'auto' , 'markdown' , {
preserveEmbeddedNewlinesInTranslatable : true
} )
const outLines = translatedJoined . split ( /\r?\n/ )
if ( outLines . length !== bodies . length ) {
const perLine = await Promise . all (
bodies . map ( ( b ) = > translateAdvancedLabMarkup ( b , target , 'auto' , 'markdown' ) )
)
outs . push ( prefixes . map ( ( pref , idx ) = > ` ${ pref } ${ perLine [ idx ] } ` ) . join ( '\n' ) )
} else {
outs . push ( prefixes . map ( ( pref , idx ) = > ` ${ pref } ${ outLines [ idx ] ? ? '' } ` ) . join ( '\n' ) )
}
}
return outs . join ( '\n' )
}
async function translateBodyChunk (
core : string ,
target : string ,
markupMode : AdvancedLabMarkupMode
) : Promise < string > {
if ( core . trim ( ) === '' ) return ''
if ( markupMode === 'markdown' ) {
return translateMarkdownBodyCoalescingBlockquotes ( core , target )
}
return translateAdvancedLabMarkup ( core , target , 'auto' , markupMode )
}
function looksLikeStringifiedJsonObject ( content : string ) : boolean {
const trimmed = content . trim ( )
if (
@ -40,7 +150,13 @@ export function articleHasTranslatableTitle(event: Event): boolean {
@@ -40,7 +150,13 @@ export function articleHasTranslatableTitle(event: Event): boolean {
return Boolean ( getLongFormArticleMetadataFromEvent ( event ) . title ? . trim ( ) )
}
/** Same exclusions as the advanced lab (`translateAdvancedLabMarkup`). Chunk large bodies for the API. */
/ * *
* Same exclusions as the advanced lab ( ` translateAdvancedLabMarkup ` ) . Chunk large bodies for the API .
*
* Trailing whitespace / newlines on a chunk must not be dropped when advancing ` rest ` ( they are not
* re - sent on the next iteration ) . Do not ` trimStart() ` the remainder or blank lines after lists and
* paragraph breaks vanish from the output .
* /
async function translateLongProtectedBody (
text : string ,
target : string ,
@ -49,7 +165,7 @@ async function translateLongProtectedBody(
@@ -49,7 +165,7 @@ async function translateLongProtectedBody(
const t = text . trim ( )
if ( ! t ) return text
if ( t . length <= CHUNK_MAX ) {
return translateAdvancedLabMarkup ( t , target , 'auto' , markupMode )
return translateBodyChunk ( t , target , markupMode )
}
const blocks : string [ ] = [ ]
let rest = t
@ -59,13 +175,20 @@ async function translateLongProtectedBody(
@@ -59,13 +175,20 @@ async function translateLongProtectedBody(
if ( nl > 600 ) {
slice = rest . slice ( 0 , nl + 1 )
}
const part = slice . trimEnd ( )
if ( part ) {
blocks . push ( await translateAdvancedLabMarkup ( part , target , 'auto' , markupMode ) )
let endCore = slice . length
while ( endCore > 0 && / \ s / u . t e s t ( s l i c e [ e n d C o r e - 1 ] ! ) ) {
endCore --
}
rest = rest . slice ( slice . length ) . trimStart ( )
const core = slice . slice ( 0 , endCore )
const trailingLiteral = slice . slice ( endCore )
const translated =
core . trim ( ) === ''
? ''
: await translateBodyChunk ( core , target , markupMode )
blocks . push ( translated + trailingLiteral )
rest = rest . slice ( slice . length )
}
return blocks . join ( '\n' )
return blocks . join ( '' )
}
/ * *
@ -88,3 +211,92 @@ export async function translateNoteForDisplay(
@@ -88,3 +211,92 @@ export async function translateNoteForDisplay(
: rawContent
return { content : content || rawContent , title }
}
/ * *
* Parent ( ` e ` reply ) and ` nostr:… ` embeds in the body — same scope as prefetch , but not every thread ` e ` tag .
* /
export function collectRelatedNoteTranslateTargets ( event : Event ) : {
hexIds : string [ ]
nip19Pointers : string [ ]
} {
const hexSet = new Set < string > ( )
const nip19Set = new Set < string > ( )
const self = event . id . toLowerCase ( )
const addHex = ( id : string | undefined ) = > {
if ( ! id ) return
const h = id . trim ( ) . toLowerCase ( )
if ( /^[0-9a-f]{64}$/ . test ( h ) && h !== self ) hexSet . add ( h )
}
addHex ( getParentEventHexId ( event ) )
const body = event . content ? ? ''
for ( const full of body . match ( EMBEDDED_EVENT_REGEX ) ? ? [ ] ) {
const colon = full . indexOf ( ':' )
if ( colon < 0 ) continue
const bech32 = full . slice ( colon + 1 ) . trim ( )
if ( ! bech32 ) continue
try {
const { type , data } = nip19 . decode ( bech32 )
if ( type === 'note' ) addHex ( data )
else if ( type === 'nevent' ) addHex ( data . id )
else if ( type === 'naddr' ) nip19Set . add ( bech32 )
} catch {
/* ignore */
}
}
return { hexIds : Array.from ( hexSet ) , nip19Pointers : Array.from ( nip19Set ) }
}
/ * *
* Translates the note body / title and any reply - parent / embedded notes shown with it , then updates the translation store .
* /
export async function translateNoteAndRelatedForDisplay (
event : Event ,
targetCode : string ,
langLabel : string ,
fetchEvent : ( id : string ) = > Promise < Event | undefined >
) : Promise < void > {
const mainOut = await translateNoteForDisplay ( event , targetCode )
const { hexIds , nip19Pointers } = collectRelatedNoteTranslateTargets ( event )
const coIds : string [ ] = [ ]
const seenRel = new Set < string > ( )
const self = event . id . toLowerCase ( )
const translateRelated = async ( rel : Event ) = > {
const idl = rel . id . toLowerCase ( )
if ( idl === self || seenRel . has ( idl ) ) return
if ( ! eventHasTranslatableTextBody ( rel ) && ! articleHasTranslatableTitle ( rel ) ) return
seenRel . add ( idl )
try {
const out = await translateNoteForDisplay ( rel , targetCode )
setNoteTranslation ( rel . id , {
lang : targetCode ,
langLabel ,
content : out.content ,
title : out.title
} )
coIds . push ( rel . id )
} catch {
seenRel . delete ( idl )
}
}
for ( const hex of hexIds ) {
const rel = await fetchEvent ( hex )
if ( rel ) await translateRelated ( rel )
}
for ( const ptr of nip19Pointers ) {
const rel = await fetchEvent ( ptr )
if ( rel ) await translateRelated ( rel )
}
setNoteTranslation ( event . id , {
lang : targetCode ,
langLabel ,
content : mainOut.content ,
title : mainOut.title ,
coTranslatedIds : coIds.length > 0 ? coIds : undefined
} )
}