diff --git a/src/lib/advanced-lab-markup-protect.test.ts b/src/lib/advanced-lab-markup-protect.test.ts index fb6195fa..ba375cd1 100644 --- a/src/lib/advanced-lab-markup-protect.test.ts +++ b/src/lib/advanced-lab-markup-protect.test.ts @@ -137,6 +137,20 @@ describe('getMarkupProtectRanges', () => { expect(rangeIntersectsMerged(t.lastIndexOf(npub), npub.length, merged)).toBe(true) }) + it('markdown: freezes a full raw https URL (blossom npub-shaped host; .gif not translatable)', () => { + const url = + 'https://npub1uq6dv4yq94704gk5r22jsqg9gy2wpxkk5dft9q5gugc8tj53nq2qg5q22d.blossom.band/efc560395efdc7327db278ea4a7677905f69fecf1e6db754f41309d62f8ddb23.gif' + const t = `See this\n${url}\nnext` + const merged = getMarkupProtectRanges(t, 'markdown') + expect( + merged.some(([a, b]) => { + const s = t.slice(a, b) + return s.startsWith('https://') && s.endsWith('.gif') && s.includes('blossom.band') + }) + ).toBe(true) + expect(rangeIntersectsMerged(t.indexOf('.gif'), 4, merged)).toBe(true) + }) + it('freezes BOOKSTR_MARKER passthrough and WIKILINK marker', () => { const book = 'BOOKSTR_MARKER:foo:BOOKSTR_END' const wiki = 'WIKILINK:my-page[My Page]' @@ -224,6 +238,27 @@ describe('translateAdvancedLabMarkup', () => { expect(out).toBe('[](https://x.com)') }) + it('markdown: does not translate raw https URL (npub-like blossom host / .gif path)', async () => { + const url = + 'https://npub1uq6dv4yq94704gk5r22jsqg9gy2wpxkk5dft9q5gugc8tj53nq2qg5q22d.blossom.band/x.gif' + const out = await translateAdvancedLabMarkup(`Before\n${url}\nAfter`, 'de', 'en', 'markdown') + expect(out).toContain(url) + expect(out).toBe('\n' + url + '\n') + }) + + it('markdown: freezes #hashtag tokens when mixed with prose', () => { + const t = 'Cool #meme and #memestr stuff' + const merged = getMarkupProtectRanges(t, 'markdown') + expect(rangeIntersectsMerged(t.indexOf('#meme'), 5, merged)).toBe(true) + expect(rangeIntersectsMerged(t.indexOf('#memestr'), 8, merged)).toBe(true) + }) + + it('markdown: leaves hashtags unchanged inside translated prose', async () => { + const out = await translateAdvancedLabMarkup('Enjoy #meme today', 'de', 'en', 'markdown') + expect(out).toContain('#meme') + expect(out).toMatch(/#meme/) + }) + it('markdown: translates optional link title in quotes', async () => { const out = await translateAdvancedLabMarkup( '[Hi](https://x.com "Link title")', diff --git a/src/lib/advanced-lab-markup-protect.ts b/src/lib/advanced-lab-markup-protect.ts index 68dc30f0..e2687528 100644 --- a/src/lib/advanced-lab-markup-protect.ts +++ b/src/lib/advanced-lab-markup-protect.ts @@ -5,7 +5,9 @@ * emphasis/strike delimiters, and AsciiDoc blocks, macros, stem, passthrough, xref. * Also: wiki `[[…]]` (incl. `book::`, `citation::`), `wikilink:`, `BOOKSTR_MARKER:…:BOOKSTR_END`, * `nostr:…` / bare NIP-19 bech32 (`npub1`…, `nprofile1`…, etc.), and `link:url[text]` macros. + * Raw `http://` / `https://` URLs (so blossom-style hosts `https://npub1….band/…/file.gif` are not split for translation). * NIP-style custom/native emoji shortcodes `:shortcode:` (see {@link EMOJI_SHORT_CODE_REGEX}). + * Markdown `#hashtag` tokens (Unicode letters/numbers/mark, `_`, `-`). */ import { EMOJI_SHORT_CODE_REGEX } from '@/lib/content-patterns' @@ -762,6 +764,42 @@ function collectWikilinkMarkerRanges(text: string, merged: [number, number][]): return ranges } +/** + * Standalone `http://` / `https://` URLs (not only inside Markdown `[]()`). + * Without this, a host like `npub1….blossom.band/…/x.gif` is split: bare-npub protection covers only the + * first 63 chars after `npub1`, leaving `.gif` and the path in the translatable stream. + */ +function collectRawHttpUrlRanges(text: string, merged: [number, number][]): [number, number][] { + const ranges: [number, number][] = [] + let i = 0 + while (i < text.length - 7) { + if (posInMerged(i, merged)) { + i++ + continue + } + const head = text.slice(i, i + 8).toLowerCase() + const isHttps = head.startsWith('https://') + const isHttp = !isHttps && head.startsWith('http://') + if (!isHttps && !isHttp) { + i++ + continue + } + const start = i + i += isHttps ? 8 : 7 + let end = i + while (end < text.length) { + const c = text[end]! + if (/\s/.test(c)) break + if (c === '<' || c === '>' || c === '"' || c === "'" || c === '`') break + if (c === ')' || c === ']' || c === '}') break + end++ + } + if (end > start) ranges.push([start, end]) + i = end + } + return ranges +} + /** AsciiDoc / Markdown `link:url[text]` and `menu:…[…]` (toolbar + jumble). */ function collectLinkMenuColonMacros(text: string, merged: [number, number][]): [number, number][] { const ranges: [number, number][] = [] @@ -868,6 +906,19 @@ function collectAsciiDocXrefRanges(text: string, merged: [number, number][]): [n return ranges } +/** `#tag` / `#plebchain` spans; skipped when already inside a frozen range (e.g. URL). */ +function collectMarkdownHashtagRanges(text: string, merged: [number, number][]): [number, number][] { + const ranges: [number, number][] = [] + const re = /#[\p{L}\p{N}\p{M}_-]+/gu + let m: RegExpExecArray | null + while ((m = re.exec(text)) !== null) { + const start = m.index + if (posInMerged(start, merged)) continue + ranges.push([start, start + m[0].length]) + } + return ranges +} + /** `:shortcode:` spans (custom / native emoji); skipped when already inside code, links, etc. */ function collectEmojiShortcodeRanges(text: string, merged: [number, number][]): [number, number][] { const ranges: [number, number][] = [] @@ -896,6 +947,8 @@ export function getMarkupProtectRanges(text: string, mode: AdvancedLabMarkupMode merged = mergeSortedRanges([...merged, ...bookstrPass, ...wikilinkM]) const linkMenu = collectLinkMenuColonMacros(text, merged) merged = mergeSortedRanges([...merged, ...linkMenu]) + const rawHttpUrls = collectRawHttpUrlRanges(text, merged) + merged = mergeSortedRanges([...merged, ...rawHttpUrls]) const nostrBech = collectNostrAndBech32Ranges(text, merged) merged = mergeSortedRanges([...merged, ...nostrBech]) const triplePlus = collectAsciiDocTriplePlusPassthrough(text, merged) @@ -912,6 +965,8 @@ export function getMarkupProtectRanges(text: string, mode: AdvancedLabMarkupMode const bold = collectMarkdownDelimiterPairEdges(text, mergeSortedRanges([...merged, ...strike]), '**') const boldU = collectMarkdownDelimiterPairEdges(text, mergeSortedRanges([...merged, ...bold]), '__') merged = mergeSortedRanges([...merged, ...strike, ...bold, ...boldU]) + const hashtags = collectMarkdownHashtagRanges(text, merged) + merged = mergeSortedRanges([...merged, ...hashtags]) } else { const adocBlocks = collectAsciiDocStructuredBlocks(text, merged) merged = mergeSortedRanges([...merged, ...adocBlocks]) diff --git a/src/lib/translate-client.test.ts b/src/lib/translate-client.test.ts new file mode 100644 index 00000000..b0a20e12 --- /dev/null +++ b/src/lib/translate-client.test.ts @@ -0,0 +1,18 @@ +import { describe, expect, it } from 'vitest' +import { shouldSkipMachineTranslatePlainCore } from '@/lib/translate-client' + +describe('shouldSkipMachineTranslatePlainCore', () => { + it('returns true for one or more ASCII hashtags with spaces', () => { + expect(shouldSkipMachineTranslatePlainCore('#meme #memes #memestr #plebchain')).toBe(true) + expect(shouldSkipMachineTranslatePlainCore(' #a #b ')).toBe(true) + }) + + it('returns false when there is non-hashtag prose', () => { + expect(shouldSkipMachineTranslatePlainCore('#meme is cool')).toBe(false) + expect(shouldSkipMachineTranslatePlainCore('see #meme')).toBe(false) + }) + + it('returns true for unicode hashtag letters', () => { + expect(shouldSkipMachineTranslatePlainCore('#café #naïve')).toBe(true) + }) +}) diff --git a/src/lib/translate-client.ts b/src/lib/translate-client.ts index 75e25ca2..5f6c20a2 100644 --- a/src/lib/translate-client.ts +++ b/src/lib/translate-client.ts @@ -144,6 +144,14 @@ export function clearTranslateLanguagesCache(): void { advertisedTranslateApiCodes = null } +/** + * LibreTranslate / Argos often corrupts hashtag-only lines (random glyphs, subtitle-like junk, + * dropped letters). Nostr-style hashtags must stay verbatim. + */ +export function shouldSkipMachineTranslatePlainCore(core: string): boolean { + return /^(?:#[\p{L}\p{N}\p{M}_-]+(?:\s+#[\p{L}\p{N}\p{M}_-]+)*)\s*$/u.test(core.trim()) +} + export async function translatePlainText( text: string, targetLang: string, @@ -162,6 +170,10 @@ export async function translatePlainText( return text } + if (shouldSkipMachineTranslatePlainCore(core)) { + return text + } + const resolvedTarget = translateApiLanguageCode(targetLang) const resolvedSource = sourceLang === 'auto' ? 'auto' : translateApiLanguageCode(sourceLang)