diff --git a/src/components/Content/index.tsx b/src/components/Content/index.tsx index fc3c46d8..d043703f 100644 --- a/src/components/Content/index.tsx +++ b/src/components/Content/index.tsx @@ -1,16 +1,7 @@ import { useMediaExtraction } from '@/hooks' -import { - EmbeddedEmojiParser, - EmbeddedEventParser, - EmbeddedHashtagParser, - EmbeddedLNInvoiceParser, - EmbeddedMentionParser, - EmbeddedPaytoParser, - EmbeddedUrlParser, - EmbeddedWebsocketUrlParser, - parseContent -} from '@/lib/content-parser' +import { parseContent, PARSE_CONTENT_PARSERS_NOTE_TEXT } from '@/lib/content-parser' import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content' +import { logContentSpacing, reprString } from '@/lib/content-spacing-debug' import logger from '@/lib/logger' import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji' import { getEmojiInfosFromEmojiTags } from '@/lib/tag' @@ -93,17 +84,15 @@ export default function Content({ const emojiInfos = getEmojiInfosFromEmojiTags(event?.tags) const customShortcodes = emojiInfos.map((e) => e.shortcode) const normalized = replaceStandardEmojiShortcodesInContent(_content, customShortcodes) + if (normalized.includes('nostr:')) { + logContentSpacing('Content:useMemo', { + rawRepr: reprString(_content), + normalizedRepr: reprString(normalized), + same: _content === normalized + }) + } - const nodes = parseContent(normalized, [ - EmbeddedUrlParser, - EmbeddedLNInvoiceParser, - EmbeddedPaytoParser, - EmbeddedWebsocketUrlParser, - EmbeddedEventParser, - EmbeddedMentionParser, - EmbeddedHashtagParser, - EmbeddedEmojiParser - ]) + const nodes = parseContent(normalized, PARSE_CONTENT_PARSERS_NOTE_TEXT) return { nodes, emojiInfos } }, [_content, event]) diff --git a/src/components/ContentPreview/Content.tsx b/src/components/ContentPreview/Content.tsx index 35ed8a3b..8c0adef7 100644 --- a/src/components/ContentPreview/Content.tsx +++ b/src/components/ContentPreview/Content.tsx @@ -1,11 +1,4 @@ -import { - EmbeddedEmojiParser, - EmbeddedEventParser, - EmbeddedMentionParser, - EmbeddedPaytoParser, - EmbeddedUrlParser, - parseContent -} from '@/lib/content-parser' +import { parseContent, PARSE_CONTENT_PARSERS_NOTE_TEXT } from '@/lib/content-parser' import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content' import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji' import { cn } from '@/lib/utils' @@ -29,13 +22,7 @@ export default function Content({ const nodes = useMemo(() => { const customShortcodes = emojiInfos?.map((e) => e.shortcode) ?? [] const normalized = replaceStandardEmojiShortcodesInContent(content, customShortcodes) - return parseContent(normalized, [ - EmbeddedUrlParser, - EmbeddedPaytoParser, - EmbeddedEventParser, - EmbeddedMentionParser, - EmbeddedEmojiParser - ]) + return parseContent(normalized, PARSE_CONTENT_PARSERS_NOTE_TEXT) }, [content, emojiInfos]) return ( diff --git a/src/components/Note/AsciidocArticle/AsciidocArticle.tsx b/src/components/Note/AsciidocArticle/AsciidocArticle.tsx index 8a73e5c6..9c6e8408 100644 --- a/src/components/Note/AsciidocArticle/AsciidocArticle.tsx +++ b/src/components/Note/AsciidocArticle/AsciidocArticle.tsx @@ -22,6 +22,11 @@ import { ReplyProvider } from '@/providers/ReplyProvider' import Wikilink from '@/components/UniversalContent/Wikilink' import { BookstrContent } from '@/components/Bookstr' import { preprocessAsciidocMediaLinks } from '../MarkdownArticle/preprocessMarkup' +import { + NOSTR_ASCIIDOC_EARLY_LINK_REGEX, + NOSTR_ASCIIDOC_TEXT_NODE_REGEX, + NOSTR_HTML_BECH32_RELAXED +} from '@/lib/content-patterns' import logger from '@/lib/logger' import { extractBookMetadata } from '@/lib/bookstr-parser' import { ExtendedKind } from '@/constants' @@ -66,7 +71,7 @@ function convertMarkdownToAsciidoc(content: string): string { // naddr addresses can be 200+ characters, so we use + instead of specific length // Also handle optional [] suffix (empty link text in AsciiDoc) // Note: Citations are already protected in passthrough (+++...+++), so nostr: links inside them won't be processed - asciidoc = asciidoc.replace(/nostr:(npub1[a-z0-9]{58,}|nprofile1[a-z0-9]+|note1[a-z0-9]{58,}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)(\[\])?/g, (_match, bech32Id, emptyBrackets) => { + asciidoc = asciidoc.replace(NOSTR_ASCIIDOC_EARLY_LINK_REGEX, (_match, bech32Id, emptyBrackets) => { // Convert directly to AsciiDoc link format // This will be processed later in HTML post-processing to render as React components // If [] suffix is present, use empty link text, otherwise use the bech32Id @@ -690,7 +695,12 @@ export default function AsciidocArticle({ // Match the full bech32 address format - addresses can vary in length // npub: 58 chars, nprofile: variable, note: 58 chars, nevent: variable, naddr: 200+ chars // Use a more flexible pattern that matches any valid bech32 address - htmlString = htmlString.replace(/]*href=["']nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})["'][^>]*>([^<]*)<\/a>/gi, (_match, bech32Id, _linkText) => { + htmlString = htmlString.replace( + new RegExp( + `]*href=["']nostr:(${NOSTR_HTML_BECH32_RELAXED})["'][^>]*>([^<]*)`, + 'gi' + ), + (_match, bech32Id, _linkText) => { // Validate bech32 ID and create appropriate placeholder if (!bech32Id) return _match @@ -709,13 +719,22 @@ export default function AsciidocArticle({ // Process text nodes by replacing content between > and < // Use more flexible regex that matches any valid bech32 address (naddr can be 200+ chars) // Match addresses with optional [] suffix - htmlString = htmlString.replace(/>([^<]*nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})(\[\])?[^<]*) { + htmlString = htmlString.replace( + new RegExp( + `>([^<]*nostr:(${NOSTR_HTML_BECH32_RELAXED})(\\[\\])?[^<]*)<`, + 'g' + ), + (_match, textContent) => { // Extract nostr addresses from the text content - use flexible pattern that handles long addresses // npub and note are typically 58 chars, but naddr can be 200+ chars - const nostrRegex = /nostr:((?:npub1[a-z0-9]{58,}|nprofile1[a-z0-9]+|note1[a-z0-9]{58,}|nevent1[a-z0-9]+|naddr1[a-z0-9]+))(\[\])?/g + const nostrRegex = new RegExp( + NOSTR_ASCIIDOC_TEXT_NODE_REGEX.source, + NOSTR_ASCIIDOC_TEXT_NODE_REGEX.flags + ) let processedText = textContent const replacements: Array<{ start: number; end: number; replacement: string }> = [] + nostrRegex.lastIndex = 0 let m while ((m = nostrRegex.exec(textContent)) !== null) { const bech32Id = m[1] @@ -751,7 +770,9 @@ export default function AsciidocArticle({ // Fallback: ensure any remaining nostr: addresses are shown as plain text // This catches any that weren't converted to placeholders - htmlString = htmlString.replace(/([^>])nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})(\[\])?/g, (_match, prefix, bech32Id, emptyBrackets) => { + htmlString = htmlString.replace( + new RegExp(`([^>])nostr:(${NOSTR_HTML_BECH32_RELAXED})(\\[\\])?`, 'g'), + (_match, prefix, bech32Id, emptyBrackets) => { // Show as plain text if not already in a tag or placeholder return `${prefix}nostr:${bech32Id}${emptyBrackets || ''}` }) diff --git a/src/components/Note/MarkdownArticle/MarkdownArticle.tsx b/src/components/Note/MarkdownArticle/MarkdownArticle.tsx index 43c3d004..1f0278ff 100644 --- a/src/components/Note/MarkdownArticle/MarkdownArticle.tsx +++ b/src/components/Note/MarkdownArticle/MarkdownArticle.tsx @@ -12,7 +12,8 @@ import { cleanUrl, isImage, isMedia, isVideo, isAudio, isWebsocketUrl } from '@/ import { getImetaInfosFromEvent } from '@/lib/event' import { Event, kinds } from 'nostr-tools' import Emoji from '@/components/Emoji' -import { ExtendedKind, EMOJI_SHORT_CODE_REGEX, WS_URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants' +import { ExtendedKind, WS_URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants' +import { EMOJI_SHORT_CODE_REGEX, NOSTR_URI_INLINE_REGEX } from '@/lib/content-patterns' import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content' import { getEmojiInfosFromEmojiTags } from '@/lib/tag' import { TEmoji } from '@/types' @@ -29,6 +30,7 @@ import { PAYTO_URI_REGEX, parsePaytoUri } from '@/lib/payto' import PaytoLink from '@/components/PaytoLink' import katex from 'katex' import 'katex/dist/katex.min.css' +import { isContentSpacingDebug, reprString } from '@/lib/content-spacing-debug' import logger from '@/lib/logger' /** @@ -1087,7 +1089,7 @@ function parseMarkdownContent( }) // Nostr addresses (nostr:npub1..., nostr:note1..., etc.) - const nostrRegex = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g + const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags) const nostrMatches = Array.from(content.matchAll(nostrRegex)) nostrMatches.forEach(match => { if (match.index !== undefined) { @@ -2679,12 +2681,26 @@ function parseMarkdownContent( * - Footnote references: [^1] (handled at block level, but parsed here for inline context) */ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map = new Map(), emojiInfos: TEmoji[] = []): React.ReactNode[] { + if (isContentSpacingDebug() && text.includes('nostr:')) { + // eslint-disable-next-line no-console + console.log('[jumble content-spacing] parseInlineMarkdown:before-normalize', { + keyPrefix, + repr: reprString(text) + }) + } // Normalize newlines to spaces at the start (defensive - text should already be normalized, but ensure it) // This prevents any hard breaks within inline content text = text.replace(/\n/g, ' ') // Collapse multiple consecutive spaces/tabs (2+) into a single space, but preserve single spaces text = text.replace(/[ \t]{2,}/g, ' ') - + if (isContentSpacingDebug() && text.includes('nostr:')) { + // eslint-disable-next-line no-console + console.log('[jumble content-spacing] parseInlineMarkdown:after-normalize', { + keyPrefix, + repr: reprString(text) + }) + } + const parts: React.ReactNode[] = [] let lastIndex = 0 const inlinePatterns: Array<{ index: number; end: number; type: string; data: any }> = [] @@ -2937,7 +2953,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map { if (match.index !== undefined) { @@ -3001,7 +3017,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map [nostr:npub1...] // nostr:note1... -> [nostr:note1...] // etc. - const nostrRegex = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g + const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags) processed = processed.replace(nostrRegex, (match) => { // Already in a link? Don't double-wrap // Check if it's already in markdown link syntax [text](nostr:...) diff --git a/src/components/PostEditor/Mentions.tsx b/src/components/PostEditor/Mentions.tsx index ef727187..1f5d2e5a 100644 --- a/src/components/PostEditor/Mentions.tsx +++ b/src/components/PostEditor/Mentions.tsx @@ -3,6 +3,7 @@ import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover import { useMuteList } from '@/providers/MuteListProvider' import { useNostr } from '@/providers/NostrProvider' import client from '@/services/client.service' +import { NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX } from '@/lib/content-patterns' import logger from '@/lib/logger' import { Check } from 'lucide-react' import { Event, nip19 } from 'nostr-tools' @@ -144,9 +145,7 @@ export async function extractMentions(content: string, parentEvent?: Event) { pubkeys.push(parentEventPubkey) } - const matches = content.match( - /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g - ) + const matches = content.match(NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX) const addToSet = (arr: string[], pubkey: string) => { if (!arr.includes(pubkey)) arr.push(pubkey) diff --git a/src/components/UniversalContent/SimpleContent.tsx b/src/components/UniversalContent/SimpleContent.tsx index 94afe0f7..17797580 100644 --- a/src/components/UniversalContent/SimpleContent.tsx +++ b/src/components/UniversalContent/SimpleContent.tsx @@ -1,6 +1,7 @@ import { useMemo } from 'react' import { cleanUrl } from '@/lib/url' import { Event } from 'nostr-tools' +import { logContentSpacing, reprString } from '@/lib/content-spacing-debug' import { parseNostrContent, renderNostrContent } from '@/lib/nostr-parser.tsx' import { cn } from '@/lib/utils' @@ -30,12 +31,28 @@ export default function SimpleContent({ } ) + if (rawContent.includes('nostr:')) { + logContentSpacing('SimpleContent:processedContent', { + rawRepr: reprString(rawContent), + cleanedRepr: reprString(cleaned), + same: rawContent === cleaned + }) + } return cleaned }, [content, event?.content]) // Parse content for nostr addresses and media const parsedContent = useMemo(() => { - return parseNostrContent(processedContent, event) + const parsed = parseNostrContent(processedContent, event) + if (processedContent.includes('nostr:')) { + logContentSpacing('SimpleContent:parsedContent', { + elementCount: parsed.elements.length, + tail: parsed.elements.slice(-3).map((e) => + e.type === 'text' ? { type: 'text', repr: reprString(e.content) } : { type: e.type } + ) + }) + } + return parsed }, [processedContent, event]) return ( diff --git a/src/constants.ts b/src/constants.ts index 9eac1c6b..8a8c8eec 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -265,10 +265,13 @@ export const URL_REGEX = export const WS_URL_REGEX = /wss?:\/\/[\w\p{L}\p{N}\p{M}&.\-/?=#@%+_:!~*]+[^\s.,;:'")\]}!?,。;:"'!?】)]/giu export const EMAIL_REGEX = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/ -/** Matches :shortcode: or :short code: (allows letters, digits, underscore, hyphen, space) */ -export const EMOJI_SHORT_CODE_REGEX = /:[a-zA-Z0-9_\-\s]+:/g -export const EMBEDDED_EVENT_REGEX = /nostr:(note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g -export const EMBEDDED_MENTION_REGEX = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+)/g +/** @see {@link '@/lib/content-patterns'} — single source for emoji + nostr regexes */ +export { + EMOJI_SHORT_CODE_MAX_INNER_LENGTH, + EMOJI_SHORT_CODE_REGEX, + EMBEDDED_EVENT_REGEX, + EMBEDDED_MENTION_REGEX +} from '@/lib/content-patterns' export const HASHTAG_REGEX = /#[a-zA-Z0-9_\-\u00C0-\u017F\u0100-\u017F\u0180-\u024F\u1E00-\u1EFF]+/g export const LN_INVOICE_REGEX = /(ln(?:bc|tb|bcrt))([0-9]+[munp]?)?1([02-9ac-hj-np-z]+)/g export const EMOJI_REGEX = diff --git a/src/lib/content-parser.ts b/src/lib/content-parser.ts index d92fa524..3ca33231 100644 --- a/src/lib/content-parser.ts +++ b/src/lib/content-parser.ts @@ -1,14 +1,18 @@ import { - EMBEDDED_EVENT_REGEX, - EMBEDDED_MENTION_REGEX, - EMOJI_SHORT_CODE_REGEX, HASHTAG_REGEX, LN_INVOICE_REGEX, URL_REGEX, WS_URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants' +import { + EMBEDDED_EVENT_REGEX, + EMBEDDED_MENTION_REGEX, + EMOJI_SHORT_CODE_REGEX, + LEGACY_PROFILE_BECH32_REGEX +} from '@/lib/content-patterns' import { PAYTO_URI_REGEX } from '@/lib/payto' +import { logContentSpacing, reprString } from '@/lib/content-spacing-debug' import { isImage, isMedia } from './url' export type TEmbeddedNodeType = @@ -53,7 +57,7 @@ export const EmbeddedMentionParser: TContentParser = { export const EmbeddedLegacyMentionParser: TContentParser = { type: 'legacy-mention', - regex: /npub1[a-z0-9]{58}|nprofile1[a-z0-9]+/g + regex: LEGACY_PROFILE_BECH32_REGEX } export const EmbeddedEventParser: TContentParser = { @@ -133,10 +137,39 @@ export const EmbeddedUrlParser: TContentParser = (content: string) => { return result } +/** + * Shared pipeline for kind-1–style strings (note body, reply preview, profile fields using parseContent). + * Order matters. + */ +export const PARSE_CONTENT_PARSERS_NOTE_TEXT: TContentParser[] = [ + EmbeddedUrlParser, + EmbeddedLNInvoiceParser, + EmbeddedPaytoParser, + EmbeddedWebsocketUrlParser, + EmbeddedEventParser, + EmbeddedMentionParser, + EmbeddedHashtagParser, + EmbeddedEmojiParser +] + export function parseContent(content: string, parsers: TContentParser[]) { + const trace = content.includes('nostr:') + if (trace) { + logContentSpacing('parseContent:input', { + rawLength: content.length, + afterTrimRepr: reprString(content.trim()), + trimRemovedLeading: content.length - content.trimStart().length, + trimRemovedTrailing: content.length - content.trimEnd().length + }) + } + let nodes: TEmbeddedNode[] = [{ type: 'text', data: content.trim() }] - parsers.forEach((parser) => { + parsers.forEach((parser, parserIndex) => { + const parserLabel = + typeof parser === 'function' ? `fn[${parserIndex}]` : parser.type + const beforeSummary = trace ? summarizeContentNodesForDebug(nodes) : null + nodes = nodes .flatMap((node) => { if (node.type !== 'text') return [node] @@ -178,15 +211,38 @@ export function parseContent(content: string, parsers: TContentParser[]) { return result }) .filter((n) => n.data !== '') + + if (trace) { + logContentSpacing('parseContent:after-parser', { + parser: parserLabel, + parserIndex, + before: beforeSummary, + after: summarizeContentNodesForDebug(nodes) + }) + } }) nodes = mergeConsecutiveTextNodes(nodes) nodes = mergeConsecutiveImageNodes(nodes) nodes = removeExtraNewlines(nodes) + if (trace) { + logContentSpacing('parseContent:final', { + afterMergeNewlines: summarizeContentNodesForDebug(nodes) + }) + } + return nodes } +function summarizeContentNodesForDebug(nodes: TEmbeddedNode[]): Array<{ type: string; repr?: string }> { + return nodes.map((n) => { + if (n.type === 'text') return { type: 'text', repr: reprString(n.data) } + if (n.type === 'images') return { type: 'images', repr: `[${n.data.length} urls]` } + return { type: n.type, repr: typeof n.data === 'string' ? reprString(n.data) : undefined } + }) +} + function mergeConsecutiveTextNodes(nodes: TEmbeddedNode[]) { const merged: TEmbeddedNode[] = [] let currentText = '' diff --git a/src/lib/content-patterns.ts b/src/lib/content-patterns.ts new file mode 100644 index 00000000..8c8df36d --- /dev/null +++ b/src/lib/content-patterns.ts @@ -0,0 +1,101 @@ +/** + * Single source of truth for :emoji: shortcodes and nostr: bech32 patterns. + * Used by MarkdownArticle, parseContent, nostr-parser, previews, post editor, AsciiDoc, etc. + */ + +// --- Emoji (:shortcode:) ---------------------------------------------------- + +export const EMOJI_SHORT_CODE_MAX_INNER_LENGTH = 20 as const + +const _emojiInnerQuantifier = EMOJI_SHORT_CODE_MAX_INNER_LENGTH - 1 + +/** + * - (?|\\]|,|\\.|!|\\?|;|:)' +export const NOSTR_PARSER_REGEX = new RegExp( + `(?:^|\\s|>|\\[)nostr:(${NOSTR_CONTENT_BECH32_ALT})${NOSTR_PARSER_LOOKAHEAD}`, + 'g' +) + +/** AsciiDoc: optional [] after nostr id */ +export const NOSTR_ASCIIDOC_EARLY_LINK_REGEX = new RegExp( + `nostr:(${NOSTR_ASCIIDOC_SOURCE_BECH32_ALT})(\\[\\])?`, + 'g' +) + +/** AsciiDoc HTML: same capture groups as early link, for text-node scanning */ +export const NOSTR_ASCIIDOC_TEXT_NODE_REGEX = new RegExp( + `nostr:(${NOSTR_ASCIIDOC_SOURCE_BECH32_ALT})(\\[\\])?`, + 'g' +) diff --git a/src/lib/content-spacing-debug.ts b/src/lib/content-spacing-debug.ts new file mode 100644 index 00000000..4170745e --- /dev/null +++ b/src/lib/content-spacing-debug.ts @@ -0,0 +1,28 @@ +/** + * Verbose content/spacing traces for debugging (e.g. "Name: nostr:npub…" collapsing). + * + * Enable in dev: localStorage.setItem('jumble-debug-content', 'true') then reload. + * Disable: localStorage.removeItem('jumble-debug-content') + */ + +const STORAGE_KEY = 'jumble-debug-content' + +export function isContentSpacingDebug(): boolean { + try { + return import.meta.env.DEV && typeof localStorage !== 'undefined' && localStorage.getItem(STORAGE_KEY) === 'true' + } catch { + return false + } +} + +/** JSON.stringify so spaces/newlines are visible in the console */ +export function reprString(s: string, maxLen = 500): string { + const t = s.length > maxLen ? `${s.slice(0, maxLen)}…(+${s.length - maxLen} chars)` : s + return JSON.stringify(t) +} + +export function logContentSpacing(phase: string, detail: Record): void { + if (!isContentSpacingDebug()) return + // eslint-disable-next-line no-console + console.log(`[jumble content-spacing] ${phase}`, detail) +} diff --git a/src/lib/emoji-content.ts b/src/lib/emoji-content.ts index 7a5e61a2..7bd9d025 100644 --- a/src/lib/emoji-content.ts +++ b/src/lib/emoji-content.ts @@ -1,3 +1,4 @@ +import { EMOJI_SHORT_CODE_REGEX } from '@/lib/content-patterns' import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji' const STANDARD_EMOJI_LIMIT = 20 @@ -42,7 +43,7 @@ export function replaceStandardEmojiShortcodesInContent( const customSet = customShortcodes instanceof Set ? customShortcodes : new Set(customShortcodes ?? []) - return content.replace(/:([a-zA-Z0-9_\-\s]+):/g, (match, shortcode: string) => { + return content.replace(EMOJI_SHORT_CODE_REGEX, (match, shortcode: string) => { const trimmed = shortcode.trim() if (customSet.has(trimmed)) return match const native = shortcodeToEmoji(trimmed, emojis) ?? shortcodeToEmoji(trimmed.replace(/\s+/g, '_'), emojis) diff --git a/src/lib/event.ts b/src/lib/event.ts index 4e75b361..cc6417f6 100644 --- a/src/lib/event.ts +++ b/src/lib/event.ts @@ -1,4 +1,5 @@ -import { CALENDAR_EVENT_KINDS, EMBEDDED_MENTION_REGEX, ExtendedKind } from '@/constants' +import { CALENDAR_EVENT_KINDS, ExtendedKind } from '@/constants' +import { EMBEDDED_MENTION_REGEX, NOSTR_EMBEDDED_NOTE_REGEX } from '@/lib/content-patterns' import client from '@/services/client.service' import { TImetaInfo } from '@/types' import { LRUCache } from 'lru-cache' @@ -223,8 +224,7 @@ export function getEmbeddedNoteBech32Ids(event: Event) { if (cache) return cache const embeddedNoteBech32Ids: string[] = [] - const embeddedNoteRegex = /nostr:(note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g - ;(event.content.match(embeddedNoteRegex) || []).forEach((note) => { + ;(event.content.match(NOSTR_EMBEDDED_NOTE_REGEX) || []).forEach((note) => { try { const { type, data } = nip19.decode(note.split(':')[1]) if (type === 'nevent') { diff --git a/src/lib/nostr-parser.tsx b/src/lib/nostr-parser.tsx index 50e5e01c..0cc2d8d8 100644 --- a/src/lib/nostr-parser.tsx +++ b/src/lib/nostr-parser.tsx @@ -13,7 +13,9 @@ import { parsePaytoUri } from '@/lib/payto' import PaytoLink from '@/components/PaytoLink' import { TImetaInfo } from '@/types' import { Event } from 'nostr-tools' +import { NOSTR_PARSER_REGEX } from '@/lib/content-patterns' import logger from '@/lib/logger' +import { logContentSpacing, reprString } from '@/lib/content-spacing-debug' export interface ParsedNostrContent { elements: Array<{ @@ -39,9 +41,16 @@ export interface ParsedNostrContent { */ export function parseNostrContent(content: string, event?: Event): ParsedNostrContent { const elements: ParsedNostrContent['elements'] = [] - - // Regex to match nostr: addresses that are not inside URLs or other contexts - const nostrRegex = /(?:^|\s|>|\[)nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)(?=\s|$|>|\]|,|\.|!|\?|;|:)/g + const traceNostr = content.includes('nostr:') + if (traceNostr) { + logContentSpacing('parseNostrContent:input', { + length: content.length, + repr: reprString(content), + eventId: event?.id + }) + } + + const nostrRegex = new RegExp(NOSTR_PARSER_REGEX.source, NOSTR_PARSER_REGEX.flags) // Regex to match all URLs (we'll filter by type later) const urlRegex = /(https?:\/\/[^\s]+)/gi @@ -79,12 +88,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo // Find nostr matches let nostrMatch while ((nostrMatch = nostrRegex.exec(content)) !== null) { - if (isNostrAddressInValidContext(content, nostrMatch.index, nostrMatch.index + nostrMatch[0].length)) { + const nStart = nostrMatch.index + const nEnd = nostrMatch.index + nostrMatch[0].length + const valid = isNostrAddressInValidContext(content, nStart, nEnd) + if (traceNostr) { + logContentSpacing('parseNostrContent:nostr-regex', { + index: nStart, + end: nEnd, + fullMatchRepr: reprString(nostrMatch[0]), + validContext: valid, + charBeforeIndex: nStart > 0 ? reprString(content[nStart - 1]) : '(start)', + charAtIndex: reprString(content[nStart] ?? '') + }) + } + if (valid) { allMatches.push({ type: 'nostr', match: nostrMatch, - start: nostrMatch.index, - end: nostrMatch.index + nostrMatch[0].length + start: nStart, + end: nEnd }) } } @@ -269,6 +291,23 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo const isAtEnd = end === content.length || content[end] === '\n' const needsSpaceBefore = !isAtStart && content[start - 1] !== ' ' const needsSpaceAfter = !isAtEnd && content[end] !== ' ' + if (traceNostr) { + const textBefore = start > lastIndex ? content.slice(lastIndex, start) : '' + logContentSpacing('parseNostrContent:nostr-element', { + lastIndex, + start, + end, + textBeforeSliceRepr: reprString(textBefore), + isAtStart, + isAtEnd, + needsSpaceBefore, + needsSpaceAfter, + prevCharRepr: + start > 0 ? reprString(content[start - 1]) : '(none)', + nextCharRepr: + end < content.length ? reprString(content[end]) : '(eof)' + }) + } if (needsSpaceBefore) { elements.push({ @@ -422,6 +461,12 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo images: allImages }) + if (traceNostr) { + logContentSpacing('parseNostrContent:result', { + branch: 'gallery', + sequence: summarizeParsedElementsForDebug(filteredElements) + }) + } return { elements: filteredElements } } @@ -433,9 +478,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo }) } + if (traceNostr) { + logContentSpacing('parseNostrContent:result', { + branch: elements.length === 1 && elements[0].type === 'text' ? 'text-only' : 'elements', + sequence: summarizeParsedElementsForDebug(elements) + }) + } return { elements } } +function summarizeParsedElementsForDebug( + els: ParsedNostrContent['elements'] +): Array<{ type: string; repr?: string; bech32Id?: string }> { + return els.map((e) => { + if (e.type === 'text') return { type: 'text', repr: reprString(e.content) } + if (e.type === 'nostr') return { type: 'nostr', bech32Id: e.bech32Id } + return { type: e.type } + }) +} + /** * Check if a nostr address is in a valid context (not inside URLs, etc.) */ diff --git a/src/lib/tiptap.ts b/src/lib/tiptap.ts index 187f95c2..3066cb90 100644 --- a/src/lib/tiptap.ts +++ b/src/lib/tiptap.ts @@ -1,10 +1,19 @@ +import { logContentSpacing, reprString } from '@/lib/content-spacing-debug' import customEmojiService from '@/services/custom-emoji.service' import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji' import { JSONContent } from '@tiptap/react' import { nip19 } from 'nostr-tools' export function parseEditorJsonToText(node?: JSONContent) { - let text = _parseEditorJsonToText(node).trim() + const rawJoined = _parseEditorJsonToText(node) + let text = rawJoined.trim() + const trace = rawJoined.includes('nostr:') || /npub1|nprofile1/.test(rawJoined) + if (trace) { + logContentSpacing('parseEditorJsonToText:joined', { + beforeTrimRepr: reprString(rawJoined), + afterTrimRepr: reprString(text) + }) + } const regex = /(?:^|\s)(nevent|naddr|nprofile|npub)1[a-zA-Z0-9]+/g text = text.replace(regex, (match) => { @@ -20,7 +29,14 @@ export function parseEditorJsonToText(node?: JSONContent) { }) // Ensure space before nostr: when not already preceded by space (fixes "Like:nostr:npub" and "Like:\nnostr:npub") + const beforeNostrSpacePass = text text = text.replace(/(.)(?=nostr:)/g, (_, prev) => (prev === ' ' ? prev : prev + ' ')) + if (trace) { + logContentSpacing('parseEditorJsonToText:after-nostr-prefix-pass', { + beforeRepr: reprString(beforeNostrSpacePass), + afterRepr: reprString(text) + }) + } return text } diff --git a/src/pages/secondary/NotePage/index.tsx b/src/pages/secondary/NotePage/index.tsx index 0e3860a5..c18b2486 100644 --- a/src/pages/secondary/NotePage/index.tsx +++ b/src/pages/secondary/NotePage/index.tsx @@ -21,10 +21,9 @@ import type { Event } from 'nostr-tools' import { kinds, nip19 } from 'nostr-tools' import { forwardRef, useEffect, useMemo, useState } from 'react' import { useTranslation } from 'react-i18next' +import { NOSTR_URI_NADDR_REGEX } from '@/lib/content-patterns' import NotFound from './NotFound' -const NADDR_REGEX = /nostr:(naddr1[a-z0-9]+)/g - // Helper function to get event type name (matching WebPreview) function getEventTypeName(kind: number): string { switch (kind) { @@ -109,8 +108,8 @@ const NotePage = forwardRef(({ id, index, hideTitlebar = false }: { id?: string; // When viewing a kind-24 invite (e.g. from notifications), extract calendar event naddr from content and show full calendar card with RSVP const calendarInviteNaddr = useMemo(() => { if (finalEvent?.kind !== ExtendedKind.PUBLIC_MESSAGE || !finalEvent.content?.trim()) return undefined - const match = NADDR_REGEX.exec(finalEvent.content) - NADDR_REGEX.lastIndex = 0 + const match = NOSTR_URI_NADDR_REGEX.exec(finalEvent.content) + NOSTR_URI_NADDR_REGEX.lastIndex = 0 const naddr = match?.[1] if (!naddr) return undefined try { diff --git a/src/services/relay-selection.service.ts b/src/services/relay-selection.service.ts index 3e7afb29..fd14f2b9 100644 --- a/src/services/relay-selection.service.ts +++ b/src/services/relay-selection.service.ts @@ -1,5 +1,6 @@ import { Event, kinds } from 'nostr-tools' import { ExtendedKind, FAST_WRITE_RELAY_URLS, RANDOM_PUBLISH_RELAY_COUNT } from '@/constants' +import { NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX } from '@/lib/content-patterns' import client from '@/services/client.service' import { normalizeUrl, isLocalNetworkUrl } from '@/lib/url' import { TRelaySet, TRelayList } from '@/types' @@ -734,9 +735,7 @@ class RelaySelectionService { } // Extract nostr addresses from content - const matches = content.match( - /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g - ) + const matches = content.match(NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX) if (matches) {