correct and consolidate regex

1 month ago · 8fa52c5533
17 changed files with 372 additions and 78 deletions
--- a/src/components/Content/index.tsx
+++ b/src/components/Content/index.tsx
@ -1,16 +1,7 @@
				@@ -1,16 +1,7 @@
 import { useMediaExtraction } from '@/hooks'
-import {
-  EmbeddedEmojiParser,
-  EmbeddedEventParser,
-  EmbeddedHashtagParser,
-  EmbeddedLNInvoiceParser,
-  EmbeddedMentionParser,
-  EmbeddedPaytoParser,
-  EmbeddedUrlParser,
-  EmbeddedWebsocketUrlParser,
-  parseContent
-} from '@/lib/content-parser'
+import { parseContent, PARSE_CONTENT_PARSERS_NOTE_TEXT } from '@/lib/content-parser'
 import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content'
+import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
 import logger from '@/lib/logger'
 import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji'
 import { getEmojiInfosFromEmojiTags } from '@/lib/tag'
@ -93,17 +84,15 @@ export default function Content({
				@@ -93,17 +84,15 @@ export default function Content({
    const emojiInfos = getEmojiInfosFromEmojiTags(event?.tags)
    const customShortcodes = emojiInfos.map((e) => e.shortcode)
    const normalized = replaceStandardEmojiShortcodesInContent(_content, customShortcodes)
+    if (normalized.includes('nostr:')) {
+      logContentSpacing('Content:useMemo', {
+        rawRepr: reprString(_content),
+        normalizedRepr: reprString(normalized),
+        same: _content === normalized
+      })
+    }

-    const nodes = parseContent(normalized, [
-      EmbeddedUrlParser,
-      EmbeddedLNInvoiceParser,
-      EmbeddedPaytoParser,
-      EmbeddedWebsocketUrlParser,
-      EmbeddedEventParser,
-      EmbeddedMentionParser,
-      EmbeddedHashtagParser,
-      EmbeddedEmojiParser
-    ])
+    const nodes = parseContent(normalized, PARSE_CONTENT_PARSERS_NOTE_TEXT)

    return { nodes, emojiInfos }
  }, [_content, event])
--- a/src/components/ContentPreview/Content.tsx
+++ b/src/components/ContentPreview/Content.tsx
@ -1,11 +1,4 @@
				@@ -1,11 +1,4 @@
-import {
-  EmbeddedEmojiParser,
-  EmbeddedEventParser,
-  EmbeddedMentionParser,
-  EmbeddedPaytoParser,
-  EmbeddedUrlParser,
-  parseContent
-} from '@/lib/content-parser'
+import { parseContent, PARSE_CONTENT_PARSERS_NOTE_TEXT } from '@/lib/content-parser'
 import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content'
 import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji'
 import { cn } from '@/lib/utils'
@ -29,13 +22,7 @@ export default function Content({
				@@ -29,13 +22,7 @@ export default function Content({
  const nodes = useMemo(() => {
    const customShortcodes = emojiInfos?.map((e) => e.shortcode) ?? []
    const normalized = replaceStandardEmojiShortcodesInContent(content, customShortcodes)
-    return parseContent(normalized, [
-      EmbeddedUrlParser,
-      EmbeddedPaytoParser,
-      EmbeddedEventParser,
-      EmbeddedMentionParser,
-      EmbeddedEmojiParser
-    ])
+    return parseContent(normalized, PARSE_CONTENT_PARSERS_NOTE_TEXT)
  }, [content, emojiInfos])

  return (
--- a/src/components/Note/AsciidocArticle/AsciidocArticle.tsx
+++ b/src/components/Note/AsciidocArticle/AsciidocArticle.tsx
@ -22,6 +22,11 @@ import { ReplyProvider } from '@/providers/ReplyProvider'
				@@ -22,6 +22,11 @@ import { ReplyProvider } from '@/providers/ReplyProvider'
 import Wikilink from '@/components/UniversalContent/Wikilink'
 import { BookstrContent } from '@/components/Bookstr'
 import { preprocessAsciidocMediaLinks } from '../MarkdownArticle/preprocessMarkup'
+import {
+  NOSTR_ASCIIDOC_EARLY_LINK_REGEX,
+  NOSTR_ASCIIDOC_TEXT_NODE_REGEX,
+  NOSTR_HTML_BECH32_RELAXED
+} from '@/lib/content-patterns'
 import logger from '@/lib/logger'
 import { extractBookMetadata } from '@/lib/bookstr-parser'
 import { ExtendedKind } from '@/constants'
@ -66,7 +71,7 @@ function convertMarkdownToAsciidoc(content: string): string {
				@@ -66,7 +71,7 @@ function convertMarkdownToAsciidoc(content: string): string {
  // naddr addresses can be 200+ characters, so we use + instead of specific length
  // Also handle optional [] suffix (empty link text in AsciiDoc)
  // Note: Citations are already protected in passthrough (+++...+++), so nostr: links inside them won't be processed
-  asciidoc = asciidoc.replace(/nostr:(npub1[a-z0-9]{58,}|nprofile1[a-z0-9]+|note1[a-z0-9]{58,}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)(\[\])?/g, (_match, bech32Id, emptyBrackets) => {
+  asciidoc = asciidoc.replace(NOSTR_ASCIIDOC_EARLY_LINK_REGEX, (_match, bech32Id, emptyBrackets) => {
    // Convert directly to AsciiDoc link format
    // This will be processed later in HTML post-processing to render as React components
    // If [] suffix is present, use empty link text, otherwise use the bech32Id
@ -690,7 +695,12 @@ export default function AsciidocArticle({
				@@ -690,7 +695,12 @@ export default function AsciidocArticle({
        // Match the full bech32 address format - addresses can vary in length
        // npub: 58 chars, nprofile: variable, note: 58 chars, nevent: variable, naddr: 200+ chars
        // Use a more flexible pattern that matches any valid bech32 address
-        htmlString = htmlString.replace(/<a[^>]*href=["']nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})["'][^>]*>([^<]*)<\/a>/gi, (_match, bech32Id, _linkText) => {
+        htmlString = htmlString.replace(
+          new RegExp(
+            `<a[^>]*href=["']nostr:(${NOSTR_HTML_BECH32_RELAXED})["'][^>]*>([^<]*)</a>`,
+            'gi'
+          ),
+          (_match, bech32Id, _linkText) => {
          // Validate bech32 ID and create appropriate placeholder
          if (!bech32Id) return _match
          
@ -709,13 +719,22 @@ export default function AsciidocArticle({
				@@ -709,13 +719,22 @@ export default function AsciidocArticle({
        // Process text nodes by replacing content between > and <
        // Use more flexible regex that matches any valid bech32 address (naddr can be 200+ chars)
        // Match addresses with optional [] suffix
-        htmlString = htmlString.replace(/>([^<]*nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})(\[\])?[^<]*)</g, (_match, textContent) => {
+        htmlString = htmlString.replace(
+          new RegExp(
+            `>([^<]*nostr:(${NOSTR_HTML_BECH32_RELAXED})(\\[\\])?[^<]*)<`,
+            'g'
+          ),
+          (_match, textContent) => {
          // Extract nostr addresses from the text content - use flexible pattern that handles long addresses
          // npub and note are typically 58 chars, but naddr can be 200+ chars
-          const nostrRegex = /nostr:((?:npub1[a-z0-9]{58,}|nprofile1[a-z0-9]+|note1[a-z0-9]{58,}|nevent1[a-z0-9]+|naddr1[a-z0-9]+))(\[\])?/g
+          const nostrRegex = new RegExp(
+            NOSTR_ASCIIDOC_TEXT_NODE_REGEX.source,
+            NOSTR_ASCIIDOC_TEXT_NODE_REGEX.flags
+          )
          let processedText = textContent
          const replacements: Array<{ start: number; end: number; replacement: string }> = []
          
+          nostrRegex.lastIndex = 0
          let m
          while ((m = nostrRegex.exec(textContent)) !== null) {
            const bech32Id = m[1]
@ -751,7 +770,9 @@ export default function AsciidocArticle({
				@@ -751,7 +770,9 @@ export default function AsciidocArticle({
        
        // Fallback: ensure any remaining nostr: addresses are shown as plain text
        // This catches any that weren't converted to placeholders
-        htmlString = htmlString.replace(/([^>])nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})(\[\])?/g, (_match, prefix, bech32Id, emptyBrackets) => {
+        htmlString = htmlString.replace(
+          new RegExp(`([^>])nostr:(${NOSTR_HTML_BECH32_RELAXED})(\\[\\])?`, 'g'),
+          (_match, prefix, bech32Id, emptyBrackets) => {
          // Show as plain text if not already in a tag or placeholder
          return `${prefix}nostr:${bech32Id}${emptyBrackets || ''}`
        })
--- a/src/components/Note/MarkdownArticle/MarkdownArticle.tsx
+++ b/src/components/Note/MarkdownArticle/MarkdownArticle.tsx
@ -12,7 +12,8 @@ import { cleanUrl, isImage, isMedia, isVideo, isAudio, isWebsocketUrl } from '@/
				@@ -12,7 +12,8 @@ import { cleanUrl, isImage, isMedia, isVideo, isAudio, isWebsocketUrl } from '@/
 import { getImetaInfosFromEvent } from '@/lib/event'
 import { Event, kinds } from 'nostr-tools'
 import Emoji from '@/components/Emoji'
-import { ExtendedKind, EMOJI_SHORT_CODE_REGEX, WS_URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants'
+import { ExtendedKind, WS_URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants'
+import { EMOJI_SHORT_CODE_REGEX, NOSTR_URI_INLINE_REGEX } from '@/lib/content-patterns'
 import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content'
 import { getEmojiInfosFromEmojiTags } from '@/lib/tag'
 import { TEmoji } from '@/types'
@ -29,6 +30,7 @@ import { PAYTO_URI_REGEX, parsePaytoUri } from '@/lib/payto'
				@@ -29,6 +30,7 @@ import { PAYTO_URI_REGEX, parsePaytoUri } from '@/lib/payto'
 import PaytoLink from '@/components/PaytoLink'
 import katex from 'katex'
 import 'katex/dist/katex.min.css'
+import { isContentSpacingDebug, reprString } from '@/lib/content-spacing-debug'
 import logger from '@/lib/logger'

 /**
@ -1087,7 +1089,7 @@ function parseMarkdownContent(
				@@ -1087,7 +1089,7 @@ function parseMarkdownContent(
  })

  // Nostr addresses (nostr:npub1..., nostr:note1..., etc.)
-  const nostrRegex = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g
+  const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags)
  const nostrMatches = Array.from(content.matchAll(nostrRegex))
  nostrMatches.forEach(match => {
    if (match.index !== undefined) {
@ -2679,11 +2681,25 @@ function parseMarkdownContent(
				@@ -2679,11 +2681,25 @@ function parseMarkdownContent(
 * - Footnote references: [^1] (handled at block level, but parsed here for inline context)
 */
 function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<string, string> = new Map(), emojiInfos: TEmoji[] = []): React.ReactNode[] {
+  if (isContentSpacingDebug() && text.includes('nostr:')) {
+    // eslint-disable-next-line no-console
+    console.log('[jumble content-spacing] parseInlineMarkdown:before-normalize', {
+      keyPrefix,
+      repr: reprString(text)
+    })
+  }
  // Normalize newlines to spaces at the start (defensive - text should already be normalized, but ensure it)
  // This prevents any hard breaks within inline content
  text = text.replace(/\n/g, ' ')
  // Collapse multiple consecutive spaces/tabs (2+) into a single space, but preserve single spaces
  text = text.replace(/[ \t]{2,}/g, ' ')
+  if (isContentSpacingDebug() && text.includes('nostr:')) {
+    // eslint-disable-next-line no-console
+    console.log('[jumble content-spacing] parseInlineMarkdown:after-normalize', {
+      keyPrefix,
+      repr: reprString(text)
+    })
+  }

  const parts: React.ReactNode[] = []
  let lastIndex = 0
@ -2937,7 +2953,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<st
				@@ -2937,7 +2953,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<st
  
  // Nostr addresses: nostr:npub1..., nostr:note1..., etc. (process after code/bold/italic/links/hashtags/relay-urls to avoid conflicts)
  // Only process profile types (npub/nprofile) inline; event types (note/nevent/naddr) should remain block-level
-  const nostrRegex = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g
+  const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags)
  const nostrMatches = Array.from(text.matchAll(nostrRegex))
  nostrMatches.forEach(match => {
    if (match.index !== undefined) {
@ -3001,7 +3017,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<st
				@@ -3001,7 +3017,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<st
          index: match.index,
          end: match.index + match[0].length,
          type: 'emoji',
-          data: match[0].slice(1, -1).trim()
+          data: (match[1] ?? match[0].slice(1, -1)).trim()
        })
      }
    }
--- a/src/components/Note/MarkdownArticle/preprocessMarkup.ts
+++ b/src/components/Note/MarkdownArticle/preprocessMarkup.ts
@ -1,3 +1,4 @@
				@@ -1,3 +1,4 @@
+import { NOSTR_URI_INLINE_REGEX } from '@/lib/content-patterns'
 import { isImage, isVideo, isAudio } from '@/lib/url'
 import { URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants'

@ -231,7 +232,7 @@ export function postProcessNostrLinks(content: string): string {
				@@ -231,7 +232,7 @@ export function postProcessNostrLinks(content: string): string {
  // nostr:npub1... -> [nostr:npub1...]
  // nostr:note1... -> [nostr:note1...]
  // etc.
-  const nostrRegex = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g
+  const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags)
  processed = processed.replace(nostrRegex, (match) => {
    // Already in a link? Don't double-wrap
    // Check if it's already in markdown link syntax [text](nostr:...)
--- a/src/components/PostEditor/Mentions.tsx
+++ b/src/components/PostEditor/Mentions.tsx
@ -3,6 +3,7 @@ import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover
				@@ -3,6 +3,7 @@ import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover
 import { useMuteList } from '@/providers/MuteListProvider'
 import { useNostr } from '@/providers/NostrProvider'
 import client from '@/services/client.service'
+import { NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX } from '@/lib/content-patterns'
 import logger from '@/lib/logger'
 import { Check } from 'lucide-react'
 import { Event, nip19 } from 'nostr-tools'
@ -144,9 +145,7 @@ export async function extractMentions(content: string, parentEvent?: Event) {
				@@ -144,9 +145,7 @@ export async function extractMentions(content: string, parentEvent?: Event) {
    pubkeys.push(parentEventPubkey)
  }
  
-  const matches = content.match(
-    /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g
-  )
+  const matches = content.match(NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX)

  const addToSet = (arr: string[], pubkey: string) => {
    if (!arr.includes(pubkey)) arr.push(pubkey)
--- a/src/components/UniversalContent/SimpleContent.tsx
+++ b/src/components/UniversalContent/SimpleContent.tsx
@ -1,6 +1,7 @@
				@@ -1,6 +1,7 @@
 import { useMemo } from 'react'
 import { cleanUrl } from '@/lib/url'
 import { Event } from 'nostr-tools'
+import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
 import { parseNostrContent, renderNostrContent } from '@/lib/nostr-parser.tsx'
 import { cn } from '@/lib/utils'

@ -30,12 +31,28 @@ export default function SimpleContent({
				@@ -30,12 +31,28 @@ export default function SimpleContent({
      }
    )
    
+    if (rawContent.includes('nostr:')) {
+      logContentSpacing('SimpleContent:processedContent', {
+        rawRepr: reprString(rawContent),
+        cleanedRepr: reprString(cleaned),
+        same: rawContent === cleaned
+      })
+    }
    return cleaned
  }, [content, event?.content])

  // Parse content for nostr addresses and media
  const parsedContent = useMemo(() => {
-    return parseNostrContent(processedContent, event)
+    const parsed = parseNostrContent(processedContent, event)
+    if (processedContent.includes('nostr:')) {
+      logContentSpacing('SimpleContent:parsedContent', {
+        elementCount: parsed.elements.length,
+        tail: parsed.elements.slice(-3).map((e) =>
+          e.type === 'text' ? { type: 'text', repr: reprString(e.content) } : { type: e.type }
+        )
+      })
+    }
+    return parsed
  }, [processedContent, event])

  return (
--- a/src/constants.ts
+++ b/src/constants.ts
@ -265,10 +265,13 @@ export const URL_REGEX =
				@@ -265,10 +265,13 @@ export const URL_REGEX =
 export const WS_URL_REGEX =
  /wss?:\/\/[\w\p{L}\p{N}\p{M}&.\-/?=#@%+_:!~*]+[^\s.,;:'")\]}!?，。；："'！？】）]/giu
 export const EMAIL_REGEX = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/
-/** Matches :shortcode: or :short code: (allows letters, digits, underscore, hyphen, space) */
-export const EMOJI_SHORT_CODE_REGEX = /:[a-zA-Z0-9_\-\s]+:/g
-export const EMBEDDED_EVENT_REGEX = /nostr:(note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g
-export const EMBEDDED_MENTION_REGEX = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+)/g
+/** @see {@link '@/lib/content-patterns'} — single source for emoji + nostr regexes */
+export {
+  EMOJI_SHORT_CODE_MAX_INNER_LENGTH,
+  EMOJI_SHORT_CODE_REGEX,
+  EMBEDDED_EVENT_REGEX,
+  EMBEDDED_MENTION_REGEX
+} from '@/lib/content-patterns'
 export const HASHTAG_REGEX = /#[a-zA-Z0-9_\-\u00C0-\u017F\u0100-\u017F\u0180-\u024F\u1E00-\u1EFF]+/g
 export const LN_INVOICE_REGEX = /(ln(?:bc|tb|bcrt))([0-9]+[munp]?)?1([02-9ac-hj-np-z]+)/g
 export const EMOJI_REGEX =
--- a/src/lib/content-parser.ts
+++ b/src/lib/content-parser.ts
@ -1,14 +1,18 @@
				@@ -1,14 +1,18 @@
 import {
-  EMBEDDED_EVENT_REGEX,
-  EMBEDDED_MENTION_REGEX,
-  EMOJI_SHORT_CODE_REGEX,
  HASHTAG_REGEX,
  LN_INVOICE_REGEX,
  URL_REGEX,
  WS_URL_REGEX,
  YOUTUBE_URL_REGEX
 } from '@/constants'
+import {
+  EMBEDDED_EVENT_REGEX,
+  EMBEDDED_MENTION_REGEX,
+  EMOJI_SHORT_CODE_REGEX,
+  LEGACY_PROFILE_BECH32_REGEX
+} from '@/lib/content-patterns'
 import { PAYTO_URI_REGEX } from '@/lib/payto'
+import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
 import { isImage, isMedia } from './url'

 export type TEmbeddedNodeType =
@ -53,7 +57,7 @@ export const EmbeddedMentionParser: TContentParser = {
				@@ -53,7 +57,7 @@ export const EmbeddedMentionParser: TContentParser = {

 export const EmbeddedLegacyMentionParser: TContentParser = {
  type: 'legacy-mention',
-  regex: /npub1[a-z0-9]{58}|nprofile1[a-z0-9]+/g
+  regex: LEGACY_PROFILE_BECH32_REGEX
 }

 export const EmbeddedEventParser: TContentParser = {
@ -133,10 +137,39 @@ export const EmbeddedUrlParser: TContentParser = (content: string) => {
				@@ -133,10 +137,39 @@ export const EmbeddedUrlParser: TContentParser = (content: string) => {
  return result
 }

+/**
+ * Shared pipeline for kind-1–style strings (note body, reply preview, profile fields using parseContent).
+ * Order matters.
+ */
+export const PARSE_CONTENT_PARSERS_NOTE_TEXT: TContentParser[] = [
+  EmbeddedUrlParser,
+  EmbeddedLNInvoiceParser,
+  EmbeddedPaytoParser,
+  EmbeddedWebsocketUrlParser,
+  EmbeddedEventParser,
+  EmbeddedMentionParser,
+  EmbeddedHashtagParser,
+  EmbeddedEmojiParser
+]
+
 export function parseContent(content: string, parsers: TContentParser[]) {
+  const trace = content.includes('nostr:')
+  if (trace) {
+    logContentSpacing('parseContent:input', {
+      rawLength: content.length,
+      afterTrimRepr: reprString(content.trim()),
+      trimRemovedLeading: content.length - content.trimStart().length,
+      trimRemovedTrailing: content.length - content.trimEnd().length
+    })
+  }
+
  let nodes: TEmbeddedNode[] = [{ type: 'text', data: content.trim() }]

-  parsers.forEach((parser) => {
+  parsers.forEach((parser, parserIndex) => {
+    const parserLabel =
+      typeof parser === 'function' ? `fn[${parserIndex}]` : parser.type
+    const beforeSummary = trace ? summarizeContentNodesForDebug(nodes) : null
+
    nodes = nodes
      .flatMap((node) => {
        if (node.type !== 'text') return [node]
@ -178,15 +211,38 @@ export function parseContent(content: string, parsers: TContentParser[]) {
				@@ -178,15 +211,38 @@ export function parseContent(content: string, parsers: TContentParser[]) {
        return result
      })
      .filter((n) => n.data !== '')
+
+    if (trace) {
+      logContentSpacing('parseContent:after-parser', {
+        parser: parserLabel,
+        parserIndex,
+        before: beforeSummary,
+        after: summarizeContentNodesForDebug(nodes)
+      })
+    }
  })

  nodes = mergeConsecutiveTextNodes(nodes)
  nodes = mergeConsecutiveImageNodes(nodes)
  nodes = removeExtraNewlines(nodes)

+  if (trace) {
+    logContentSpacing('parseContent:final', {
+      afterMergeNewlines: summarizeContentNodesForDebug(nodes)
+    })
+  }
+
  return nodes
 }

+function summarizeContentNodesForDebug(nodes: TEmbeddedNode[]): Array<{ type: string; repr?: string }> {
+  return nodes.map((n) => {
+    if (n.type === 'text') return { type: 'text', repr: reprString(n.data) }
+    if (n.type === 'images') return { type: 'images', repr: `[${n.data.length} urls]` }
+    return { type: n.type, repr: typeof n.data === 'string' ? reprString(n.data) : undefined }
+  })
+}
+
 function mergeConsecutiveTextNodes(nodes: TEmbeddedNode[]) {
  const merged: TEmbeddedNode[] = []
  let currentText = ''
--- a/src/lib/content-patterns.ts
+++ b/src/lib/content-patterns.ts
@ -0,0 +1,101 @@
				@@ -0,0 +1,101 @@
+/**
+ * Single source of truth for :emoji: shortcodes and nostr: bech32 patterns.
+ * Used by MarkdownArticle, parseContent, nostr-parser, previews, post editor, AsciiDoc, etc.
+ */
+
+// --- Emoji (:shortcode:) ----------------------------------------------------
+
+export const EMOJI_SHORT_CODE_MAX_INNER_LENGTH = 20 as const
+
+const _emojiInnerQuantifier = EMOJI_SHORT_CODE_MAX_INNER_LENGTH - 1
+
+/**
+ * - (?<!:) avoids AsciiDoc double-colon macros (link::, image::, citation::, etc.)
+ * - First char after ":" must be [a-zA-Z0-9_-] so "Name: nostr:npub…" is not ": nostr:"
+ * - Inner body max length so URLs/paths/nostr ids are not treated as shortcodes
+ */
+export const EMOJI_SHORT_CODE_REGEX = new RegExp(
+  `(?<!:):([a-zA-Z0-9_\\-][^:]{0,${_emojiInnerQuantifier}}):`,
+  'g'
+)
+
+// --- Nostr bech32 (after "nostr:") ------------------------------------------
+
+/** Standard npub / note payload length in hex */
+export const BECH32_NPUB = 'npub1[a-z0-9]{58}'
+export const BECH32_NPROFILE = 'nprofile1[a-z0-9]+'
+export const BECH32_NOTE = 'note1[a-z0-9]{58}'
+export const BECH32_NEVENT = 'nevent1[a-z0-9]+'
+export const BECH32_NADDR = 'naddr1[a-z0-9]+'
+
+/** AsciiDoc / forgiving passes: allow longer npub/note encodings ({58,}) */
+export const BECH32_NPUB_LOOSE = 'npub1[a-z0-9]{58,}'
+export const BECH32_NOTE_LOOSE = 'note1[a-z0-9]{58,}'
+
+/** All kinds we render from note content (strict lengths for Markdown / parseContent) */
+export const NOSTR_CONTENT_BECH32_ALT = [
+  BECH32_NPUB,
+  BECH32_NPROFILE,
+  BECH32_NOTE,
+  BECH32_NEVENT,
+  BECH32_NADDR
+].join('|')
+
+/** AsciiDoc early conversion + text-node extraction (loose npub/note) */
+export const NOSTR_ASCIIDOC_SOURCE_BECH32_ALT = [
+  BECH32_NPUB_LOOSE,
+  BECH32_NPROFILE,
+  BECH32_NOTE_LOOSE,
+  BECH32_NEVENT,
+  BECH32_NADDR
+].join('|')
+
+/** Relaxed tail for HTML href / fallback matching (naddr can be very long) */
+export const NOSTR_HTML_BECH32_RELAXED = '(?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,}'
+
+export const NOSTR_PROFILE_BECH32_ALT = [BECH32_NPUB, BECH32_NPROFILE].join('|')
+export const NOSTR_EVENT_BECH32_ALT = [BECH32_NOTE, BECH32_NEVENT, BECH32_NADDR].join('|')
+export const NOSTR_NOTE_AND_NEVENT_ALT = [BECH32_NOTE, BECH32_NEVENT].join('|')
+
+/** nostr:… anywhere in text (Markdown inline, relay scan, editor, preprocess) */
+export const NOSTR_URI_INLINE_REGEX = new RegExp(`nostr:(${NOSTR_CONTENT_BECH32_ALT})`, 'g')
+
+/** parseContent: profile mentions only */
+export const EMBEDDED_MENTION_REGEX = new RegExp(`nostr:(${NOSTR_PROFILE_BECH32_ALT})`, 'g')
+
+/** parseContent: embedded notes (note / nevent / naddr) */
+export const EMBEDDED_EVENT_REGEX = new RegExp(`nostr:(${NOSTR_EVENT_BECH32_ALT})`, 'g')
+
+/** event helpers: note + nevent only */
+export const NOSTR_EMBEDDED_NOTE_REGEX = new RegExp(`nostr:(${NOSTR_NOTE_AND_NEVENT_ALT})`, 'g')
+
+/** naddr-only (e.g. URL / deep links) */
+export const NOSTR_URI_NADDR_REGEX = new RegExp(`nostr:(${BECH32_NADDR})`, 'g')
+
+/** Post editor / reply pubkey scan: npub, nprofile, note, nevent (not naddr) */
+export const NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX = new RegExp(
+  `nostr:(${[BECH32_NPUB, BECH32_NPROFILE, BECH32_NOTE, BECH32_NEVENT].join('|')})`,
+  'g'
+)
+
+/** Legacy bare bech32 (no nostr: prefix) */
+export const LEGACY_PROFILE_BECH32_REGEX = new RegExp(`${BECH32_NPUB}|${BECH32_NPROFILE}`, 'g')
+
+/** nostr-parser.tsx: boundary + lookahead so punctuation does not stick to bech32 */
+export const NOSTR_PARSER_LOOKAHEAD = '(?=\\s|$|>|\\]|,|\\.|!|\\?|;|:)'
+export const NOSTR_PARSER_REGEX = new RegExp(
+  `(?:^|\\s|>|\\[)nostr:(${NOSTR_CONTENT_BECH32_ALT})${NOSTR_PARSER_LOOKAHEAD}`,
+  'g'
+)
+
+/** AsciiDoc: optional [] after nostr id */
+export const NOSTR_ASCIIDOC_EARLY_LINK_REGEX = new RegExp(
+  `nostr:(${NOSTR_ASCIIDOC_SOURCE_BECH32_ALT})(\\[\\])?`,
+  'g'
+)
+
+/** AsciiDoc HTML: same capture groups as early link, for text-node scanning */
+export const NOSTR_ASCIIDOC_TEXT_NODE_REGEX = new RegExp(
+  `nostr:(${NOSTR_ASCIIDOC_SOURCE_BECH32_ALT})(\\[\\])?`,
+  'g'
+)
--- a/src/lib/content-spacing-debug.ts
+++ b/src/lib/content-spacing-debug.ts
@ -0,0 +1,28 @@
				@@ -0,0 +1,28 @@
+/**
+ * Verbose content/spacing traces for debugging (e.g. "Name: nostr:npub…" collapsing).
+ *
+ * Enable in dev: localStorage.setItem('jumble-debug-content', 'true') then reload.
+ * Disable: localStorage.removeItem('jumble-debug-content')
+ */
+
+const STORAGE_KEY = 'jumble-debug-content'
+
+export function isContentSpacingDebug(): boolean {
+  try {
+    return import.meta.env.DEV && typeof localStorage !== 'undefined' && localStorage.getItem(STORAGE_KEY) === 'true'
+  } catch {
+    return false
+  }
+}
+
+/** JSON.stringify so spaces/newlines are visible in the console */
+export function reprString(s: string, maxLen = 500): string {
+  const t = s.length > maxLen ? `${s.slice(0, maxLen)}…(+${s.length - maxLen} chars)` : s
+  return JSON.stringify(t)
+}
+
+export function logContentSpacing(phase: string, detail: Record<string, unknown>): void {
+  if (!isContentSpacingDebug()) return
+  // eslint-disable-next-line no-console
+  console.log(`[jumble content-spacing] ${phase}`, detail)
+}
--- a/src/lib/emoji-content.ts
+++ b/src/lib/emoji-content.ts
@ -1,3 +1,4 @@
				@@ -1,3 +1,4 @@
+import { EMOJI_SHORT_CODE_REGEX } from '@/lib/content-patterns'
 import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji'

 const STANDARD_EMOJI_LIMIT = 20
@ -42,7 +43,7 @@ export function replaceStandardEmojiShortcodesInContent(
				@@ -42,7 +43,7 @@ export function replaceStandardEmojiShortcodesInContent(
  const customSet = customShortcodes instanceof Set
    ? customShortcodes
    : new Set(customShortcodes ?? [])
-  return content.replace(/:([a-zA-Z0-9_\-\s]+):/g, (match, shortcode: string) => {
+  return content.replace(EMOJI_SHORT_CODE_REGEX, (match, shortcode: string) => {
    const trimmed = shortcode.trim()
    if (customSet.has(trimmed)) return match
    const native = shortcodeToEmoji(trimmed, emojis) ?? shortcodeToEmoji(trimmed.replace(/\s+/g, '_'), emojis)
--- a/src/lib/event.ts
+++ b/src/lib/event.ts
@ -1,4 +1,5 @@
				@@ -1,4 +1,5 @@
-import { CALENDAR_EVENT_KINDS, EMBEDDED_MENTION_REGEX, ExtendedKind } from '@/constants'
+import { CALENDAR_EVENT_KINDS, ExtendedKind } from '@/constants'
+import { EMBEDDED_MENTION_REGEX, NOSTR_EMBEDDED_NOTE_REGEX } from '@/lib/content-patterns'
 import client from '@/services/client.service'
 import { TImetaInfo } from '@/types'
 import { LRUCache } from 'lru-cache'
@ -223,8 +224,7 @@ export function getEmbeddedNoteBech32Ids(event: Event) {
				@@ -223,8 +224,7 @@ export function getEmbeddedNoteBech32Ids(event: Event) {
  if (cache) return cache

  const embeddedNoteBech32Ids: string[] = []
-  const embeddedNoteRegex = /nostr:(note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g
-  ;(event.content.match(embeddedNoteRegex) || []).forEach((note) => {
+  ;(event.content.match(NOSTR_EMBEDDED_NOTE_REGEX) || []).forEach((note) => {
    try {
      const { type, data } = nip19.decode(note.split(':')[1])
      if (type === 'nevent') {
--- a/src/lib/nostr-parser.tsx
+++ b/src/lib/nostr-parser.tsx
@ -13,7 +13,9 @@ import { parsePaytoUri } from '@/lib/payto'
				@@ -13,7 +13,9 @@ import { parsePaytoUri } from '@/lib/payto'
 import PaytoLink from '@/components/PaytoLink'
 import { TImetaInfo } from '@/types'
 import { Event } from 'nostr-tools'
+import { NOSTR_PARSER_REGEX } from '@/lib/content-patterns'
 import logger from '@/lib/logger'
+import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'

 export interface ParsedNostrContent {
  elements: Array<{
@ -39,9 +41,16 @@ export interface ParsedNostrContent {
				@@ -39,9 +41,16 @@ export interface ParsedNostrContent {
 */
 export function parseNostrContent(content: string, event?: Event): ParsedNostrContent {
  const elements: ParsedNostrContent['elements'] = []
+  const traceNostr = content.includes('nostr:')
+  if (traceNostr) {
+    logContentSpacing('parseNostrContent:input', {
+      length: content.length,
+      repr: reprString(content),
+      eventId: event?.id
+    })
+  }

-  // Regex to match nostr: addresses that are not inside URLs or other contexts
-  const nostrRegex = /(?:^|\s|>|\[)nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)(?=\s|$|>|\]|,|\.|!|\?|;|:)/g
+  const nostrRegex = new RegExp(NOSTR_PARSER_REGEX.source, NOSTR_PARSER_REGEX.flags)
  
  // Regex to match all URLs (we'll filter by type later)
  const urlRegex = /(https?:\/\/[^\s]+)/gi
@ -79,12 +88,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
				@@ -79,12 +88,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
  // Find nostr matches
  let nostrMatch
  while ((nostrMatch = nostrRegex.exec(content)) !== null) {
-    if (isNostrAddressInValidContext(content, nostrMatch.index, nostrMatch.index + nostrMatch[0].length)) {
+    const nStart = nostrMatch.index
+    const nEnd = nostrMatch.index + nostrMatch[0].length
+    const valid = isNostrAddressInValidContext(content, nStart, nEnd)
+    if (traceNostr) {
+      logContentSpacing('parseNostrContent:nostr-regex', {
+        index: nStart,
+        end: nEnd,
+        fullMatchRepr: reprString(nostrMatch[0]),
+        validContext: valid,
+        charBeforeIndex: nStart > 0 ? reprString(content[nStart - 1]) : '(start)',
+        charAtIndex: reprString(content[nStart] ?? '')
+      })
+    }
+    if (valid) {
      allMatches.push({
        type: 'nostr',
        match: nostrMatch,
-        start: nostrMatch.index,
-        end: nostrMatch.index + nostrMatch[0].length
+        start: nStart,
+        end: nEnd
      })
    }
  }
@ -269,6 +291,23 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
				@@ -269,6 +291,23 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
      const isAtEnd = end === content.length || content[end] === '\n'
      const needsSpaceBefore = !isAtStart && content[start - 1] !== ' '
      const needsSpaceAfter = !isAtEnd && content[end] !== ' '
+      if (traceNostr) {
+        const textBefore = start > lastIndex ? content.slice(lastIndex, start) : ''
+        logContentSpacing('parseNostrContent:nostr-element', {
+          lastIndex,
+          start,
+          end,
+          textBeforeSliceRepr: reprString(textBefore),
+          isAtStart,
+          isAtEnd,
+          needsSpaceBefore,
+          needsSpaceAfter,
+          prevCharRepr:
+            start > 0 ? reprString(content[start - 1]) : '(none)',
+          nextCharRepr:
+            end < content.length ? reprString(content[end]) : '(eof)'
+        })
+      }
      
      if (needsSpaceBefore) {
        elements.push({
@ -422,6 +461,12 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
				@@ -422,6 +461,12 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
      images: allImages
    })
    
+    if (traceNostr) {
+      logContentSpacing('parseNostrContent:result', {
+        branch: 'gallery',
+        sequence: summarizeParsedElementsForDebug(filteredElements)
+      })
+    }
    return { elements: filteredElements }
  }
  
@ -433,9 +478,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
				@@ -433,9 +478,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
    })
  }
  
+  if (traceNostr) {
+    logContentSpacing('parseNostrContent:result', {
+      branch: elements.length === 1 && elements[0].type === 'text' ? 'text-only' : 'elements',
+      sequence: summarizeParsedElementsForDebug(elements)
+    })
+  }
  return { elements }
 }

+function summarizeParsedElementsForDebug(
+  els: ParsedNostrContent['elements']
+): Array<{ type: string; repr?: string; bech32Id?: string }> {
+  return els.map((e) => {
+    if (e.type === 'text') return { type: 'text', repr: reprString(e.content) }
+    if (e.type === 'nostr') return { type: 'nostr', bech32Id: e.bech32Id }
+    return { type: e.type }
+  })
+}
+
 /**
 * Check if a nostr address is in a valid context (not inside URLs, etc.)
 */
--- a/src/lib/tiptap.ts
+++ b/src/lib/tiptap.ts
@ -1,10 +1,19 @@
				@@ -1,10 +1,19 @@
+import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
 import customEmojiService from '@/services/custom-emoji.service'
 import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji'
 import { JSONContent } from '@tiptap/react'
 import { nip19 } from 'nostr-tools'

 export function parseEditorJsonToText(node?: JSONContent) {
-  let text = _parseEditorJsonToText(node).trim()
+  const rawJoined = _parseEditorJsonToText(node)
+  let text = rawJoined.trim()
+  const trace = rawJoined.includes('nostr:') || /npub1|nprofile1/.test(rawJoined)
+  if (trace) {
+    logContentSpacing('parseEditorJsonToText:joined', {
+      beforeTrimRepr: reprString(rawJoined),
+      afterTrimRepr: reprString(text)
+    })
+  }
  const regex = /(?:^|\s)(nevent|naddr|nprofile|npub)1[a-zA-Z0-9]+/g

  text = text.replace(regex, (match) => {
@ -20,7 +29,14 @@ export function parseEditorJsonToText(node?: JSONContent) {
				@@ -20,7 +29,14 @@ export function parseEditorJsonToText(node?: JSONContent) {
  })

  // Ensure space before nostr: when not already preceded by space (fixes "Like:nostr:npub" and "Like:\nnostr:npub")
+  const beforeNostrSpacePass = text
  text = text.replace(/(.)(?=nostr:)/g, (_, prev) => (prev === ' ' ? prev : prev + ' '))
+  if (trace) {
+    logContentSpacing('parseEditorJsonToText:after-nostr-prefix-pass', {
+      beforeRepr: reprString(beforeNostrSpacePass),
+      afterRepr: reprString(text)
+    })
+  }
  return text
 }

--- a/src/pages/secondary/NotePage/index.tsx
+++ b/src/pages/secondary/NotePage/index.tsx
@ -21,10 +21,9 @@ import type { Event } from 'nostr-tools'
				@@ -21,10 +21,9 @@ import type { Event } from 'nostr-tools'
 import { kinds, nip19 } from 'nostr-tools'
 import { forwardRef, useEffect, useMemo, useState } from 'react'
 import { useTranslation } from 'react-i18next'
+import { NOSTR_URI_NADDR_REGEX } from '@/lib/content-patterns'
 import NotFound from './NotFound'

-const NADDR_REGEX = /nostr:(naddr1[a-z0-9]+)/g
-
 // Helper function to get event type name (matching WebPreview)
 function getEventTypeName(kind: number): string {
  switch (kind) {
@ -109,8 +108,8 @@ const NotePage = forwardRef(({ id, index, hideTitlebar = false }: { id?: string;
				@@ -109,8 +108,8 @@ const NotePage = forwardRef(({ id, index, hideTitlebar = false }: { id?: string;
  // When viewing a kind-24 invite (e.g. from notifications), extract calendar event naddr from content and show full calendar card with RSVP
  const calendarInviteNaddr = useMemo(() => {
    if (finalEvent?.kind !== ExtendedKind.PUBLIC_MESSAGE || !finalEvent.content?.trim()) return undefined
-    const match = NADDR_REGEX.exec(finalEvent.content)
-    NADDR_REGEX.lastIndex = 0
+    const match = NOSTR_URI_NADDR_REGEX.exec(finalEvent.content)
+    NOSTR_URI_NADDR_REGEX.lastIndex = 0
    const naddr = match?.[1]
    if (!naddr) return undefined
    try {
--- a/src/services/relay-selection.service.ts
+++ b/src/services/relay-selection.service.ts
@ -1,5 +1,6 @@
				@@ -1,5 +1,6 @@
 import { Event, kinds } from 'nostr-tools'
 import { ExtendedKind, FAST_WRITE_RELAY_URLS, RANDOM_PUBLISH_RELAY_COUNT } from '@/constants'
+import { NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX } from '@/lib/content-patterns'
 import client from '@/services/client.service'
 import { normalizeUrl, isLocalNetworkUrl } from '@/lib/url'
 import { TRelaySet, TRelayList } from '@/types'
@ -734,9 +735,7 @@ class RelaySelectionService {
				@@ -734,9 +735,7 @@ class RelaySelectionService {
    }
    
    // Extract nostr addresses from content
-    const matches = content.match(
-      /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g
-    )
+    const matches = content.match(NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX)


    if (matches) {