Browse Source

correct and consolidate regex

imwald
Silberengel 1 month ago
parent
commit
8fa52c5533
  1. 31
      src/components/Content/index.tsx
  2. 17
      src/components/ContentPreview/Content.tsx
  3. 31
      src/components/Note/AsciidocArticle/AsciidocArticle.tsx
  4. 24
      src/components/Note/MarkdownArticle/MarkdownArticle.tsx
  5. 3
      src/components/Note/MarkdownArticle/preprocessMarkup.ts
  6. 5
      src/components/PostEditor/Mentions.tsx
  7. 19
      src/components/UniversalContent/SimpleContent.tsx
  8. 11
      src/constants.ts
  9. 66
      src/lib/content-parser.ts
  10. 101
      src/lib/content-patterns.ts
  11. 28
      src/lib/content-spacing-debug.ts
  12. 3
      src/lib/emoji-content.ts
  13. 6
      src/lib/event.ts
  14. 71
      src/lib/nostr-parser.tsx
  15. 18
      src/lib/tiptap.ts
  16. 7
      src/pages/secondary/NotePage/index.tsx
  17. 5
      src/services/relay-selection.service.ts

31
src/components/Content/index.tsx

@ -1,16 +1,7 @@ @@ -1,16 +1,7 @@
import { useMediaExtraction } from '@/hooks'
import {
EmbeddedEmojiParser,
EmbeddedEventParser,
EmbeddedHashtagParser,
EmbeddedLNInvoiceParser,
EmbeddedMentionParser,
EmbeddedPaytoParser,
EmbeddedUrlParser,
EmbeddedWebsocketUrlParser,
parseContent
} from '@/lib/content-parser'
import { parseContent, PARSE_CONTENT_PARSERS_NOTE_TEXT } from '@/lib/content-parser'
import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content'
import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
import logger from '@/lib/logger'
import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji'
import { getEmojiInfosFromEmojiTags } from '@/lib/tag'
@ -93,17 +84,15 @@ export default function Content({ @@ -93,17 +84,15 @@ export default function Content({
const emojiInfos = getEmojiInfosFromEmojiTags(event?.tags)
const customShortcodes = emojiInfos.map((e) => e.shortcode)
const normalized = replaceStandardEmojiShortcodesInContent(_content, customShortcodes)
if (normalized.includes('nostr:')) {
logContentSpacing('Content:useMemo', {
rawRepr: reprString(_content),
normalizedRepr: reprString(normalized),
same: _content === normalized
})
}
const nodes = parseContent(normalized, [
EmbeddedUrlParser,
EmbeddedLNInvoiceParser,
EmbeddedPaytoParser,
EmbeddedWebsocketUrlParser,
EmbeddedEventParser,
EmbeddedMentionParser,
EmbeddedHashtagParser,
EmbeddedEmojiParser
])
const nodes = parseContent(normalized, PARSE_CONTENT_PARSERS_NOTE_TEXT)
return { nodes, emojiInfos }
}, [_content, event])

17
src/components/ContentPreview/Content.tsx

@ -1,11 +1,4 @@ @@ -1,11 +1,4 @@
import {
EmbeddedEmojiParser,
EmbeddedEventParser,
EmbeddedMentionParser,
EmbeddedPaytoParser,
EmbeddedUrlParser,
parseContent
} from '@/lib/content-parser'
import { parseContent, PARSE_CONTENT_PARSERS_NOTE_TEXT } from '@/lib/content-parser'
import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content'
import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji'
import { cn } from '@/lib/utils'
@ -29,13 +22,7 @@ export default function Content({ @@ -29,13 +22,7 @@ export default function Content({
const nodes = useMemo(() => {
const customShortcodes = emojiInfos?.map((e) => e.shortcode) ?? []
const normalized = replaceStandardEmojiShortcodesInContent(content, customShortcodes)
return parseContent(normalized, [
EmbeddedUrlParser,
EmbeddedPaytoParser,
EmbeddedEventParser,
EmbeddedMentionParser,
EmbeddedEmojiParser
])
return parseContent(normalized, PARSE_CONTENT_PARSERS_NOTE_TEXT)
}, [content, emojiInfos])
return (

31
src/components/Note/AsciidocArticle/AsciidocArticle.tsx

@ -22,6 +22,11 @@ import { ReplyProvider } from '@/providers/ReplyProvider' @@ -22,6 +22,11 @@ import { ReplyProvider } from '@/providers/ReplyProvider'
import Wikilink from '@/components/UniversalContent/Wikilink'
import { BookstrContent } from '@/components/Bookstr'
import { preprocessAsciidocMediaLinks } from '../MarkdownArticle/preprocessMarkup'
import {
NOSTR_ASCIIDOC_EARLY_LINK_REGEX,
NOSTR_ASCIIDOC_TEXT_NODE_REGEX,
NOSTR_HTML_BECH32_RELAXED
} from '@/lib/content-patterns'
import logger from '@/lib/logger'
import { extractBookMetadata } from '@/lib/bookstr-parser'
import { ExtendedKind } from '@/constants'
@ -66,7 +71,7 @@ function convertMarkdownToAsciidoc(content: string): string { @@ -66,7 +71,7 @@ function convertMarkdownToAsciidoc(content: string): string {
// naddr addresses can be 200+ characters, so we use + instead of specific length
// Also handle optional [] suffix (empty link text in AsciiDoc)
// Note: Citations are already protected in passthrough (+++...+++), so nostr: links inside them won't be processed
asciidoc = asciidoc.replace(/nostr:(npub1[a-z0-9]{58,}|nprofile1[a-z0-9]+|note1[a-z0-9]{58,}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)(\[\])?/g, (_match, bech32Id, emptyBrackets) => {
asciidoc = asciidoc.replace(NOSTR_ASCIIDOC_EARLY_LINK_REGEX, (_match, bech32Id, emptyBrackets) => {
// Convert directly to AsciiDoc link format
// This will be processed later in HTML post-processing to render as React components
// If [] suffix is present, use empty link text, otherwise use the bech32Id
@ -690,7 +695,12 @@ export default function AsciidocArticle({ @@ -690,7 +695,12 @@ export default function AsciidocArticle({
// Match the full bech32 address format - addresses can vary in length
// npub: 58 chars, nprofile: variable, note: 58 chars, nevent: variable, naddr: 200+ chars
// Use a more flexible pattern that matches any valid bech32 address
htmlString = htmlString.replace(/<a[^>]*href=["']nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})["'][^>]*>([^<]*)<\/a>/gi, (_match, bech32Id, _linkText) => {
htmlString = htmlString.replace(
new RegExp(
`<a[^>]*href=["']nostr:(${NOSTR_HTML_BECH32_RELAXED})["'][^>]*>([^<]*)</a>`,
'gi'
),
(_match, bech32Id, _linkText) => {
// Validate bech32 ID and create appropriate placeholder
if (!bech32Id) return _match
@ -709,13 +719,22 @@ export default function AsciidocArticle({ @@ -709,13 +719,22 @@ export default function AsciidocArticle({
// Process text nodes by replacing content between > and <
// Use more flexible regex that matches any valid bech32 address (naddr can be 200+ chars)
// Match addresses with optional [] suffix
htmlString = htmlString.replace(/>([^<]*nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})(\[\])?[^<]*)</g, (_match, textContent) => {
htmlString = htmlString.replace(
new RegExp(
`>([^<]*nostr:(${NOSTR_HTML_BECH32_RELAXED})(\\[\\])?[^<]*)<`,
'g'
),
(_match, textContent) => {
// Extract nostr addresses from the text content - use flexible pattern that handles long addresses
// npub and note are typically 58 chars, but naddr can be 200+ chars
const nostrRegex = /nostr:((?:npub1[a-z0-9]{58,}|nprofile1[a-z0-9]+|note1[a-z0-9]{58,}|nevent1[a-z0-9]+|naddr1[a-z0-9]+))(\[\])?/g
const nostrRegex = new RegExp(
NOSTR_ASCIIDOC_TEXT_NODE_REGEX.source,
NOSTR_ASCIIDOC_TEXT_NODE_REGEX.flags
)
let processedText = textContent
const replacements: Array<{ start: number; end: number; replacement: string }> = []
nostrRegex.lastIndex = 0
let m
while ((m = nostrRegex.exec(textContent)) !== null) {
const bech32Id = m[1]
@ -751,7 +770,9 @@ export default function AsciidocArticle({ @@ -751,7 +770,9 @@ export default function AsciidocArticle({
// Fallback: ensure any remaining nostr: addresses are shown as plain text
// This catches any that weren't converted to placeholders
htmlString = htmlString.replace(/([^>])nostr:((?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,})(\[\])?/g, (_match, prefix, bech32Id, emptyBrackets) => {
htmlString = htmlString.replace(
new RegExp(`([^>])nostr:(${NOSTR_HTML_BECH32_RELAXED})(\\[\\])?`, 'g'),
(_match, prefix, bech32Id, emptyBrackets) => {
// Show as plain text if not already in a tag or placeholder
return `${prefix}nostr:${bech32Id}${emptyBrackets || ''}`
})

24
src/components/Note/MarkdownArticle/MarkdownArticle.tsx

@ -12,7 +12,8 @@ import { cleanUrl, isImage, isMedia, isVideo, isAudio, isWebsocketUrl } from '@/ @@ -12,7 +12,8 @@ import { cleanUrl, isImage, isMedia, isVideo, isAudio, isWebsocketUrl } from '@/
import { getImetaInfosFromEvent } from '@/lib/event'
import { Event, kinds } from 'nostr-tools'
import Emoji from '@/components/Emoji'
import { ExtendedKind, EMOJI_SHORT_CODE_REGEX, WS_URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants'
import { ExtendedKind, WS_URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants'
import { EMOJI_SHORT_CODE_REGEX, NOSTR_URI_INLINE_REGEX } from '@/lib/content-patterns'
import { replaceStandardEmojiShortcodesInContent } from '@/lib/emoji-content'
import { getEmojiInfosFromEmojiTags } from '@/lib/tag'
import { TEmoji } from '@/types'
@ -29,6 +30,7 @@ import { PAYTO_URI_REGEX, parsePaytoUri } from '@/lib/payto' @@ -29,6 +30,7 @@ import { PAYTO_URI_REGEX, parsePaytoUri } from '@/lib/payto'
import PaytoLink from '@/components/PaytoLink'
import katex from 'katex'
import 'katex/dist/katex.min.css'
import { isContentSpacingDebug, reprString } from '@/lib/content-spacing-debug'
import logger from '@/lib/logger'
/**
@ -1087,7 +1089,7 @@ function parseMarkdownContent( @@ -1087,7 +1089,7 @@ function parseMarkdownContent(
})
// Nostr addresses (nostr:npub1..., nostr:note1..., etc.)
const nostrRegex = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g
const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags)
const nostrMatches = Array.from(content.matchAll(nostrRegex))
nostrMatches.forEach(match => {
if (match.index !== undefined) {
@ -2679,11 +2681,25 @@ function parseMarkdownContent( @@ -2679,11 +2681,25 @@ function parseMarkdownContent(
* - Footnote references: [^1] (handled at block level, but parsed here for inline context)
*/
function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<string, string> = new Map(), emojiInfos: TEmoji[] = []): React.ReactNode[] {
if (isContentSpacingDebug() && text.includes('nostr:')) {
// eslint-disable-next-line no-console
console.log('[jumble content-spacing] parseInlineMarkdown:before-normalize', {
keyPrefix,
repr: reprString(text)
})
}
// Normalize newlines to spaces at the start (defensive - text should already be normalized, but ensure it)
// This prevents any hard breaks within inline content
text = text.replace(/\n/g, ' ')
// Collapse multiple consecutive spaces/tabs (2+) into a single space, but preserve single spaces
text = text.replace(/[ \t]{2,}/g, ' ')
if (isContentSpacingDebug() && text.includes('nostr:')) {
// eslint-disable-next-line no-console
console.log('[jumble content-spacing] parseInlineMarkdown:after-normalize', {
keyPrefix,
repr: reprString(text)
})
}
const parts: React.ReactNode[] = []
let lastIndex = 0
@ -2937,7 +2953,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<st @@ -2937,7 +2953,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<st
// Nostr addresses: nostr:npub1..., nostr:note1..., etc. (process after code/bold/italic/links/hashtags/relay-urls to avoid conflicts)
// Only process profile types (npub/nprofile) inline; event types (note/nevent/naddr) should remain block-level
const nostrRegex = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g
const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags)
const nostrMatches = Array.from(text.matchAll(nostrRegex))
nostrMatches.forEach(match => {
if (match.index !== undefined) {
@ -3001,7 +3017,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<st @@ -3001,7 +3017,7 @@ function parseInlineMarkdown(text: string, keyPrefix: string, _footnotes: Map<st
index: match.index,
end: match.index + match[0].length,
type: 'emoji',
data: match[0].slice(1, -1).trim()
data: (match[1] ?? match[0].slice(1, -1)).trim()
})
}
}

3
src/components/Note/MarkdownArticle/preprocessMarkup.ts

@ -1,3 +1,4 @@ @@ -1,3 +1,4 @@
import { NOSTR_URI_INLINE_REGEX } from '@/lib/content-patterns'
import { isImage, isVideo, isAudio } from '@/lib/url'
import { URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants'
@ -231,7 +232,7 @@ export function postProcessNostrLinks(content: string): string { @@ -231,7 +232,7 @@ export function postProcessNostrLinks(content: string): string {
// nostr:npub1... -> [nostr:npub1...]
// nostr:note1... -> [nostr:note1...]
// etc.
const nostrRegex = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g
const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags)
processed = processed.replace(nostrRegex, (match) => {
// Already in a link? Don't double-wrap
// Check if it's already in markdown link syntax [text](nostr:...)

5
src/components/PostEditor/Mentions.tsx

@ -3,6 +3,7 @@ import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover @@ -3,6 +3,7 @@ import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover
import { useMuteList } from '@/providers/MuteListProvider'
import { useNostr } from '@/providers/NostrProvider'
import client from '@/services/client.service'
import { NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX } from '@/lib/content-patterns'
import logger from '@/lib/logger'
import { Check } from 'lucide-react'
import { Event, nip19 } from 'nostr-tools'
@ -144,9 +145,7 @@ export async function extractMentions(content: string, parentEvent?: Event) { @@ -144,9 +145,7 @@ export async function extractMentions(content: string, parentEvent?: Event) {
pubkeys.push(parentEventPubkey)
}
const matches = content.match(
/nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g
)
const matches = content.match(NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX)
const addToSet = (arr: string[], pubkey: string) => {
if (!arr.includes(pubkey)) arr.push(pubkey)

19
src/components/UniversalContent/SimpleContent.tsx

@ -1,6 +1,7 @@ @@ -1,6 +1,7 @@
import { useMemo } from 'react'
import { cleanUrl } from '@/lib/url'
import { Event } from 'nostr-tools'
import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
import { parseNostrContent, renderNostrContent } from '@/lib/nostr-parser.tsx'
import { cn } from '@/lib/utils'
@ -30,12 +31,28 @@ export default function SimpleContent({ @@ -30,12 +31,28 @@ export default function SimpleContent({
}
)
if (rawContent.includes('nostr:')) {
logContentSpacing('SimpleContent:processedContent', {
rawRepr: reprString(rawContent),
cleanedRepr: reprString(cleaned),
same: rawContent === cleaned
})
}
return cleaned
}, [content, event?.content])
// Parse content for nostr addresses and media
const parsedContent = useMemo(() => {
return parseNostrContent(processedContent, event)
const parsed = parseNostrContent(processedContent, event)
if (processedContent.includes('nostr:')) {
logContentSpacing('SimpleContent:parsedContent', {
elementCount: parsed.elements.length,
tail: parsed.elements.slice(-3).map((e) =>
e.type === 'text' ? { type: 'text', repr: reprString(e.content) } : { type: e.type }
)
})
}
return parsed
}, [processedContent, event])
return (

11
src/constants.ts

@ -265,10 +265,13 @@ export const URL_REGEX = @@ -265,10 +265,13 @@ export const URL_REGEX =
export const WS_URL_REGEX =
/wss?:\/\/[\w\p{L}\p{N}\p{M}&.\-/?=#@%+_:!~*]+[^\s.,;:'")\]}!?"']/giu
export const EMAIL_REGEX = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/
/** Matches :shortcode: or :short code: (allows letters, digits, underscore, hyphen, space) */
export const EMOJI_SHORT_CODE_REGEX = /:[a-zA-Z0-9_\-\s]+:/g
export const EMBEDDED_EVENT_REGEX = /nostr:(note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)/g
export const EMBEDDED_MENTION_REGEX = /nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+)/g
/** @see {@link '@/lib/content-patterns'} — single source for emoji + nostr regexes */
export {
EMOJI_SHORT_CODE_MAX_INNER_LENGTH,
EMOJI_SHORT_CODE_REGEX,
EMBEDDED_EVENT_REGEX,
EMBEDDED_MENTION_REGEX
} from '@/lib/content-patterns'
export const HASHTAG_REGEX = /#[a-zA-Z0-9_\-\u00C0-\u017F\u0100-\u017F\u0180-\u024F\u1E00-\u1EFF]+/g
export const LN_INVOICE_REGEX = /(ln(?:bc|tb|bcrt))([0-9]+[munp]?)?1([02-9ac-hj-np-z]+)/g
export const EMOJI_REGEX =

66
src/lib/content-parser.ts

@ -1,14 +1,18 @@ @@ -1,14 +1,18 @@
import {
EMBEDDED_EVENT_REGEX,
EMBEDDED_MENTION_REGEX,
EMOJI_SHORT_CODE_REGEX,
HASHTAG_REGEX,
LN_INVOICE_REGEX,
URL_REGEX,
WS_URL_REGEX,
YOUTUBE_URL_REGEX
} from '@/constants'
import {
EMBEDDED_EVENT_REGEX,
EMBEDDED_MENTION_REGEX,
EMOJI_SHORT_CODE_REGEX,
LEGACY_PROFILE_BECH32_REGEX
} from '@/lib/content-patterns'
import { PAYTO_URI_REGEX } from '@/lib/payto'
import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
import { isImage, isMedia } from './url'
export type TEmbeddedNodeType =
@ -53,7 +57,7 @@ export const EmbeddedMentionParser: TContentParser = { @@ -53,7 +57,7 @@ export const EmbeddedMentionParser: TContentParser = {
export const EmbeddedLegacyMentionParser: TContentParser = {
type: 'legacy-mention',
regex: /npub1[a-z0-9]{58}|nprofile1[a-z0-9]+/g
regex: LEGACY_PROFILE_BECH32_REGEX
}
export const EmbeddedEventParser: TContentParser = {
@ -133,10 +137,39 @@ export const EmbeddedUrlParser: TContentParser = (content: string) => { @@ -133,10 +137,39 @@ export const EmbeddedUrlParser: TContentParser = (content: string) => {
return result
}
/**
* Shared pipeline for kind-1style strings (note body, reply preview, profile fields using parseContent).
* Order matters.
*/
export const PARSE_CONTENT_PARSERS_NOTE_TEXT: TContentParser[] = [
EmbeddedUrlParser,
EmbeddedLNInvoiceParser,
EmbeddedPaytoParser,
EmbeddedWebsocketUrlParser,
EmbeddedEventParser,
EmbeddedMentionParser,
EmbeddedHashtagParser,
EmbeddedEmojiParser
]
export function parseContent(content: string, parsers: TContentParser[]) {
const trace = content.includes('nostr:')
if (trace) {
logContentSpacing('parseContent:input', {
rawLength: content.length,
afterTrimRepr: reprString(content.trim()),
trimRemovedLeading: content.length - content.trimStart().length,
trimRemovedTrailing: content.length - content.trimEnd().length
})
}
let nodes: TEmbeddedNode[] = [{ type: 'text', data: content.trim() }]
parsers.forEach((parser) => {
parsers.forEach((parser, parserIndex) => {
const parserLabel =
typeof parser === 'function' ? `fn[${parserIndex}]` : parser.type
const beforeSummary = trace ? summarizeContentNodesForDebug(nodes) : null
nodes = nodes
.flatMap((node) => {
if (node.type !== 'text') return [node]
@ -178,15 +211,38 @@ export function parseContent(content: string, parsers: TContentParser[]) { @@ -178,15 +211,38 @@ export function parseContent(content: string, parsers: TContentParser[]) {
return result
})
.filter((n) => n.data !== '')
if (trace) {
logContentSpacing('parseContent:after-parser', {
parser: parserLabel,
parserIndex,
before: beforeSummary,
after: summarizeContentNodesForDebug(nodes)
})
}
})
nodes = mergeConsecutiveTextNodes(nodes)
nodes = mergeConsecutiveImageNodes(nodes)
nodes = removeExtraNewlines(nodes)
if (trace) {
logContentSpacing('parseContent:final', {
afterMergeNewlines: summarizeContentNodesForDebug(nodes)
})
}
return nodes
}
function summarizeContentNodesForDebug(nodes: TEmbeddedNode[]): Array<{ type: string; repr?: string }> {
return nodes.map((n) => {
if (n.type === 'text') return { type: 'text', repr: reprString(n.data) }
if (n.type === 'images') return { type: 'images', repr: `[${n.data.length} urls]` }
return { type: n.type, repr: typeof n.data === 'string' ? reprString(n.data) : undefined }
})
}
function mergeConsecutiveTextNodes(nodes: TEmbeddedNode[]) {
const merged: TEmbeddedNode[] = []
let currentText = ''

101
src/lib/content-patterns.ts

@ -0,0 +1,101 @@ @@ -0,0 +1,101 @@
/**
* Single source of truth for :emoji: shortcodes and nostr: bech32 patterns.
* Used by MarkdownArticle, parseContent, nostr-parser, previews, post editor, AsciiDoc, etc.
*/
// --- Emoji (:shortcode:) ----------------------------------------------------
export const EMOJI_SHORT_CODE_MAX_INNER_LENGTH = 20 as const
const _emojiInnerQuantifier = EMOJI_SHORT_CODE_MAX_INNER_LENGTH - 1
/**
* - (?<!:) avoids AsciiDoc double-colon macros (link::, image::, citation::, etc.)
* - First char after ":" must be [a-zA-Z0-9_-] so "Name: nostr:npub…" is not ": nostr:"
* - Inner body max length so URLs/paths/nostr ids are not treated as shortcodes
*/
export const EMOJI_SHORT_CODE_REGEX = new RegExp(
`(?<!:):([a-zA-Z0-9_\\-][^:]{0,${_emojiInnerQuantifier}}):`,
'g'
)
// --- Nostr bech32 (after "nostr:") ------------------------------------------
/** Standard npub / note payload length in hex */
export const BECH32_NPUB = 'npub1[a-z0-9]{58}'
export const BECH32_NPROFILE = 'nprofile1[a-z0-9]+'
export const BECH32_NOTE = 'note1[a-z0-9]{58}'
export const BECH32_NEVENT = 'nevent1[a-z0-9]+'
export const BECH32_NADDR = 'naddr1[a-z0-9]+'
/** AsciiDoc / forgiving passes: allow longer npub/note encodings ({58,}) */
export const BECH32_NPUB_LOOSE = 'npub1[a-z0-9]{58,}'
export const BECH32_NOTE_LOOSE = 'note1[a-z0-9]{58,}'
/** All kinds we render from note content (strict lengths for Markdown / parseContent) */
export const NOSTR_CONTENT_BECH32_ALT = [
BECH32_NPUB,
BECH32_NPROFILE,
BECH32_NOTE,
BECH32_NEVENT,
BECH32_NADDR
].join('|')
/** AsciiDoc early conversion + text-node extraction (loose npub/note) */
export const NOSTR_ASCIIDOC_SOURCE_BECH32_ALT = [
BECH32_NPUB_LOOSE,
BECH32_NPROFILE,
BECH32_NOTE_LOOSE,
BECH32_NEVENT,
BECH32_NADDR
].join('|')
/** Relaxed tail for HTML href / fallback matching (naddr can be very long) */
export const NOSTR_HTML_BECH32_RELAXED = '(?:npub1|nprofile1|note1|nevent1|naddr1)[a-z0-9]{20,}'
export const NOSTR_PROFILE_BECH32_ALT = [BECH32_NPUB, BECH32_NPROFILE].join('|')
export const NOSTR_EVENT_BECH32_ALT = [BECH32_NOTE, BECH32_NEVENT, BECH32_NADDR].join('|')
export const NOSTR_NOTE_AND_NEVENT_ALT = [BECH32_NOTE, BECH32_NEVENT].join('|')
/** nostr:… anywhere in text (Markdown inline, relay scan, editor, preprocess) */
export const NOSTR_URI_INLINE_REGEX = new RegExp(`nostr:(${NOSTR_CONTENT_BECH32_ALT})`, 'g')
/** parseContent: profile mentions only */
export const EMBEDDED_MENTION_REGEX = new RegExp(`nostr:(${NOSTR_PROFILE_BECH32_ALT})`, 'g')
/** parseContent: embedded notes (note / nevent / naddr) */
export const EMBEDDED_EVENT_REGEX = new RegExp(`nostr:(${NOSTR_EVENT_BECH32_ALT})`, 'g')
/** event helpers: note + nevent only */
export const NOSTR_EMBEDDED_NOTE_REGEX = new RegExp(`nostr:(${NOSTR_NOTE_AND_NEVENT_ALT})`, 'g')
/** naddr-only (e.g. URL / deep links) */
export const NOSTR_URI_NADDR_REGEX = new RegExp(`nostr:(${BECH32_NADDR})`, 'g')
/** Post editor / reply pubkey scan: npub, nprofile, note, nevent (not naddr) */
export const NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX = new RegExp(
`nostr:(${[BECH32_NPUB, BECH32_NPROFILE, BECH32_NOTE, BECH32_NEVENT].join('|')})`,
'g'
)
/** Legacy bare bech32 (no nostr: prefix) */
export const LEGACY_PROFILE_BECH32_REGEX = new RegExp(`${BECH32_NPUB}|${BECH32_NPROFILE}`, 'g')
/** nostr-parser.tsx: boundary + lookahead so punctuation does not stick to bech32 */
export const NOSTR_PARSER_LOOKAHEAD = '(?=\\s|$|>|\\]|,|\\.|!|\\?|;|:)'
export const NOSTR_PARSER_REGEX = new RegExp(
`(?:^|\\s|>|\\[)nostr:(${NOSTR_CONTENT_BECH32_ALT})${NOSTR_PARSER_LOOKAHEAD}`,
'g'
)
/** AsciiDoc: optional [] after nostr id */
export const NOSTR_ASCIIDOC_EARLY_LINK_REGEX = new RegExp(
`nostr:(${NOSTR_ASCIIDOC_SOURCE_BECH32_ALT})(\\[\\])?`,
'g'
)
/** AsciiDoc HTML: same capture groups as early link, for text-node scanning */
export const NOSTR_ASCIIDOC_TEXT_NODE_REGEX = new RegExp(
`nostr:(${NOSTR_ASCIIDOC_SOURCE_BECH32_ALT})(\\[\\])?`,
'g'
)

28
src/lib/content-spacing-debug.ts

@ -0,0 +1,28 @@ @@ -0,0 +1,28 @@
/**
* Verbose content/spacing traces for debugging (e.g. "Name: nostr:npub…" collapsing).
*
* Enable in dev: localStorage.setItem('jumble-debug-content', 'true') then reload.
* Disable: localStorage.removeItem('jumble-debug-content')
*/
const STORAGE_KEY = 'jumble-debug-content'
export function isContentSpacingDebug(): boolean {
try {
return import.meta.env.DEV && typeof localStorage !== 'undefined' && localStorage.getItem(STORAGE_KEY) === 'true'
} catch {
return false
}
}
/** JSON.stringify so spaces/newlines are visible in the console */
export function reprString(s: string, maxLen = 500): string {
const t = s.length > maxLen ? `${s.slice(0, maxLen)}…(+${s.length - maxLen} chars)` : s
return JSON.stringify(t)
}
export function logContentSpacing(phase: string, detail: Record<string, unknown>): void {
if (!isContentSpacingDebug()) return
// eslint-disable-next-line no-console
console.log(`[jumble content-spacing] ${phase}`, detail)
}

3
src/lib/emoji-content.ts

@ -1,3 +1,4 @@ @@ -1,3 +1,4 @@
import { EMOJI_SHORT_CODE_REGEX } from '@/lib/content-patterns'
import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji'
const STANDARD_EMOJI_LIMIT = 20
@ -42,7 +43,7 @@ export function replaceStandardEmojiShortcodesInContent( @@ -42,7 +43,7 @@ export function replaceStandardEmojiShortcodesInContent(
const customSet = customShortcodes instanceof Set
? customShortcodes
: new Set(customShortcodes ?? [])
return content.replace(/:([a-zA-Z0-9_\-\s]+):/g, (match, shortcode: string) => {
return content.replace(EMOJI_SHORT_CODE_REGEX, (match, shortcode: string) => {
const trimmed = shortcode.trim()
if (customSet.has(trimmed)) return match
const native = shortcodeToEmoji(trimmed, emojis) ?? shortcodeToEmoji(trimmed.replace(/\s+/g, '_'), emojis)

6
src/lib/event.ts

@ -1,4 +1,5 @@ @@ -1,4 +1,5 @@
import { CALENDAR_EVENT_KINDS, EMBEDDED_MENTION_REGEX, ExtendedKind } from '@/constants'
import { CALENDAR_EVENT_KINDS, ExtendedKind } from '@/constants'
import { EMBEDDED_MENTION_REGEX, NOSTR_EMBEDDED_NOTE_REGEX } from '@/lib/content-patterns'
import client from '@/services/client.service'
import { TImetaInfo } from '@/types'
import { LRUCache } from 'lru-cache'
@ -223,8 +224,7 @@ export function getEmbeddedNoteBech32Ids(event: Event) { @@ -223,8 +224,7 @@ export function getEmbeddedNoteBech32Ids(event: Event) {
if (cache) return cache
const embeddedNoteBech32Ids: string[] = []
const embeddedNoteRegex = /nostr:(note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g
;(event.content.match(embeddedNoteRegex) || []).forEach((note) => {
;(event.content.match(NOSTR_EMBEDDED_NOTE_REGEX) || []).forEach((note) => {
try {
const { type, data } = nip19.decode(note.split(':')[1])
if (type === 'nevent') {

71
src/lib/nostr-parser.tsx

@ -13,7 +13,9 @@ import { parsePaytoUri } from '@/lib/payto' @@ -13,7 +13,9 @@ import { parsePaytoUri } from '@/lib/payto'
import PaytoLink from '@/components/PaytoLink'
import { TImetaInfo } from '@/types'
import { Event } from 'nostr-tools'
import { NOSTR_PARSER_REGEX } from '@/lib/content-patterns'
import logger from '@/lib/logger'
import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
export interface ParsedNostrContent {
elements: Array<{
@ -39,9 +41,16 @@ export interface ParsedNostrContent { @@ -39,9 +41,16 @@ export interface ParsedNostrContent {
*/
export function parseNostrContent(content: string, event?: Event): ParsedNostrContent {
const elements: ParsedNostrContent['elements'] = []
const traceNostr = content.includes('nostr:')
if (traceNostr) {
logContentSpacing('parseNostrContent:input', {
length: content.length,
repr: reprString(content),
eventId: event?.id
})
}
// Regex to match nostr: addresses that are not inside URLs or other contexts
const nostrRegex = /(?:^|\s|>|\[)nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+|naddr1[a-z0-9]+)(?=\s|$|>|\]|,|\.|!|\?|;|:)/g
const nostrRegex = new RegExp(NOSTR_PARSER_REGEX.source, NOSTR_PARSER_REGEX.flags)
// Regex to match all URLs (we'll filter by type later)
const urlRegex = /(https?:\/\/[^\s]+)/gi
@ -79,12 +88,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo @@ -79,12 +88,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
// Find nostr matches
let nostrMatch
while ((nostrMatch = nostrRegex.exec(content)) !== null) {
if (isNostrAddressInValidContext(content, nostrMatch.index, nostrMatch.index + nostrMatch[0].length)) {
const nStart = nostrMatch.index
const nEnd = nostrMatch.index + nostrMatch[0].length
const valid = isNostrAddressInValidContext(content, nStart, nEnd)
if (traceNostr) {
logContentSpacing('parseNostrContent:nostr-regex', {
index: nStart,
end: nEnd,
fullMatchRepr: reprString(nostrMatch[0]),
validContext: valid,
charBeforeIndex: nStart > 0 ? reprString(content[nStart - 1]) : '(start)',
charAtIndex: reprString(content[nStart] ?? '')
})
}
if (valid) {
allMatches.push({
type: 'nostr',
match: nostrMatch,
start: nostrMatch.index,
end: nostrMatch.index + nostrMatch[0].length
start: nStart,
end: nEnd
})
}
}
@ -269,6 +291,23 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo @@ -269,6 +291,23 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
const isAtEnd = end === content.length || content[end] === '\n'
const needsSpaceBefore = !isAtStart && content[start - 1] !== ' '
const needsSpaceAfter = !isAtEnd && content[end] !== ' '
if (traceNostr) {
const textBefore = start > lastIndex ? content.slice(lastIndex, start) : ''
logContentSpacing('parseNostrContent:nostr-element', {
lastIndex,
start,
end,
textBeforeSliceRepr: reprString(textBefore),
isAtStart,
isAtEnd,
needsSpaceBefore,
needsSpaceAfter,
prevCharRepr:
start > 0 ? reprString(content[start - 1]) : '(none)',
nextCharRepr:
end < content.length ? reprString(content[end]) : '(eof)'
})
}
if (needsSpaceBefore) {
elements.push({
@ -422,6 +461,12 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo @@ -422,6 +461,12 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
images: allImages
})
if (traceNostr) {
logContentSpacing('parseNostrContent:result', {
branch: 'gallery',
sequence: summarizeParsedElementsForDebug(filteredElements)
})
}
return { elements: filteredElements }
}
@ -433,9 +478,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo @@ -433,9 +478,25 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
})
}
if (traceNostr) {
logContentSpacing('parseNostrContent:result', {
branch: elements.length === 1 && elements[0].type === 'text' ? 'text-only' : 'elements',
sequence: summarizeParsedElementsForDebug(elements)
})
}
return { elements }
}
function summarizeParsedElementsForDebug(
els: ParsedNostrContent['elements']
): Array<{ type: string; repr?: string; bech32Id?: string }> {
return els.map((e) => {
if (e.type === 'text') return { type: 'text', repr: reprString(e.content) }
if (e.type === 'nostr') return { type: 'nostr', bech32Id: e.bech32Id }
return { type: e.type }
})
}
/**
* Check if a nostr address is in a valid context (not inside URLs, etc.)
*/

18
src/lib/tiptap.ts

@ -1,10 +1,19 @@ @@ -1,10 +1,19 @@
import { logContentSpacing, reprString } from '@/lib/content-spacing-debug'
import customEmojiService from '@/services/custom-emoji.service'
import { emojis, shortcodeToEmoji } from '@tiptap/extension-emoji'
import { JSONContent } from '@tiptap/react'
import { nip19 } from 'nostr-tools'
export function parseEditorJsonToText(node?: JSONContent) {
let text = _parseEditorJsonToText(node).trim()
const rawJoined = _parseEditorJsonToText(node)
let text = rawJoined.trim()
const trace = rawJoined.includes('nostr:') || /npub1|nprofile1/.test(rawJoined)
if (trace) {
logContentSpacing('parseEditorJsonToText:joined', {
beforeTrimRepr: reprString(rawJoined),
afterTrimRepr: reprString(text)
})
}
const regex = /(?:^|\s)(nevent|naddr|nprofile|npub)1[a-zA-Z0-9]+/g
text = text.replace(regex, (match) => {
@ -20,7 +29,14 @@ export function parseEditorJsonToText(node?: JSONContent) { @@ -20,7 +29,14 @@ export function parseEditorJsonToText(node?: JSONContent) {
})
// Ensure space before nostr: when not already preceded by space (fixes "Like:nostr:npub" and "Like:\nnostr:npub")
const beforeNostrSpacePass = text
text = text.replace(/(.)(?=nostr:)/g, (_, prev) => (prev === ' ' ? prev : prev + ' '))
if (trace) {
logContentSpacing('parseEditorJsonToText:after-nostr-prefix-pass', {
beforeRepr: reprString(beforeNostrSpacePass),
afterRepr: reprString(text)
})
}
return text
}

7
src/pages/secondary/NotePage/index.tsx

@ -21,10 +21,9 @@ import type { Event } from 'nostr-tools' @@ -21,10 +21,9 @@ import type { Event } from 'nostr-tools'
import { kinds, nip19 } from 'nostr-tools'
import { forwardRef, useEffect, useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { NOSTR_URI_NADDR_REGEX } from '@/lib/content-patterns'
import NotFound from './NotFound'
const NADDR_REGEX = /nostr:(naddr1[a-z0-9]+)/g
// Helper function to get event type name (matching WebPreview)
function getEventTypeName(kind: number): string {
switch (kind) {
@ -109,8 +108,8 @@ const NotePage = forwardRef(({ id, index, hideTitlebar = false }: { id?: string; @@ -109,8 +108,8 @@ const NotePage = forwardRef(({ id, index, hideTitlebar = false }: { id?: string;
// When viewing a kind-24 invite (e.g. from notifications), extract calendar event naddr from content and show full calendar card with RSVP
const calendarInviteNaddr = useMemo(() => {
if (finalEvent?.kind !== ExtendedKind.PUBLIC_MESSAGE || !finalEvent.content?.trim()) return undefined
const match = NADDR_REGEX.exec(finalEvent.content)
NADDR_REGEX.lastIndex = 0
const match = NOSTR_URI_NADDR_REGEX.exec(finalEvent.content)
NOSTR_URI_NADDR_REGEX.lastIndex = 0
const naddr = match?.[1]
if (!naddr) return undefined
try {

5
src/services/relay-selection.service.ts

@ -1,5 +1,6 @@ @@ -1,5 +1,6 @@
import { Event, kinds } from 'nostr-tools'
import { ExtendedKind, FAST_WRITE_RELAY_URLS, RANDOM_PUBLISH_RELAY_COUNT } from '@/constants'
import { NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX } from '@/lib/content-patterns'
import client from '@/services/client.service'
import { normalizeUrl, isLocalNetworkUrl } from '@/lib/url'
import { TRelaySet, TRelayList } from '@/types'
@ -734,9 +735,7 @@ class RelaySelectionService { @@ -734,9 +735,7 @@ class RelaySelectionService {
}
// Extract nostr addresses from content
const matches = content.match(
/nostr:(npub1[a-z0-9]{58}|nprofile1[a-z0-9]+|note1[a-z0-9]{58}|nevent1[a-z0-9]+)/g
)
const matches = content.match(NOSTR_URI_FOR_REPLY_PUBKEYS_REGEX)
if (matches) {

Loading…
Cancel
Save