diff --git a/src/lib/utils/markdownParser.ts b/src/lib/utils/markdownParser.ts index e0e09ed..501edfe 100644 --- a/src/lib/utils/markdownParser.ts +++ b/src/lib/utils/markdownParser.ts @@ -7,17 +7,14 @@ import { ndkInstance } from '$lib/ndk'; import { nip19 } from 'nostr-tools'; // Regular expressions for nostr identifiers - process these first -const NOSTR_NPUB_REGEX = /(?:nostr:)?(npub[a-zA-Z0-9]{59,60})/g; +const NOSTR_PROFILE_REGEX = /(?:nostr:)?((?:npub|nprofile)[a-zA-Z0-9]{20,})/g; +const NOSTR_NOTE_REGEX = /(?:nostr:)?((?:nevent|note|naddr)[a-zA-Z0-9]{20,})/g; // Regular expressions for markdown elements -const BLOCKQUOTE_REGEX = /^(?:>[ \t]*.+\n?(?:(?:>[ \t]*\n)*(?:>[ \t]*.+\n?))*)+/gm; -const ORDERED_LIST_REGEX = /^(\d+)\.[ \t]+(.+)$/gm; -const UNORDERED_LIST_REGEX = /^[-*][ \t]+(.+)$/gm; const BOLD_REGEX = /\*\*([^*]+)\*\*|\*([^*]+)\*/g; const ITALIC_REGEX = /_([^_]+)_/g; const HEADING_REGEX = /^(#{1,6})\s+(.+)$/gm; const HORIZONTAL_RULE_REGEX = /^(?:---|\*\*\*|___)$/gm; -const CODE_BLOCK_REGEX = /```([^\n]*)\n([\s\S]*?)```/gm; const INLINE_CODE_REGEX = /`([^`\n]+)`/g; const LINK_REGEX = /\[([^\]]+)\]\(([^)]+)\)/g; const IMAGE_REGEX = /!\[([^\]]*)\]\(([^)]+)\)/g; @@ -29,38 +26,49 @@ const FOOTNOTE_DEFINITION_REGEX = /^\[(\^[^\]]+)\]:\s*(.+?)(?:\n(?!\[)|\n\n|$)/g const npubCache = new Map(); /** - * Get user metadata for an npub + * Get user metadata for a nostr identifier (npub or nprofile) */ -async function getUserMetadata(npub: string): Promise<{name?: string, displayName?: string}> { - if (npubCache.has(npub)) { - return npubCache.get(npub)!; +async function getUserMetadata(identifier: string): Promise<{name?: string, displayName?: string}> { + if (npubCache.has(identifier)) { + return npubCache.get(identifier)!; } - const fallback = { name: `${npub.slice(0, 8)}...${npub.slice(-4)}` }; + const fallback = { name: `${identifier.slice(0, 8)}...${identifier.slice(-4)}` }; try { const ndk = get(ndkInstance); if (!ndk) { - npubCache.set(npub, fallback); + npubCache.set(identifier, fallback); return fallback; } - const decoded = nip19.decode(npub); - if (decoded.type !== 'npub') { - npubCache.set(npub, fallback); + const decoded = nip19.decode(identifier); + if (!decoded) { + npubCache.set(identifier, fallback); return fallback; } - const user = ndk.getUser({ npub: npub }); + // Handle different identifier types + let pubkey: string; + if (decoded.type === 'npub') { + pubkey = decoded.data; + } else if (decoded.type === 'nprofile') { + pubkey = decoded.data.pubkey; + } else { + npubCache.set(identifier, fallback); + return fallback; + } + + const user = ndk.getUser({ pubkey: pubkey }); if (!user) { - npubCache.set(npub, fallback); + npubCache.set(identifier, fallback); return fallback; } try { const profile = await user.fetchProfile(); if (!profile) { - npubCache.set(npub, fallback); + npubCache.set(identifier, fallback); return fallback; } @@ -69,14 +77,14 @@ async function getUserMetadata(npub: string): Promise<{name?: string, displayNam displayName: profile.displayName }; - npubCache.set(npub, metadata); + npubCache.set(identifier, metadata); return metadata; } catch (e) { - npubCache.set(npub, fallback); + npubCache.set(identifier, fallback); return fallback; } } catch (e) { - npubCache.set(npub, fallback); + npubCache.set(identifier, fallback); return fallback; } } @@ -84,8 +92,8 @@ async function getUserMetadata(npub: string): Promise<{name?: string, displayNam /** * Process lists (ordered and unordered) */ -function processLists(html: string): string { - const lines = html.split('\n'); +function processLists(content: string): string { + const lines = content.split('\n'); let inList = false; let isOrdered = false; let currentList: string[] = []; @@ -93,8 +101,8 @@ function processLists(html: string): string { for (let i = 0; i < lines.length; i++) { const line = lines[i]; - const orderedMatch = ORDERED_LIST_REGEX.exec(line); - const unorderedMatch = UNORDERED_LIST_REGEX.exec(line); + const orderedMatch = line.match(/^(\d+)\.[ \t]+(.+)$/); + const unorderedMatch = line.match(/^\*[ \t]+(.+)$/); if (orderedMatch || unorderedMatch) { if (!inList) { @@ -118,10 +126,6 @@ function processLists(html: string): string { } processed.push(line); } - - // Reset regex lastIndex - ORDERED_LIST_REGEX.lastIndex = 0; - UNORDERED_LIST_REGEX.lastIndex = 0; } if (inList) { @@ -138,78 +142,445 @@ function processLists(html: string): string { } /** - * Process blockquotes using placeholder approach + * Process blockquotes by finding consecutive quote lines and preserving their structure */ function processBlockquotes(text: string): string { + const lines = text.split('\n'); + const processedLines: string[] = []; + let currentQuote: string[] = []; + let quoteCount = 0; + let lastLineWasQuote = false; const blockquotes: Array<{id: string, content: string}> = []; - let processedText = text; - // Extract and save blockquotes - processedText = processedText.replace(BLOCKQUOTE_REGEX, (match) => { - const id = `BLOCKQUOTE_${blockquotes.length}`; - const cleanContent = match - .split('\n') - .map(line => line.replace(/^>[ \t]*/, '')) - .join('\n') - .trim(); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const isQuoteLine = line.startsWith('> '); + + if (isQuoteLine) { + // If we had a gap between quotes, this is a new quote + if (!lastLineWasQuote && currentQuote.length > 0) { + quoteCount++; + const id = `BLOCKQUOTE_${quoteCount}`; + const quoteContent = currentQuote.join('
'); + blockquotes.push({ + id, + content: `

${quoteContent}

` + }); + processedLines.push(id); + currentQuote = []; + } + + // Add to current quote + currentQuote.push(line.substring(2)); + lastLineWasQuote = true; + } else { + // If we were in a quote and now we're not, process it + if (currentQuote.length > 0) { + quoteCount++; + const id = `BLOCKQUOTE_${quoteCount}`; + const quoteContent = currentQuote.join('
'); + blockquotes.push({ + id, + content: `

${quoteContent}

` + }); + processedLines.push(id); + currentQuote = []; + } + processedLines.push(line); + lastLineWasQuote = false; + } + } + // Handle any remaining quote + if (currentQuote.length > 0) { + quoteCount++; + const id = `BLOCKQUOTE_${quoteCount}`; + const quoteContent = currentQuote.join('
'); blockquotes.push({ id, - content: `
${cleanContent}
` + content: `

${quoteContent}

` }); - return id; - }); + processedLines.push(id); + } + + let result = processedLines.join('\n'); // Restore blockquotes blockquotes.forEach(({id, content}) => { - processedText = processedText.replace(id, content); + result = result.replace(id, content); }); - return processedText; + return result; } /** - * Process code blocks and inline code before any HTML escaping + * Format code based on language */ -function processCode(text: string): string { - const blocks: Array<{id: string, content: string}> = []; - const inlineCodes: Array<{id: string, content: string}> = []; - let processedText = text; - - // First, extract and save code blocks - processedText = processedText.replace(CODE_BLOCK_REGEX, (match, lang, code) => { - const id = `CODE_BLOCK_${blocks.length}`; - blocks.push({ - id, - content: `
${escapeHtml(code)}
` - }); - return id; - }); +function formatCodeByLanguage(code: string, lang: string): string { + const language = lang.trim().toLowerCase(); + + // Remove any trailing whitespace or empty lines at start/end + let formattedCode = code.trim(); + + switch (language) { + case 'json': + try { + return JSON.stringify(JSON.parse(formattedCode), null, 2); + } catch (e) { + return formattedCode; + } - // Then extract and save inline code - processedText = processedText.replace(INLINE_CODE_REGEX, (match, code) => { - const id = `INLINE_CODE_${inlineCodes.length}`; - inlineCodes.push({ - id, - content: `${escapeHtml(code.trim())}` - }); - return id; - }); + case 'javascript': + case 'js': + case 'typescript': + case 'ts': + try { + // Basic indentation for JS/TS + formattedCode = formattedCode + .split('\n') + .map(line => line.trim()) + .join('\n'); + + // Add line breaks after certain characters + formattedCode = formattedCode + .replace(/([{([])\s*/g, '$1\n') + .replace(/\s*([\]})])/g, '\n$1') + .replace(/;\s*/g, ';\n') + .replace(/,\s*([^\s])/g, ',\n$1'); + + // Indent based on brackets + let indent = 0; + return formattedCode + .split('\n') + .map(line => { + line = line.trim(); + if (line.match(/[}\])]$/)) indent--; + const formatted = ' '.repeat(Math.max(0, indent)) + line; + if (line.match(/[{([]\s*$/)) indent++; + return formatted; + }) + .filter(line => line.trim()) + .join('\n'); + } catch (e) { + return formattedCode; + } - // Now escape HTML in the remaining text - processedText = escapeHtml(processedText); + case 'html': + case 'xml': + try { + // Basic indentation for HTML/XML + let indent = 0; + return formattedCode + .replace(/>\n<') + .split('\n') + .map(line => { + line = line.trim(); + if (line.match(/<\/[^>]+>$/)) indent--; + const formatted = ' '.repeat(Math.max(0, indent)) + line; + if (line.match(/<[^/][^>]*>$/) && !line.match(/<[^>]+\/>/)) indent++; + return formatted; + }) + .filter(line => line.trim()) + .join('\n'); + } catch (e) { + return formattedCode; + } - // Restore code blocks - blocks.forEach(({id, content}) => { - processedText = processedText.replace(escapeHtml(id), content); + case 'css': + try { + // Basic indentation for CSS + return formattedCode + .replace(/\s*{\s*/g, ' {\n') + .replace(/;\s*/g, ';\n') + .replace(/\s*}\s*/g, '\n}\n') + .split('\n') + .map(line => line.trim()) + .filter(line => line) + .map(line => line.startsWith('}') ? line : ' ' + line) + .join('\n'); + } catch (e) { + return formattedCode; + } + + case 'python': + case 'py': + try { + // Basic indentation for Python + let indent = 0; + return formattedCode + .split('\n') + .map(line => { + line = line.trim(); + if (line.match(/^(return|break|continue|pass|else|elif|except|finally)\b/)) indent--; + const formatted = ' '.repeat(Math.max(0, indent)) + line; + if (line.match(/:\s*$/)) indent++; + return formatted; + }) + .filter(line => line.trim()) + .join('\n'); + } catch (e) { + return formattedCode; + } + + case 'cpp': + case 'c': + case 'rust': + try { + // Basic indentation for C/C++/Rust + let indent = 0; + return formattedCode + .split('\n') + .map(line => { + line = line.trim(); + if (line.match(/^[}\])]/) || line.match(/^(public|private|protected):/)) indent--; + const formatted = ' '.repeat(Math.max(0, indent)) + line; + if (line.match(/[{[]$/)) indent++; + return formatted; + }) + .filter(line => line.trim()) + .join('\n'); + } catch (e) { + return formattedCode; + } + + case 'php': + try { + // Basic indentation for PHP + let indent = 0; + return formattedCode + .split('\n') + .map(line => { + line = line.trim(); + if (line.match(/^[}\])]/) || line.match(/^(case|default):/)) indent--; + const formatted = ' '.repeat(Math.max(0, indent)) + line; + if (line.match(/[{[]$/) || line.match(/^(case|default):/)) indent++; + return formatted; + }) + .filter(line => line.trim()) + .join('\n'); + } catch (e) { + return formattedCode; + } + + case 'bash': + case 'shell': + case 'sh': + try { + // Basic formatting for shell scripts + return formattedCode + .split('\n') + .map(line => line.trim()) + .filter(line => line) + .map(line => { + if (line.startsWith('#')) return line; + if (line.endsWith('\\')) return line + '\n'; + if (line.match(/^(if|while|for|case)/)) return line; + if (line.match(/^(then|do|else|elif)/)) return ' ' + line; + if (line.match(/^(fi|done|esac)/)) return line; + return ' ' + line; + }) + .join('\n'); + } catch (e) { + return formattedCode; + } + + default: + return formattedCode; + } +} + +/** + * Process nostr identifiers + */ +async function processNostrIdentifiers(content: string): Promise { + let processedContent = content; + + // Process profiles (npub and nprofile) + const profileMatches = Array.from(content.matchAll(NOSTR_PROFILE_REGEX)); + for (const match of profileMatches) { + const [fullMatch, identifier] = match; + const metadata = await getUserMetadata(identifier); + const displayText = metadata.displayName || metadata.name || `${identifier.slice(0, 8)}...${identifier.slice(-4)}`; + const escapedId = identifier + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + const escapedDisplayText = displayText + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + + // Create a link with standardized styling + const link = `@${escapedDisplayText}`; + + // Replace only the exact match to preserve surrounding text + processedContent = processedContent.replace(fullMatch, link); + } + + // Process notes (nevent, note, naddr) + const noteMatches = Array.from(processedContent.matchAll(NOSTR_NOTE_REGEX)); + for (const match of noteMatches) { + const [fullMatch, identifier] = match; + const shortId = identifier.slice(0, 12) + '...' + identifier.slice(-8); + const escapedId = identifier + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + + // Create a link with standardized styling + const link = `${shortId}`; + + // Replace only the exact match to preserve surrounding text + processedContent = processedContent.replace(fullMatch, link); + } + + return processedContent; +} + +/** + * Process code blocks by finding consecutive code lines and preserving their content + */ +function processCodeBlocks(text: string): { text: string; blocks: Map } { + const lines = text.split('\n'); + const processedLines: string[] = []; + const blocks = new Map(); + let inCodeBlock = false; + let currentCode: string[] = []; + let currentLanguage = ''; + let blockCount = 0; + let lastWasCodeBlock = false; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const codeBlockStart = line.match(/^```(\w*)$/); + + if (codeBlockStart) { + if (!inCodeBlock) { + // Starting a new code block + inCodeBlock = true; + currentLanguage = codeBlockStart[1]; + currentCode = []; + lastWasCodeBlock = true; + } else { + // Ending current code block + blockCount++; + const id = `CODE_BLOCK_${blockCount}`; + const code = currentCode.join('\n'); + + blocks.set(id, JSON.stringify({ + code, + language: currentLanguage, + raw: true + })); + + processedLines.push(''); // Add spacing before code block + processedLines.push(id); + processedLines.push(''); // Add spacing after code block + inCodeBlock = false; + currentCode = []; + currentLanguage = ''; + } + } else if (inCodeBlock) { + currentCode.push(line); + } else { + if (lastWasCodeBlock && line.trim()) { + processedLines.push(''); + lastWasCodeBlock = false; + } + processedLines.push(line); + } + } + + // Handle unclosed code block + if (inCodeBlock && currentCode.length > 0) { + blockCount++; + const id = `CODE_BLOCK_${blockCount}`; + blocks.set(id, JSON.stringify({ + code: currentCode.join('\n'), + language: currentLanguage, + raw: true + })); + processedLines.push(''); + processedLines.push(id); + processedLines.push(''); + } + + return { + text: processedLines.join('\n'), + blocks + }; +} + +/** + * Restore code blocks with proper formatting + */ +function restoreCodeBlocks(text: string, blocks: Map): string { + let result = text; + + for (const [id, blockData] of blocks) { + const { code, language } = JSON.parse(blockData); + + // Preserve code exactly as it was written + const html = `
+
${code}
+
`; + + result = result.replace(id, html); + } + + return result; +} + +/** + * Process inline code + */ +function processInlineCode(text: string): string { + return text.replace(INLINE_CODE_REGEX, (match, code) => { + const escapedCode = code + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, ''') + .replace(/\\n/g, '\n'); + + return `${escapedCode}`; }); +} + +/** + * Process other markdown elements (excluding code) + */ +function processOtherElements(content: string): string { + // Process blockquotes first + content = processBlockquotes(content); - // Restore inline code - inlineCodes.forEach(({id, content}) => { - processedText = processedText.replace(escapeHtml(id), content); + // Process basic markdown elements + content = content.replace(BOLD_REGEX, '$1$2'); + content = content.replace(ITALIC_REGEX, '$1'); + content = content.replace(HEADING_REGEX, (match, hashes, content) => { + const level = hashes.length; + const sizes = ['text-2xl', 'text-xl', 'text-lg', 'text-base', 'text-sm', 'text-xs']; + return `${content.trim()}`; }); - return processedText; + // Process links and images with standardized styling + content = content.replace(IMAGE_REGEX, '$1'); + content = content.replace(LINK_REGEX, '$1'); + + // Process hashtags with standardized styling + content = content.replace(HASHTAG_REGEX, '#$1'); + + // Process horizontal rules + content = content.replace(HORIZONTAL_RULE_REGEX, '
'); + + return content; } /** @@ -226,12 +597,12 @@ function processFootnotes(text: string): { text: string, footnotes: Map { const cleanId = id.replace('^', ''); if (footnotes.has(cleanId)) { counter++; - return `[${counter}]`; + return `[${counter}]`; } return match; }); @@ -244,7 +615,7 @@ function processFootnotes(text: string): { text: string, footnotes: Map${content}`; + text += `
  • ${content}
  • `; } text += ''; @@ -254,82 +625,45 @@ function processFootnotes(text: string): { text: string, footnotes: Map { if (!text) return ''; - // First, process code blocks (protect these from HTML escaping) - let html = processCode(text); // still escape HTML *inside* code blocks - - // 👉 NEW: process blockquotes *before* the rest of HTML is escaped - html = processBlockquotes(html); + // First extract and save code blocks + const { text: withoutCode, blocks } = processCodeBlocks(text); // Process nostr identifiers - const npubMatches = Array.from(html.matchAll(NOSTR_NPUB_REGEX)); - const npubPromises = npubMatches.map(async match => { - const [fullMatch, npub] = match; - const metadata = await getUserMetadata(npub); - const displayText = metadata.displayName || metadata.name || `${npub.slice(0, 8)}...${npub.slice(-4)}`; - return { fullMatch, npub, displayText }; - }); + let content = await processNostrIdentifiers(withoutCode); - const npubResults = await Promise.all(npubPromises); - for (const { fullMatch, npub, displayText } of npubResults) { - html = html.replace( - fullMatch, - `@${displayText}` - ); - } + // Process blockquotes + content = processBlockquotes(content); // Process lists - html = processLists(html); + content = processLists(content); - // Process footnotes - const { text: processedHtml } = processFootnotes(html); - html = processedHtml; + // Process other markdown elements + content = processOtherElements(content); - // Process basic markdown elements - html = html.replace(BOLD_REGEX, '$1$2'); - html = html.replace(ITALIC_REGEX, '$1'); - html = html.replace(HEADING_REGEX, (match, hashes, content) => { - const level = hashes.length; - const sizes = ['text-2xl', 'text-xl', 'text-lg', 'text-base', 'text-sm', 'text-xs']; - return `${content.trim()}`; - }); + // Process inline code (after other elements to prevent conflicts) + content = processInlineCode(content); - // Process links and images - html = html.replace(IMAGE_REGEX, '$1'); - html = html.replace(LINK_REGEX, '$1'); - - // Process hashtags - html = html.replace(HASHTAG_REGEX, '#$1'); - - // Process horizontal rules - html = html.replace(HORIZONTAL_RULE_REGEX, '
    '); - - // Handle paragraphs and line breaks - html = html.replace(/\n{2,}/g, '

    '); - html = html.replace(/\n/g, '
    '); + // Process footnotes + const { text: processedContent } = processFootnotes(content); + content = processedContent; - // Wrap content in paragraph if needed - if (!html.startsWith('<')) { - html = `

    ${html}

    `; - } + // Handle paragraphs and line breaks, preserving existing HTML + content = content + .split(/\n{2,}/) + .map(para => para.trim()) + .filter(para => para) + .map(para => para.startsWith('<') ? para : `

    ${para}

    `) + .join('\n\n'); - return html; -} + // Finally, restore code blocks + content = restoreCodeBlocks(content, blocks); -/** - * Escape HTML special characters to prevent XSS - */ -function escapeHtml(text: string): string { - return text - .replace(/&/g, '&') - .replace(//g, '>') - .replace(/"/g, '"') - .replace(/'/g, '''); + return content; } /**