import { parseBasicMarkdown } from './basicMarkdownParser'; import hljs from 'highlight.js'; import 'highlight.js/lib/common'; // Import common languages import 'highlight.js/styles/github-dark.css'; // Dark theme only // Register common languages hljs.configure({ ignoreUnescapedHTML: true }); // Regular expressions for advanced markdown elements const HEADING_REGEX = /^(#{1,6})\s+(.+)$/gm; const ALTERNATE_HEADING_REGEX = /^([^\n]+)\n(=+|-+)\n/gm; const INLINE_CODE_REGEX = /`([^`\n]+)`/g; const HORIZONTAL_RULE_REGEX = /^(?:[-*_]\s*){3,}$/gm; const FOOTNOTE_REFERENCE_REGEX = /\[\^([^\]]+)\]/g; const FOOTNOTE_DEFINITION_REGEX = /^\[\^([^\]]+)\]:\s*(.+)$/gm; /** * Process headings (both styles) */ function processHeadings(content: string): string { // Process ATX-style headings (# Heading) let processedContent = content.replace(HEADING_REGEX, (_, level, text) => { const headingLevel = level.length; return `${text.trim()}`; }); // Process Setext-style headings (Heading\n====) processedContent = processedContent.replace(ALTERNATE_HEADING_REGEX, (_, text, level) => { const headingLevel = level[0] === '=' ? 1 : 2; return `${text.trim()}`; }); return processedContent; } /** * Process tables */ function processTables(content: string): string { try { if (!content) return ''; return content.replace(/^\|(.*(?:\n\|.*)*)/gm, (match) => { try { // Split into rows and clean up const rows = match.split('\n').filter(row => row.trim()); if (rows.length < 1) return match; // Helper to process a row into cells const processCells = (row: string): string[] => { return row .split('|') .slice(1, -1) // Remove empty cells from start/end .map(cell => cell.trim()); }; // Check if second row is a delimiter row (only hyphens) const hasHeader = rows.length > 1 && rows[1].trim().match(/^\|[-\s|]+\|$/); // Extract header and body rows let headerCells: string[] = []; let bodyRows: string[] = []; if (hasHeader) { // If we have a header, first row is header, skip delimiter, rest is body headerCells = processCells(rows[0]); bodyRows = rows.slice(2); } else { // No header, all rows are body bodyRows = rows; } // Build table HTML let html = '
\n'; html += '\n'; // Add header if exists if (hasHeader) { html += '\n\n'; headerCells.forEach(cell => { html += `\n`; }); html += '\n\n'; } // Add body html += '\n'; bodyRows.forEach(row => { const cells = processCells(row); html += '\n'; cells.forEach(cell => { html += `\n`; }); html += '\n'; }); html += '\n
${cell}
${cell}
\n
'; return html; } catch (error) { console.error('Error processing table row:', error); return match; } }); } catch (error) { console.error('Error in processTables:', error); return content; } } /** * Process horizontal rules */ function processHorizontalRules(content: string): string { return content.replace(HORIZONTAL_RULE_REGEX, '
' ); } /** * Process footnotes */ function processFootnotes(content: string): string { try { if (!content) return ''; // First collect all footnote references and definitions const footnotes = new Map(); const references = new Map(); const referenceLocations = new Set(); let nextNumber = 1; // First pass: collect all references to establish order let processedContent = content.replace(FOOTNOTE_REFERENCE_REGEX, (match, id) => { if (!referenceLocations.has(id) && !references.has(id)) { references.set(id, nextNumber++); } referenceLocations.add(id); return match; // Keep the reference for now }); // Second pass: collect all definitions processedContent = processedContent.replace(FOOTNOTE_DEFINITION_REGEX, (match, id, text) => { footnotes.set(id, text.trim()); return ''; // Remove the definition }); // Third pass: process references with collected information processedContent = processedContent.replace(FOOTNOTE_REFERENCE_REGEX, (match, id) => { if (!footnotes.has(id)) { console.warn(`Footnote reference [^${id}] found but no definition exists`); return match; } const num = references.get(id)!; return `[${num}]`; }); // Add footnotes section if we have any if (references.size > 0) { processedContent += '\n\n

Footnotes

\n
    \n'; // Sort footnotes by their reference number const sortedFootnotes = Array.from(references.entries()) .sort((a, b) => a[1] - b[1]) .filter(([id]) => footnotes.has(id)); // Only include footnotes that have definitions // Add each footnote in order for (const [id, num] of sortedFootnotes) { const text = footnotes.get(id) || ''; processedContent += `
  1. ${text}
  2. \n`; } processedContent += '
'; } return processedContent; } catch (error) { console.error('Error processing footnotes:', error); return content; } } /** * Process blockquotes */ function processBlockquotes(content: string): string { // Match blockquotes that might span multiple lines const blockquoteRegex = /^>[ \t]?(.+(?:\n>[ \t]?.+)*)/gm; return content.replace(blockquoteRegex, (match) => { // Remove the '>' prefix from each line and preserve line breaks const text = match .split('\n') .map(line => line.replace(/^>[ \t]?/, '')) .join('\n') .trim(); return `
${text}
`; }); } /** * Process code blocks by finding consecutive code lines and preserving their content */ function processCodeBlocks(text: string): { text: string; blocks: Map } { const lines = text.split('\n'); const processedLines: string[] = []; const blocks = new Map(); let inCodeBlock = false; let currentCode: string[] = []; let currentLanguage = ''; let blockCount = 0; let lastWasCodeBlock = false; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const codeBlockStart = line.match(/^```(\w*)$/); if (codeBlockStart) { if (!inCodeBlock) { // Starting a new code block inCodeBlock = true; currentLanguage = codeBlockStart[1]; currentCode = []; lastWasCodeBlock = true; } else { // Ending current code block blockCount++; const id = `CODE_BLOCK_${blockCount}`; const code = currentCode.join('\n'); // Try to format JSON if specified let formattedCode = code; if (currentLanguage.toLowerCase() === 'json') { try { formattedCode = JSON.stringify(JSON.parse(code), null, 2); } catch (e) { formattedCode = code; } } blocks.set(id, JSON.stringify({ code: formattedCode, language: currentLanguage, raw: true })); processedLines.push(''); // Add spacing before code block processedLines.push(id); processedLines.push(''); // Add spacing after code block inCodeBlock = false; currentCode = []; currentLanguage = ''; } } else if (inCodeBlock) { currentCode.push(line); } else { if (lastWasCodeBlock && line.trim()) { processedLines.push(''); lastWasCodeBlock = false; } processedLines.push(line); } } // Handle unclosed code block if (inCodeBlock && currentCode.length > 0) { blockCount++; const id = `CODE_BLOCK_${blockCount}`; const code = currentCode.join('\n'); // Try to format JSON if specified let formattedCode = code; if (currentLanguage.toLowerCase() === 'json') { try { formattedCode = JSON.stringify(JSON.parse(code), null, 2); } catch (e) { formattedCode = code; } } blocks.set(id, JSON.stringify({ code: formattedCode, language: currentLanguage, raw: true })); processedLines.push(''); processedLines.push(id); processedLines.push(''); } return { text: processedLines.join('\n'), blocks }; } /** * Restore code blocks with proper formatting */ function restoreCodeBlocks(text: string, blocks: Map): string { let result = text; for (const [id, blockData] of blocks) { try { const { code, language } = JSON.parse(blockData); let html; if (language && hljs.getLanguage(language)) { try { const highlighted = hljs.highlight(code, { language, ignoreIllegals: true }).value; html = `
${highlighted}
`; } catch (e) { console.warn('Failed to highlight code block:', e); html = `
${code}
`; } } else { html = `
${code}
`; } result = result.replace(id, html); } catch (error) { console.error('Error restoring code block:', error); result = result.replace(id, '
Error processing code block
'); } } return result; } /** * Parse markdown text with advanced formatting */ export async function parseAdvancedMarkdown(text: string): Promise { if (!text) return ''; try { // Step 1: Extract and save code blocks first const { text: withoutCode, blocks } = processCodeBlocks(text); let processedText = withoutCode; // Step 2: Process block-level elements processedText = processTables(processedText); processedText = processBlockquotes(processedText); processedText = processHeadings(processedText); processedText = processHorizontalRules(processedText); // Process inline elements processedText = processedText.replace(INLINE_CODE_REGEX, (_, code) => { const escapedCode = code .trim() .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); return `${escapedCode}`; }); // Process footnotes processedText = processFootnotes(processedText); // Process basic markdown (which will also handle Nostr identifiers) processedText = await parseBasicMarkdown(processedText); // Step 3: Restore code blocks processedText = restoreCodeBlocks(processedText, blocks); return processedText; } catch (error) { console.error('Error in parseAdvancedMarkdown:', error); return `
Error processing markdown: ${error instanceof Error ? error.message : 'Unknown error'}
`; } }