From 1fff18b26729ebf49e658a10f3e4d939dbfd73af Mon Sep 17 00:00:00 2001 From: Silberengel Date: Thu, 10 Jul 2025 15:34:08 +0200 Subject: [PATCH] latex/Asciidoc rendering improvement for NostrMarkup --- .../advancedAsciidoctorPostProcessor.ts | 121 ++++-- src/lib/utils/markup/advancedMarkupParser.ts | 409 +++++++++++++----- src/lib/utils/markup/basicMarkupParser.ts | 4 +- test_data/LaTeXtestfile.json | 34 ++ test_data/LaTeXtestfile.md | 135 ++++++ test_data/latex_markdown.md | 50 --- tests/unit/latexRendering.test.ts | 112 ++--- 7 files changed, 593 insertions(+), 272 deletions(-) create mode 100644 test_data/LaTeXtestfile.json create mode 100644 test_data/LaTeXtestfile.md delete mode 100644 test_data/latex_markdown.md diff --git a/src/lib/utils/markup/advancedAsciidoctorPostProcessor.ts b/src/lib/utils/markup/advancedAsciidoctorPostProcessor.ts index 0700496..ab417d7 100644 --- a/src/lib/utils/markup/advancedAsciidoctorPostProcessor.ts +++ b/src/lib/utils/markup/advancedAsciidoctorPostProcessor.ts @@ -45,54 +45,103 @@ export async function postProcessAdvancedAsciidoctorHtml( /** * Fixes all math blocks for MathJax rendering. - * Handles stem blocks, inline math, and normalizes delimiters. + * Now only processes LaTeX within inline code blocks. */ function fixAllMathBlocks(html: string): string { // Unescape \$ to $ for math delimiters html = html.replace(/\\\$/g, "$"); - // Block math:
...
+ // Process inline code blocks that contain LaTeX html = html.replace( - /
\s*
([\s\S]*?)<\/div>\s*<\/div>/g, - (_match, mathContent) => { - let cleanMath = mathContent - .replace(/\$<\/span>/g, "") - .replace(/\$\$<\/span>/g, "") - // Remove $ or $$ on their own line, or surrounded by whitespace/newlines - .replace(/(^|[\n\r\s])\$([\n\r\s]|$)/g, "$1$2") - .replace(/(^|[\n\r\s])\$\$([\n\r\s]|$)/g, "$1$2") - // Remove all leading and trailing whitespace and $ - .replace(/^[\s$]+/, "") - .replace(/[\s$]+$/, "") - .trim(); // Final trim to remove any stray whitespace or $ - // Always wrap in $$...$$ - return `
$$${cleanMath}$$
`; - }, - ); - // Inline math: $ ... $ (allow whitespace/newlines) - html = html.replace( - /\$<\/span>\s*([\s\S]+?)\s*\$<\/span>/g, - (_match, mathContent) => - `$${mathContent.trim()}$`, - ); - // Inline math: stem:[...] or latexmath:[...] - html = html.replace( - /stem:\[([^\]]+?)\]/g, - (_match, content) => `$${content.trim()}$`, - ); - html = html.replace( - /latexmath:\[([^\]]+?)\]/g, - (_match, content) => - `\\(${content.trim().replace(/\\\\/g, "\\")}\\)`, + /]*class="[^"]*language-[^"]*"[^>]*>([\s\S]*?)<\/code>/g, + (match, codeContent) => { + const trimmedCode = codeContent.trim(); + if (isLaTeXContent(trimmedCode)) { + return `$${trimmedCode}$`; + } + return match; // Return original if not LaTeX + } ); + + // Also process code blocks without language class html = html.replace( - /asciimath:\[([^\]]+?)\]/g, - (_match, content) => - `\`${content.trim()}\``, + /]*>([\s\S]*?)<\/code>/g, + (match, codeContent) => { + const trimmedCode = codeContent.trim(); + if (isLaTeXContent(trimmedCode)) { + return `$${trimmedCode}$`; + } + return match; // Return original if not LaTeX + } ); + return html; } +/** + * Checks if content contains LaTeX syntax + */ +function isLaTeXContent(content: string): boolean { + const trimmed = content.trim(); + + // Check for common LaTeX patterns + const latexPatterns = [ + /\\[a-zA-Z]+/, // LaTeX commands like \frac, \sum, etc. + /\\[\(\)\[\]]/, // LaTeX delimiters like \(, \), \[, \] + /\\begin\{/, // LaTeX environments + /\\end\{/, // LaTeX environments + /\$\$/, // Display math delimiters + /\$[^$]+\$/, // Inline math delimiters + /\\text\{/, // LaTeX text command + /\\mathrm\{/, // LaTeX mathrm command + /\\mathbf\{/, // LaTeX bold command + /\\mathit\{/, // LaTeX italic command + /\\sqrt/, // Square root + /\\frac/, // Fraction + /\\sum/, // Sum + /\\int/, // Integral + /\\lim/, // Limit + /\\infty/, // Infinity + /\\alpha/, // Greek letters + /\\beta/, + /\\gamma/, + /\\delta/, + /\\theta/, + /\\lambda/, + /\\mu/, + /\\pi/, + /\\sigma/, + /\\phi/, + /\\omega/, + /\\partial/, // Partial derivative + /\\nabla/, // Nabla + /\\cdot/, // Dot product + /\\times/, // Times + /\\div/, // Division + /\\pm/, // Plus-minus + /\\mp/, // Minus-plus + /\\leq/, // Less than or equal + /\\geq/, // Greater than or equal + /\\neq/, // Not equal + /\\approx/, // Approximately equal + /\\equiv/, // Equivalent + /\\propto/, // Proportional + /\\in/, // Element of + /\\notin/, // Not element of + /\\subset/, // Subset + /\\supset/, // Superset + /\\cup/, // Union + /\\cap/, // Intersection + /\\emptyset/, // Empty set + /\\mathbb\{/, // Blackboard bold + /\\mathcal\{/, // Calligraphic + /\\mathfrak\{/, // Fraktur + /\\mathscr\{/, // Script + ]; + + return latexPatterns.some(pattern => pattern.test(trimmed)); +} + /** * Processes PlantUML blocks in HTML content */ diff --git a/src/lib/utils/markup/advancedMarkupParser.ts b/src/lib/utils/markup/advancedMarkupParser.ts index 34785ba..2e4721f 100644 --- a/src/lib/utils/markup/advancedMarkupParser.ts +++ b/src/lib/utils/markup/advancedMarkupParser.ts @@ -8,6 +8,22 @@ hljs.configure({ ignoreUnescapedHTML: true, }); +// Escapes HTML characters for safe display +function escapeHtml(text: string): string { + const div = typeof document !== 'undefined' ? document.createElement('div') : null; + if (div) { + div.textContent = text; + return div.innerHTML; + } + // Fallback for non-browser environments + return text + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} + // Regular expressions for advanced markup elements const HEADING_REGEX = /^(#{1,6})\s+(.+)$/gm; const ALTERNATE_HEADING_REGEX = /^([^\n]+)\n(=+|-+)\n/gm; @@ -380,111 +396,296 @@ function restoreCodeBlocks(text: string, blocks: Map): string { } /** - * Process LaTeX math expressions using a token-based approach to avoid nested processing + * Process $...$ and $$...$$ math blocks: render as LaTeX if recognized, otherwise as AsciiMath + * This must run BEFORE any paragraph or inline code formatting. + */ +function processDollarMath(content: string): string { + // Display math: $$...$$ (multi-line, not empty) + content = content.replace(/\$\$([\s\S]*?\S[\s\S]*?)\$\$/g, (match, expr) => { + if (isLaTeXContent(expr)) { + return `
$$${expr}$$
`; + } else { + // Strip all $ or $$ from AsciiMath + const clean = expr.replace(/\$+/g, '').trim(); + return `
${clean}
`; + } + }); + // Inline math: $...$ (not empty, not just whitespace) + content = content.replace(/\$([^\s$][^$\n]*?)\$/g, (match, expr) => { + if (isLaTeXContent(expr)) { + return `$${expr}$`; + } else { + const clean = expr.replace(/\$+/g, '').trim(); + return `${clean}`; + } + }); + return content; +} + +/** + * Process LaTeX math expressions only within inline code blocks */ function processMathExpressions(content: string): string { - // Tokenize the content to avoid nested processing - const tokens: Array<{type: 'text' | 'math', content: string}> = []; - let currentText = ''; - let i = 0; - - while (i < content.length) { - // Check for LaTeX environments first (most specific) - const envMatch = content.slice(i).match(/^\\begin\{([^}]+)\}([\s\S]*?)\\end\{\1\}/); - if (envMatch) { - if (currentText) { - tokens.push({type: 'text', content: currentText}); - currentText = ''; - } - tokens.push({type: 'math', content: `\\begin{${envMatch[1]}}${envMatch[2]}\\end{${envMatch[1]}}`}); - i += envMatch[0].length; - continue; + // Only process LaTeX within inline code blocks (backticks) + return content.replace(INLINE_CODE_REGEX, (match, code) => { + const trimmedCode = code.trim(); + + // Check for unsupported LaTeX environments (like tabular) first + if (/\\begin\{tabular\}|\\\\begin\{tabular\}/.test(trimmedCode)) { + return `
+

+ Unrendered, as it is LaTeX typesetting, not a formula: +

+
+          ${escapeHtml(trimmedCode)}
+        
+
`; } - - // Check for display math blocks ($$...$$) - const displayMatch = content.slice(i).match(/^\$\$([\s\S]*?)\$\$/); - if (displayMatch) { - if (currentText) { - tokens.push({type: 'text', content: currentText}); - currentText = ''; + + // Check if the code contains LaTeX syntax + if (isLaTeXContent(trimmedCode)) { + // Detect LaTeX display math (\\[...\\]) + if (/^\\\[[\s\S]*\\\]$/.test(trimmedCode)) { + // Remove the delimiters for rendering + const inner = trimmedCode.replace(/^\\\[|\\\]$/g, ''); + return `
$$${inner}$$
`; } - tokens.push({type: 'math', content: displayMatch[1]}); - i += displayMatch[0].length; - continue; - } - - // Check for LaTeX display math (\[...\]) - const latexDisplayMatch = content.slice(i).match(/^\\\[([^\]]+)\\\]/); - if (latexDisplayMatch) { - if (currentText) { - tokens.push({type: 'text', content: currentText}); - currentText = ''; + // Detect display math ($$...$$) + if (/^\$\$[\s\S]*\$\$$/.test(trimmedCode)) { + // Remove the delimiters for rendering + const inner = trimmedCode.replace(/^\$\$|\$\$$/g, ''); + return `
$$${inner}$$
`; } - tokens.push({type: 'math', content: latexDisplayMatch[1]}); - i += latexDisplayMatch[0].length; - continue; - } - - // Check for inline math ($...$) - const inlineMatch = content.slice(i).match(/^\$([^$\n]+)\$/); - if (inlineMatch) { - if (currentText) { - tokens.push({type: 'text', content: currentText}); - currentText = ''; + // Detect inline math ($...$) + if (/^\$[\s\S]*\$$/.test(trimmedCode)) { + // Remove the delimiters for rendering + const inner = trimmedCode.replace(/^\$|\$$/g, ''); + return `$${inner}$`; } - tokens.push({type: 'math', content: inlineMatch[1]}); - i += inlineMatch[0].length; - continue; - } - - // Check for LaTeX inline math (\(...\)) - const latexInlineMatch = content.slice(i).match(/^\\\(([^)]+)\\\)/); - if (latexInlineMatch) { - if (currentText) { - tokens.push({type: 'text', content: currentText}); - currentText = ''; + // Default to inline math for any other LaTeX content + return `$${trimmedCode}$`; + } else { + // Check for edge cases that should remain as code, not math + // These patterns indicate code that contains dollar signs but is not math + const codePatterns = [ + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=/, // Variable assignment like "const price =" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*\(/, // Function call like "echo(" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*\{/, // Object literal like "const obj = {" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*\[/, // Array literal like "const arr = [" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*!&|^~]/, // Operator like "const x = 1 +" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Two identifiers like "const price = amount" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]/, // Number like "const x = 1" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]/, // Complex expression like "const price = amount +" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[a-zA-Z0-9_$]*\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Three identifiers like "const price = amount + tax" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]/, // Two identifiers and number like "const price = amount + 1" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]/, // Identifier, number, operator like "const x = 1 +" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Identifier, number, identifier like "const x = 1 + y" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[0-9]/, // Identifier, number, number like "const x = 1 + 2" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Complex like "const x = 1 + y" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Complex like "const x = 1 + 2" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]/, // Very complex like "const x = 1 + y +" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Very complex like "const x = 1 + y + z" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Very complex like "const x = 1 + y + 2" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]\s*[+\-*/%=<>!&|^~]/, // Very complex like "const x = 1 + 2 +" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Very complex like "const x = 1 + 2 + y" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Very complex like "const x = 1 + 2 + 3" + // Additional patterns for JavaScript template literals and other code + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*`/, // Template literal assignment like "const str = `" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*'/, // String assignment like "const str = '" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*"/, // String assignment like "const str = \"" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[0-9]/, // Number assignment like "const x = 1" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Variable assignment like "const x = y" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[+\-*/%=<>!&|^~]/, // Assignment with operator like "const x = +" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]/, // Assignment with variable and operator like "const x = y +" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Assignment with two variables and operator like "const x = y + z" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[0-9]\s*[+\-*/%=<>!&|^~]/, // Assignment with number and operator like "const x = 1 +" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Assignment with number, operator, variable like "const x = 1 + y" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Assignment with variable, operator, number like "const x = y + 1" + /^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Assignment with number, operator, number like "const x = 1 + 2" + ]; + + // If it matches code patterns, treat as regular code + if (codePatterns.some(pattern => pattern.test(trimmedCode))) { + const escapedCode = trimmedCode + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + return `${escapedCode}`; } - tokens.push({type: 'math', content: latexInlineMatch[1]}); - i += latexInlineMatch[0].length; - continue; + + // Return as regular inline code + const escapedCode = trimmedCode + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + return `${escapedCode}`; } - - // If no math pattern matches, add to current text - currentText += content[i]; - i++; + }); +} + +/** + * Checks if content contains LaTeX syntax + */ +function isLaTeXContent(content: string): boolean { + const trimmed = content.trim(); + + // Check for simple math expressions first (like AsciiMath) + if (/^\$[^$]+\$$/.test(trimmed)) { + return true; } - // Add any remaining text - if (currentText) { - tokens.push({type: 'text', content: currentText}); + // Check for display math + if (/^\$\$[\s\S]*\$\$$/.test(trimmed)) { + return true; } - // Now process the tokens to create the final HTML - let result = ''; - for (const token of tokens) { - if (token.type === 'text') { - result += token.content; - } else { - // Determine if this should be display or inline math - const isDisplay = token.content.includes('\\begin{') || - token.content.includes('\\end{') || - token.content.includes('\\[') || - token.content.includes('\\]') || - token.content.length > 50 || // Heuristic for display math - token.content.includes('=') && token.content.length > 20 || // Equations with equals - token.content.includes('\\begin{') || // Any LaTeX environment - token.content.includes('\\boxed{') || // Boxed expressions - token.content.includes('\\text{') && token.content.length > 30; // Text blocks - - if (isDisplay) { - result += `
$$${token.content}$$
`; - } else { - result += `$${token.content}$`; - } - } + // Check for LaTeX display math + if (/^\\\[[\s\S]*\\\]$/.test(trimmed)) { + return true; } - return result; + // Check for LaTeX environments with double backslashes (like tabular) + if (/\\\\begin\{[^}]+\}/.test(trimmed) || /\\\\end\{[^}]+\}/.test(trimmed)) { + return true; + } + + // Check for common LaTeX patterns + const latexPatterns = [ + /\\[a-zA-Z]+/, // LaTeX commands like \frac, \sum, etc. + /\\\\[a-zA-Z]+/, // LaTeX commands with double backslashes like \\frac, \\sum, etc. + /\\[\(\)\[\]]/, // LaTeX delimiters like \(, \), \[, \] + /\\\\[\(\)\[\]]/, // LaTeX delimiters with double backslashes like \\(, \\), \\[, \\] + /\\\[[\s\S]*?\\\]/, // LaTeX display math \[ ... \] + /\\\\\[[\s\S]*?\\\\\]/, // LaTeX display math with double backslashes \\[ ... \\] + /\\begin\{/, // LaTeX environments + /\\\\begin\{/, // LaTeX environments with double backslashes + /\\end\{/, // LaTeX environments + /\\\\end\{/, // LaTeX environments with double backslashes + /\\begin\{array\}/, // LaTeX array environment + /\\\\begin\{array\}/, // LaTeX array environment with double backslashes + /\\end\{array\}/, + /\\\\end\{array\}/, + /\\begin\{matrix\}/, // LaTeX matrix environment + /\\\\begin\{matrix\}/, // LaTeX matrix environment with double backslashes + /\\end\{matrix\}/, + /\\\\end\{matrix\}/, + /\\begin\{bmatrix\}/, // LaTeX bmatrix environment + /\\\\begin\{bmatrix\}/, // LaTeX bmatrix environment with double backslashes + /\\end\{bmatrix\}/, + /\\\\end\{bmatrix\}/, + /\\begin\{pmatrix\}/, // LaTeX pmatrix environment + /\\\\begin\{pmatrix\}/, // LaTeX pmatrix environment with double backslashes + /\\end\{pmatrix\}/, + /\\\\end\{pmatrix\}/, + /\\begin\{tabular\}/, // LaTeX tabular environment + /\\\\begin\{tabular\}/, // LaTeX tabular environment with double backslashes + /\\end\{tabular\}/, + /\\\\end\{tabular\}/, + /\$\$/, // Display math delimiters + /\$[^$]+\$/, // Inline math delimiters + /\\text\{/, // LaTeX text command + /\\\\text\{/, // LaTeX text command with double backslashes + /\\mathrm\{/, // LaTeX mathrm command + /\\\\mathrm\{/, // LaTeX mathrm command with double backslashes + /\\mathbf\{/, // LaTeX bold command + /\\\\mathbf\{/, // LaTeX bold command with double backslashes + /\\mathit\{/, // LaTeX italic command + /\\\\mathit\{/, // LaTeX italic command with double backslashes + /\\sqrt/, // Square root + /\\\\sqrt/, // Square root with double backslashes + /\\frac/, // Fraction + /\\\\frac/, // Fraction with double backslashes + /\\sum/, // Sum + /\\\\sum/, // Sum with double backslashes + /\\int/, // Integral + /\\\\int/, // Integral with double backslashes + /\\lim/, // Limit + /\\\\lim/, // Limit with double backslashes + /\\infty/, // Infinity + /\\\\infty/, // Infinity with double backslashes + /\\alpha/, // Greek letters + /\\\\alpha/, // Greek letters with double backslashes + /\\beta/, + /\\\\beta/, + /\\gamma/, + /\\\\gamma/, + /\\delta/, + /\\\\delta/, + /\\theta/, + /\\\\theta/, + /\\lambda/, + /\\\\lambda/, + /\\mu/, + /\\\\mu/, + /\\pi/, + /\\\\pi/, + /\\sigma/, + /\\\\sigma/, + /\\phi/, + /\\\\phi/, + /\\omega/, + /\\\\omega/, + /\\partial/, // Partial derivative + /\\\\partial/, // Partial derivative with double backslashes + /\\nabla/, // Nabla + /\\\\nabla/, // Nabla with double backslashes + /\\cdot/, // Dot product + /\\\\cdot/, // Dot product with double backslashes + /\\times/, // Times + /\\\\times/, // Times with double backslashes + /\\div/, // Division + /\\\\div/, // Division with double backslashes + /\\pm/, // Plus-minus + /\\\\pm/, // Plus-minus with double backslashes + /\\mp/, // Minus-plus + /\\\\mp/, // Minus-plus with double backslashes + /\\leq/, // Less than or equal + /\\\\leq/, // Less than or equal with double backslashes + /\\geq/, // Greater than or equal + /\\\\geq/, // Greater than or equal with double backslashes + /\\neq/, // Not equal + /\\\\neq/, // Not equal with double backslashes + /\\approx/, // Approximately equal + /\\\\approx/, // Approximately equal with double backslashes + /\\equiv/, // Equivalent + /\\\\equiv/, // Equivalent with double backslashes + /\\propto/, // Proportional + /\\\\propto/, // Proportional with double backslashes + /\\in/, // Element of + /\\\\in/, // Element of with double backslashes + /\\notin/, // Not element of + /\\\\notin/, // Not element of with double backslashes + /\\subset/, // Subset + /\\\\subset/, // Subset with double backslashes + /\\supset/, // Superset + /\\\\supset/, // Superset with double backslashes + /\\cup/, // Union + /\\\\cup/, // Union with double backslashes + /\\cap/, // Intersection + /\\\\cap/, // Intersection with double backslashes + /\\emptyset/, // Empty set + /\\\\emptyset/, // Empty set with double backslashes + /\\mathbb\{/, // Blackboard bold + /\\\\mathbb\{/, // Blackboard bold with double backslashes + /\\mathcal\{/, // Calligraphic + /\\\\mathcal\{/, // Calligraphic with double backslashes + /\\mathfrak\{/, // Fraktur + /\\\\mathfrak\{/, // Fraktur with double backslashes + /\\mathscr\{/, // Script + /\\\\mathscr\{/, // Script with double backslashes + ]; + + return latexPatterns.some(pattern => pattern.test(trimmed)); } /** @@ -498,34 +699,26 @@ export async function parseAdvancedmarkup(text: string): Promise { const { text: withoutCode, blocks } = processCodeBlocks(text); let processedText = withoutCode; - // Step 2: Process LaTeX math expressions FIRST to avoid wrapping in

or

+ // Step 2: Process $...$ and $$...$$ math blocks (LaTeX or AsciiMath) + processedText = processDollarMath(processedText); + + // Step 3: Process LaTeX math expressions ONLY within inline code blocks (legacy support) processedText = processMathExpressions(processedText); - // Step 3: Process block-level elements + // Step 4: Process block-level elements (tables, blockquotes, headings, horizontal rules) processedText = processTables(processedText); processedText = processBlockquotes(processedText); processedText = processHeadings(processedText); processedText = processHorizontalRules(processedText); - // Process inline elements - processedText = processedText.replace(INLINE_CODE_REGEX, (_, code) => { - const escapedCode = code - .trim() - .replace(/&/g, "&") - .replace(//g, ">") - .replace(/"/g, """) - .replace(/'/g, "'"); - return `${escapedCode}`; - }); - - // Process footnotes (only references, not definitions) + // Step 5: Process footnotes (only references, not definitions) processedText = processFootnotes(processedText); - // Process basic markup (which will also handle Nostr identifiers) + // Step 6: Process basic markup (which will also handle Nostr identifiers) + // This includes paragraphs, inline code, links, lists, etc. processedText = await parseBasicmarkup(processedText); - // Step 4: Restore code blocks + // Step 7: Restore code blocks processedText = restoreCodeBlocks(processedText, blocks); return processedText; diff --git a/src/lib/utils/markup/basicMarkupParser.ts b/src/lib/utils/markup/basicMarkupParser.ts index f829462..e7a1d74 100644 --- a/src/lib/utils/markup/basicMarkupParser.ts +++ b/src/lib/utils/markup/basicMarkupParser.ts @@ -411,8 +411,8 @@ export async function parseBasicmarkup(text: string): Promise { .map((para) => para.trim()) .filter((para) => para.length > 0) .map((para) => { - // Skip wrapping if para already contains block-level elements - if (/<(div|h[1-6]|blockquote|table|pre|ul|ol|hr)/i.test(para)) { + // Skip wrapping if para already contains block-level elements or math blocks + if (/(]*class=["'][^"']*math-block[^"']*["'])|<(div|h[1-6]|blockquote|table|pre|ul|ol|hr)/i.test(para)) { return para; } return `

${para}

`; diff --git a/test_data/LaTeXtestfile.json b/test_data/LaTeXtestfile.json new file mode 100644 index 0000000..079226d --- /dev/null +++ b/test_data/LaTeXtestfile.json @@ -0,0 +1,34 @@ +{ + "created_at": 1752150799, + "content": "# This is a test file for writing mathematical formulas in #NostrMarkup\n\nThis document covers the rendering of formulas in TeX/LaTeX and AsciiMath notation, or some combination of those within the same page. It is meant to be rendered by clients utilizing MathJax.\n\nIf you want the entire document to be rendered as mathematics, place the entire thing in a back-tick code-block, but know that this makes the document slower to load, it is harder to format the prose, and the result is less legible. It also doesn't increase portability, as it's easy to export markup as LaTeX files, or as PDFs, with the formulas rendered.\n\nThe general idea, is that anything placed within `single back-ticks` is inline code, and inline-code will all be scanned for typical mathematics statements and rendered with best-effort. (For more precise rendering, use AsciiDoc.) We will not render text that is not marked as inline code, as mathematical formulas, as that is prose.\n\nIf you want the TeX to be blended into the surrounding text, wrap the text within single `$`. Otherwise, use double `$$` symbols, for display math, and it will appear on its own line.\n\n## TeX Examples\n\nInline equation: `$\\sqrt{x}$`\n\nSame equation, in the display mode: `$$\\sqrt{x}$$`\n\nSomething more complex, inline: `$\\mathbb{N} = \\{ a \\in \\mathbb{Z} : a > 0 \\}$`\n\nSomething complex, in display mode: `$$P \\left( A=2 \\, \\middle| \\, \\dfrac{A^2}{B}>4 \\right)$$`\n\nAnother example of `$$\\prod_{i=1}^{n} x_i - 1$$` inline formulas.\n\nFunction example: \n`$$\nf(x)=\n\\begin{cases}\n1/d_{ij} & \\quad \\text{when $d_{ij} \\leq 160$}\\\\ \n0 & \\quad \\text{otherwise}\n\\end{cases}\n$$`\n\nAnd a matrix:\n`$$\nM = \n\\begin{bmatrix}\n\\frac{5}{6} & \\frac{1}{6} & 0 \\\\[0.3em]\n\\frac{5}{6} & 0 & \\frac{1}{6} \\\\[0.3em]\n0 & \\frac{5}{6} & \\frac{1}{6}\n\\end{bmatrix}\n$$`\n\nLaTeX ypesetting won't be rendered. Use NostrMarkup delimeter tables for this sort of thing.\n\n`\\\\begin{tabular}{|c|c|c|l|r|}\n\\\\hline\n\\\\multicolumn{3}{|l|}{test} & A & B \\\\\\\\\n\\\\hline\n1 & 2 & 3 & 4 & 5 \\\\\\\\\n\\\\hline\n\\\\end{tabular}`\n\nWe also recognize common LaTeX statements:\n\n`\\[\n\\begin{array}{ccccc}\n1 & 2 & 3 & 4 & 5 \\\\\n\\end{array}\n\\]`\n\n`\\[ x^n + y^n = z^n \\]`\n\n`\\sqrt{x^2+1}`\n\nGreek letters are a snap: `$\\Psi$`, `$\\psi$`, `$\\Phi$`, `$\\phi$`. \n\nEquations within text are easy--- A well known Maxwell thermodynamic relation is `$\\left.{\\partial T \\over \\partial P}\\right|_{s} = \\left.{\\partial v \\over \\partial s}\\right|_{P}$`.\n\nYou can also set aside equations like so: `\\begin{eqnarray} du &=& T\\ ds -P\\ dv, \\qquad \\mbox{first law.}\\label{fl}\\\\ ds &\\ge& {\\delta q \\over T}.\\qquad \\qquad \\mbox{second law.} \\label{sl} \\end {eqnarray}`\n\n## And some good ole Asciimath\n\nAsciimath doesn't use `$` or `$$` delimiters, but we are using it to make mathy stuff easier to find. If you want it inline, include it inline. If you want it on a separate line, put a hard-return before and after.\n\nInline text example here `$E=mc^2$` and another `$1/(x+1)$`; very simple.\n\nDisplaying on a separate line:\n\n`$$sum_(k=1)^n k = 1+2+ cdots +n=(n(n+1))/2$$`\n\n`$$int_0^1 x^2 dx$$`\n\n`$$x = (-6 +- sqrt((-6)^2 - 4 (1)(4)))/(2 xx 1)$$`\n\n`$$|x|= {(x , if x ge 0 text(,)),(-x , if x <0.):}$$`\n\nDisplaying with wider spacing:\n\n`$a=3, \\ \\ \\ b=-3,\\ \\ $` and `$ \\ \\ c=2$`.\n\nThus `$(a+b)(c+b)=0$`.\n\nDisplaying with indentations:\n\nUsing the quadratic formula, the roots of `$x^2-6x+4=0$` are\n\n`$$x = (-6 +- sqrt((-6)^2 - 4 (1)(4)))/(2 xx 1)$$`\n\n`$$ \\ \\ = (-6 +- sqrt(36 - 16))/2$$`\n\n`$$ \\ \\ =(-6 +- sqrt(20))/2$$`\n\n`$$ \\ \\ = -0.8 or 2.2 \\ \\ \\ $$` to 1 decimal place.\n\nAdvanced alignment and matrices looks like this:\n\nA `$3xx3$` matrix, `$$((1,2,3),(4,5,6),(7,8,9))$$` and a `$2xx1$` matrix, or vector, `$$((1),(0))$$`.\n\nThe outer brackets determine the delimiters e.g. `$|(a,b),(c,d)|=ad-bc$`.\n\nA general `$m xx n$` matrix `$$((a_(11), cdots , a_(1n)),(vdots, ddots, vdots),(a_(m1), cdots , a_(mn)))$$`\n\n## Mixed Examples\n\nHere are some examples mixing LaTeX and AsciiMath:\n\n- LaTeX inline: `$\\frac{1}{2}$` vs AsciiMath inline: `$1/2$`\n- LaTeX display: `$$\\sum_{i=1}^n x_i$$` vs AsciiMath display: `$$sum_(i=1)^n x_i$$`\n- LaTeX matrix: `$$\\begin{pmatrix} a & b \\\\ c & d \\end{pmatrix}$$` vs AsciiMath matrix: `$$((a,b),(c,d))$$`\n\n## Edge Cases\n\n- Empty math: `$$`\n- Just delimiters: `$ $`\n- Dollar signs in text: The price is $10.50\n- Currency: `$19.99`\n- Shell command: `echo \"Price: $100\"`\n- JavaScript template: `const price = \\`$${amount}\\``\n- CSS with dollar signs: `color: $primary-color`\n\nThis document should demonstrate that:\n1. LaTeX is processed within inline code blocks with proper delimiters\n2. AsciiMath is processed within inline code blocks with proper delimiters\n3. Regular code blocks remain unchanged\n4. Mixed content is handled correctly\n5. Edge cases are handled gracefully", + "tags": [ + [ + "t", + "test" + ], + [ + "t", + "Asciimath" + ], + [ + "t", + "TeX" + ], + [ + "t", + "LaTeX" + ], + [ + "d", + "this-is-a-test-file-for-writing-mathematical-formulas-in-nostrmarkup" + ], + [ + "title", + "This is a test file for writing mathematical formulas in #NostrMarkup" + ] + ], + "kind": 30023, + "pubkey": "fd208ee8c8f283780a9552896e4823cc9dc6bfd442063889577106940fd927c1", + "id": "91be487e67cb68cfe3c7e965a654642b7bcedecb68340523a8c1b865b21fa5dc", + "sig": "59b7f87fe2c2d318152cf5b4796580f79a26936d515a816ddcb89b89ba337992eaa3d50896d3bde345d25be99c9caa3a237d476abeb8537589256cbcceeb2e75" +} \ No newline at end of file diff --git a/test_data/LaTeXtestfile.md b/test_data/LaTeXtestfile.md new file mode 100644 index 0000000..3c2e7e8 --- /dev/null +++ b/test_data/LaTeXtestfile.md @@ -0,0 +1,135 @@ +# This is a testfile for writing mathematic formulas in NostrMarkup + +This document covers the rendering of formulas in TeX/LaTeX and AsciiMath notation, or some combination of those within the same page. It is meant to be rendered by clients utilizing MathJax. + +If you want the entire document to be rendered as mathematics, place the entire thing in a backtick-codeblock, but know that this makes the document slower to load, it is harder to format the prose, and the result is less legible. It also doesn't increase portability, as it's easy to export markup as LaTeX files, or as PDFs, with the formulas rendered. + +The general idea, is that anything placed within `single backticks` is inline code, and inline-code will all be scanned for typical mathematics statements and rendered with best-effort. (For more precise rendering, use Asciidoc.) We will not render text that is not marked as inline code, as mathematical formulas, as that is prose. + +If you want the TeX to be blended into the surrounding text, wrap the text within single `$`. Otherwise, use double `$$` symbols, for display math, and it will appear on its own line. + +## TeX Examples + +Inline equation: `$\sqrt{x}$` + +Same equation, in the display mode: `$$\sqrt{x}$$` + +Something more complex, inline: `$\mathbb{N} = \{ a \in \mathbb{Z} : a > 0 \}$` + +Something complex, in display mode: `$$P \left( A=2 \, \middle| \, \dfrac{A^2}{B}>4 \right)$$` + +Another example of `$$\prod_{i=1}^{n} x_i - 1$$` inline formulas. + +Function example: +`$$ +f(x)= +\begin{cases} +1/d_{ij} & \quad \text{when $d_{ij} \leq 160$}\\ +0 & \quad \text{otherwise} +\end{cases} +$$` + +And a matrix: +`$$ +M = +\begin{bmatrix} +\frac{5}{6} & \frac{1}{6} & 0 \\[0.3em] +\frac{5}{6} & 0 & \frac{1}{6} \\[0.3em] +0 & \frac{5}{6} & \frac{1}{6} +\end{bmatrix} +$$` + +LaTeX ypesetting won't be rendered. Use NostrMarkup delimeter tables for this sort of thing. + +`\\begin{tabular}{|c|c|c|l|r|} +\\hline +\\multicolumn{3}{|l|}{test} & A & B \\\\ +\\hline +1 & 2 & 3 & 4 & 5 \\\\ +\\hline +\\end{tabular}` + +We also recognize common LaTeX statements: + +`\[ +\begin{array}{ccccc} +1 & 2 & 3 & 4 & 5 \\ +\end{array} +\]` + +`\[ x^n + y^n = z^n \]` + +`\sqrt{x^2+1}` + +Greek letters are a snap: `$\Psi$`, `$\psi$`, `$\Phi$`, `$\phi$`. + +Equations within text are easy--- A well known Maxwell thermodynamic relation is `$\left.{\partial T \over \partial P}\right|_{s} = \left.{\partial v \over \partial s}\right|_{P}$`. + +You can also set aside equations like so: `\begin{eqnarray} du &=& T\ ds -P\ dv, \qquad \mbox{first law.}\label{fl}\\ ds &\ge& {\delta q \over T}.\qquad \qquad \mbox{second law.} \label{sl} \end {eqnarray}` + +## And some good ole Asciimath + +Asciimath doesn't use `$` or `$$` delimiters, but we are using it to make mathy stuff easier to find. If you want it inline, include it inline. If you want it on a separate line, put a hard-return before and after. + +Inline text example here `$E=mc^2$` and another `$1/(x+1)$`; very simple. + +Displaying on a separate line: + +`$$sum_(k=1)^n k = 1+2+ cdots +n=(n(n+1))/2$$` + +`$$int_0^1 x^2 dx$$` + +`$$x = (-6 +- sqrt((-6)^2 - 4 (1)(4)))/(2 xx 1)$$` + +`$$|x|= {(x , if x ge 0 text(,)),(-x , if x <0.):}$$` + +Displaying with wider spacing: + +`$a=3, \ \ \ b=-3,\ \ $` and `$ \ \ c=2$`. + +Thus `$(a+b)(c+b)=0$`. + +Displaying with indentations: + +Using the quadratic formula, the roots of `$x^2-6x+4=0$` are + +`$$x = (-6 +- sqrt((-6)^2 - 4 (1)(4)))/(2 xx 1)$$` + +`$$ \ \ = (-6 +- sqrt(36 - 16))/2$$` + +`$$ \ \ =(-6 +- sqrt(20))/2$$` + +`$$ \ \ = -0.8 or 2.2 \ \ \ $$` to 1 decimal place. + +Advanced alignment and matrices looks like this: + +A `$3xx3$` matrix, `$$((1,2,3),(4,5,6),(7,8,9))$$` and a `$2xx1$` matrix, or vector, `$$((1),(0))$$`. + +The outer brackets determine the delimiters e.g. `$|(a,b),(c,d)|=ad-bc$`. + +A general `$m xx n$` matrix `$$((a_(11), cdots , a_(1n)),(vdots, ddots, vdots),(a_(m1), cdots , a_(mn)))$$` + +## Mixed Examples + +Here are some examples mixing LaTeX and AsciiMath: + +- LaTeX inline: `$\frac{1}{2}$` vs AsciiMath inline: `$1/2$` +- LaTeX display: `$$\sum_{i=1}^n x_i$$` vs AsciiMath display: `$$sum_(i=1)^n x_i$$` +- LaTeX matrix: `$$\begin{pmatrix} a & b \\ c & d \end{pmatrix}$$` vs AsciiMath matrix: `$$((a,b),(c,d))$$` + +## Edge Cases + +- Empty math: `$$` +- Just delimiters: `$ $` +- Dollar signs in text: The price is $10.50 +- Currency: `$19.99` +- Shell command: `echo "Price: $100"` +- JavaScript template: `const price = \`$${amount}\`` +- CSS with dollar signs: `color: $primary-color` + +This document should demonstrate that: +1. LaTeX is processed within inline code blocks with proper delimiters +2. AsciiMath is processed within inline code blocks with proper delimiters +3. Regular code blocks remain unchanged +4. Mixed content is handled correctly +5. Edge cases are handled gracefully diff --git a/test_data/latex_markdown.md b/test_data/latex_markdown.md deleted file mode 100644 index 0317f22..0000000 --- a/test_data/latex_markdown.md +++ /dev/null @@ -1,50 +0,0 @@ -{ -"created*at": 1752035710, -"content": "## 1 Introduction\n\nThe P versus NP problem asks whether every problem verifiable in polynomial time (NP) can be solved in polynomial time (P) [1]. The NP-complete Boolean Satisfiability (SAT) problem, determining if a conjunctive normal form formula has a satisfying assignment, is central to this question [2]. Proving that 3-SAT requires super-polynomial time would imply $P \\neq NP$, impacting computer science, cryptography, and optimization [3].\n\nWe prove $P \\neq NP$ by reformulating 3-SAT as an optimization problem using categorical and graph-theoretic frameworks. A 2-category models SAT’s logical constraints, while a clause graph captures satisfiability combinatorially [4]. A constraint measure and topological invariant establish that determining satisfiability requires exponential time [5,6]. Unlike combinatorial or algebraic methods [3], our approach leverages category theory and graph theory for a novel perspective.\n\nThe paper is organized as follows: Section 2 defines a 2-category for SAT; Section 3 presents an optimization problem; Section 4 introduces a constraint measure; Section 5 proves exponential time complexity; and Section 6 provides a graph-theoretic reformulation.\n\n## 2 Categorical Reformulation of SAT\n\nTo prove $P \\neq NP$, we reformulate the Boolean Satisfiability (SAT) problem as an optimization problem using a 2-category framework. Variables and clauses of a SAT instance are encoded as vectors and linear transformations in a complex vector space, with their logical structure modeled by a strict 2-category [4,7]. This allows satisfiability to be tested via compositions of transformations, setting up the constraint measure defined in Section 4.\n\n### 2.1 Construction of the 2-Category\n\nFor a SAT instance $\\phi = C_1 \\wedge \\cdots \\wedge C_m$, where each clause $C_j = l*{j1} \\vee \\cdots \\vee l*{jk}$ is a disjunction of $k \\leq n$ literals (with $l*{ji} = x*i$ or $\\neg x_i$ for variables $x_1, \\ldots, x_n$), we define a strict 2-category $\\mathcal{C}$ to encode $\\phi$’s logical structure.\n\n**Definition 2.1 (2-Category $\\mathcal{C}$)** \nThe 2-category $\\mathcal{C}$ consists of:\n- *Objects*: Vectors in the complex vector space $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes n}$, dimension $2^n$, representing variable assignments. For each variable $x_i$, define basis vectors:\n - $\\mathbf{v}_i = (1, 0) \\in \\mathbb{C}^2$, for $x_i = \\text{True}$.\n - $\\mathbf{w}_i = (0, 1) \\in \\mathbb{C}^2$, for $\\neg x_i = \\text{False}$.\n \n A configuration, e.g., $\\mathbf{v}_1 \\otimes \\mathbf{w}_2 \\otimes \\mathbf{v}_3 \\in \\mathcal{V}$, represents $x_1 = \\text{True}, x_2 = \\text{False}, x_3 = \\text{True}$.\n\n- *1-Morphisms*: Linear maps $f: \\mathcal{V} \\to \\mathcal{V}$, including:\n - *Clause projections* $P_j: \\mathcal{V} \\to \\mathcal{V}$, for clause $C_j$ with variables indexed by $I_j \\subseteq \\{1, \\ldots, n\\}$, defined as:\n $$\n P_j = \\bigotimes*{i=1}^n Q*i, \\quad Q_i = \\begin{cases} \n I - |\\mathbf{l}*{ji}\\rangle\\langle \\mathbf{l}_{ji}| & \\text{if } i \\in I_j, \\\\\n I & \\text{otherwise},\n \\end{cases}\n $$\n where $\\mathbf{l}_{ji} = \\mathbf{v}_i$ if $l_{ji} = x*i$, or $\\mathbf{l}*{ji} = \\mathbf{w}_i$ if $l_{ji} = \\neg x*i$, and $I$ is the identity on $\\mathbb{C}^2$. Thus, $P_j v = v$ if $v$ satisfies $C_j$; otherwise, $P_j v$ lies in the orthogonal complement.\n - *Identity maps* $\\text{id}_A: A \\to A$, for subspaces $A \\subseteq \\mathcal{V}$.\n - *Negation maps* $N_i: \\mathcal{V} \\to \\mathcal{V}$, swapping $\\mathbf{v}_i \\leftrightarrow \\mathbf{w}_i$ on the $i$-th tensor factor:\n $$\n N_i = I \\otimes \\cdots \\otimes \\begin{pmatrix} 0 & 1 \\\\ 1 & 0 \\end{pmatrix} \\otimes \\cdots \\otimes I.\n $$\n\n- *2-Morphisms*: Natural transformations $\\alpha: f \\Rightarrow g$ between 1-morphisms $f, g: A \\to B$, where $A, B \\subseteq \\mathcal{V}$. A 2-morphism $\\alpha$ is a linear map ensuring that if $f$ and $g$ represent assignments, $f$ satisfies all clauses satisfied by $g$, preserving the logical structure of $\\phi$ [4].\n\n- *Compositions*: Horizontal composition $\\beta \\circ \\alpha: g \\circ f \\Rightarrow g' \\circ f'$ for 2-morphisms $\\alpha: f \\Rightarrow f'$, $\\beta: g \\Rightarrow g'$, and vertical composition $\\beta \\cdot \\alpha: f \\Rightarrow h$ for $\\alpha: f \\Rightarrow g$, $\\beta: g \\Rightarrow h$, defined via linear map composition. Associativity and identity laws ensure $\\mathcal{C}$ is a strict 2-category [4].\n\nThe 2-category $\\mathcal{C}$ encodes SAT as follows: vectors in $\\mathcal{V}$ represent assignments, projections $P_j$ enforce clause constraints, negation maps $N_i$ handle negated literals, and 2-morphisms preserve logical consistency across transformations [7].\n\n### 2.2 Satisfiability via Projection Composition\n\nSatisfiability of $\\phi$ is tested by composing the clause projections:\n$$\nP = P_m \\circ \\cdots \\circ P_1: \\mathcal{V} \\to \\mathcal{V}.\n$$\nFor a normalized vector $v \\in \\mathcal{V}, \\|v\\|=1$, $\\phi$ is satisfiable if there exists $v$ such that $P v = v$, meaning $P_j v = v$ for all $j = 1, \\ldots, m$, corresponding to a satisfying assignment. If $\\phi$ is unsatisfiable, the intersection of projection images $\\bigcap*{j=1}^m \\text{im}(P*j) = \\emptyset$, so $P v \\neq v$ for all $v$. This composition reformulates SAT as finding a fixed point of $P$, which we analyze as an optimization problem in Section 3 using a distance metric.\n\n### 2.3 Example: 3-SAT Instance\n\nConsider a 3-SAT instance with $n=3$ variables, $\\phi = (x_1 \\vee \\neg x_2 \\vee x_3) \\wedge (\\neg x_1 \\vee x_2 \\vee \\neg x_3)$, encoded in $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes 3}$. Assign $\\mathbf{v}_i = (1, 0)$, $\\mathbf{w}_i = (0, 1)$ for $x_i = \\text{True}$, $\\neg x_i = \\text{False}$. For clause $C_1 = x_1 \\vee \\neg x_2 \\vee x_3$, the projection is:\n$$\nP_1 = I - (I - |\\mathbf{v}_1\\rangle\\langle \\mathbf{v}_1|) \\otimes (I - |\\mathbf{w}_2\\rangle\\langle \\mathbf{w}_2|) \\otimes (I - |\\mathbf{v}_3\\rangle\\langle \\mathbf{v}_3|).\n$$\nFor $C_2 = \\neg x_1 \\vee x_2 \\vee \\neg x_3$:\n$$\nP_2 = I - (I - |\\mathbf{w}_1\\rangle\\langle \\mathbf{w}_1|) \\otimes (I - |\\mathbf{v}_2\\rangle\\langle \\mathbf{v}_2|) \\otimes (I - |\\mathbf{w}_3\\rangle\\langle \\mathbf{w}_3|).\n$$\nThe assignment $x_1 = x_2 = x_3 = \\text{True}$, represented by $v = \\mathbf{v}_1 \\otimes \\mathbf{v}_2 \\otimes \\mathbf{v}_3$, satisfies $C_1$ ($x_1 = \\text{True}$) and $C_2$ ($x_2 = \\text{True}$). Thus, $P_1 v = v$, $P_2 v = v$, and $P v = P_2 \\circ P_1 v = v$, confirming satisfiability.\n\n## 3 Optimization Problem for SAT\n\nWe reformulate the Boolean Satisfiability (SAT) problem as an optimization problem, where satisfiability is determined by minimizing a distance metric between configurations under the projection composition defined in Section 2.2. Building on the 2-category $\\mathcal{C}$ (Section 2), this approach quantifies deviations from satisfiability, with satisfiable instances achieving zero deviation and unsatisfiable ones exhibiting a positive gap [8].\n\n### 3.1 Configuration Space and Distance Metric\n\n**Definition 3.1 (Configuration Space)** \nThe configuration space $\\mathcal{D}(\\mathcal{V})$ consists of positive semi-definite operators $\\rho$ on $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes n}$, dimension $2^n$, with trace $\\text{Tr}(\\rho) = 1$. Pure configurations, such as $\\rho_v = |v\\rangle\\langle v|$ for a normalized vector $v \\in \\mathcal{V}$, correspond to classical assignments (e.g., $v = \\mathbf{v}_1 \\otimes \\mathbf{v}_2 \\otimes \\mathbf{v}_3$ for $x_1 = x_2 = x_3 = \\text{True}$, where $\\mathbf{v}_i = (1, 0)$).\n\nThe space $\\mathcal{D}(\\mathcal{V})$ is convex and compact, equipped with a metric to measure distances between configurations [8]. We use the Bures distance due to its compatibility with the transformations in $\\mathcal{C}$.\n\n**Definition 3.2 (Bures Distance)** \nFor $\\rho, \\sigma \\in \\mathcal{D}(\\mathcal{V})$, the Bures distance is:\n$$\nd_B(\\rho, \\sigma) = \\sqrt{2 \\left( 1 - \\sqrt{F(\\rho, \\sigma)} \\right)},\n$$\nwhere the fidelity is $F(\\rho, \\sigma) = \\left( \\text{Tr} \\sqrt{\\sqrt{\\rho} \\sigma \\sqrt{\\rho}} \\right)^2$. For pure configurations $\\rho = |u\\rangle\\langle u|$, $\\sigma = |v\\rangle\\langle v|$ with $u, v \\in \\mathcal{V}, \\|u\\| = \\|v\\| = 1$, it simplifies to:\n$$\nd_B(\\rho, \\sigma) = \\sqrt{2 (1 - |\\langle u | v \\rangle|)},\n$$\nsince $|\\langle u | v \\rangle|$ is real and non-negative for normalized vectors [8].\n\nThe Bures distance is a metric on $\\mathcal{D}(\\mathcal{V})$, satisfying positivity, symmetry, and the triangle inequality [8]. It is suitable for measuring deviations induced by clause projections $P_j: \\mathcal{V} \\to \\mathcal{V}$ (Section 2.1), as it aligns with the 2-category’s structure [9,10].\n\n### 3.2 Optimization Problem\n\nFor the projection composition $P = P_m \\circ \\cdots \\circ P_1: \\mathcal{V} \\to \\mathcal{V}$ (Section 2.2), we define a deviation measure to reformulate SAT as an optimization problem.\n\n**Definition 3.3 (Deviation Measure)** \nThe deviation measure for a configuration $\\rho \\in \\mathcal{D}(\\mathcal{V})$ is:\n$$\nd_B(\\rho, P(\\rho)),\n$$\nwhere:\n$$\nP(\\rho) = \\frac{P \\rho P^\\dagger}{\\text{Tr}(P \\rho P^\\dagger)},\n$$\nif $\\text{Tr}(P \\rho P^\\dagger) \\neq 0$, and $P(\\rho) = 0$ otherwise. The SAT problem is equivalent to minimizing:\n$$\nS[\\rho] = d_B(\\rho, P(\\rho))^2,\n$$\nover $\\rho \\in \\mathcal{D}(\\mathcal{V})$.\n\nThe deviation measure quantifies how far $\\rho$ is from being invariant under $P$. For a pure configuration $\\rho_v = |v\\rangle\\langle v|$, $v \\in \\mathcal{V}, \\|v\\|=1$:\n- If $\\phi$ is satisfiable, there exists $\\rho_v$ such that $P_j \\rho_v = \\rho_v$ for all $j$, so $P(\\rho_v) = \\rho_v$ and $d_B(\\rho_v, P(\\rho_v)) = 0$.\n- If $\\phi$ is unsatisfiable, $\\bigcap*{j=1}^m \\text{im}(P*j) = \\emptyset$, so $P(\\rho) = 0$ for all $\\rho \\in \\mathcal{D}(\\mathcal{V})$, and $d_B(\\rho, P(\\rho)) = \\sqrt{2}$ [8].\n\nThus, the infimum satisfies:\n$$\n\\inf*{\\rho \\in \\mathcal{D}(\\mathcal{V})} S[\\rho] = \\begin{cases} \n0 & \\text{if } \\phi \\text{ is satisfiable}, \\\\\n2 & \\text{if } \\phi \\text{ is unsatisfiable}.\n\\end{cases}\n$$\nWe focus on pure configurations $\\rho_v$, as they correspond to classical assignments and suffice to determine satisfiability, aligning with the constraint measure $\\lambda(v) = \\sum_{j=1}^m M_j(v)$ in Section 4 [8].\n\n### 3.3 Example: 3-SAT Instance\n\nConsider the 3-SAT instance $\\phi = (x_1 \\vee \\neg x_2 \\vee x_3) \\wedge (\\neg x_1 \\vee x_2 \\vee \\neg x_3)$ with $n=3$, as in Section 2.3, using $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes 3}$. For the assignment $x_1 = x_2 = x_3 = \\text{True}$, the pure configuration is $\\rho = |\\mathbf{v}_1 \\otimes \\mathbf{v}_2 \\otimes \\mathbf{v}_3\\rangle\\langle \\mathbf{v}_1 \\otimes \\mathbf{v}_2 \\otimes \\mathbf{v}_3|$, where $\\mathbf{v}_i = (1, 0)$. The clause projections are as in Section 2.3. Since $\\mathbf{v}_1 \\otimes \\mathbf{v}_2 \\otimes \\mathbf{v}_3$ satisfies $C_1$ ($x_1 = \\text{True}$) and $C_2$ ($x_2 = \\text{True}$), we have $P_1 \\rho = \\rho$, $P_2 \\rho = \\rho$, so $P(\\rho) = P_2 (P_1 \\rho) = \\rho$, and:\n$$\nd*B(\\rho, P(\\rho)) = 0.\n$$\nFor an unsatisfiable 3-SAT instance, consider $\\phi = (x_1 \\vee x_2) \\wedge (\\neg x_1 \\vee \\neg x_2) \\wedge (x_1 \\vee \\neg x_2) \\wedge (\\neg x_1 \\vee x_2)$. For any $\\rho \\in \\mathcal{D}(\\mathcal{V})$, the projections conflict, so $P(\\rho) = 0$, yielding:\n$$\nd_B(\\rho, P(\\rho)) = \\sqrt{2}.\n$$\nThis gap ($0$ vs. $\\sqrt{2}$) distinguishes satisfiable from unsatisfiable instances, aligning with the constraint measure in Section 4.\n\n## 4 Constraint Measure for SAT\n\nWe define a constraint measure $\\lambda(v)$ for a SAT instance, quantifying clause violations in the 2-category $\\mathcal{C}$ (Section 2). This measure distinguishes satisfiable from unsatisfiable instances via a positive gap, aligning with the optimization problem in Section 3 and enabling the complexity analysis in Section 5 [2].\n\n### 4.1 Constraint Measure and Satisfiability Gap\n\n**Definition 4.1 (Constraint Measure)** \nFor a SAT instance $\\phi = C_1 \\wedge \\cdots \\wedge C_m$ with $n$ variables, represented in $\\mathcal{C}$, the constraint measure $\\lambda: \\mathcal{V} \\to \\mathbb{R}*{\\geq 0}$ on the configuration space $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes n}$ is:\n$$\n\\lambda(v) = \\sum_{j=1}^m M_j(v),\n$$\nwhere $v \\in \\mathcal{V}, \\|v\\|=1$, and the clause mapping $M_j: \\mathcal{V} \\to \\mathbb{R}_{\\geq 0}$ for clause $C_j$ is:\n$$\nM_j(v) = \\text{Tr}((I - P_j) \\rho_v),\n$$\nwith $\\rho_v = |v\\rangle\\langle v|$ and $P_j: \\mathcal{V} \\to \\mathcal{V}$ the clause projection (Definition 2.1). The minimum penalty is:\n$$\n\\lambda_{\\min} = \\inf_{v \\in \\mathcal{V}, \\|v\\|=1} \\lambda(v).\n$$\n\nThe mapping $M_j(v) = 0$ if $v$ satisfies $C_j$ (i.e., $P_j v = v$), and $M_j(v) \\geq \\delta > 0$ otherwise, where $\\delta$ is a constant reflecting the orthogonal distance to the satisfying subspace, determined by the clause structure (e.g., up to three literals in 3-SAT) [8]. The measure $\\lambda(v)$ sums clause violations, with $\\lambda_{\\min} = 0$ indicating satisfiability. This aligns with the optimization problem in Section 3.2, where $\\lambda(v) = 0$ corresponds to $d_B(\\rho_v, P(\\rho_v)) = 0$ for a pure configuration $\\rho_v = |v\\rangle\\langle v|$ [2].\n\n**Theorem 4.1 (Satisfiability Gap)** \nFor a SAT instance $\\phi$, the minimum penalty satisfies:\n$$\n\\lambda_{\\min} = \\begin{cases} \n0 & \\text{if } \\phi \\text{ is satisfiable}, \\\\\nc & \\text{if } \\phi \\text{ is unsatisfiable},\n\\end{cases}\n$$\nwhere $c \\geq \\delta > 0$ is a constant independent of $n$ or $m$.\n\n**Proof.** \nConsider $\\phi = C_1 \\wedge \\cdots \\wedge C_m$ with configurations in $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes n}$. Each clause $C_j$ has a projection $P_j$ (Section 2.1), where $P_j v = v$ if $v$ satisfies $C_j$, and $P_j v$ lies in the orthogonal complement otherwise.\n\n**Case 1: Satisfiable.** If $\\phi$ is satisfiable, there exists an assignment $a = (a_1, \\ldots, a_n) \\in \\{0,1\\}^n$ satisfying all clauses. Construct $v_a \\in \\mathcal{V}$ as the tensor product of $\\mathbf{v}_i = (1, 0)$ for $a_i = 1$ or $\\mathbf{w}_i = (0, 1)$ for $a_i = 0$, with $\\|v_a\\|=1$. Since $a$ satisfies each $C_j$, we have $P_j v_a = v_a$, so:\n$$\nM_j(v_a) = \\text{Tr}((I - P_j) \\rho_{v_a}) = \\langle v_a | (I - P_j) v_a \\rangle = 0.\n$$\nThus, $\\lambda(v_a) = \\sum_{j=1}^m M_j(v_a) = 0$, and since $\\lambda(v) \\geq 0$, we have $\\lambda_{\\min} = 0$.\n\n**Case 2: Unsatisfiable.** If $\\phi$ is unsatisfiable, no $v \\in \\mathcal{V}, \\|v\\|=1$ satisfies all clauses. For any $v$, at least one clause $C_j$ is violated, so $P_j v \\neq v$, and:\n$$\nM_j(v) = \\langle v | (I - P_j) v \\rangle \\geq \\delta > 0,\n$$\nwhere $\\delta > 0$ is a constant determined by the clause structure [8]. Thus, $\\lambda(v) \\geq \\delta$, and:\n$$\n\\lambda_{\\min} = \\inf_{v \\in \\mathcal{V}, \\|v\\|=1} \\lambda(v) \\geq \\delta.\n$$\nSet $c = \\delta$, independent of $n$ or $m$. The projection composition $P = P_m \\circ \\cdots \\circ P_1$ (Section 2.2) yields $P(\\rho_v) = 0$ for unsatisfiable instances, confirming the gap: $\\lambda_{\\min} \\geq c > 0$. $\\square$\n\nThe gap ($\\lambda_{\\min} = 0$ vs. $c > 0$) mirrors the optimization gap in Section 3.2 ($S[\\rho] = 0$ vs. $2$), linking $\\lambda(v)$ to the complexity analysis in Section 5.\n\n### 4.2 Example: 3-SAT Instance\n\nFor the satisfiable 3-SAT instance $\\phi = (x_1 \\vee \\neg x_2 \\vee x_3) \\wedge (\\neg x_1 \\vee x_2 \\vee \\neg x_3)$ with $n=3$, using $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes 3}$ (Section 2.3), consider the assignment $x_1 = x_2 = x_3 = \\text{True}$, with $v_a = \\mathbf{v}_1 \\otimes \\mathbf{v}_2 \\otimes \\mathbf{v}_3$, $\\mathbf{v}_i = (1, 0)$, $\\|v_a\\|=1$. The projections $P_1, P_2$ are defined as in Section 2.3. Since $v_a$ satisfies $C_1$ ($x_1 = \\text{True}$) and $C_2$ ($x_2 = \\text{True}$), we have $P_1 v_a = v_a$, $P_2 v_a = v_a$, so:\n$$\nM_1(v_a) = \\text{Tr}((I - P_1) \\rho_{v_a}) = 0, \\quad M_2(v_a) = \\text{Tr}((I - P_2) \\rho_{v_a}) = 0.\n$$\nThus, $\\lambda(v_a) = 0$, so $\\lambda_{\\min} = 0$.\n\nFor an unsatisfiable 3-SAT instance, consider $\\phi = (x_1 \\vee x_2) \\wedge (\\neg x_1 \\vee \\neg x_2) \\wedge (x_1 \\vee \\neg x_2) \\wedge (\\neg x_1 \\vee x_2)$. For any $v \\in \\mathcal{V}, \\|v\\|=1$, at least one clause is violated. For $v = \\mathbf{v}_1 \\otimes \\mathbf{v}_2$, satisfying the first clause, the second clause $\\neg x_1 \\vee \\neg x_2$ is violated, so:\n$$\nP_2 v \\neq v, \\quad M_2(v) = \\text{Tr}((I - P_2) \\rho_v) \\geq \\delta > 0.\n$$\nThus, $\\lambda(v) \\geq \\delta$, and $\\lambda_{\\min} \\geq c = \\delta > 0$. This gap illustrates the theorem’s distinction between satisfiable and unsatisfiable instances.\n\n## 5 Exponential Time Complexity of 3-SAT\n\nWe prove that computing the satisfiability of a 3-SAT instance, an NP-complete problem, requires exponential time in the number of variables $n$, establishing $P \\neq NP$. This builds on the 2-category $\\mathcal{C}$ (Section 2), optimization problem (Section 3), and constraint measure $\\lambda(v)$ (Section 4), showing that computing the minimum penalty $\\lambda_{\\min}$ demands exponential time [1,2].\n\n### 5.1 Hardness of Computing the Minimum Penalty\n\nFor a 3-SAT instance $\\phi = C_1 \\wedge \\cdots \\wedge C_m$ with $n$ variables and $m = O(n)$ clauses, each with up to three literals, satisfiability is equivalent to determining whether $\\lambda_{\\min} = \\inf_{v \\in \\mathcal{V}, \\|v\\|=1} \\lambda(v) = 0$, where $\\lambda(v) = \\sum_{j=1}^m M_j(v)$ is the constraint measure on $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes n}$, with $M_j(v) = \\text{Tr}((I - P_j) \\rho_v)$, $\\rho_v = |v\\rangle\\langle v|$, and $P_j$ the clause projection (Section 4.1). For example, the satisfiable 3-SAT instance from Section 2.3 has $\\lambda_{\\min} = 0$, while the unsatisfiable instance from Section 4.2 has $\\lambda_{\\min} \\geq c$.\n\n**Theorem 5.1 (Exponential Time for $\\lambda_{\\min}$)** \nComputing $\\lambda_{\\min}$ for worst-case 3-SAT instances requires $\\Omega(2^{kn})$ time for some constant $k > 0$, unless $P = NP$.\n\n**Proof.** \nBy the Satisfiability Gap Theorem (Theorem 4.1), $\\lambda_{\\min} = 0$ if $\\phi$ is satisfiable (there exists $v \\in \\mathcal{V}, \\|v\\|=1$ such that $P_j v = v$ for all $j$), and $\\lambda_{\\min} \\geq c = \\delta > 0$ otherwise, where $\\delta$ is a constant. Exact computation of $\\lambda_{\\min}$ over $\\mathcal{V}$, dimension $2^n$, requires evaluating $\\lambda(v)$ for $O(2^n)$ basis configurations, taking $O(2^{3n})$ time due to matrix operations [11]. We show that even approximating $\\lambda_{\\min}$ to decide satisfiability is NP-hard.\n\n**Lemma 5.1 (Hardness of Approximation)** \nApproximating $\\lambda_{\\min}$ to within additive error $\\epsilon < c/m$ requires $\\Omega(2^{kn})$ time for some $k > 0$, unless $P = NP$.\n\n**Proof.** \nFor a satisfiable $\\phi$, there exists $v$ such that $\\lambda(v) = 0$, so $\\lambda_{\\min} = 0$. For an unsatisfiable $\\phi$, every $v$ violates at least one clause, so $\\lambda(v) \\geq \\delta$, and $\\lambda_{\\min} \\geq c = \\delta$. An algorithm outputting a value $< c/m$ for satisfiable instances ($\\lambda_{\\min} = 0$) and $\\geq c/2$ for unsatisfiable instances ($\\lambda_{\\min} \\geq c$) distinguishes $\\lambda_{\\min} = 0$ from $\\lambda_{\\min} \\geq c$, as $c/m < c/2$ for $m \\geq 2$, solving 3-SAT.\n\nSince 3-SAT is NP-complete [1], and MAX-3-SAT inapproximability [6] shows that distinguishing fully satisfiable instances from those with at most a $1 - 1/8$ fraction satisfiable is NP-hard, approximating $\\lambda_{\\min}$ within $\\epsilon < c/m$ (with $m = O(n)$) is equivalent to solving 3-SAT. The projections $P_j$ encode 3-SAT’s combinatorial structure (Section 2.1), requiring $\\Omega(2^{kn})$ evaluations of $\\lambda(v)$ to find a satisfying configuration [5,6]. A polynomial-time approximation algorithm would imply $P = NP$. $\\square$\n\nThus, computing $\\lambda_{\\min}$ requires $\\Omega(2^{kn})$ time unless $P = NP$. $\\square$\n\n### 5.2 Implications and Complexity Barriers\n\nThe exponential time requirement for computing $\\lambda_{\\min}$ for 3-SAT implies that no polynomial-time algorithm exists for 3-SAT unless $P = NP$. Since 3-SAT is reducible to any NP problem [1], this extends to all NP problems, yielding:\n$$\n\\boxed{P \\neq NP}\n$$\n\nOur categorical approach avoids known complexity barriers [12,13]. The _relativization barrier_ [12] is sidestepped because the proof relies on the categorical structure of $\\mathcal{C}$ and the linear algebraic properties of $\\mathcal{V}$, which encode 3-SAT’s constraints non-relativizingly, unlike diagonalization techniques [2,4]. The _natural proofs barrier_ [13] is avoided as the proof is non-constructive (no efficient algorithm is provided) and problem-specific to 3-SAT’s clause structure, not broadly applicable to Boolean functions. These properties ensure the proof’s robustness, relying on standard NP-hardness assumptions [1,5,6].\n\n## 6 Graph-Theoretic Reformulation of 3-SAT\n\nTo reinforce the proof that $P \\neq NP$, we reformulate the 3-SAT problem as a graph-theoretic problem on a clause graph, preserving the constraint measure $\\lambda(v)$ (Section 4) as a combinatorial invariant. By showing that computing this invariant requires exponential time, we provide an alternative confirmation of the exponential complexity of 3-SAT, supporting the result of Section 5 [1,2].\n\n### 6.1 Clause Graph and Connectivity Index\n\nFor a 3-SAT instance $\\phi = C_1 \\wedge \\cdots \\wedge C_m$ with $n$ variables and $m = O(n)$ clauses, we define a clause graph to encode satisfiability combinatorially.\n\n**Definition 6.1 (Clause Graph)** \nThe clause graph $G_\\phi = (V, E)$ is defined as:\n- _Vertices_ $V$: Configurations in $\\mathcal{V} = (\\mathbb{C}^2)^{\\otimes n}$, representing variable assignments (Section 2.1).\n- _Edges_ $E$: Pairs $(v, v')$ where $v, v' \\in \\mathcal{V}, \\|v\\| = \\|v'\\| = 1$, differ in at most one variable, and satisfy the same clauses $C_j$, i.e., $P_j v = v$ and $P_j v' = v'$ for some $j$, with $P_j$ the clause projection (Definition 2.1).\n\nThe graph $G_\\phi$ connects configurations with similar clause satisfaction profiles. For a satisfiable $\\phi$, there exists a configuration $v$ such that $P_j v = v$ for all $j$, forming a connected component in $G_\\phi$ where all vertices satisfy $\\phi$. For an unsatisfiable $\\phi$, no such component exists, as every $v$ violates at least one clause (Section 4.1). For the satisfiable instance $\\phi = (x_1 \\vee \\neg x_2 \\vee x_3) \\wedge (\\neg x_1 \\vee x_2 \\vee \\neg x_3)$ (Section 2.3) with $n=3$, the clause graph $G_\\phi$ has $2^3 = 8$ vertices, and includes a connected component containing $v = \\mathbf{v}_1 \\otimes \\mathbf{v}_2 \\otimes \\mathbf{v}_3$, with $\\kappa_\\phi = 1$. For the unsatisfiable instance $\\phi = (x_1 \\vee x_2) \\wedge (\\neg x_1 \\vee \\neg x_2) \\wedge (x_1 \\vee \\neg x_2) \\wedge (\\neg x_1 \\vee x_2)$ (Section 4.2) with $n=2$, the graph has $2^2 = 4$ vertices, and no such component exists, so $\\kappa_\\phi = 0$.\n\n**Definition 6.2 (Connectivity Index)** \nThe connectivity index $\\kappa_\\phi$ is 1 if there exists a connected component in $G_\\phi$ where all vertices satisfy $\\phi$ (i.e., $P_j v = v$ for all $j$), and 0 otherwise.\n\nThe index $\\kappa_\\phi$ mirrors the constraint measure’s minimum penalty $\\lambda_{\\min}$ (Section 4.1). If $\\lambda_{\\min} = 0$, there exists $v$ with $\\lambda(v) = 0$, corresponding to $\\kappa_\\phi = 1$. If $\\lambda_{\\min} \\geq c > 0$, no configuration satisfies all clauses, so $\\kappa_\\phi = 0$. This invariant captures satisfiability combinatorially [2].\n\n### 6.2 Exponential Time Complexity\n\n**Theorem 6.1** \nComputing the connectivity index $\\kappa_\\phi$ for worst-case 3-SAT instances requires $\\Omega(2^{kn})$ time for some constant $k > 0$, unless $P = NP$.\n\n**Proof.** \nComputing $\\kappa_\\phi$ requires identifying a connected component in $G_\\phi$ where all vertices satisfy $\\phi$. Each vertex $v \\in \\mathcal{V}$, dimension $2^n$, represents a variable assignment, and edges connect $v$ to $O(n)$ neighbors differing in one variable. For satisfiable $\\phi$, there exists a component where all vertices have $\\lambda(v) = 0$ (Section 4.1), so $\\kappa_\\phi = 1$. For unsatisfiable $\\phi$, every vertex violates at least one clause, so $\\kappa_\\phi = 0$. Since 3-SAT’s combinatorial structure ensures that any satisfying configuration $v$ (where $P_j v = v$ for all $j$) implies a non-empty component, checking one such $v$ is equivalent to solving 3-SAT.\n\nDetermining whether $\\kappa_\\phi = 1$ is equivalent to finding a configuration $v$ such that $P_j v = v$ for all $j$, i.e., solving 3-SAT. Since $\\mathcal{V}$ has $2^n$ vertices, evaluating clause satisfaction (via projections $P_j$) for each vertex and checking connectivity requires $\\Omega(2^n)$ operations. The NP-completeness of 3-SAT [1] and MAX-3-SAT inapproximability [6] imply that distinguishing $\\kappa_\\phi = 1$ from $\\kappa_\\phi = 0$ is NP-hard, requiring $\\Omega(2^{kn})$ time for some $k > 0$ due to the combinatorial structure of clause interactions [5]. A polynomial-time algorithm for computing $\\kappa_\\phi$ would solve 3-SAT, implying $P = NP$. $\\square$\n\nThis graph-theoretic reformulation reinforces the exponential time complexity of 3-SAT (Section 5), as computing $\\kappa_\\phi$ mirrors the hardness of computing $\\lambda_{\\min}$, confirming $P \\neq NP$.\n\n## 7 Conclusion\n\nWe prove that $P \\neq NP$ by reformulating the NP-complete 3-SAT problem in categorical and graph-theoretic frameworks. A 2-category and a clause graph model 3-SAT, enabling an optimization problem and connectivity analysis that confirm $P \\neq NP$ (Sections 2, 6). By defining a constraint measure and a topological invariant, we show that determining satisfiability requires exponential time (Sections 4, 5, 6) [1,5,6]. Unlike combinatorial or algebraic approaches [3], our methods leverage category theory and graph theory, offering novel insights into computational complexity. The proof avoids relativization and natural proofs barriers by being non-relativizing and specific to 3-SAT, ensuring robustness [12,13]. This result confirms that NP-complete problems require super-polynomial time unless $P = NP$. Future work could extend these frameworks to other NP-complete problems [2,4].\n\n$$\n\\boxed{P \\neq NP}\n$$\n\n---\n\n## References\n\n1. Cook, Stephen A. \"The complexity of theorem-proving procedures.\" _Proceedings of the Third Annual ACM Symposium on Theory of Computing (STOC '71)_, 151–158, ACM, New York, NY, USA, 1971. DOI: 10.1145/800157.805047.\n2. Arora, Sanjeev and Barak, Boaz. _Computational Complexity: A Modern Approach_. Cambridge University Press, Cambridge, UK, 2009.\n3. Fortnow, Lance. \"The status of the P versus NP problem.\" _Communications of the ACM_ 56(9): 78–86, 2013. DOI: 10.1145/2500468.2500487.\n4. Leinster, Tom. _Basic Category Theory_. Cambridge University Press, Cambridge, UK, 2014.\n5. Dinur, Irit and Safra, Shmuel. \"On the hardness of approximating minimum vertex cover.\" _Annals of Mathematics_ 162(1): 439–485, 2007. DOI: 10.4007/annals.2007.162.439.\n6. Håstad, Johan. \"Some optimal inapproximability results.\" _Journal of the ACM_ 48(4): 798–859, 2001. DOI: 10.1145/502090.502098.\n7. Mac Lane, Saunders. _Categories for the Working Mathematician_, 2nd ed. Springer, New York, NY, USA, 1998.\n8. Bengtsson, Ingemar and Życzkowski, Karol. _Geometry of Quantum States: An Introduction to Quantum Entanglement_. Cambridge University Press, Cambridge, UK, 2006.\n9. Petz, Dénes. \"Monotone metrics on matrix spaces.\" _Linear Algebra and its Applications_ 244: 81–96, 1996. DOI: 10.1016/0024-3795(94)00211-8.\n10. Petz, Dénes and Sudár, Csaba. \"Geometries of quantum states.\" _Journal of Mathematical Physics_ 37(6): 2662–2673, 1996. DOI: 10.1063/1.531551.\n11. Golub, Gene H. and Van Loan, Charles F. _Matrix Computations_, 3rd ed. Johns Hopkins University Press, Baltimore, MD, USA, 1996.\n12. Baker, Theodore P. and Gill, John and Solovay, Robert. \"Relativizations of the P =? NP question.\" _SIAM Journal on Computing_ 4(4): 431–442, 1975. DOI: 10.1137/0204037.\n13. Razborov, Alexander A. and Rudich, Steven. \"Natural proofs.\" _Journal of Computer and System Sciences_ 55(1): 24–35, 1997. DOI: 10.1006/jcss.1997.1494.", -"tags": [ -[ -"d", -"1752035287698" -], -[ -"title", -"Proving P ≠ NP via Categorical and Graph-Theoretic 3-SAT" -], -[ -"summary", -"We prove that $P \\neq NP$ by reformulating the NP-complete 3-SAT problem as an optimization problem using categorical and graph-theoretic frameworks. A 2-category encodes 3-SAT’s variables and clauses as vectors and transformations in a complex vector space, while a clause graph captures satisfiability as a connectivity property, with a constraint measure and invariant distinguishing satisfiable and unsatisfiable cases. Computing either requires exponential time, establishing $P \\neq NP$. This dual approach, leveraging category theory and graph theory, offers a novel perspective on computational complexity." -], -[ -"t", -"math" -], -[ -"t", -"p vs np" -], -[ -"t", -"complexity theory" -], -[ -"t", -"category theory" -], -[ -"t", -"graph theory" -], -[ -"published_at", -"1752035704" -], -[ -"alt", -"This is a long form article, you can read it in https://habla.news/a/naddr1qvzqqqr4gupzqwe6gtf5eu9pgqk334fke8f2ct43ccqe4y2nhetssnypvhge9ce9qqxnzde4xgcrxdfj8qmnvwfc69lg5m" -] -], -"kind": 30023, -"pubkey": "3b3a42d34cf0a1402d18d536c9d2ac2eb1c6019a9153be57084c8165d192e325", -"id": "4afdd068904f12c370913ca3c8744b71fae258e59457fad6f3c28ddffb8f0f41", -"sig": "6be4cf6472b98c80c659e472d8db3bc8c144a1c551c821d1cfd925dade26b395690f71b38631e49d180d7ec79fbbbbcb148df27a40955ef22479e7bec36bd6ad" -} diff --git a/tests/unit/latexRendering.test.ts b/tests/unit/latexRendering.test.ts index 7096a8a..667cd0d 100644 --- a/tests/unit/latexRendering.test.ts +++ b/tests/unit/latexRendering.test.ts @@ -3,99 +3,59 @@ import { parseAdvancedmarkup } from "../../src/lib/utils/markup/advancedMarkupPa import { readFileSync } from "fs"; import { join } from "path"; -describe("LaTeX Math Rendering", () => { - const mdPath = join(__dirname, "../../test_data/latex_markdown.md"); - const raw = readFileSync(mdPath, "utf-8"); - // Extract the markdown content field from the JSON +describe("LaTeX and AsciiMath Rendering in Inline Code Blocks", () => { + const jsonPath = join(__dirname, "../../test_data/LaTeXtestfile.json"); + const raw = readFileSync(jsonPath, "utf-8"); + // Extract the markdown content field from the JSON event const content = JSON.parse(raw).content; - it('renders inline math as ', async () => { + it('renders LaTeX inline and display math correctly', async () => { const html = await parseAdvancedmarkup(content); - expect(html).toMatch(/\$P \\neq NP\$<\/span>/); - expect(html).toMatch( - /\$x_1 = \\text\{True\}\$<\/span>/, - ); + // Test basic LaTeX examples from the test document + expect(html).toMatch(/\$\\sqrt\{x\}\$<\/span>/); + expect(html).toMatch(/
\$\$\\sqrt\{x\}\$\$<\/div>/); + expect(html).toMatch(/\$\\mathbb\{N\} = \\{ a \\in \\mathbb\{Z\} : a > 0 \\}\$<\/span>/); + expect(html).toMatch(/
\$\$P \\left\( A=2 \\, \\middle\| \\, \\dfrac\{A\^2\}\{B\}>4 \\right\)\$\$<\/div>/); }); - it('renders display math as
\$\$\s*P_j = \\bigotimes/, - ); - expect(html).toMatch( - /
\$\$[\s\S]*?\\begin\{pmatrix\}/, - ); - expect(html).toMatch( - /
\$\$\\boxed\{P \\neq NP\}\$\$<\/div>/, - ); + // Test AsciiMath examples + expect(html).toMatch(/\$E=mc\^2\$<\/span>/); + expect(html).toMatch(/
\$\$sum_\(k=1\)\^n k = 1\+2\+ cdots \+n=\(n\(n\+1\)\)\/2\$\$<\/div>/); + expect(html).toMatch(/
\$\$int_0\^1 x\^2 dx\$\$<\/div>/); }); - it("does not wrap display math in

or

", async () => { + it('renders LaTeX array and matrix environments as math', async () => { const html = await parseAdvancedmarkup(content); - // No

or

directly wrapping math-block - expect(html).not.toMatch(/]*>\s*
\$\$[\s\S]*\\begin\{array\}\{ccccc\}[\s\S]*\\end\{array\}[\s\S]*\$\$<\/div>/); + expect(html).toMatch(/
\$\$[\s\S]*\\begin\{bmatrix\}[\s\S]*\\end\{bmatrix\}[\s\S]*\$\$<\/div>/); }); - it("renders LaTeX environments (pmatrix) within display math blocks", async () => { + it('handles unsupported LaTeX environments gracefully', async () => { const html = await parseAdvancedmarkup(content); - // Check that pmatrix is properly rendered within a display math block - expect(html).toMatch( - /
\$\$[\s\S]*?\\begin\{pmatrix\}[\s\S]*?\\end\{pmatrix\}[\s\S]*?\$\$<\/div>/, - ); + // Should show a message and plaintext for tabular + expect(html).toMatch(/
/); + expect(html).toMatch(/Unrendered, as it is LaTeX typesetting, not a formula:/); + expect(html).toMatch(/\\\\begin\{tabular\}/); }); - it('renders all math as math (no unwrapped $...$, $$...$$, \\(...\\), \\[...\\], or environments left)', async () => { + it('renders mixed LaTeX and AsciiMath correctly', async () => { const html = await parseAdvancedmarkup(content); - // No unwrapped $...$ outside math-inline or math-block - // Remove all math-inline and math-block tags and check for stray $...$ - const htmlNoMath = html - .replace(/\$[^$]+\$<\/span>/g, '') - .replace(/
\$\$[\s\S]*?\$\$<\/div>/g, '') - .replace(/
[\s\S]*?<\/div>/g, ''); - expect(htmlNoMath).not.toMatch(/\$[^\$\n]+\$/); // inline math - expect(htmlNoMath).not.toMatch(/\$\$[\s\S]*?\$\$/); // display math - expect(htmlNoMath).not.toMatch(/\\\([^)]+\\\)/); // \(...\) - expect(htmlNoMath).not.toMatch(/\\\[[^\]]+\\\]/); // \[...\] - expect(htmlNoMath).not.toMatch(/\\begin\{[a-zA-Z*]+\}[\s\S]*?\\end\{[a-zA-Z*]+\}/); // environments - // No math inside code or pre - expect(html).not.toMatch(//); - expect(html).not.toMatch(//); + // Test mixed content + expect(html).toMatch(/\$\\frac\{1\}\{2\}\$<\/span>/); + expect(html).toMatch(/\$1\/2\$<\/span>/); + expect(html).toMatch(/
\$\$\\sum_\{i=1\}\^n x_i\$\$<\/div>/); + expect(html).toMatch(/
\$\$sum_\(i=1\)\^n x_i\$\$<\/div>/); }); - it('renders every line of the document: all math is wrapped', async () => { - const lines = content.split(/\r?\n/); - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - if (!line.trim()) continue; - const html = await parseAdvancedmarkup(line); - // If the line contains $...$, $$...$$, \(...\), \[...\], or bare LaTeX commands, it should be wrapped - const hasMath = /\$[^$]+\$|\$\$[\s\S]*?\$\$|\\\([^)]+\\\)|\\\[[^\]]+\\\]|\\[a-zA-Z]+(\{[^}]*\})*/.test(line); - if (hasMath) { - const wrapped = /math-inline|math-block/.test(html); - if (!wrapped) { - // eslint-disable-next-line no-console - console.error(`Line ${i + 1} failed:`, line); - // eslint-disable-next-line no-console - console.error('Rendered HTML:', html); - } - expect(wrapped).toBe(true); - } - // Should not have any unwrapped $...$, $$...$$, \(...\), \[...\], or bare LaTeX commands - const stray = /(^|[^>])\$[^$\n]+\$|\$\$[\s\S]*?\$\$|\\\([^)]+\\\)|\\\[[^\]]+\\\]|\\[a-zA-Z]+(\{[^}]*\})*/.test(html); - expect(stray).toBe(false); - } - }); - - it('renders standalone math lines as display math blocks', async () => { - const mdPath = require('path').join(__dirname, '../../test_data/latex_markdown.md'); - const raw = require('fs').readFileSync(mdPath, 'utf-8'); - const content = JSON.parse(raw).content || raw; + it('handles edge cases and regular code blocks', async () => { const html = await parseAdvancedmarkup(content); - // Example: Bures distance line - expect(html).toMatch(/
\$\$d_B\([^$]+\) = [^$]+\$\$<\/div>/); - // Example: P(\rho) = ... - expect(html).toMatch(/
\$\$P\([^$]+\) = [^$]+\$\$<\/div>/); + // Test regular code blocks (should remain as code, not math) + expect(html).toMatch(/]*>\$19\.99<\/code>/); + expect(html).toMatch(/]*>echo "Price: \$100"<\/code>/); + expect(html).toMatch(/]*>const price = \\`\$\$\{amount\}\\`<\/code>/); + expect(html).toMatch(/]*>color: \$primary-color<\/code>/); }); });