Browse Source

latex/Asciidoc rendering improvement for NostrMarkup

master
Silberengel 8 months ago
parent
commit
1fff18b267
  1. 121
      src/lib/utils/markup/advancedAsciidoctorPostProcessor.ts
  2. 417
      src/lib/utils/markup/advancedMarkupParser.ts
  3. 4
      src/lib/utils/markup/basicMarkupParser.ts
  4. 34
      test_data/LaTeXtestfile.json
  5. 135
      test_data/LaTeXtestfile.md
  6. 50
      test_data/latex_markdown.md
  7. 112
      tests/unit/latexRendering.test.ts

121
src/lib/utils/markup/advancedAsciidoctorPostProcessor.ts

@ -45,54 +45,103 @@ export async function postProcessAdvancedAsciidoctorHtml( @@ -45,54 +45,103 @@ export async function postProcessAdvancedAsciidoctorHtml(
/**
* Fixes all math blocks for MathJax rendering.
* Handles stem blocks, inline math, and normalizes delimiters.
* Now only processes LaTeX within inline code blocks.
*/
function fixAllMathBlocks(html: string): string {
// Unescape \$ to $ for math delimiters
html = html.replace(/\\\$/g, "$");
// Block math: <div class="stemblock"><div class="content">...</div></div>
// Process inline code blocks that contain LaTeX
html = html.replace(
/<div class="stemblock">\s*<div class="content">([\s\S]*?)<\/div>\s*<\/div>/g,
(_match, mathContent) => {
let cleanMath = mathContent
.replace(/<span>\$<\/span>/g, "")
.replace(/<span>\$\$<\/span>/g, "")
// Remove $ or $$ on their own line, or surrounded by whitespace/newlines
.replace(/(^|[\n\r\s])\$([\n\r\s]|$)/g, "$1$2")
.replace(/(^|[\n\r\s])\$\$([\n\r\s]|$)/g, "$1$2")
// Remove all leading and trailing whitespace and $
.replace(/^[\s$]+/, "")
.replace(/[\s$]+$/, "")
.trim(); // Final trim to remove any stray whitespace or $
// Always wrap in $$...$$
return `<div class="stemblock"><div class="content">$$${cleanMath}$$</div></div>`;
},
);
// Inline math: <span>$</span> ... <span>$</span> (allow whitespace/newlines)
html = html.replace(
/<span>\$<\/span>\s*([\s\S]+?)\s*<span>\$<\/span>/g,
(_match, mathContent) =>
`<span class="math-inline">$${mathContent.trim()}$</span>`,
);
// Inline math: stem:[...] or latexmath:[...]
html = html.replace(
/stem:\[([^\]]+?)\]/g,
(_match, content) => `<span class="math-inline">$${content.trim()}$</span>`,
);
html = html.replace(
/latexmath:\[([^\]]+?)\]/g,
(_match, content) =>
`<span class="math-inline">\\(${content.trim().replace(/\\\\/g, "\\")}\\)</span>`,
/<code[^>]*class="[^"]*language-[^"]*"[^>]*>([\s\S]*?)<\/code>/g,
(match, codeContent) => {
const trimmedCode = codeContent.trim();
if (isLaTeXContent(trimmedCode)) {
return `<span class="math-inline">$${trimmedCode}$</span>`;
}
return match; // Return original if not LaTeX
}
);
// Also process code blocks without language class
html = html.replace(
/asciimath:\[([^\]]+?)\]/g,
(_match, content) =>
`<span class="math-inline">\`${content.trim()}\`</span>`,
/<code[^>]*>([\s\S]*?)<\/code>/g,
(match, codeContent) => {
const trimmedCode = codeContent.trim();
if (isLaTeXContent(trimmedCode)) {
return `<span class="math-inline">$${trimmedCode}$</span>`;
}
return match; // Return original if not LaTeX
}
);
return html;
}
/**
* Checks if content contains LaTeX syntax
*/
function isLaTeXContent(content: string): boolean {
const trimmed = content.trim();
// Check for common LaTeX patterns
const latexPatterns = [
/\\[a-zA-Z]+/, // LaTeX commands like \frac, \sum, etc.
/\\[\(\)\[\]]/, // LaTeX delimiters like \(, \), \[, \]
/\\begin\{/, // LaTeX environments
/\\end\{/, // LaTeX environments
/\$\$/, // Display math delimiters
/\$[^$]+\$/, // Inline math delimiters
/\\text\{/, // LaTeX text command
/\\mathrm\{/, // LaTeX mathrm command
/\\mathbf\{/, // LaTeX bold command
/\\mathit\{/, // LaTeX italic command
/\\sqrt/, // Square root
/\\frac/, // Fraction
/\\sum/, // Sum
/\\int/, // Integral
/\\lim/, // Limit
/\\infty/, // Infinity
/\\alpha/, // Greek letters
/\\beta/,
/\\gamma/,
/\\delta/,
/\\theta/,
/\\lambda/,
/\\mu/,
/\\pi/,
/\\sigma/,
/\\phi/,
/\\omega/,
/\\partial/, // Partial derivative
/\\nabla/, // Nabla
/\\cdot/, // Dot product
/\\times/, // Times
/\\div/, // Division
/\\pm/, // Plus-minus
/\\mp/, // Minus-plus
/\\leq/, // Less than or equal
/\\geq/, // Greater than or equal
/\\neq/, // Not equal
/\\approx/, // Approximately equal
/\\equiv/, // Equivalent
/\\propto/, // Proportional
/\\in/, // Element of
/\\notin/, // Not element of
/\\subset/, // Subset
/\\supset/, // Superset
/\\cup/, // Union
/\\cap/, // Intersection
/\\emptyset/, // Empty set
/\\mathbb\{/, // Blackboard bold
/\\mathcal\{/, // Calligraphic
/\\mathfrak\{/, // Fraktur
/\\mathscr\{/, // Script
];
return latexPatterns.some(pattern => pattern.test(trimmed));
}
/**
* Processes PlantUML blocks in HTML content
*/

417
src/lib/utils/markup/advancedMarkupParser.ts

@ -8,6 +8,22 @@ hljs.configure({ @@ -8,6 +8,22 @@ hljs.configure({
ignoreUnescapedHTML: true,
});
// Escapes HTML characters for safe display
function escapeHtml(text: string): string {
const div = typeof document !== 'undefined' ? document.createElement('div') : null;
if (div) {
div.textContent = text;
return div.innerHTML;
}
// Fallback for non-browser environments
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');
}
// Regular expressions for advanced markup elements
const HEADING_REGEX = /^(#{1,6})\s+(.+)$/gm;
const ALTERNATE_HEADING_REGEX = /^([^\n]+)\n(=+|-+)\n/gm;
@ -380,111 +396,296 @@ function restoreCodeBlocks(text: string, blocks: Map<string, string>): string { @@ -380,111 +396,296 @@ function restoreCodeBlocks(text: string, blocks: Map<string, string>): string {
}
/**
* Process LaTeX math expressions using a token-based approach to avoid nested processing
* Process $...$ and $$...$$ math blocks: render as LaTeX if recognized, otherwise as AsciiMath
* This must run BEFORE any paragraph or inline code formatting.
*/
function processMathExpressions(content: string): string {
// Tokenize the content to avoid nested processing
const tokens: Array<{type: 'text' | 'math', content: string}> = [];
let currentText = '';
let i = 0;
while (i < content.length) {
// Check for LaTeX environments first (most specific)
const envMatch = content.slice(i).match(/^\\begin\{([^}]+)\}([\s\S]*?)\\end\{\1\}/);
if (envMatch) {
if (currentText) {
tokens.push({type: 'text', content: currentText});
currentText = '';
}
tokens.push({type: 'math', content: `\\begin{${envMatch[1]}}${envMatch[2]}\\end{${envMatch[1]}}`});
i += envMatch[0].length;
continue;
}
// Check for display math blocks ($$...$$)
const displayMatch = content.slice(i).match(/^\$\$([\s\S]*?)\$\$/);
if (displayMatch) {
if (currentText) {
tokens.push({type: 'text', content: currentText});
currentText = '';
}
tokens.push({type: 'math', content: displayMatch[1]});
i += displayMatch[0].length;
continue;
}
// Check for LaTeX display math (\[...\])
const latexDisplayMatch = content.slice(i).match(/^\\\[([^\]]+)\\\]/);
if (latexDisplayMatch) {
if (currentText) {
tokens.push({type: 'text', content: currentText});
currentText = '';
}
tokens.push({type: 'math', content: latexDisplayMatch[1]});
i += latexDisplayMatch[0].length;
continue;
}
// Check for inline math ($...$)
const inlineMatch = content.slice(i).match(/^\$([^$\n]+)\$/);
if (inlineMatch) {
if (currentText) {
tokens.push({type: 'text', content: currentText});
currentText = '';
function processDollarMath(content: string): string {
// Display math: $$...$$ (multi-line, not empty)
content = content.replace(/\$\$([\s\S]*?\S[\s\S]*?)\$\$/g, (match, expr) => {
if (isLaTeXContent(expr)) {
return `<div class="math-block">$$${expr}$$</div>`;
} else {
// Strip all $ or $$ from AsciiMath
const clean = expr.replace(/\$+/g, '').trim();
return `<div class="math-block" data-math-type="asciimath">${clean}</div>`;
}
tokens.push({type: 'math', content: inlineMatch[1]});
i += inlineMatch[0].length;
continue;
});
// Inline math: $...$ (not empty, not just whitespace)
content = content.replace(/\$([^\s$][^$\n]*?)\$/g, (match, expr) => {
if (isLaTeXContent(expr)) {
return `<span class="math-inline">$${expr}$</span>`;
} else {
const clean = expr.replace(/\$+/g, '').trim();
return `<span class="math-inline" data-math-type="asciimath">${clean}</span>`;
}
});
return content;
}
// Check for LaTeX inline math (\(...\))
const latexInlineMatch = content.slice(i).match(/^\\\(([^)]+)\\\)/);
if (latexInlineMatch) {
if (currentText) {
tokens.push({type: 'text', content: currentText});
currentText = '';
}
tokens.push({type: 'math', content: latexInlineMatch[1]});
i += latexInlineMatch[0].length;
continue;
}
/**
* Process LaTeX math expressions only within inline code blocks
*/
function processMathExpressions(content: string): string {
// Only process LaTeX within inline code blocks (backticks)
return content.replace(INLINE_CODE_REGEX, (match, code) => {
const trimmedCode = code.trim();
// Check for unsupported LaTeX environments (like tabular) first
if (/\\begin\{tabular\}|\\\\begin\{tabular\}/.test(trimmedCode)) {
return `<div class="unrendered-latex">
<p class="text-sm text-gray-600 dark:text-gray-400 mb-2">
Unrendered, as it is LaTeX typesetting, not a formula:
</p>
<pre class="bg-gray-100 dark:bg-gray-900 p-2 rounded text-xs overflow-x-auto">
<code>${escapeHtml(trimmedCode)}</code>
</pre>
</div>`;
}
// Check if the code contains LaTeX syntax
if (isLaTeXContent(trimmedCode)) {
// Detect LaTeX display math (\\[...\\])
if (/^\\\[[\s\S]*\\\]$/.test(trimmedCode)) {
// Remove the delimiters for rendering
const inner = trimmedCode.replace(/^\\\[|\\\]$/g, '');
return `<div class="math-block">$$${inner}$$</div>`;
}
// Detect display math ($$...$$)
if (/^\$\$[\s\S]*\$\$$/.test(trimmedCode)) {
// Remove the delimiters for rendering
const inner = trimmedCode.replace(/^\$\$|\$\$$/g, '');
return `<div class="math-block">$$${inner}$$</div>`;
}
// Detect inline math ($...$)
if (/^\$[\s\S]*\$$/.test(trimmedCode)) {
// Remove the delimiters for rendering
const inner = trimmedCode.replace(/^\$|\$$/g, '');
return `<span class="math-inline">$${inner}$</span>`;
}
// Default to inline math for any other LaTeX content
return `<span class="math-inline">$${trimmedCode}$</span>`;
} else {
// Check for edge cases that should remain as code, not math
// These patterns indicate code that contains dollar signs but is not math
const codePatterns = [
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=/, // Variable assignment like "const price ="
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*\(/, // Function call like "echo("
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*\{/, // Object literal like "const obj = {"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*\[/, // Array literal like "const arr = ["
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*</, // JSX or HTML like "const element = <"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*`/, // Template literal like "const str = `"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*'/, // String literal like "const str = '"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*"/, // String literal like "const str = \""
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*;/, // Statement ending like "const x = 1;"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*$/, // Just a variable name
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]/, // Operator like "const x = 1 +"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Two identifiers like "const price = amount"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]/, // Number like "const x = 1"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]/, // Complex expression like "const price = amount +"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[a-zA-Z0-9_$]*\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Three identifiers like "const price = amount + tax"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]/, // Two identifiers and number like "const price = amount + 1"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]/, // Identifier, number, operator like "const x = 1 +"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Identifier, number, identifier like "const x = 1 + y"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[0-9]/, // Identifier, number, number like "const x = 1 + 2"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Complex like "const x = 1 + y"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Complex like "const x = 1 + 2"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]/, // Very complex like "const x = 1 + y +"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Very complex like "const x = 1 + y + z"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Very complex like "const x = 1 + y + 2"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]\s*[+\-*/%=<>!&|^~]/, // Very complex like "const x = 1 + 2 +"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Very complex like "const x = 1 + 2 + y"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Very complex like "const x = 1 + 2 + 3"
// Additional patterns for JavaScript template literals and other code
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*`/, // Template literal assignment like "const str = `"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*'/, // String assignment like "const str = '"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*"/, // String assignment like "const str = \""
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[0-9]/, // Number assignment like "const x = 1"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Variable assignment like "const x = y"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[+\-*/%=<>!&|^~]/, // Assignment with operator like "const x = +"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]/, // Assignment with variable and operator like "const x = y +"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Assignment with two variables and operator like "const x = y + z"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[0-9]\s*[+\-*/%=<>!&|^~]/, // Assignment with number and operator like "const x = 1 +"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[a-zA-Z_$][a-zA-Z0-9_$]*/, // Assignment with number, operator, variable like "const x = 1 + y"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[a-zA-Z_$][a-zA-Z0-9_$]*\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Assignment with variable, operator, number like "const x = y + 1"
/^[a-zA-Z_$][a-zA-Z0-9_$]*\s*=\s*[0-9]\s*[+\-*/%=<>!&|^~]\s*[0-9]/, // Assignment with number, operator, number like "const x = 1 + 2"
];
// If no math pattern matches, add to current text
currentText += content[i];
i++;
// If it matches code patterns, treat as regular code
if (codePatterns.some(pattern => pattern.test(trimmedCode))) {
const escapedCode = trimmedCode
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
return `<code class="px-1.5 py-0.5 bg-white dark:bg-gray-900 border border-gray-200 dark:border-gray-700 rounded text-sm font-mono">${escapedCode}</code>`;
}
// Add any remaining text
if (currentText) {
tokens.push({type: 'text', content: currentText});
// Return as regular inline code
const escapedCode = trimmedCode
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
return `<code class="px-1.5 py-0.5 bg-white dark:bg-gray-900 border border-gray-200 dark:border-gray-700 rounded text-sm font-mono">${escapedCode}</code>`;
}
});
}
// Now process the tokens to create the final HTML
let result = '';
for (const token of tokens) {
if (token.type === 'text') {
result += token.content;
} else {
// Determine if this should be display or inline math
const isDisplay = token.content.includes('\\begin{') ||
token.content.includes('\\end{') ||
token.content.includes('\\[') ||
token.content.includes('\\]') ||
token.content.length > 50 || // Heuristic for display math
token.content.includes('=') && token.content.length > 20 || // Equations with equals
token.content.includes('\\begin{') || // Any LaTeX environment
token.content.includes('\\boxed{') || // Boxed expressions
token.content.includes('\\text{') && token.content.length > 30; // Text blocks
if (isDisplay) {
result += `<div class="math-block my-4 text-center">$$${token.content}$$</div>`;
} else {
result += `<span class="math-inline">$${token.content}$</span>`;
}
}
}
/**
* Checks if content contains LaTeX syntax
*/
function isLaTeXContent(content: string): boolean {
const trimmed = content.trim();
// Check for simple math expressions first (like AsciiMath)
if (/^\$[^$]+\$$/.test(trimmed)) {
return true;
}
// Check for display math
if (/^\$\$[\s\S]*\$\$$/.test(trimmed)) {
return true;
}
// Check for LaTeX display math
if (/^\\\[[\s\S]*\\\]$/.test(trimmed)) {
return true;
}
// Check for LaTeX environments with double backslashes (like tabular)
if (/\\\\begin\{[^}]+\}/.test(trimmed) || /\\\\end\{[^}]+\}/.test(trimmed)) {
return true;
}
// Check for common LaTeX patterns
const latexPatterns = [
/\\[a-zA-Z]+/, // LaTeX commands like \frac, \sum, etc.
/\\\\[a-zA-Z]+/, // LaTeX commands with double backslashes like \\frac, \\sum, etc.
/\\[\(\)\[\]]/, // LaTeX delimiters like \(, \), \[, \]
/\\\\[\(\)\[\]]/, // LaTeX delimiters with double backslashes like \\(, \\), \\[, \\]
/\\\[[\s\S]*?\\\]/, // LaTeX display math \[ ... \]
/\\\\\[[\s\S]*?\\\\\]/, // LaTeX display math with double backslashes \\[ ... \\]
/\\begin\{/, // LaTeX environments
/\\\\begin\{/, // LaTeX environments with double backslashes
/\\end\{/, // LaTeX environments
/\\\\end\{/, // LaTeX environments with double backslashes
/\\begin\{array\}/, // LaTeX array environment
/\\\\begin\{array\}/, // LaTeX array environment with double backslashes
/\\end\{array\}/,
/\\\\end\{array\}/,
/\\begin\{matrix\}/, // LaTeX matrix environment
/\\\\begin\{matrix\}/, // LaTeX matrix environment with double backslashes
/\\end\{matrix\}/,
/\\\\end\{matrix\}/,
/\\begin\{bmatrix\}/, // LaTeX bmatrix environment
/\\\\begin\{bmatrix\}/, // LaTeX bmatrix environment with double backslashes
/\\end\{bmatrix\}/,
/\\\\end\{bmatrix\}/,
/\\begin\{pmatrix\}/, // LaTeX pmatrix environment
/\\\\begin\{pmatrix\}/, // LaTeX pmatrix environment with double backslashes
/\\end\{pmatrix\}/,
/\\\\end\{pmatrix\}/,
/\\begin\{tabular\}/, // LaTeX tabular environment
/\\\\begin\{tabular\}/, // LaTeX tabular environment with double backslashes
/\\end\{tabular\}/,
/\\\\end\{tabular\}/,
/\$\$/, // Display math delimiters
/\$[^$]+\$/, // Inline math delimiters
/\\text\{/, // LaTeX text command
/\\\\text\{/, // LaTeX text command with double backslashes
/\\mathrm\{/, // LaTeX mathrm command
/\\\\mathrm\{/, // LaTeX mathrm command with double backslashes
/\\mathbf\{/, // LaTeX bold command
/\\\\mathbf\{/, // LaTeX bold command with double backslashes
/\\mathit\{/, // LaTeX italic command
/\\\\mathit\{/, // LaTeX italic command with double backslashes
/\\sqrt/, // Square root
/\\\\sqrt/, // Square root with double backslashes
/\\frac/, // Fraction
/\\\\frac/, // Fraction with double backslashes
/\\sum/, // Sum
/\\\\sum/, // Sum with double backslashes
/\\int/, // Integral
/\\\\int/, // Integral with double backslashes
/\\lim/, // Limit
/\\\\lim/, // Limit with double backslashes
/\\infty/, // Infinity
/\\\\infty/, // Infinity with double backslashes
/\\alpha/, // Greek letters
/\\\\alpha/, // Greek letters with double backslashes
/\\beta/,
/\\\\beta/,
/\\gamma/,
/\\\\gamma/,
/\\delta/,
/\\\\delta/,
/\\theta/,
/\\\\theta/,
/\\lambda/,
/\\\\lambda/,
/\\mu/,
/\\\\mu/,
/\\pi/,
/\\\\pi/,
/\\sigma/,
/\\\\sigma/,
/\\phi/,
/\\\\phi/,
/\\omega/,
/\\\\omega/,
/\\partial/, // Partial derivative
/\\\\partial/, // Partial derivative with double backslashes
/\\nabla/, // Nabla
/\\\\nabla/, // Nabla with double backslashes
/\\cdot/, // Dot product
/\\\\cdot/, // Dot product with double backslashes
/\\times/, // Times
/\\\\times/, // Times with double backslashes
/\\div/, // Division
/\\\\div/, // Division with double backslashes
/\\pm/, // Plus-minus
/\\\\pm/, // Plus-minus with double backslashes
/\\mp/, // Minus-plus
/\\\\mp/, // Minus-plus with double backslashes
/\\leq/, // Less than or equal
/\\\\leq/, // Less than or equal with double backslashes
/\\geq/, // Greater than or equal
/\\\\geq/, // Greater than or equal with double backslashes
/\\neq/, // Not equal
/\\\\neq/, // Not equal with double backslashes
/\\approx/, // Approximately equal
/\\\\approx/, // Approximately equal with double backslashes
/\\equiv/, // Equivalent
/\\\\equiv/, // Equivalent with double backslashes
/\\propto/, // Proportional
/\\\\propto/, // Proportional with double backslashes
/\\in/, // Element of
/\\\\in/, // Element of with double backslashes
/\\notin/, // Not element of
/\\\\notin/, // Not element of with double backslashes
/\\subset/, // Subset
/\\\\subset/, // Subset with double backslashes
/\\supset/, // Superset
/\\\\supset/, // Superset with double backslashes
/\\cup/, // Union
/\\\\cup/, // Union with double backslashes
/\\cap/, // Intersection
/\\\\cap/, // Intersection with double backslashes
/\\emptyset/, // Empty set
/\\\\emptyset/, // Empty set with double backslashes
/\\mathbb\{/, // Blackboard bold
/\\\\mathbb\{/, // Blackboard bold with double backslashes
/\\mathcal\{/, // Calligraphic
/\\\\mathcal\{/, // Calligraphic with double backslashes
/\\mathfrak\{/, // Fraktur
/\\\\mathfrak\{/, // Fraktur with double backslashes
/\\mathscr\{/, // Script
/\\\\mathscr\{/, // Script with double backslashes
];
return result;
return latexPatterns.some(pattern => pattern.test(trimmed));
}
/**
@ -498,34 +699,26 @@ export async function parseAdvancedmarkup(text: string): Promise<string> { @@ -498,34 +699,26 @@ export async function parseAdvancedmarkup(text: string): Promise<string> {
const { text: withoutCode, blocks } = processCodeBlocks(text);
let processedText = withoutCode;
// Step 2: Process LaTeX math expressions FIRST to avoid wrapping in <p> or <blockquote>
// Step 2: Process $...$ and $$...$$ math blocks (LaTeX or AsciiMath)
processedText = processDollarMath(processedText);
// Step 3: Process LaTeX math expressions ONLY within inline code blocks (legacy support)
processedText = processMathExpressions(processedText);
// Step 3: Process block-level elements
// Step 4: Process block-level elements (tables, blockquotes, headings, horizontal rules)
processedText = processTables(processedText);
processedText = processBlockquotes(processedText);
processedText = processHeadings(processedText);
processedText = processHorizontalRules(processedText);
// Process inline elements
processedText = processedText.replace(INLINE_CODE_REGEX, (_, code) => {
const escapedCode = code
.trim()
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
return `<code class="px-1.5 py-0.5 bg-white dark:bg-gray-900 border border-gray-200 dark:border-gray-700 rounded text-sm font-mono">${escapedCode}</code>`;
});
// Process footnotes (only references, not definitions)
// Step 5: Process footnotes (only references, not definitions)
processedText = processFootnotes(processedText);
// Process basic markup (which will also handle Nostr identifiers)
// Step 6: Process basic markup (which will also handle Nostr identifiers)
// This includes paragraphs, inline code, links, lists, etc.
processedText = await parseBasicmarkup(processedText);
// Step 4: Restore code blocks
// Step 7: Restore code blocks
processedText = restoreCodeBlocks(processedText, blocks);
return processedText;

4
src/lib/utils/markup/basicMarkupParser.ts

@ -411,8 +411,8 @@ export async function parseBasicmarkup(text: string): Promise<string> { @@ -411,8 +411,8 @@ export async function parseBasicmarkup(text: string): Promise<string> {
.map((para) => para.trim())
.filter((para) => para.length > 0)
.map((para) => {
// Skip wrapping if para already contains block-level elements
if (/<(div|h[1-6]|blockquote|table|pre|ul|ol|hr)/i.test(para)) {
// Skip wrapping if para already contains block-level elements or math blocks
if (/(<div[^>]*class=["'][^"']*math-block[^"']*["'])|<(div|h[1-6]|blockquote|table|pre|ul|ol|hr)/i.test(para)) {
return para;
}
return `<p class="my-4">${para}</p>`;

34
test_data/LaTeXtestfile.json

File diff suppressed because one or more lines are too long

135
test_data/LaTeXtestfile.md

@ -0,0 +1,135 @@ @@ -0,0 +1,135 @@
# This is a testfile for writing mathematic formulas in NostrMarkup
This document covers the rendering of formulas in TeX/LaTeX and AsciiMath notation, or some combination of those within the same page. It is meant to be rendered by clients utilizing MathJax.
If you want the entire document to be rendered as mathematics, place the entire thing in a backtick-codeblock, but know that this makes the document slower to load, it is harder to format the prose, and the result is less legible. It also doesn't increase portability, as it's easy to export markup as LaTeX files, or as PDFs, with the formulas rendered.
The general idea, is that anything placed within `single backticks` is inline code, and inline-code will all be scanned for typical mathematics statements and rendered with best-effort. (For more precise rendering, use Asciidoc.) We will not render text that is not marked as inline code, as mathematical formulas, as that is prose.
If you want the TeX to be blended into the surrounding text, wrap the text within single `$`. Otherwise, use double `$$` symbols, for display math, and it will appear on its own line.
## TeX Examples
Inline equation: `$\sqrt{x}$`
Same equation, in the display mode: `$$\sqrt{x}$$`
Something more complex, inline: `$\mathbb{N} = \{ a \in \mathbb{Z} : a > 0 \}$`
Something complex, in display mode: `$$P \left( A=2 \, \middle| \, \dfrac{A^2}{B}>4 \right)$$`
Another example of `$$\prod_{i=1}^{n} x_i - 1$$` inline formulas.
Function example:
`$$
f(x)=
\begin{cases}
1/d_{ij} & \quad \text{when $d_{ij} \leq 160$}\\
0 & \quad \text{otherwise}
\end{cases}
$$`
And a matrix:
`$$
M =
\begin{bmatrix}
\frac{5}{6} & \frac{1}{6} & 0 \\[0.3em]
\frac{5}{6} & 0 & \frac{1}{6} \\[0.3em]
0 & \frac{5}{6} & \frac{1}{6}
\end{bmatrix}
$$`
LaTeX ypesetting won't be rendered. Use NostrMarkup delimeter tables for this sort of thing.
`\\begin{tabular}{|c|c|c|l|r|}
\\hline
\\multicolumn{3}{|l|}{test} & A & B \\\\
\\hline
1 & 2 & 3 & 4 & 5 \\\\
\\hline
\\end{tabular}`
We also recognize common LaTeX statements:
`\[
\begin{array}{ccccc}
1 & 2 & 3 & 4 & 5 \\
\end{array}
\]`
`\[ x^n + y^n = z^n \]`
`\sqrt{x^2+1}`
Greek letters are a snap: `$\Psi$`, `$\psi$`, `$\Phi$`, `$\phi$`.
Equations within text are easy--- A well known Maxwell thermodynamic relation is `$\left.{\partial T \over \partial P}\right|_{s} = \left.{\partial v \over \partial s}\right|_{P}$`.
You can also set aside equations like so: `\begin{eqnarray} du &=& T\ ds -P\ dv, \qquad \mbox{first law.}\label{fl}\\ ds &\ge& {\delta q \over T}.\qquad \qquad \mbox{second law.} \label{sl} \end {eqnarray}`
## And some good ole Asciimath
Asciimath doesn't use `$` or `$$` delimiters, but we are using it to make mathy stuff easier to find. If you want it inline, include it inline. If you want it on a separate line, put a hard-return before and after.
Inline text example here `$E=mc^2$` and another `$1/(x+1)$`; very simple.
Displaying on a separate line:
`$$sum_(k=1)^n k = 1+2+ cdots +n=(n(n+1))/2$$`
`$$int_0^1 x^2 dx$$`
`$$x = (-6 +- sqrt((-6)^2 - 4 (1)(4)))/(2 xx 1)$$`
`$$|x|= {(x , if x ge 0 text(,)),(-x , if x <0.):}$$`
Displaying with wider spacing:
`$a=3, \ \ \ b=-3,\ \ $` and `$ \ \ c=2$`.
Thus `$(a+b)(c+b)=0$`.
Displaying with indentations:
Using the quadratic formula, the roots of `$x^2-6x+4=0$` are
`$$x = (-6 +- sqrt((-6)^2 - 4 (1)(4)))/(2 xx 1)$$`
`$$ \ \ = (-6 +- sqrt(36 - 16))/2$$`
`$$ \ \ =(-6 +- sqrt(20))/2$$`
`$$ \ \ = -0.8 or 2.2 \ \ \ $$` to 1 decimal place.
Advanced alignment and matrices looks like this:
A `$3xx3$` matrix, `$$((1,2,3),(4,5,6),(7,8,9))$$` and a `$2xx1$` matrix, or vector, `$$((1),(0))$$`.
The outer brackets determine the delimiters e.g. `$|(a,b),(c,d)|=ad-bc$`.
A general `$m xx n$` matrix `$$((a_(11), cdots , a_(1n)),(vdots, ddots, vdots),(a_(m1), cdots , a_(mn)))$$`
## Mixed Examples
Here are some examples mixing LaTeX and AsciiMath:
- LaTeX inline: `$\frac{1}{2}$` vs AsciiMath inline: `$1/2$`
- LaTeX display: `$$\sum_{i=1}^n x_i$$` vs AsciiMath display: `$$sum_(i=1)^n x_i$$`
- LaTeX matrix: `$$\begin{pmatrix} a & b \\ c & d \end{pmatrix}$$` vs AsciiMath matrix: `$$((a,b),(c,d))$$`
## Edge Cases
- Empty math: `$$`
- Just delimiters: `$ $`
- Dollar signs in text: The price is $10.50
- Currency: `$19.99`
- Shell command: `echo "Price: $100"`
- JavaScript template: `const price = \`$${amount}\``
- CSS with dollar signs: `color: $primary-color`
This document should demonstrate that:
1. LaTeX is processed within inline code blocks with proper delimiters
2. AsciiMath is processed within inline code blocks with proper delimiters
3. Regular code blocks remain unchanged
4. Mixed content is handled correctly
5. Edge cases are handled gracefully

50
test_data/latex_markdown.md

File diff suppressed because one or more lines are too long

112
tests/unit/latexRendering.test.ts

@ -3,99 +3,59 @@ import { parseAdvancedmarkup } from "../../src/lib/utils/markup/advancedMarkupPa @@ -3,99 +3,59 @@ import { parseAdvancedmarkup } from "../../src/lib/utils/markup/advancedMarkupPa
import { readFileSync } from "fs";
import { join } from "path";
describe("LaTeX Math Rendering", () => {
const mdPath = join(__dirname, "../../test_data/latex_markdown.md");
const raw = readFileSync(mdPath, "utf-8");
// Extract the markdown content field from the JSON
describe("LaTeX and AsciiMath Rendering in Inline Code Blocks", () => {
const jsonPath = join(__dirname, "../../test_data/LaTeXtestfile.json");
const raw = readFileSync(jsonPath, "utf-8");
// Extract the markdown content field from the JSON event
const content = JSON.parse(raw).content;
it('renders inline math as <span class="math-inline">', async () => {
it('renders LaTeX inline and display math correctly', async () => {
const html = await parseAdvancedmarkup(content);
expect(html).toMatch(/<span class="math-inline">\$P \\neq NP\$<\/span>/);
expect(html).toMatch(
/<span class="math-inline">\$x_1 = \\text\{True\}\$<\/span>/,
);
// Test basic LaTeX examples from the test document
expect(html).toMatch(/<span class="math-inline">\$\\sqrt\{x\}\$<\/span>/);
expect(html).toMatch(/<div class="math-block">\$\$\\sqrt\{x\}\$\$<\/div>/);
expect(html).toMatch(/<span class="math-inline">\$\\mathbb\{N\} = \\{ a \\in \\mathbb\{Z\} : a > 0 \\}\$<\/span>/);
expect(html).toMatch(/<div class="math-block">\$\$P \\left\( A=2 \\, \\middle\| \\, \\dfrac\{A\^2\}\{B\}>4 \\right\)\$\$<\/div>/);
});
it('renders display math as <div class="math-block', async () => {
it('renders AsciiMath inline and display math correctly', async () => {
const html = await parseAdvancedmarkup(content);
// Representative display math
expect(html).toMatch(
/<div class="math-block my-4 text-center">\$\$\s*P_j = \\bigotimes/,
);
expect(html).toMatch(
/<div class="math-block my-4 text-center">\$\$[\s\S]*?\\begin\{pmatrix\}/,
);
expect(html).toMatch(
/<div class="math-block my-4 text-center">\$\$\\boxed\{P \\neq NP\}\$\$<\/div>/,
);
// Test AsciiMath examples
expect(html).toMatch(/<span class="math-inline">\$E=mc\^2\$<\/span>/);
expect(html).toMatch(/<div class="math-block">\$\$sum_\(k=1\)\^n k = 1\+2\+ cdots \+n=\(n\(n\+1\)\)\/2\$\$<\/div>/);
expect(html).toMatch(/<div class="math-block">\$\$int_0\^1 x\^2 dx\$\$<\/div>/);
});
it("does not wrap display math in <p> or <blockquote>", async () => {
it('renders LaTeX array and matrix environments as math', async () => {
const html = await parseAdvancedmarkup(content);
// No <p> or <blockquote> directly wrapping math-block
expect(html).not.toMatch(/<p[^>]*>\s*<div class="math-block/);
expect(html).not.toMatch(/<blockquote[^>]*>\s*<div class="math-block/);
// Test array and matrix environments
expect(html).toMatch(/<div class="math-block">\$\$[\s\S]*\\begin\{array\}\{ccccc\}[\s\S]*\\end\{array\}[\s\S]*\$\$<\/div>/);
expect(html).toMatch(/<div class="math-block">\$\$[\s\S]*\\begin\{bmatrix\}[\s\S]*\\end\{bmatrix\}[\s\S]*\$\$<\/div>/);
});
it("renders LaTeX environments (pmatrix) within display math blocks", async () => {
it('handles unsupported LaTeX environments gracefully', async () => {
const html = await parseAdvancedmarkup(content);
// Check that pmatrix is properly rendered within a display math block
expect(html).toMatch(
/<div class="math-block my-4 text-center">\$\$[\s\S]*?\\begin\{pmatrix\}[\s\S]*?\\end\{pmatrix\}[\s\S]*?\$\$<\/div>/,
);
// Should show a message and plaintext for tabular
expect(html).toMatch(/<div class="unrendered-latex">/);
expect(html).toMatch(/Unrendered, as it is LaTeX typesetting, not a formula:/);
expect(html).toMatch(/\\\\begin\{tabular\}/);
});
it('renders all math as math (no unwrapped $...$, $$...$$, \\(...\\), \\[...\\], or environments left)', async () => {
it('renders mixed LaTeX and AsciiMath correctly', async () => {
const html = await parseAdvancedmarkup(content);
// No unwrapped $...$ outside math-inline or math-block
// Remove all math-inline and math-block tags and check for stray $...$
const htmlNoMath = html
.replace(/<span class="math-inline">\$[^$]+\$<\/span>/g, '')
.replace(/<div class="math-block[^"]*">\$\$[\s\S]*?\$\$<\/div>/g, '')
.replace(/<div class="math-block[^"]*">[\s\S]*?<\/div>/g, '');
expect(htmlNoMath).not.toMatch(/\$[^\$\n]+\$/); // inline math
expect(htmlNoMath).not.toMatch(/\$\$[\s\S]*?\$\$/); // display math
expect(htmlNoMath).not.toMatch(/\\\([^)]+\\\)/); // \(...\)
expect(htmlNoMath).not.toMatch(/\\\[[^\]]+\\\]/); // \[...\]
expect(htmlNoMath).not.toMatch(/\\begin\{[a-zA-Z*]+\}[\s\S]*?\\end\{[a-zA-Z*]+\}/); // environments
// No math inside code or pre
expect(html).not.toMatch(/<code[\s\S]*?\$[\s\S]*?\$[\s\S]*?<\/code>/);
expect(html).not.toMatch(/<pre[\s\S]*?\$[\s\S]*?\$[\s\S]*?<\/pre>/);
// Test mixed content
expect(html).toMatch(/<span class="math-inline">\$\\frac\{1\}\{2\}\$<\/span>/);
expect(html).toMatch(/<span class="math-inline">\$1\/2\$<\/span>/);
expect(html).toMatch(/<div class="math-block">\$\$\\sum_\{i=1\}\^n x_i\$\$<\/div>/);
expect(html).toMatch(/<div class="math-block">\$\$sum_\(i=1\)\^n x_i\$\$<\/div>/);
});
it('renders every line of the document: all math is wrapped', async () => {
const lines = content.split(/\r?\n/);
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (!line.trim()) continue;
const html = await parseAdvancedmarkup(line);
// If the line contains $...$, $$...$$, \(...\), \[...\], or bare LaTeX commands, it should be wrapped
const hasMath = /\$[^$]+\$|\$\$[\s\S]*?\$\$|\\\([^)]+\\\)|\\\[[^\]]+\\\]|\\[a-zA-Z]+(\{[^}]*\})*/.test(line);
if (hasMath) {
const wrapped = /math-inline|math-block/.test(html);
if (!wrapped) {
// eslint-disable-next-line no-console
console.error(`Line ${i + 1} failed:`, line);
// eslint-disable-next-line no-console
console.error('Rendered HTML:', html);
}
expect(wrapped).toBe(true);
}
// Should not have any unwrapped $...$, $$...$$, \(...\), \[...\], or bare LaTeX commands
const stray = /(^|[^>])\$[^$\n]+\$|\$\$[\s\S]*?\$\$|\\\([^)]+\\\)|\\\[[^\]]+\\\]|\\[a-zA-Z]+(\{[^}]*\})*/.test(html);
expect(stray).toBe(false);
}
});
it('renders standalone math lines as display math blocks', async () => {
const mdPath = require('path').join(__dirname, '../../test_data/latex_markdown.md');
const raw = require('fs').readFileSync(mdPath, 'utf-8');
const content = JSON.parse(raw).content || raw;
it('handles edge cases and regular code blocks', async () => {
const html = await parseAdvancedmarkup(content);
// Example: Bures distance line
expect(html).toMatch(/<div class="math-block my-4 text-center">\$\$d_B\([^$]+\) = [^$]+\$\$<\/div>/);
// Example: P(\rho) = ...
expect(html).toMatch(/<div class="math-block my-4 text-center">\$\$P\([^$]+\) = [^$]+\$\$<\/div>/);
// Test regular code blocks (should remain as code, not math)
expect(html).toMatch(/<code[^>]*>\$19\.99<\/code>/);
expect(html).toMatch(/<code[^>]*>echo &quot;Price: \$100&quot;<\/code>/);
expect(html).toMatch(/<code[^>]*>const price = \\`\$\$\{amount\}\\`<\/code>/);
expect(html).toMatch(/<code[^>]*>color: \$primary-color<\/code>/);
});
});

Loading…
Cancel
Save