clone of repo on github
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

378 lines
12 KiB

import { parseBasicMarkdown } from './basicMarkdownParser';
import hljs from 'highlight.js';
import 'highlight.js/lib/common'; // Import common languages
import 'highlight.js/styles/github-dark.css'; // Dark theme only
// Register common languages
hljs.configure({
ignoreUnescapedHTML: true
});
// Regular expressions for advanced markdown elements
const HEADING_REGEX = /^(#{1,6})\s+(.+)$/gm;
const ALTERNATE_HEADING_REGEX = /^([^\n]+)\n(=+|-+)\n/gm;
const INLINE_CODE_REGEX = /`([^`\n]+)`/g;
const HORIZONTAL_RULE_REGEX = /^(?:[-*_]\s*){3,}$/gm;
const FOOTNOTE_REFERENCE_REGEX = /\[\^([^\]]+)\]/g;
const FOOTNOTE_DEFINITION_REGEX = /^\[\^([^\]]+)\]:\s*(.+)$/gm;
/**
* Process headings (both styles)
*/
function processHeadings(content: string): string {
// Process ATX-style headings (# Heading)
let processedContent = content.replace(HEADING_REGEX, (_, level, text) => {
const headingLevel = level.length;
return `<h${headingLevel} class="text-2xl font-bold mt-6 mb-4">${text.trim()}</h${headingLevel}>`;
});
// Process Setext-style headings (Heading\n====)
processedContent = processedContent.replace(ALTERNATE_HEADING_REGEX, (_, text, level) => {
const headingLevel = level[0] === '=' ? 1 : 2;
return `<h${headingLevel} class="text-2xl font-bold mt-6 mb-4">${text.trim()}</h${headingLevel}>`;
});
return processedContent;
}
/**
* Process tables
*/
function processTables(content: string): string {
try {
if (!content) return '';
return content.replace(/^\|(.*(?:\n\|.*)*)/gm, (match) => {
try {
// Split into rows and clean up
const rows = match.split('\n').filter(row => row.trim());
if (rows.length < 1) return match;
// Helper to process a row into cells
const processCells = (row: string): string[] => {
return row
.split('|')
.slice(1, -1) // Remove empty cells from start/end
.map(cell => cell.trim());
};
// Check if second row is a delimiter row (only hyphens)
const hasHeader = rows.length > 1 && rows[1].trim().match(/^\|[-\s|]+\|$/);
// Extract header and body rows
let headerCells: string[] = [];
let bodyRows: string[] = [];
if (hasHeader) {
// If we have a header, first row is header, skip delimiter, rest is body
headerCells = processCells(rows[0]);
bodyRows = rows.slice(2);
} else {
// No header, all rows are body
bodyRows = rows;
}
// Build table HTML
let html = '<div class="overflow-x-auto my-4">\n';
html += '<table class="min-w-full border-collapse">\n';
// Add header if exists
if (hasHeader) {
html += '<thead>\n<tr>\n';
headerCells.forEach(cell => {
html += `<th class="py-2 px-4 text-left border-b-2 border-gray-200 dark:border-gray-700 font-semibold">${cell}</th>\n`;
});
html += '</tr>\n</thead>\n';
}
// Add body
html += '<tbody>\n';
bodyRows.forEach(row => {
const cells = processCells(row);
html += '<tr>\n';
cells.forEach(cell => {
html += `<td class="py-2 px-4 text-left border-b border-gray-200 dark:border-gray-700">${cell}</td>\n`;
});
html += '</tr>\n';
});
html += '</tbody>\n</table>\n</div>';
return html;
} catch (error) {
console.error('Error processing table row:', error);
return match;
}
});
} catch (error) {
console.error('Error in processTables:', error);
return content;
}
}
/**
* Process horizontal rules
*/
function processHorizontalRules(content: string): string {
return content.replace(HORIZONTAL_RULE_REGEX,
'<hr class="my-8 h-px border-0 bg-gray-200 dark:bg-gray-700">'
);
}
/**
* Process footnotes
*/
function processFootnotes(content: string): string {
try {
if (!content) return '';
// First collect all footnote references and definitions
const footnotes = new Map<string, string>();
const references = new Map<string, number>();
const referenceLocations = new Set<string>();
let nextNumber = 1;
// First pass: collect all references to establish order
let processedContent = content.replace(FOOTNOTE_REFERENCE_REGEX, (match, id) => {
if (!referenceLocations.has(id) && !references.has(id)) {
references.set(id, nextNumber++);
}
referenceLocations.add(id);
return match; // Keep the reference for now
});
// Second pass: collect all definitions
processedContent = processedContent.replace(FOOTNOTE_DEFINITION_REGEX, (match, id, text) => {
footnotes.set(id, text.trim());
return ''; // Remove the definition
});
// Third pass: process references with collected information
processedContent = processedContent.replace(FOOTNOTE_REFERENCE_REGEX, (match, id) => {
if (!footnotes.has(id)) {
console.warn(`Footnote reference [^${id}] found but no definition exists`);
return match;
}
const num = references.get(id)!;
return `<sup><a href="#fn-${id}" id="fnref-${id}" class="text-primary-600 hover:underline">[${num}]</a></sup>`;
});
// Add footnotes section if we have any
if (references.size > 0) {
processedContent += '\n\n<h2 class="text-xl font-bold mt-8 mb-4">Footnotes</h2>\n<ol class="list-decimal list-inside">\n';
// Sort footnotes by their reference number
const sortedFootnotes = Array.from(references.entries())
.sort((a, b) => a[1] - b[1])
.filter(([id]) => footnotes.has(id)); // Only include footnotes that have definitions
// Add each footnote in order
for (const [id, num] of sortedFootnotes) {
const text = footnotes.get(id) || '';
processedContent += `<li id="fn-${id}" value="${num}"><span class="marker">${text}</span> <a href="#fnref-${id}" class="text-primary-600 hover:underline">↩</a></li>\n`;
}
processedContent += '</ol>';
}
return processedContent;
} catch (error) {
console.error('Error processing footnotes:', error);
return content;
}
}
/**
* Process blockquotes
*/
function processBlockquotes(content: string): string {
// Match blockquotes that might span multiple lines
const blockquoteRegex = /^>[ \t]?(.+(?:\n>[ \t]?.+)*)/gm;
return content.replace(blockquoteRegex, (match) => {
// Remove the '>' prefix from each line and preserve line breaks
const text = match
.split('\n')
.map(line => line.replace(/^>[ \t]?/, ''))
.join('\n')
.trim();
return `<blockquote class="pl-4 border-l-4 border-gray-300 dark:border-gray-600 my-4 whitespace-pre-wrap">${text}</blockquote>`;
});
}
/**
* Process code blocks by finding consecutive code lines and preserving their content
*/
function processCodeBlocks(text: string): { text: string; blocks: Map<string, string> } {
const lines = text.split('\n');
const processedLines: string[] = [];
const blocks = new Map<string, string>();
let inCodeBlock = false;
let currentCode: string[] = [];
let currentLanguage = '';
let blockCount = 0;
let lastWasCodeBlock = false;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const codeBlockStart = line.match(/^```(\w*)$/);
if (codeBlockStart) {
if (!inCodeBlock) {
// Starting a new code block
inCodeBlock = true;
currentLanguage = codeBlockStart[1];
currentCode = [];
lastWasCodeBlock = true;
} else {
// Ending current code block
blockCount++;
const id = `CODE_BLOCK_${blockCount}`;
const code = currentCode.join('\n');
// Try to format JSON if specified
let formattedCode = code;
if (currentLanguage.toLowerCase() === 'json') {
try {
formattedCode = JSON.stringify(JSON.parse(code), null, 2);
} catch (e) {
formattedCode = code;
}
}
blocks.set(id, JSON.stringify({
code: formattedCode,
language: currentLanguage,
raw: true
}));
processedLines.push(''); // Add spacing before code block
processedLines.push(id);
processedLines.push(''); // Add spacing after code block
inCodeBlock = false;
currentCode = [];
currentLanguage = '';
}
} else if (inCodeBlock) {
currentCode.push(line);
} else {
if (lastWasCodeBlock && line.trim()) {
processedLines.push('');
lastWasCodeBlock = false;
}
processedLines.push(line);
}
}
// Handle unclosed code block
if (inCodeBlock && currentCode.length > 0) {
blockCount++;
const id = `CODE_BLOCK_${blockCount}`;
const code = currentCode.join('\n');
// Try to format JSON if specified
let formattedCode = code;
if (currentLanguage.toLowerCase() === 'json') {
try {
formattedCode = JSON.stringify(JSON.parse(code), null, 2);
} catch (e) {
formattedCode = code;
}
}
blocks.set(id, JSON.stringify({
code: formattedCode,
language: currentLanguage,
raw: true
}));
processedLines.push('');
processedLines.push(id);
processedLines.push('');
}
return {
text: processedLines.join('\n'),
blocks
};
}
/**
* Restore code blocks with proper formatting
*/
function restoreCodeBlocks(text: string, blocks: Map<string, string>): string {
let result = text;
for (const [id, blockData] of blocks) {
try {
const { code, language } = JSON.parse(blockData);
let html;
if (language && hljs.getLanguage(language)) {
try {
const highlighted = hljs.highlight(code, {
language,
ignoreIllegals: true
}).value;
html = `<pre class="code-block"><code class="hljs language-${language}">${highlighted}</code></pre>`;
} catch (e) {
console.warn('Failed to highlight code block:', e);
html = `<pre class="code-block"><code class="hljs ${language ? `language-${language}` : ''}">${code}</code></pre>`;
}
} else {
html = `<pre class="code-block"><code class="hljs">${code}</code></pre>`;
}
result = result.replace(id, html);
} catch (error) {
console.error('Error restoring code block:', error);
result = result.replace(id, '<pre class="code-block"><code class="hljs">Error processing code block</code></pre>');
}
}
return result;
}
/**
* Parse markdown text with advanced formatting
*/
export async function parseAdvancedMarkdown(text: string): Promise<string> {
if (!text) return '';
try {
// Step 1: Extract and save code blocks first
const { text: withoutCode, blocks } = processCodeBlocks(text);
let processedText = withoutCode;
// Step 2: Process block-level elements
processedText = processTables(processedText);
processedText = processBlockquotes(processedText);
processedText = processHeadings(processedText);
processedText = processHorizontalRules(processedText);
// Process inline elements
processedText = processedText.replace(INLINE_CODE_REGEX, (_, code) => {
const escapedCode = code
.trim()
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');
return `<code class="px-1.5 py-0.5 bg-white dark:bg-gray-900 border border-gray-200 dark:border-gray-700 rounded text-sm font-mono">${escapedCode}</code>`;
});
// Process footnotes
processedText = processFootnotes(processedText);
// Process basic markdown (which will also handle Nostr identifiers)
processedText = await parseBasicMarkdown(processedText);
// Step 3: Restore code blocks
processedText = restoreCodeBlocks(processedText, blocks);
return processedText;
} catch (error) {
console.error('Error in parseAdvancedMarkdown:', error);
return `<div class="text-red-500">Error processing markdown: ${error instanceof Error ? error.message : 'Unknown error'}</div>`;
}
}