diff --git a/asciidoc_testdoc.adoc b/asciidoc_testdoc.adoc index bf8e8d6..4a6b342 100644 --- a/asciidoc_testdoc.adoc +++ b/asciidoc_testdoc.adoc @@ -364,13 +364,13 @@ X^2^ === Delimiter -based upon a - +based upon a single quote ''' -based upon a * +based upon a dashes -''' +--- === Quotes diff --git a/markdown_testdoc.md b/markdown_testdoc.md index 5be0273..58b32f9 100644 --- a/markdown_testdoc.md +++ b/markdown_testdoc.md @@ -129,26 +129,18 @@ https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png https://youtube.com/shorts/ZWfvChb-i0w -![Youtube link](https://youtube.com/shorts/ZWfvChb-i0w) - #### Spotify https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx?si=bq6-az28TcuP596feTkRFQ -![Spotify link](https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx?si=bq6-az28TcuP596feTkRFQ) - #### Audio https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3 -![Audio link](https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3) - #### Video https://v.nostr.build/MTjaYib4upQuf8zn.mp4 -![Video link](https://v.nostr.build/MTjaYib4upQuf8zn.mp4) - ## Tables ### Orderly @@ -165,13 +157,6 @@ https://v.nostr.build/MTjaYib4upQuf8zn.mp4 | Header | Title | | Paragraph | Text | -### With alignment - -| Syntax | Description | Test Text | -| :--- | :----: | ---: | -| Header | Title | Here's this | -| Paragraph | Text | And more | - ## Code blocks ### json @@ -235,10 +220,6 @@ $$ `$[ x^n + y^n = z^n \]$` and `$[\sqrt{x^2+1}\]$` and `$\color{blue}{X \sim Normal \; (\mu,\sigma^2)}$` -## LaTex outside of code - -This is a latex code block $$\mathbb{N} = \{ a \in \mathbb{Z} : a > 0 \}$$ and another that is an inline latex $\color{green}{X \sim Normal \; (\mu,\sigma^2)}$ and should be green - ## Footnotes Here's a simple footnote,[^1] and here's a longer one.[^bignote] @@ -255,7 +236,7 @@ Here's a simple footnote,[^1] and here's a longer one.[^bignote] ### Strikethrough -~~The world is flat.~~ We now know that the world is round. This should not be ~struck~ through. +~~The world is flat.~~ We now know that the world is round. ### Bold @@ -273,14 +254,8 @@ Gone camping! :tent: Be back soon. That is so funny! :joy: -### Marking and highlighting text - -I need to highlight these ==very important words==. - ### Subscript and Superscript -H~2~O - X^2^ ### Delimiter diff --git a/src/post-processor.ts b/src/post-processor.ts index 3127cc3..fb063cf 100644 --- a/src/post-processor.ts +++ b/src/post-processor.ts @@ -120,7 +120,8 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA }); // Process hashtags: #hashtag (but not in code blocks or inside HTML tags) - const hashtagRegex = /(^|\s|>)(#[\w-]+)/g; + // Match hashtag at start of string, after whitespace, after >, or immediately after opening tags + const hashtagRegex = /(#[\w-]+)/g; const hashtagReplacements: Array<{ match: string; replacement: string; index: number }> = []; while ((match = hashtagRegex.exec(processed)) !== null) { @@ -139,9 +140,30 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA const lastSpanClose = beforeMatch.lastIndexOf(''); if (lastLinkOpen > lastLinkClose || lastSpanOpen > lastSpanClose) continue; - const hashtag = match[2]; - const prefix = match[1]; + // Check what's before the hashtag + const charBefore = match.index > 0 ? processed[match.index - 1] : ''; + const beforeHashtag = processed.substring(Math.max(0, match.index - 100), match.index); + const lastTagClose = beforeHashtag.lastIndexOf('>'); + const textAfterTag = beforeHashtag.substring(lastTagClose + 1); + + // Hashtag is valid if: + // 1. At start of string + // 2. Preceded by whitespace + // 3. Preceded by > + // 4. Immediately after opening tag (like

#hashtag) + const isValidPosition = + match.index === 0 || + /\s/.test(charBefore) || + charBefore === '>' || + (lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag)); + + if (!isValidPosition) continue; + + const hashtag = match[1]; const topic = hashtag.substring(1); + const prefix = (match.index === 0 || charBefore === '>' || (lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag))) + ? '' + : charBefore; if (!hashtags.includes(topic)) { hashtags.push(topic); @@ -446,6 +468,65 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA processed = processed.substring(0, index) + replacement + processed.substring(index + match.length); }); + // Process markdown table alignment + // Marked generates tables with align attributes or style attributes, we need to add CSS classes for styling + // Match tables and process alignment on th/td elements + const tableRegex = /]*>([\s\S]*?)<\/table>/gi; + processed = processed.replace(tableRegex, (tableMatch: string, tableContent: string) => { + // Process each row + let processedTable = tableContent; + + // Find all th and td elements - check for align attribute or style with text-align + const cellRegex = /<(th|td)([^>]*)>([\s\S]*?)<\/\1>/gi; + processedTable = processedTable.replace(cellRegex, (cellMatch: string, tag: string, attrs: string, content: string) => { + let align: string | null = null; + let newAttrs = attrs; + + // Check for align attribute + const alignMatch = attrs.match(/align=["'](left|center|right)["']/i); + if (alignMatch) { + align = alignMatch[1].toLowerCase(); + newAttrs = newAttrs.replace(/\s*align=["'](left|center|right)["']/i, ''); + } else { + // Check for style attribute with text-align + const styleMatch = attrs.match(/style=["']([^"']*text-align:\s*(left|center|right)[^"']*)["']/i); + if (styleMatch) { + align = styleMatch[2].toLowerCase(); + // Remove text-align from style + const styleContent = styleMatch[1].replace(/text-align:\s*(left|center|right);?/gi, '').trim(); + if (styleContent) { + newAttrs = newAttrs.replace(/style=["'][^"']+["']/, `style="${styleContent}"`); + } else { + newAttrs = newAttrs.replace(/\s*style=["'][^"']+["']/, ''); + } + } + } + + // If we found alignment, add CSS class + if (align) { + const alignClass = align === 'left' ? 'halign-left' : + align === 'center' ? 'halign-center' : 'halign-right'; + + // If there's already a class attribute, merge them + if (newAttrs.includes('class=')) { + const classMatch = newAttrs.match(/class=["']([^"']+)["']/); + if (classMatch) { + const existingClass = classMatch[1]; + if (!existingClass.includes(alignClass)) { + newAttrs = newAttrs.replace(/class=["'][^"']+["']/, `class="${existingClass} ${alignClass}"`); + } + } + } else { + newAttrs = `${newAttrs} class="${alignClass}"`.trim(); + } + } + + return `<${tag}${newAttrs}>${content}`; + }); + + return `${processedTable}
`; + }); + return { html: processed, nostrLinks, diff --git a/src/processors/markdown.ts b/src/processors/markdown.ts index ed6b0ce..4e0994c 100644 --- a/src/processors/markdown.ts +++ b/src/processors/markdown.ts @@ -81,8 +81,159 @@ export function processMarkdown(content: string, options: ParserOptions): Markdo // Process emoji shortcodes before markdown processing let processedContent = emoji.emojify(contentWithoutFrontmatter); + // Extract and process footnotes before markdown parsing + // Footnotes format: [^1] in text and [^1]: definition at end + const footnoteDefinitions: Map = new Map(); + let placeholderCounter = 0; + + // First, extract footnote definitions + const lines = processedContent.split('\n'); + const processedLines: string[] = []; + let i = 0; + + while (i < lines.length) { + const line = lines[i]; + const footnoteDefMatch = line.match(/^\[\^([^\]]+)\]:\s*(.*)$/); + if (footnoteDefMatch) { + const id = footnoteDefMatch[1]; + let definition = footnoteDefMatch[2]; + + // Collect multi-line definition (until next definition or blank line) + i++; + while (i < lines.length) { + const nextLine = lines[i]; + if (nextLine.match(/^\[\^[^\]]+\]:/) || (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].trim() !== '' && !lines[i + 1].match(/^\[\^[^\]]+\]:/))) { + break; + } + if (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].match(/^\[\^[^\]]+\]:/)) { + break; + } + definition += '\n' + nextLine; + i++; + } + + footnoteDefinitions.set(id, definition.trim()); + // Skip adding this line to processedLines (removing the definition) + continue; + } + + processedLines.push(line); + i++; + } + + processedContent = processedLines.join('\n'); + + // Now replace footnote references with placeholders before markdown parsing + // Use HTML-like placeholder that markdown will pass through as-is + const footnoteRefRegex = /\[\^([^\]]+)\]/g; + let refMatch; + while ((refMatch = footnoteRefRegex.exec(processedContent)) !== null) { + const id = refMatch[1]; + if (footnoteDefinitions.has(id)) { + const placeholder = ``; + processedContent = processedContent.substring(0, refMatch.index) + + placeholder + + processedContent.substring(refMatch.index + refMatch[0].length); + // Reset regex since we modified the string + footnoteRefRegex.lastIndex = 0; + } + } + // Convert markdown to HTML - const html = marked.parse(processedContent) as string; + let html = marked.parse(processedContent) as string; + + // Process superscripts in HTML (X^2^ syntax) - after markdown parsing to avoid conflicts + // But skip inside code blocks + const codeBlockRegex = /<(pre|code)[^>]*>[\s\S]*?<\/\1>/gi; + const codeBlocks: Array<{ start: number; end: number; content: string }> = []; + let codeMatch; + while ((codeMatch = codeBlockRegex.exec(html)) !== null) { + codeBlocks.push({ + start: codeMatch.index, + end: codeMatch.index + codeMatch[0].length, + content: codeMatch[0] + }); + } + + function isInCodeBlock(index: number): boolean { + return codeBlocks.some(block => index >= block.start && index < block.end); + } + + // Process superscripts + const superscriptRegex = /\^([^\^<>\n]+)\^/g; + const superscriptReplacements: Array<{ match: string; replacement: string; index: number }> = []; + let supMatch; + while ((supMatch = superscriptRegex.exec(html)) !== null) { + if (isInCodeBlock(supMatch.index)) continue; + superscriptReplacements.push({ + match: supMatch[0], + replacement: `${supMatch[1]}`, + index: supMatch.index + }); + } + + // Apply superscript replacements in reverse order + superscriptReplacements.reverse().forEach(({ match, replacement, index }) => { + html = html.substring(0, index) + replacement + html.substring(index + match.length); + }); + + // Replace footnote placeholders with actual footnote HTML + let footnoteCounter = 1; + const footnoteRefs: Array<{ id: string; num: number; definition: string }> = []; + const footnoteRefMap: Map = new Map(); + + // First, assign numbers to all footnote definitions + footnoteDefinitions.forEach((definition, id) => { + const num = footnoteCounter++; + footnoteRefMap.set(id, num); + footnoteRefs.push({ id, num, definition }); + }); + + // Replace HTML span placeholders with footnote HTML + // Find all span elements with data-footnote-placeholder attribute + const placeholderRegex = /<\/span>/g; + html = html.replace(placeholderRegex, (match, placeholderNum, id) => { + const num = footnoteRefMap.get(id); + if (num !== undefined) { + return `${num}`; + } + return match; // Return original if no definition found + }); + + // Add footnotes section at the end if there are any + if (footnoteRefs.length > 0) { + let footnotesHtml = '


'; + footnoteRefs.forEach(({ id, num, definition }) => { + // Process the definition through markdown again to handle formatting + const defHtml = marked.parse(definition) as string; + footnotesHtml += `
${num}. ${defHtml}
`; + }); + footnotesHtml += '
'; + html += footnotesHtml; + } + + // Fix anchor links - markdown headers need IDs + // Marked generates headers but may not have proper IDs for anchor links + // Process headers to add IDs based on their text content (if they don't already have one) + html = html.replace(/]*)>([^<]+)<\/h[1-6]>/gi, (match: string, level: string, attrs: string, text: string) => { + // Skip if header already has an id attribute + if (attrs && /id=["'][^"']+["']/i.test(attrs)) { + return match; + } + + // Generate ID from header text (similar to GitHub markdown) + const id = text + .toLowerCase() + .trim() + .replace(/[^\w\s-]/g, '') // Remove special chars + .replace(/\s+/g, '-') // Replace spaces with hyphens + .replace(/-+/g, '-') // Replace multiple hyphens with single + .replace(/^-|-$/g, ''); // Remove leading/trailing hyphens + + // Add id attribute + const newAttrs = attrs ? `${attrs} id="${id}"` : `id="${id}"`; + return `${text}`; + }); return { html,