fix markdown

3 months ago · 3498f764d4
4 changed files with 240 additions and 33 deletions
--- a/asciidoc_testdoc.adoc
+++ b/asciidoc_testdoc.adoc
@ -364,13 +364,13 @@ X^2^
				@@ -364,13 +364,13 @@ X^2^

 === Delimiter

-based upon a -
+based upon a single quote

 '''

-based upon a *
+based upon a dashes

-'''
+---

 === Quotes

--- a/markdown_testdoc.md
+++ b/markdown_testdoc.md
@ -129,26 +129,18 @@ https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png
				@@ -129,26 +129,18 @@ https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png

 https://youtube.com/shorts/ZWfvChb-i0w

-![Youtube link](https://youtube.com/shorts/ZWfvChb-i0w)
-
 #### Spotify

 https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx?si=bq6-az28TcuP596feTkRFQ

-![Spotify link](https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx?si=bq6-az28TcuP596feTkRFQ)
-
 #### Audio

 https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3

-![Audio link](https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3)
-
 #### Video

 https://v.nostr.build/MTjaYib4upQuf8zn.mp4

-![Video link](https://v.nostr.build/MTjaYib4upQuf8zn.mp4)
-
 ## Tables

 ### Orderly
@ -165,13 +157,6 @@ https://v.nostr.build/MTjaYib4upQuf8zn.mp4
				@@ -165,13 +157,6 @@ https://v.nostr.build/MTjaYib4upQuf8zn.mp4
 | Header | Title |
 | Paragraph | Text |

-### With alignment
-
-| Syntax      | Description | Test Text     |
-| :---        |    :----:   |          ---: |
-| Header      | Title       | Here's this   |
-| Paragraph   | Text        | And more      |
-
 ## Code blocks

 ### json
@ -235,10 +220,6 @@ $$
				@@ -235,10 +220,6 @@ $$

 `$[ x^n + y^n = z^n \]$` and `$[\sqrt{x^2+1}\]$` and `$\color{blue}{X \sim Normal \; (\mu,\sigma^2)}$`

-## LaTex outside of code
-
-This is a latex code block $$\mathbb{N} = \{ a \in \mathbb{Z} : a > 0 \}$$ and another that is an inline latex $\color{green}{X \sim Normal \; (\mu,\sigma^2)}$ and should be green
-
 ## Footnotes

 Here's a simple footnote,[^1] and here's a longer one.[^bignote]
@ -255,7 +236,7 @@ Here's a simple footnote,[^1] and here's a longer one.[^bignote]
				@@ -255,7 +236,7 @@ Here's a simple footnote,[^1] and here's a longer one.[^bignote]

 ### Strikethrough 

-~~The world is flat.~~ We now know that the world is round. This should not be ~struck~ through.
+~~The world is flat.~~ We now know that the world is round.

 ### Bold

@ -273,14 +254,8 @@ Gone camping! :tent: Be back soon.
				@@ -273,14 +254,8 @@ Gone camping! :tent: Be back soon.

 That is so funny! :joy:

-### Marking and highlighting text
-
-I need to highlight these ==very important words==.
-
 ### Subscript and Superscript

-H~2~O
-
 X^2^

 ### Delimiter
--- a/src/post-processor.ts
+++ b/src/post-processor.ts
@ -120,7 +120,8 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
				@@ -120,7 +120,8 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
    });

    // Process hashtags: #hashtag (but not in code blocks or inside HTML tags)
-    const hashtagRegex = /(^|\s|>)(#[\w-]+)/g;
+    // Match hashtag at start of string, after whitespace, after >, or immediately after opening tags
+    const hashtagRegex = /(#[\w-]+)/g;
    const hashtagReplacements: Array<{ match: string; replacement: string; index: number }> = [];
    
    while ((match = hashtagRegex.exec(processed)) !== null) {
@ -139,9 +140,30 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
				@@ -139,9 +140,30 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
      const lastSpanClose = beforeMatch.lastIndexOf('</span>');
      if (lastLinkOpen > lastLinkClose || lastSpanOpen > lastSpanClose) continue;
      
-      const hashtag = match[2];
-      const prefix = match[1];
+      // Check what's before the hashtag
+      const charBefore = match.index > 0 ? processed[match.index - 1] : '';
+      const beforeHashtag = processed.substring(Math.max(0, match.index - 100), match.index);
+      const lastTagClose = beforeHashtag.lastIndexOf('>');
+      const textAfterTag = beforeHashtag.substring(lastTagClose + 1);
+      
+      // Hashtag is valid if:
+      // 1. At start of string
+      // 2. Preceded by whitespace
+      // 3. Preceded by >
+      // 4. Immediately after opening tag (like <p>#hashtag)
+      const isValidPosition = 
+        match.index === 0 ||
+        /\s/.test(charBefore) ||
+        charBefore === '>' ||
+        (lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag));
+      
+      if (!isValidPosition) continue;
+      
+      const hashtag = match[1];
      const topic = hashtag.substring(1);
+      const prefix = (match.index === 0 || charBefore === '>' || (lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag))) 
+        ? '' 
+        : charBefore;
      
      if (!hashtags.includes(topic)) {
        hashtags.push(topic);
@ -446,6 +468,65 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
				@@ -446,6 +468,65 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
    processed = processed.substring(0, index) + replacement + processed.substring(index + match.length);
  });

+  // Process markdown table alignment
+  // Marked generates tables with align attributes or style attributes, we need to add CSS classes for styling
+  // Match tables and process alignment on th/td elements
+  const tableRegex = /<table[^>]*>([\s\S]*?)<\/table>/gi;
+  processed = processed.replace(tableRegex, (tableMatch: string, tableContent: string) => {
+    // Process each row
+    let processedTable = tableContent;
+    
+    // Find all th and td elements - check for align attribute or style with text-align
+    const cellRegex = /<(th|td)([^>]*)>([\s\S]*?)<\/\1>/gi;
+    processedTable = processedTable.replace(cellRegex, (cellMatch: string, tag: string, attrs: string, content: string) => {
+      let align: string | null = null;
+      let newAttrs = attrs;
+      
+      // Check for align attribute
+      const alignMatch = attrs.match(/align=["'](left|center|right)["']/i);
+      if (alignMatch) {
+        align = alignMatch[1].toLowerCase();
+        newAttrs = newAttrs.replace(/\s*align=["'](left|center|right)["']/i, '');
+      } else {
+        // Check for style attribute with text-align
+        const styleMatch = attrs.match(/style=["']([^"']*text-align:\s*(left|center|right)[^"']*)["']/i);
+        if (styleMatch) {
+          align = styleMatch[2].toLowerCase();
+          // Remove text-align from style
+          const styleContent = styleMatch[1].replace(/text-align:\s*(left|center|right);?/gi, '').trim();
+          if (styleContent) {
+            newAttrs = newAttrs.replace(/style=["'][^"']+["']/, `style="${styleContent}"`);
+          } else {
+            newAttrs = newAttrs.replace(/\s*style=["'][^"']+["']/, '');
+          }
+        }
+      }
+      
+      // If we found alignment, add CSS class
+      if (align) {
+        const alignClass = align === 'left' ? 'halign-left' : 
+                          align === 'center' ? 'halign-center' : 'halign-right';
+        
+        // If there's already a class attribute, merge them
+        if (newAttrs.includes('class=')) {
+          const classMatch = newAttrs.match(/class=["']([^"']+)["']/);
+          if (classMatch) {
+            const existingClass = classMatch[1];
+            if (!existingClass.includes(alignClass)) {
+              newAttrs = newAttrs.replace(/class=["'][^"']+["']/, `class="${existingClass} ${alignClass}"`);
+            }
+          }
+        } else {
+          newAttrs = `${newAttrs} class="${alignClass}"`.trim();
+        }
+      }
+      
+      return `<${tag}${newAttrs}>${content}</${tag}>`;
+    });
+    
+    return `<table>${processedTable}</table>`;
+  });
+
  return {
    html: processed,
    nostrLinks,
--- a/src/processors/markdown.ts
+++ b/src/processors/markdown.ts
@ -81,8 +81,159 @@ export function processMarkdown(content: string, options: ParserOptions): Markdo
				@@ -81,8 +81,159 @@ export function processMarkdown(content: string, options: ParserOptions): Markdo
  // Process emoji shortcodes before markdown processing
  let processedContent = emoji.emojify(contentWithoutFrontmatter);

+  // Extract and process footnotes before markdown parsing
+  // Footnotes format: [^1] in text and [^1]: definition at end
+  const footnoteDefinitions: Map<string, string> = new Map();
+  let placeholderCounter = 0;
+  
+  // First, extract footnote definitions
+  const lines = processedContent.split('\n');
+  const processedLines: string[] = [];
+  let i = 0;
+  
+  while (i < lines.length) {
+    const line = lines[i];
+    const footnoteDefMatch = line.match(/^\[\^([^\]]+)\]:\s*(.*)$/);
+    if (footnoteDefMatch) {
+      const id = footnoteDefMatch[1];
+      let definition = footnoteDefMatch[2];
+      
+      // Collect multi-line definition (until next definition or blank line)
+      i++;
+      while (i < lines.length) {
+        const nextLine = lines[i];
+        if (nextLine.match(/^\[\^[^\]]+\]:/) || (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].trim() !== '' && !lines[i + 1].match(/^\[\^[^\]]+\]:/))) {
+          break;
+        }
+        if (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].match(/^\[\^[^\]]+\]:/)) {
+          break;
+        }
+        definition += '\n' + nextLine;
+        i++;
+      }
+      
+      footnoteDefinitions.set(id, definition.trim());
+      // Skip adding this line to processedLines (removing the definition)
+      continue;
+    }
+    
+    processedLines.push(line);
+    i++;
+  }
+  
+  processedContent = processedLines.join('\n');
+  
+  // Now replace footnote references with placeholders before markdown parsing
+  // Use HTML-like placeholder that markdown will pass through as-is
+  const footnoteRefRegex = /\[\^([^\]]+)\]/g;
+  let refMatch;
+  while ((refMatch = footnoteRefRegex.exec(processedContent)) !== null) {
+    const id = refMatch[1];
+    if (footnoteDefinitions.has(id)) {
+      const placeholder = `<span data-footnote-placeholder="${placeholderCounter++}" data-footnote-id="${id}"></span>`;
+      processedContent = processedContent.substring(0, refMatch.index) + 
+                        placeholder + 
+                        processedContent.substring(refMatch.index + refMatch[0].length);
+      // Reset regex since we modified the string
+      footnoteRefRegex.lastIndex = 0;
+    }
+  }
+
  // Convert markdown to HTML
-  const html = marked.parse(processedContent) as string;
+  let html = marked.parse(processedContent) as string;
+
+  // Process superscripts in HTML (X^2^ syntax) - after markdown parsing to avoid conflicts
+  // But skip inside code blocks
+  const codeBlockRegex = /<(pre|code)[^>]*>[\s\S]*?<\/\1>/gi;
+  const codeBlocks: Array<{ start: number; end: number; content: string }> = [];
+  let codeMatch;
+  while ((codeMatch = codeBlockRegex.exec(html)) !== null) {
+    codeBlocks.push({
+      start: codeMatch.index,
+      end: codeMatch.index + codeMatch[0].length,
+      content: codeMatch[0]
+    });
+  }
+  
+  function isInCodeBlock(index: number): boolean {
+    return codeBlocks.some(block => index >= block.start && index < block.end);
+  }
+  
+  // Process superscripts
+  const superscriptRegex = /\^([^\^<>\n]+)\^/g;
+  const superscriptReplacements: Array<{ match: string; replacement: string; index: number }> = [];
+  let supMatch;
+  while ((supMatch = superscriptRegex.exec(html)) !== null) {
+    if (isInCodeBlock(supMatch.index)) continue;
+    superscriptReplacements.push({
+      match: supMatch[0],
+      replacement: `<sup>${supMatch[1]}</sup>`,
+      index: supMatch.index
+    });
+  }
+  
+  // Apply superscript replacements in reverse order
+  superscriptReplacements.reverse().forEach(({ match, replacement, index }) => {
+    html = html.substring(0, index) + replacement + html.substring(index + match.length);
+  });
+
+  // Replace footnote placeholders with actual footnote HTML
+  let footnoteCounter = 1;
+  const footnoteRefs: Array<{ id: string; num: number; definition: string }> = [];
+  const footnoteRefMap: Map<string, number> = new Map();
+  
+  // First, assign numbers to all footnote definitions
+  footnoteDefinitions.forEach((definition, id) => {
+    const num = footnoteCounter++;
+    footnoteRefMap.set(id, num);
+    footnoteRefs.push({ id, num, definition });
+  });
+  
+  // Replace HTML span placeholders with footnote HTML
+  // Find all span elements with data-footnote-placeholder attribute
+  const placeholderRegex = /<span data-footnote-placeholder="(\d+)" data-footnote-id="([^"]+)"><\/span>/g;
+  html = html.replace(placeholderRegex, (match, placeholderNum, id) => {
+    const num = footnoteRefMap.get(id);
+    if (num !== undefined) {
+      return `<sup class="footnote"><a id="footnoteref_${num}" class="footnote" href="#footnotedef_${num}" title="View footnote.">${num}</a></sup>`;
+    }
+    return match; // Return original if no definition found
+  });
+
+  // Add footnotes section at the end if there are any
+  if (footnoteRefs.length > 0) {
+    let footnotesHtml = '<div id="footnotes"><hr>';
+    footnoteRefs.forEach(({ id, num, definition }) => {
+      // Process the definition through markdown again to handle formatting
+      const defHtml = marked.parse(definition) as string;
+      footnotesHtml += `<div class="footnote" id="footnotedef_${num}"><a href="#footnoteref_${num}">${num}</a>. ${defHtml}</div>`;
+    });
+    footnotesHtml += '</div>';
+    html += footnotesHtml;
+  }
+  
+  // Fix anchor links - markdown headers need IDs
+  // Marked generates headers but may not have proper IDs for anchor links
+  // Process headers to add IDs based on their text content (if they don't already have one)
+  html = html.replace(/<h([1-6])([^>]*)>([^<]+)<\/h[1-6]>/gi, (match: string, level: string, attrs: string, text: string) => {
+    // Skip if header already has an id attribute
+    if (attrs && /id=["'][^"']+["']/i.test(attrs)) {
+      return match;
+    }
+    
+    // Generate ID from header text (similar to GitHub markdown)
+    const id = text
+      .toLowerCase()
+      .trim()
+      .replace(/[^\w\s-]/g, '') // Remove special chars
+      .replace(/\s+/g, '-') // Replace spaces with hyphens
+      .replace(/-+/g, '-') // Replace multiple hyphens with single
+      .replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
+    
+    // Add id attribute
+    const newAttrs = attrs ? `${attrs} id="${id}"` : `id="${id}"`;
+    return `<h${level} ${newAttrs}>${text}</h${level}>`;
+  });

  return {
    html,