Browse Source

fix markdown

master
Silberengel 2 weeks ago
parent
commit
3498f764d4
  1. 6
      asciidoc_testdoc.adoc
  2. 27
      markdown_testdoc.md
  3. 87
      src/post-processor.ts
  4. 153
      src/processors/markdown.ts

6
asciidoc_testdoc.adoc

@ -364,13 +364,13 @@ X^2^ @@ -364,13 +364,13 @@ X^2^
=== Delimiter
based upon a -
based upon a single quote
'''
based upon a *
based upon a dashes
'''
---
=== Quotes

27
markdown_testdoc.md

@ -129,26 +129,18 @@ https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png @@ -129,26 +129,18 @@ https://blog.ronin.cloud/content/images/size/w2000/2022/02/markdown.png
https://youtube.com/shorts/ZWfvChb-i0w
![Youtube link](https://youtube.com/shorts/ZWfvChb-i0w)
#### Spotify
https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx?si=bq6-az28TcuP596feTkRFQ
![Spotify link](https://open.spotify.com/episode/1GSZFA8vWltPyxYkArdRKx?si=bq6-az28TcuP596feTkRFQ)
#### Audio
https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3
![Audio link](https://media.blubrry.com/takeituneasy/ins.blubrry.com/takeituneasy/lex_ai_rick_beato.mp3)
#### Video
https://v.nostr.build/MTjaYib4upQuf8zn.mp4
![Video link](https://v.nostr.build/MTjaYib4upQuf8zn.mp4)
## Tables
### Orderly
@ -165,13 +157,6 @@ https://v.nostr.build/MTjaYib4upQuf8zn.mp4 @@ -165,13 +157,6 @@ https://v.nostr.build/MTjaYib4upQuf8zn.mp4
| Header | Title |
| Paragraph | Text |
### With alignment
| Syntax | Description | Test Text |
| :--- | :----: | ---: |
| Header | Title | Here's this |
| Paragraph | Text | And more |
## Code blocks
### json
@ -235,10 +220,6 @@ $$ @@ -235,10 +220,6 @@ $$
`$[ x^n + y^n = z^n \]$` and `$[\sqrt{x^2+1}\]$` and `$\color{blue}{X \sim Normal \; (\mu,\sigma^2)}$`
## LaTex outside of code
This is a latex code block $$\mathbb{N} = \{ a \in \mathbb{Z} : a > 0 \}$$ and another that is an inline latex $\color{green}{X \sim Normal \; (\mu,\sigma^2)}$ and should be green
## Footnotes
Here's a simple footnote,[^1] and here's a longer one.[^bignote]
@ -255,7 +236,7 @@ Here's a simple footnote,[^1] and here's a longer one.[^bignote] @@ -255,7 +236,7 @@ Here's a simple footnote,[^1] and here's a longer one.[^bignote]
### Strikethrough
~~The world is flat.~~ We now know that the world is round. This should not be ~struck~ through.
~~The world is flat.~~ We now know that the world is round.
### Bold
@ -273,14 +254,8 @@ Gone camping! :tent: Be back soon. @@ -273,14 +254,8 @@ Gone camping! :tent: Be back soon.
That is so funny! :joy:
### Marking and highlighting text
I need to highlight these ==very important words==.
### Subscript and Superscript
H~2~O
X^2^
### Delimiter

87
src/post-processor.ts

@ -120,7 +120,8 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA @@ -120,7 +120,8 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
});
// Process hashtags: #hashtag (but not in code blocks or inside HTML tags)
const hashtagRegex = /(^|\s|>)(#[\w-]+)/g;
// Match hashtag at start of string, after whitespace, after >, or immediately after opening tags
const hashtagRegex = /(#[\w-]+)/g;
const hashtagReplacements: Array<{ match: string; replacement: string; index: number }> = [];
while ((match = hashtagRegex.exec(processed)) !== null) {
@ -139,9 +140,30 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA @@ -139,9 +140,30 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
const lastSpanClose = beforeMatch.lastIndexOf('</span>');
if (lastLinkOpen > lastLinkClose || lastSpanOpen > lastSpanClose) continue;
const hashtag = match[2];
const prefix = match[1];
// Check what's before the hashtag
const charBefore = match.index > 0 ? processed[match.index - 1] : '';
const beforeHashtag = processed.substring(Math.max(0, match.index - 100), match.index);
const lastTagClose = beforeHashtag.lastIndexOf('>');
const textAfterTag = beforeHashtag.substring(lastTagClose + 1);
// Hashtag is valid if:
// 1. At start of string
// 2. Preceded by whitespace
// 3. Preceded by >
// 4. Immediately after opening tag (like <p>#hashtag)
const isValidPosition =
match.index === 0 ||
/\s/.test(charBefore) ||
charBefore === '>' ||
(lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag));
if (!isValidPosition) continue;
const hashtag = match[1];
const topic = hashtag.substring(1);
const prefix = (match.index === 0 || charBefore === '>' || (lastTagClose >= 0 && /^[\s\n]*$/.test(textAfterTag)))
? ''
: charBefore;
if (!hashtags.includes(topic)) {
hashtags.push(topic);
@ -446,6 +468,65 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA @@ -446,6 +468,65 @@ export function postProcess(html: string, options: ParserOptions, skipWikilinksA
processed = processed.substring(0, index) + replacement + processed.substring(index + match.length);
});
// Process markdown table alignment
// Marked generates tables with align attributes or style attributes, we need to add CSS classes for styling
// Match tables and process alignment on th/td elements
const tableRegex = /<table[^>]*>([\s\S]*?)<\/table>/gi;
processed = processed.replace(tableRegex, (tableMatch: string, tableContent: string) => {
// Process each row
let processedTable = tableContent;
// Find all th and td elements - check for align attribute or style with text-align
const cellRegex = /<(th|td)([^>]*)>([\s\S]*?)<\/\1>/gi;
processedTable = processedTable.replace(cellRegex, (cellMatch: string, tag: string, attrs: string, content: string) => {
let align: string | null = null;
let newAttrs = attrs;
// Check for align attribute
const alignMatch = attrs.match(/align=["'](left|center|right)["']/i);
if (alignMatch) {
align = alignMatch[1].toLowerCase();
newAttrs = newAttrs.replace(/\s*align=["'](left|center|right)["']/i, '');
} else {
// Check for style attribute with text-align
const styleMatch = attrs.match(/style=["']([^"']*text-align:\s*(left|center|right)[^"']*)["']/i);
if (styleMatch) {
align = styleMatch[2].toLowerCase();
// Remove text-align from style
const styleContent = styleMatch[1].replace(/text-align:\s*(left|center|right);?/gi, '').trim();
if (styleContent) {
newAttrs = newAttrs.replace(/style=["'][^"']+["']/, `style="${styleContent}"`);
} else {
newAttrs = newAttrs.replace(/\s*style=["'][^"']+["']/, '');
}
}
}
// If we found alignment, add CSS class
if (align) {
const alignClass = align === 'left' ? 'halign-left' :
align === 'center' ? 'halign-center' : 'halign-right';
// If there's already a class attribute, merge them
if (newAttrs.includes('class=')) {
const classMatch = newAttrs.match(/class=["']([^"']+)["']/);
if (classMatch) {
const existingClass = classMatch[1];
if (!existingClass.includes(alignClass)) {
newAttrs = newAttrs.replace(/class=["'][^"']+["']/, `class="${existingClass} ${alignClass}"`);
}
}
} else {
newAttrs = `${newAttrs} class="${alignClass}"`.trim();
}
}
return `<${tag}${newAttrs}>${content}</${tag}>`;
});
return `<table>${processedTable}</table>`;
});
return {
html: processed,
nostrLinks,

153
src/processors/markdown.ts

@ -81,8 +81,159 @@ export function processMarkdown(content: string, options: ParserOptions): Markdo @@ -81,8 +81,159 @@ export function processMarkdown(content: string, options: ParserOptions): Markdo
// Process emoji shortcodes before markdown processing
let processedContent = emoji.emojify(contentWithoutFrontmatter);
// Extract and process footnotes before markdown parsing
// Footnotes format: [^1] in text and [^1]: definition at end
const footnoteDefinitions: Map<string, string> = new Map();
let placeholderCounter = 0;
// First, extract footnote definitions
const lines = processedContent.split('\n');
const processedLines: string[] = [];
let i = 0;
while (i < lines.length) {
const line = lines[i];
const footnoteDefMatch = line.match(/^\[\^([^\]]+)\]:\s*(.*)$/);
if (footnoteDefMatch) {
const id = footnoteDefMatch[1];
let definition = footnoteDefMatch[2];
// Collect multi-line definition (until next definition or blank line)
i++;
while (i < lines.length) {
const nextLine = lines[i];
if (nextLine.match(/^\[\^[^\]]+\]:/) || (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].trim() !== '' && !lines[i + 1].match(/^\[\^[^\]]+\]:/))) {
break;
}
if (nextLine.trim() === '' && i + 1 < lines.length && lines[i + 1].match(/^\[\^[^\]]+\]:/)) {
break;
}
definition += '\n' + nextLine;
i++;
}
footnoteDefinitions.set(id, definition.trim());
// Skip adding this line to processedLines (removing the definition)
continue;
}
processedLines.push(line);
i++;
}
processedContent = processedLines.join('\n');
// Now replace footnote references with placeholders before markdown parsing
// Use HTML-like placeholder that markdown will pass through as-is
const footnoteRefRegex = /\[\^([^\]]+)\]/g;
let refMatch;
while ((refMatch = footnoteRefRegex.exec(processedContent)) !== null) {
const id = refMatch[1];
if (footnoteDefinitions.has(id)) {
const placeholder = `<span data-footnote-placeholder="${placeholderCounter++}" data-footnote-id="${id}"></span>`;
processedContent = processedContent.substring(0, refMatch.index) +
placeholder +
processedContent.substring(refMatch.index + refMatch[0].length);
// Reset regex since we modified the string
footnoteRefRegex.lastIndex = 0;
}
}
// Convert markdown to HTML
const html = marked.parse(processedContent) as string;
let html = marked.parse(processedContent) as string;
// Process superscripts in HTML (X^2^ syntax) - after markdown parsing to avoid conflicts
// But skip inside code blocks
const codeBlockRegex = /<(pre|code)[^>]*>[\s\S]*?<\/\1>/gi;
const codeBlocks: Array<{ start: number; end: number; content: string }> = [];
let codeMatch;
while ((codeMatch = codeBlockRegex.exec(html)) !== null) {
codeBlocks.push({
start: codeMatch.index,
end: codeMatch.index + codeMatch[0].length,
content: codeMatch[0]
});
}
function isInCodeBlock(index: number): boolean {
return codeBlocks.some(block => index >= block.start && index < block.end);
}
// Process superscripts
const superscriptRegex = /\^([^\^<>\n]+)\^/g;
const superscriptReplacements: Array<{ match: string; replacement: string; index: number }> = [];
let supMatch;
while ((supMatch = superscriptRegex.exec(html)) !== null) {
if (isInCodeBlock(supMatch.index)) continue;
superscriptReplacements.push({
match: supMatch[0],
replacement: `<sup>${supMatch[1]}</sup>`,
index: supMatch.index
});
}
// Apply superscript replacements in reverse order
superscriptReplacements.reverse().forEach(({ match, replacement, index }) => {
html = html.substring(0, index) + replacement + html.substring(index + match.length);
});
// Replace footnote placeholders with actual footnote HTML
let footnoteCounter = 1;
const footnoteRefs: Array<{ id: string; num: number; definition: string }> = [];
const footnoteRefMap: Map<string, number> = new Map();
// First, assign numbers to all footnote definitions
footnoteDefinitions.forEach((definition, id) => {
const num = footnoteCounter++;
footnoteRefMap.set(id, num);
footnoteRefs.push({ id, num, definition });
});
// Replace HTML span placeholders with footnote HTML
// Find all span elements with data-footnote-placeholder attribute
const placeholderRegex = /<span data-footnote-placeholder="(\d+)" data-footnote-id="([^"]+)"><\/span>/g;
html = html.replace(placeholderRegex, (match, placeholderNum, id) => {
const num = footnoteRefMap.get(id);
if (num !== undefined) {
return `<sup class="footnote"><a id="footnoteref_${num}" class="footnote" href="#footnotedef_${num}" title="View footnote.">${num}</a></sup>`;
}
return match; // Return original if no definition found
});
// Add footnotes section at the end if there are any
if (footnoteRefs.length > 0) {
let footnotesHtml = '<div id="footnotes"><hr>';
footnoteRefs.forEach(({ id, num, definition }) => {
// Process the definition through markdown again to handle formatting
const defHtml = marked.parse(definition) as string;
footnotesHtml += `<div class="footnote" id="footnotedef_${num}"><a href="#footnoteref_${num}">${num}</a>. ${defHtml}</div>`;
});
footnotesHtml += '</div>';
html += footnotesHtml;
}
// Fix anchor links - markdown headers need IDs
// Marked generates headers but may not have proper IDs for anchor links
// Process headers to add IDs based on their text content (if they don't already have one)
html = html.replace(/<h([1-6])([^>]*)>([^<]+)<\/h[1-6]>/gi, (match: string, level: string, attrs: string, text: string) => {
// Skip if header already has an id attribute
if (attrs && /id=["'][^"']+["']/i.test(attrs)) {
return match;
}
// Generate ID from header text (similar to GitHub markdown)
const id = text
.toLowerCase()
.trim()
.replace(/[^\w\s-]/g, '') // Remove special chars
.replace(/\s+/g, '-') // Replace spaces with hyphens
.replace(/-+/g, '-') // Replace multiple hyphens with single
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
// Add id attribute
const newAttrs = attrs ? `${attrs} id="${id}"` : `id="${id}"`;
return `<h${level} ${newAttrs}>${text}</h${level}>`;
});
return {
html,

Loading…
Cancel
Save