18 changed files with 961 additions and 435 deletions
@ -0,0 +1,275 @@ |
|||||||
|
import { ContentFormat } from '../types'; |
||||||
|
|
||||||
|
export interface ConvertOptions { |
||||||
|
enableNostrAddresses?: boolean; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Converts content to AsciiDoc format based on detected format |
||||||
|
* This is the unified entry point - everything becomes AsciiDoc |
||||||
|
*/ |
||||||
|
export function convertToAsciidoc( |
||||||
|
content: string, |
||||||
|
format: ContentFormat, |
||||||
|
linkBaseURL: string, |
||||||
|
options: ConvertOptions = {} |
||||||
|
): string { |
||||||
|
let asciidoc = ''; |
||||||
|
|
||||||
|
switch (format) { |
||||||
|
case ContentFormat.AsciiDoc: |
||||||
|
// For AsciiDoc content, ensure proper formatting
|
||||||
|
asciidoc = content.replace(/\\n/g, '\n'); |
||||||
|
|
||||||
|
// Ensure headers are on their own lines with proper spacing
|
||||||
|
asciidoc = asciidoc.replace(/(\S[^\n]*)\n(={1,6}\s+[^\n]+)/g, (_match, before, header) => { |
||||||
|
return `${before}\n\n${header}`; |
||||||
|
}); |
||||||
|
break; |
||||||
|
|
||||||
|
case ContentFormat.Markdown: |
||||||
|
asciidoc = convertMarkdownToAsciidoc(content); |
||||||
|
break; |
||||||
|
|
||||||
|
case ContentFormat.Plain: |
||||||
|
default: |
||||||
|
asciidoc = convertPlainTextToAsciidoc(content); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
// Process special elements for all content types
|
||||||
|
// Process wikilinks
|
||||||
|
asciidoc = processWikilinks(asciidoc, linkBaseURL); |
||||||
|
|
||||||
|
// Process nostr: addresses if enabled
|
||||||
|
if (options.enableNostrAddresses !== false) { |
||||||
|
asciidoc = processNostrAddresses(asciidoc, linkBaseURL); |
||||||
|
} |
||||||
|
|
||||||
|
// Process hashtags
|
||||||
|
asciidoc = processHashtags(asciidoc); |
||||||
|
|
||||||
|
return asciidoc; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Converts Markdown to AsciiDoc format |
||||||
|
* Based on jumble's conversion patterns |
||||||
|
*/ |
||||||
|
function convertMarkdownToAsciidoc(content: string): string { |
||||||
|
let asciidoc = content.replace(/\\n/g, '\n'); |
||||||
|
|
||||||
|
// Fix spacing issues
|
||||||
|
asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)'); |
||||||
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3'); |
||||||
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` ('); |
||||||
|
asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2'); |
||||||
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2'); |
||||||
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])==/g, '$1 =='); |
||||||
|
|
||||||
|
// Preserve nostr: addresses temporarily
|
||||||
|
asciidoc = asciidoc.replace(/nostr:([a-z0-9]+)/g, 'nostr:$1'); |
||||||
|
|
||||||
|
// Convert headers
|
||||||
|
asciidoc = asciidoc.replace(/^#{6}\s+(.+)$/gm, '====== $1 ======'); |
||||||
|
asciidoc = asciidoc.replace(/^#{5}\s+(.+)$/gm, '===== $1 ====='); |
||||||
|
asciidoc = asciidoc.replace(/^#{4}\s+(.+)$/gm, '==== $1 ===='); |
||||||
|
asciidoc = asciidoc.replace(/^#{3}\s+(.+)$/gm, '=== $1 ==='); |
||||||
|
asciidoc = asciidoc.replace(/^#{2}\s+(.+)$/gm, '== $1 =='); |
||||||
|
asciidoc = asciidoc.replace(/^#{1}\s+(.+)$/gm, '= $1 ='); |
||||||
|
asciidoc = asciidoc.replace(/^==\s+(.+?)\s+==$/gm, '== $1 =='); |
||||||
|
asciidoc = asciidoc.replace(/\s==\s+([^=]+?)\s+==\s/g, ' == $1 == '); |
||||||
|
|
||||||
|
// Convert emphasis
|
||||||
|
asciidoc = asciidoc.replace(/\*\*(.+?)\*\*/g, '*$1*'); // Bold
|
||||||
|
asciidoc = asciidoc.replace(/__(.+?)__/g, '*$1*'); // Bold
|
||||||
|
asciidoc = asciidoc.replace(/\*(.+?)\*/g, '_$1_'); // Italic
|
||||||
|
asciidoc = asciidoc.replace(/_(.+?)_/g, '_$1_'); // Italic
|
||||||
|
asciidoc = asciidoc.replace(/~~(.+?)~~/g, '[line-through]#$1#'); // Strikethrough
|
||||||
|
asciidoc = asciidoc.replace(/~(.+?)~/g, '[subscript]#$1#'); // Subscript
|
||||||
|
asciidoc = asciidoc.replace(/\^(.+?)\^/g, '[superscript]#$1#'); // Superscript
|
||||||
|
|
||||||
|
// Convert code blocks
|
||||||
|
asciidoc = asciidoc.replace(/```(\w+)?\n([\s\S]*?)\n```/g, (_match, lang, code) => { |
||||||
|
const trimmedCode = code.trim(); |
||||||
|
if (trimmedCode.length === 0) return ''; |
||||||
|
|
||||||
|
const hasCodePatterns = /[{}();=<>]|function|class|import|export|def |if |for |while |return |const |let |var |public |private |static |console\.log/.test(trimmedCode); |
||||||
|
const isLikelyText = /^[A-Za-z\s.,!?\-'"]+$/.test(trimmedCode) && trimmedCode.length > 50; |
||||||
|
const hasTooManySpaces = (trimmedCode.match(/\s{3,}/g) || []).length > 3; |
||||||
|
const hasMarkdownPatterns = /^#{1,6}\s|^\*\s|^\d+\.\s|^\>\s|^\|.*\|/.test(trimmedCode); |
||||||
|
|
||||||
|
if ((!hasCodePatterns && trimmedCode.length > 100) || isLikelyText || hasTooManySpaces || hasMarkdownPatterns) { |
||||||
|
return _match; |
||||||
|
} |
||||||
|
|
||||||
|
return `[source${lang ? ',' + lang : ''}]\n----\n${trimmedCode}\n----`; |
||||||
|
}); |
||||||
|
asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Inline code
|
||||||
|
asciidoc = asciidoc.replace(/`\$([^$]+)\$`/g, '`$\\$1\\$$`'); // Preserve LaTeX in code
|
||||||
|
|
||||||
|
// Convert images
|
||||||
|
asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, 'image::$2[$1,width=100%]'); |
||||||
|
asciidoc = asciidoc.replace(/image::([^\[]+)\[([^\]]+),width=100%\]/g, 'image::$1[$2,width=100%]'); |
||||||
|
|
||||||
|
// Convert links
|
||||||
|
asciidoc = asciidoc.replace(/\[([^\]]+)\]\(([^)]+)\)/g, 'link:$2[$1]'); |
||||||
|
|
||||||
|
// Convert horizontal rules
|
||||||
|
asciidoc = asciidoc.replace(/^---$/gm, '\'\'\''); |
||||||
|
|
||||||
|
// Convert unordered lists
|
||||||
|
asciidoc = asciidoc.replace(/^(\s*)\*\s+(.+)$/gm, '$1* $2'); |
||||||
|
asciidoc = asciidoc.replace(/^(\s*)-\s+(.+)$/gm, '$1* $2'); |
||||||
|
asciidoc = asciidoc.replace(/^(\s*)\+\s+(.+)$/gm, '$1* $2'); |
||||||
|
|
||||||
|
// Convert ordered lists
|
||||||
|
asciidoc = asciidoc.replace(/^(\s*)\d+\.\s+(.+)$/gm, '$1. $2'); |
||||||
|
|
||||||
|
// Convert blockquotes with attribution
|
||||||
|
asciidoc = asciidoc.replace(/^(>\s+.+(?:\n>\s+.+)*)/gm, (match) => { |
||||||
|
const lines = match.split('\n').map(line => line.replace(/^>\s*/, '')); |
||||||
|
|
||||||
|
let quoteBodyLines: string[] = []; |
||||||
|
let attributionLine: string | undefined; |
||||||
|
|
||||||
|
for (let i = lines.length - 1; i >= 0; i--) { |
||||||
|
const line = lines[i].trim(); |
||||||
|
if (line.startsWith('—') || line.startsWith('--')) { |
||||||
|
attributionLine = line; |
||||||
|
quoteBodyLines = lines.slice(0, i); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
const quoteContent = quoteBodyLines.filter(l => l.trim() !== '').join('\n').trim(); |
||||||
|
|
||||||
|
if (attributionLine) { |
||||||
|
let cleanedAttribution = attributionLine.replace(/^[—-]+/, '').trim(); |
||||||
|
|
||||||
|
let author = ''; |
||||||
|
let source = ''; |
||||||
|
|
||||||
|
const linkMatch = cleanedAttribution.match(/^(.*?),?\s*link:([^[\\]]+)\[([^\\]]+)\]$/); |
||||||
|
|
||||||
|
if (linkMatch) { |
||||||
|
author = linkMatch[1].trim(); |
||||||
|
source = `link:${linkMatch[2].trim()}[${linkMatch[3].trim()}]`; |
||||||
|
} else { |
||||||
|
const parts = cleanedAttribution.split(',').map(p => p.trim()); |
||||||
|
author = parts[0]; |
||||||
|
if (parts.length > 1) { |
||||||
|
source = parts.slice(1).join(', ').trim(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return `[quote, ${author}, ${source}]\n____\n${quoteContent}\n____`; |
||||||
|
} else { |
||||||
|
return `____\n${quoteContent}\n____`; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert tables
|
||||||
|
asciidoc = asciidoc.replace(/(\|.*\|[\r\n]+\|[\s\-\|]*[\r\n]+(\|.*\|[\r\n]+)*)/g, (match) => { |
||||||
|
const lines = match.trim().split('\n').filter(line => line.trim()); |
||||||
|
if (lines.length < 2) return match; |
||||||
|
|
||||||
|
const headerRow = lines[0]; |
||||||
|
const separatorRow = lines[1]; |
||||||
|
const dataRows = lines.slice(2); |
||||||
|
|
||||||
|
if (!separatorRow.includes('-')) return match; |
||||||
|
|
||||||
|
let tableAsciidoc = '[cols="1,1"]\n|===\n'; |
||||||
|
tableAsciidoc += headerRow + '\n'; |
||||||
|
dataRows.forEach(row => { |
||||||
|
tableAsciidoc += row + '\n'; |
||||||
|
}); |
||||||
|
tableAsciidoc += '|==='; |
||||||
|
|
||||||
|
return tableAsciidoc; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert footnotes
|
||||||
|
const footnoteDefinitions: { [id: string]: string } = {}; |
||||||
|
let tempAsciidoc = asciidoc; |
||||||
|
|
||||||
|
tempAsciidoc = tempAsciidoc.replace(/^\[\^([^\]]+)\]:\s*([\s\S]*?)(?=\n\[\^|\n---|\n##|\n###|\n####|\n#####|\n######|$)/gm, (_, id, text) => { |
||||||
|
footnoteDefinitions[id] = text.trim(); |
||||||
|
return ''; |
||||||
|
}); |
||||||
|
|
||||||
|
asciidoc = tempAsciidoc.replace(/\[\^([^\]]+)\]/g, (match, id) => { |
||||||
|
if (footnoteDefinitions[id]) { |
||||||
|
return `footnote:[${footnoteDefinitions[id]}]`; |
||||||
|
} |
||||||
|
return match; |
||||||
|
}); |
||||||
|
|
||||||
|
return asciidoc; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Converts plain text to AsciiDoc format |
||||||
|
*/ |
||||||
|
function convertPlainTextToAsciidoc(content: string): string { |
||||||
|
return content |
||||||
|
.replace(/\n\n/g, '\n\n') |
||||||
|
.replace(/\n/g, ' +\n'); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Normalizes text to d-tag format |
||||||
|
*/ |
||||||
|
function normalizeDtag(text: string): string { |
||||||
|
return text |
||||||
|
.toLowerCase() |
||||||
|
.replace(/[^a-z0-9]+/g, '-') |
||||||
|
.replace(/^-+|-+$/g, ''); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Processes wikilinks: [[target]] or [[target|display text]] |
||||||
|
* Converts to wikilink:dtag[display] format |
||||||
|
*/ |
||||||
|
function processWikilinks(content: string, linkBaseURL: string): string { |
||||||
|
// Process bookstr macro wikilinks: [[book::...]]
|
||||||
|
content = content.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => { |
||||||
|
const cleanContent = bookContent.trim(); |
||||||
|
return `BOOKSTR:${cleanContent}`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Process standard wikilinks: [[Target Page]] or [[target page|see this]]
|
||||||
|
content = content.replace(/\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g, (_match, target, displayText) => { |
||||||
|
const cleanTarget = target.trim(); |
||||||
|
const cleanDisplay = displayText ? displayText.trim() : cleanTarget; |
||||||
|
const dTag = normalizeDtag(cleanTarget); |
||||||
|
|
||||||
|
return `wikilink:${dTag}[${cleanDisplay}]`; |
||||||
|
}); |
||||||
|
|
||||||
|
return content; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Processes nostr: addresses |
||||||
|
* Converts to link:nostr:...[...] format |
||||||
|
*/ |
||||||
|
function processNostrAddresses(content: string, linkBaseURL: string): string { |
||||||
|
// Match nostr: followed by valid bech32 string
|
||||||
|
return content.replace(/nostr:([a-z0-9]+[a-z0-9]{6,})/g, (_match, bech32Id) => { |
||||||
|
return `link:nostr:${bech32Id}[${bech32Id}]`; |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Processes hashtags |
||||||
|
* Converts to hashtag:tag[#tag] format |
||||||
|
*/ |
||||||
|
function processHashtags(content: string): string { |
||||||
|
// Match # followed by word characters, avoiding those in URLs, code blocks, etc.
|
||||||
|
return content.replace(/\B#([a-zA-Z0-9_]+)/g, (_match, hashtag) => { |
||||||
|
const normalizedHashtag = hashtag.toLowerCase(); |
||||||
|
return `hashtag:${normalizedHashtag}[#${hashtag}]`; |
||||||
|
}); |
||||||
|
} |
||||||
@ -0,0 +1,263 @@ |
|||||||
|
import { NostrLink, Wikilink } from '../types'; |
||||||
|
|
||||||
|
export interface ExtractedMetadata { |
||||||
|
nostrLinks: NostrLink[]; |
||||||
|
wikilinks: Wikilink[]; |
||||||
|
hashtags: string[]; |
||||||
|
links: Array<{ url: string; text: string; isExternal: boolean }>; |
||||||
|
media: string[]; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extracts metadata from content before processing |
||||||
|
*/ |
||||||
|
export function extractMetadata(content: string, linkBaseURL: string): ExtractedMetadata { |
||||||
|
return { |
||||||
|
nostrLinks: extractNostrLinks(content), |
||||||
|
wikilinks: extractWikilinks(content), |
||||||
|
hashtags: extractHashtags(content), |
||||||
|
links: extractLinks(content, linkBaseURL), |
||||||
|
media: extractMedia(content), |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract Nostr links from content |
||||||
|
*/ |
||||||
|
function extractNostrLinks(content: string): NostrLink[] { |
||||||
|
const nostrLinks: NostrLink[] = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Extract nostr: prefixed links
|
||||||
|
const nostrMatches = content.match(/nostr:([a-z0-9]+[a-z0-9]{6,})/g) || []; |
||||||
|
nostrMatches.forEach(match => { |
||||||
|
const id = match.substring(6); // Remove 'nostr:'
|
||||||
|
const type = getNostrType(id); |
||||||
|
if (type && !seen.has(id)) { |
||||||
|
seen.add(id); |
||||||
|
nostrLinks.push({ |
||||||
|
type, |
||||||
|
id, |
||||||
|
text: match, |
||||||
|
bech32: id, |
||||||
|
}); |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
return nostrLinks; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract wikilinks from content |
||||||
|
*/ |
||||||
|
function extractWikilinks(content: string): Wikilink[] { |
||||||
|
const wikilinks: Wikilink[] = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Match [[target]] or [[target|display]]
|
||||||
|
const wikilinkPattern = /\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g; |
||||||
|
let match; |
||||||
|
|
||||||
|
while ((match = wikilinkPattern.exec(content)) !== null) { |
||||||
|
const target = match[1].trim(); |
||||||
|
const display = match[2] ? match[2].trim() : target; |
||||||
|
const dtag = normalizeDtag(target); |
||||||
|
const key = `${dtag}|${display}`; |
||||||
|
|
||||||
|
if (!seen.has(key)) { |
||||||
|
seen.add(key); |
||||||
|
wikilinks.push({ |
||||||
|
dtag, |
||||||
|
display, |
||||||
|
original: match[0], |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return wikilinks; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract hashtags from content |
||||||
|
*/ |
||||||
|
function extractHashtags(content: string): string[] { |
||||||
|
const hashtags: string[] = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Extract hashtags: #hashtag
|
||||||
|
const hashtagMatches = content.match(/#([a-zA-Z0-9_]+)/g) || []; |
||||||
|
hashtagMatches.forEach(match => { |
||||||
|
const tag = match.substring(1).toLowerCase(); |
||||||
|
if (!seen.has(tag)) { |
||||||
|
hashtags.push(tag); |
||||||
|
seen.add(tag); |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
return hashtags; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract regular links from content |
||||||
|
*/ |
||||||
|
function extractLinks(content: string, linkBaseURL: string): Array<{ url: string; text: string; isExternal: boolean }> { |
||||||
|
const links: Array<{ url: string; text: string; isExternal: boolean }> = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Extract markdown links: [text](url)
|
||||||
|
const markdownLinks = content.match(/\[([^\]]+)\]\(([^)]+)\)/g) || []; |
||||||
|
markdownLinks.forEach(match => { |
||||||
|
const linkMatch = match.match(/\[([^\]]+)\]\(([^)]+)\)/); |
||||||
|
if (linkMatch) { |
||||||
|
const [, text, url] = linkMatch; |
||||||
|
if (!seen.has(url) && !isNostrUrl(url)) { |
||||||
|
seen.add(url); |
||||||
|
links.push({ |
||||||
|
url, |
||||||
|
text, |
||||||
|
isExternal: isExternalUrl(url, linkBaseURL), |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
// Extract asciidoc links: link:url[text]
|
||||||
|
const asciidocLinks = content.match(/link:([^\[]+)\[([^\]]+)\]/g) || []; |
||||||
|
asciidocLinks.forEach(match => { |
||||||
|
const linkMatch = match.match(/link:([^\[]+)\[([^\]]+)\]/); |
||||||
|
if (linkMatch) { |
||||||
|
const [, url, text] = linkMatch; |
||||||
|
if (!seen.has(url) && !isNostrUrl(url)) { |
||||||
|
seen.add(url); |
||||||
|
links.push({ |
||||||
|
url, |
||||||
|
text, |
||||||
|
isExternal: isExternalUrl(url, linkBaseURL), |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
// Extract raw URLs (basic pattern)
|
||||||
|
const urlPattern = /https?:\/\/[^\s<>"']+/g; |
||||||
|
const rawUrls = content.match(urlPattern) || []; |
||||||
|
rawUrls.forEach(url => { |
||||||
|
if (!seen.has(url) && !isNostrUrl(url)) { |
||||||
|
seen.add(url); |
||||||
|
links.push({ |
||||||
|
url, |
||||||
|
text: url, |
||||||
|
isExternal: isExternalUrl(url, linkBaseURL), |
||||||
|
}); |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
return links; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Extract media URLs from content |
||||||
|
*/ |
||||||
|
function extractMedia(content: string): string[] { |
||||||
|
const media: string[] = []; |
||||||
|
const seen = new Set<string>(); |
||||||
|
|
||||||
|
// Extract markdown images: 
|
||||||
|
const imageMatches = content.match(/!\[[^\]]*\]\(([^)]+)\)/g) || []; |
||||||
|
imageMatches.forEach(match => { |
||||||
|
const url = match.match(/!\[[^\]]*\]\(([^)]+)\)/)?.[1]; |
||||||
|
if (url && !seen.has(url)) { |
||||||
|
if (isImageUrl(url) || isVideoUrl(url)) { |
||||||
|
media.push(url); |
||||||
|
seen.add(url); |
||||||
|
} |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
// Extract asciidoc images: image::url[alt]
|
||||||
|
const asciidocImageMatches = content.match(/image::([^\[]+)\[/g) || []; |
||||||
|
asciidocImageMatches.forEach(match => { |
||||||
|
const url = match.match(/image::([^\[]+)\[/)?.[1]; |
||||||
|
if (url && !seen.has(url)) { |
||||||
|
if (isImageUrl(url) || isVideoUrl(url)) { |
||||||
|
media.push(url); |
||||||
|
seen.add(url); |
||||||
|
} |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
// Extract raw image/video URLs
|
||||||
|
const urlPattern = /https?:\/\/[^\s<>"']+/g; |
||||||
|
const rawUrls = content.match(urlPattern) || []; |
||||||
|
rawUrls.forEach(url => { |
||||||
|
if (!seen.has(url) && (isImageUrl(url) || isVideoUrl(url))) { |
||||||
|
media.push(url); |
||||||
|
seen.add(url); |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
return media; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Get Nostr identifier type |
||||||
|
*/ |
||||||
|
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||||
|
if (id.startsWith('npub')) return 'npub'; |
||||||
|
if (id.startsWith('nprofile')) return 'nprofile'; |
||||||
|
if (id.startsWith('nevent')) return 'nevent'; |
||||||
|
if (id.startsWith('naddr')) return 'naddr'; |
||||||
|
if (id.startsWith('note')) return 'note'; |
||||||
|
return null; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Normalize text to d-tag format |
||||||
|
*/ |
||||||
|
function normalizeDtag(text: string): string { |
||||||
|
return text |
||||||
|
.toLowerCase() |
||||||
|
.replace(/[^a-z0-9]+/g, '-') |
||||||
|
.replace(/^-+|-+$/g, ''); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if URL is external |
||||||
|
*/ |
||||||
|
function isExternalUrl(url: string, linkBaseURL: string): boolean { |
||||||
|
if (!linkBaseURL) return true; |
||||||
|
try { |
||||||
|
// Use a simple string-based check for Node.js compatibility
|
||||||
|
// Extract hostname from URL string
|
||||||
|
const urlMatch = url.match(/^https?:\/\/([^\/]+)/); |
||||||
|
const baseMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/); |
||||||
|
|
||||||
|
if (urlMatch && baseMatch) { |
||||||
|
return urlMatch[1] !== baseMatch[1]; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} catch { |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if URL is a Nostr URL |
||||||
|
*/ |
||||||
|
function isNostrUrl(url: string): boolean { |
||||||
|
return url.startsWith('nostr:') || getNostrType(url) !== null; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if URL is an image |
||||||
|
*/ |
||||||
|
function isImageUrl(url: string): boolean { |
||||||
|
return /\.(jpeg|jpg|png|gif|webp|svg)$/i.test(url); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if URL is a video |
||||||
|
*/ |
||||||
|
function isVideoUrl(url: string): boolean { |
||||||
|
return /\.(mp4|webm|ogg)$/i.test(url); |
||||||
|
} |
||||||
@ -1,2 +1,3 @@ |
|||||||
export * from './parser'; |
export * from './parser'; |
||||||
export * from './types'; |
export * from './types'; |
||||||
|
export * from './detector'; |
||||||
@ -1,66 +0,0 @@ |
|||||||
/** |
|
||||||
* Normalizes a d tag according to NIP-54 rules |
|
||||||
*/ |
|
||||||
export function normalizeDTag(dTag: string): string { |
|
||||||
// Convert to lowercase
|
|
||||||
let normalized = dTag.toLowerCase(); |
|
||||||
|
|
||||||
// Convert whitespace to hyphens
|
|
||||||
normalized = normalized.replace(/\s+/g, '-'); |
|
||||||
|
|
||||||
// Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII)
|
|
||||||
normalized = normalized.replace(/[^a-z0-9\-\u0080-\uFFFF]/g, ''); |
|
||||||
|
|
||||||
// Collapse multiple consecutive hyphens
|
|
||||||
normalized = normalized.replace(/-+/g, '-'); |
|
||||||
|
|
||||||
// Remove leading and trailing hyphens
|
|
||||||
normalized = normalized.replace(/^-+|-+$/g, ''); |
|
||||||
|
|
||||||
return normalized; |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Rewrites wikilinks and nostr: links in AsciiDoc content |
|
||||||
*/ |
|
||||||
export function rewriteAsciiDocLinks(content: string, linkBaseURL: string): string { |
|
||||||
// Rewrite wikilinks: [[target]] or [[target|display text]]
|
|
||||||
// Format: [[target]] -> link:url[display]
|
|
||||||
const wikilinkRegex = /\[\[([^\]]+)\]\]/g; |
|
||||||
content = content.replace(wikilinkRegex, (match, inner) => { |
|
||||||
let target: string; |
|
||||||
let display: string; |
|
||||||
|
|
||||||
if (inner.includes('|')) { |
|
||||||
const parts = inner.split('|', 2); |
|
||||||
target = parts[0].trim(); |
|
||||||
display = parts[1].trim(); |
|
||||||
} else { |
|
||||||
target = inner.trim(); |
|
||||||
display = target; |
|
||||||
} |
|
||||||
|
|
||||||
// Normalize the d tag
|
|
||||||
const normalized = normalizeDTag(target); |
|
||||||
|
|
||||||
// Create the link
|
|
||||||
if (linkBaseURL) { |
|
||||||
const url = `${linkBaseURL}/events?d=${normalized}`; |
|
||||||
return `link:${url}[${display}]`; |
|
||||||
} |
|
||||||
return `link:#${normalized}[${display}]`; |
|
||||||
}); |
|
||||||
|
|
||||||
// Rewrite nostr: links: nostr:naddr1... or nostr:nevent1...
|
|
||||||
// Format: nostr:naddr1... -> link:url[nostr:naddr1...]
|
|
||||||
const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)/g; |
|
||||||
content = content.replace(nostrLinkRegex, (match, nostrID) => { |
|
||||||
if (linkBaseURL) { |
|
||||||
const url = `${linkBaseURL}/events?id=${nostrID}`; |
|
||||||
return `link:${url}[${match}]`; |
|
||||||
} |
|
||||||
return match; |
|
||||||
}); |
|
||||||
|
|
||||||
return content; |
|
||||||
} |
|
||||||
@ -1,49 +1,150 @@ |
|||||||
import asciidoctor from '@asciidoctor/core'; |
import asciidoctor from '@asciidoctor/core'; |
||||||
import { ProcessResult } from '../types'; |
import { ProcessResult } from '../types'; |
||||||
import { rewriteAsciiDocLinks } from './asciidoc-links'; |
import { extractTOC, sanitizeHTML } from './html-utils'; |
||||||
import { extractTOC, sanitizeHTML, processLinks } from './html-utils'; |
import { postProcessHtml } from './html-postprocess'; |
||||||
|
|
||||||
const asciidoctorInstance = asciidoctor(); |
const asciidoctorInstance = asciidoctor(); |
||||||
|
|
||||||
|
export interface ProcessOptions { |
||||||
|
enableCodeHighlighting?: boolean; |
||||||
|
enableLaTeX?: boolean; |
||||||
|
enableMusicalNotation?: boolean; |
||||||
|
originalContent?: string; // Original content for LaTeX detection
|
||||||
|
} |
||||||
|
|
||||||
/** |
/** |
||||||
* Processes AsciiDoc content to HTML |
* Processes AsciiDoc content to HTML using AsciiDoctor |
||||||
|
* Uses AsciiDoctor's built-in highlight.js and LaTeX support |
||||||
*/ |
*/ |
||||||
export async function processAsciiDoc(content: string, linkBaseURL: string): Promise<ProcessResult> { |
export async function processAsciidoc( |
||||||
// Rewrite links in AsciiDoc content
|
content: string, |
||||||
const processedContent = rewriteAsciiDocLinks(content, linkBaseURL); |
options: ProcessOptions = {} |
||||||
|
): Promise<ProcessResult> { |
||||||
|
const { |
||||||
|
enableCodeHighlighting = true, |
||||||
|
enableLaTeX = true, |
||||||
|
enableMusicalNotation = true, |
||||||
|
} = options; |
||||||
|
|
||||||
|
// Check if content starts with level 3+ headers
|
||||||
|
// Asciidoctor article doctype requires level 1 (=) or level 2 (==) before level 3 (===)
|
||||||
|
// If content starts with level 3+, use book doctype
|
||||||
|
const firstHeaderMatch = content.match(/^(={1,6})\s+/m); |
||||||
|
let doctype: 'article' | 'book' = 'article'; |
||||||
|
|
||||||
|
if (firstHeaderMatch) { |
||||||
|
const firstHeaderLevel = firstHeaderMatch[1].length; |
||||||
|
if (firstHeaderLevel >= 3) { |
||||||
|
doctype = 'book'; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
// Convert AsciiDoc to HTML
|
try { |
||||||
const html = asciidoctorInstance.convert(processedContent, { |
const result = asciidoctorInstance.convert(content, { |
||||||
safe: 'safe', |
safe: 'safe', |
||||||
backend: 'html5', |
backend: 'html5', |
||||||
doctype: 'article', |
doctype: doctype, |
||||||
attributes: { |
attributes: { |
||||||
showtitle: true, |
'showtitle': true, |
||||||
icons: 'font', |
'sectanchors': true, |
||||||
sectanchors: true, |
'sectlinks': true, |
||||||
sectlinks: true, |
'toc': 'left', |
||||||
toc: 'left', |
'toclevels': 6, |
||||||
toclevels: 3, |
'toc-title': 'Table of Contents', |
||||||
}, |
'source-highlighter': enableCodeHighlighting ? 'highlight.js' : 'none', |
||||||
}) as string; |
'stem': enableLaTeX ? 'latexmath' : 'none', |
||||||
|
'data-uri': true, |
||||||
|
'imagesdir': '', |
||||||
|
'linkcss': false, |
||||||
|
'stylesheet': '', |
||||||
|
'stylesdir': '', |
||||||
|
'prewrap': true, |
||||||
|
'sectnums': false, |
||||||
|
'sectnumlevels': 6, |
||||||
|
'experimental': true, |
||||||
|
'compat-mode': false, |
||||||
|
'attribute-missing': 'warn', |
||||||
|
'attribute-undefined': 'warn', |
||||||
|
'skip-front-matter': true, |
||||||
|
'source-indent': 0, |
||||||
|
'indent': 0, |
||||||
|
'tabsize': 2, |
||||||
|
'tabwidth': 2, |
||||||
|
'hardbreaks': false, |
||||||
|
'paragraph-rewrite': 'normal', |
||||||
|
'sectids': true, |
||||||
|
'idprefix': '', |
||||||
|
'idseparator': '-', |
||||||
|
'sectidprefix': '', |
||||||
|
'sectidseparator': '-' |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
const htmlString = typeof result === 'string' ? result : result.toString(); |
||||||
|
|
||||||
// Extract table of contents from HTML
|
// Extract table of contents from HTML
|
||||||
const { toc, contentWithoutTOC } = extractTOC(html); |
const { toc, contentWithoutTOC } = extractTOC(htmlString); |
||||||
|
|
||||||
// Sanitize HTML to prevent XSS
|
// Sanitize HTML to prevent XSS
|
||||||
const sanitized = sanitizeHTML(contentWithoutTOC); |
const sanitized = sanitizeHTML(contentWithoutTOC); |
||||||
|
|
||||||
// Process links: make external links open in new tab, local links in same tab
|
// Post-process HTML: convert macros to HTML, add styling, etc.
|
||||||
const processed = processLinks(sanitized, linkBaseURL); |
const processed = postProcessHtml(sanitized, { |
||||||
|
enableMusicalNotation, |
||||||
|
}); |
||||||
|
|
||||||
// Also sanitize and process links in TOC
|
// Also process TOC
|
||||||
const tocSanitized = sanitizeHTML(toc); |
const tocSanitized = sanitizeHTML(toc); |
||||||
const tocProcessed = processLinks(tocSanitized, linkBaseURL); |
const tocProcessed = postProcessHtml(tocSanitized, { |
||||||
|
enableMusicalNotation: false, // Don't process music in TOC
|
||||||
|
}); |
||||||
|
|
||||||
|
// Check for LaTeX in original content (more reliable than checking HTML)
|
||||||
|
const contentToCheck = options.originalContent || content; |
||||||
|
const hasLaTeX = enableLaTeX && hasMathContent(contentToCheck); |
||||||
|
|
||||||
|
// Check for musical notation in processed HTML
|
||||||
|
const hasMusicalNotation = enableMusicalNotation && ( |
||||||
|
/class="abc-notation"|class="lilypond-notation"|class="chord"|class="musicxml-notation"/.test(processed) |
||||||
|
); |
||||||
|
|
||||||
return { |
return { |
||||||
content: processed, |
content: processed, |
||||||
tableOfContents: tocProcessed, |
tableOfContents: tocProcessed, |
||||||
|
hasLaTeX, |
||||||
|
hasMusicalNotation, |
||||||
|
nostrLinks: [], // Will be populated by metadata extraction
|
||||||
|
wikilinks: [], |
||||||
|
hashtags: [], |
||||||
|
links: [], |
||||||
|
media: [], |
||||||
|
}; |
||||||
|
} catch (error) { |
||||||
|
// Fallback to plain text
|
||||||
|
return { |
||||||
|
content: `<p>${sanitizeHTML(content)}</p>`, |
||||||
|
tableOfContents: '', |
||||||
hasLaTeX: false, |
hasLaTeX: false, |
||||||
hasMusicalNotation: false, |
hasMusicalNotation: false, |
||||||
|
nostrLinks: [], |
||||||
|
wikilinks: [], |
||||||
|
hashtags: [], |
||||||
|
links: [], |
||||||
|
media: [], |
||||||
}; |
}; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Check if content has LaTeX math |
||||||
|
* Based on jumble's detection pattern |
||||||
|
*/ |
||||||
|
function hasMathContent(content: string): boolean { |
||||||
|
// Check for inline math: $...$ or \(...\)
|
||||||
|
const inlineMath = /\$[^$]+\$|\\\([^)]+\\\)/.test(content); |
||||||
|
|
||||||
|
// Check for block math: $$...$$ or \[...\]
|
||||||
|
const blockMath = /\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]/.test(content); |
||||||
|
|
||||||
|
return inlineMath || blockMath; |
||||||
} |
} |
||||||
|
|||||||
@ -1,52 +0,0 @@ |
|||||||
import hljs from 'highlight.js'; |
|
||||||
|
|
||||||
/** |
|
||||||
* Ensures code blocks have syntax highlighting using highlight.js |
|
||||||
*/ |
|
||||||
export function ensureCodeHighlighting(html: string): string { |
|
||||||
// Pattern to match code blocks: <pre><code>...</code></pre> or <pre><code class="language-xxx">...</code></pre>
|
|
||||||
const codeBlockRegex = /<pre><code(?:\s+class=["']language-([^"']+)["'])?[^>]*>(.*?)<\/code><\/pre>/gs; |
|
||||||
|
|
||||||
return html.replace(codeBlockRegex, (match, lang, code) => { |
|
||||||
// Unescape HTML entities in code
|
|
||||||
const unescapedCode = unescapeHTML(code); |
|
||||||
|
|
||||||
// Highlight the code
|
|
||||||
try { |
|
||||||
let highlighted: hljs.HighlightResult; |
|
||||||
|
|
||||||
if (lang) { |
|
||||||
// Try to get the language
|
|
||||||
const language = hljs.getLanguage(lang); |
|
||||||
if (language) { |
|
||||||
highlighted = hljs.highlight(unescapedCode, { language: lang }); |
|
||||||
} else { |
|
||||||
// Try auto-detection
|
|
||||||
highlighted = hljs.highlightAuto(unescapedCode); |
|
||||||
} |
|
||||||
} else { |
|
||||||
// Auto-detect language
|
|
||||||
highlighted = hljs.highlightAuto(unescapedCode); |
|
||||||
} |
|
||||||
|
|
||||||
// Return highlighted code with proper classes
|
|
||||||
const langClass = highlighted.language ? ` class="language-${highlighted.language}"` : ''; |
|
||||||
return `<pre><code${langClass}>${highlighted.value}</code></pre>`; |
|
||||||
} catch (error) { |
|
||||||
// If highlighting fails, return original
|
|
||||||
return match; |
|
||||||
} |
|
||||||
}); |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Unescapes HTML entities |
|
||||||
*/ |
|
||||||
function unescapeHTML(text: string): string { |
|
||||||
return text |
|
||||||
.replace(/</g, '<') |
|
||||||
.replace(/>/g, '>') |
|
||||||
.replace(/&/g, '&') |
|
||||||
.replace(/"/g, '"') |
|
||||||
.replace(/'/g, "'"); |
|
||||||
} |
|
||||||
@ -0,0 +1,192 @@ |
|||||||
|
import { processMusicalNotation } from './music'; |
||||||
|
|
||||||
|
export interface PostProcessOptions { |
||||||
|
enableMusicalNotation?: boolean; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Post-processes HTML output from AsciiDoctor |
||||||
|
* Converts AsciiDoc macros to HTML with data attributes and CSS classes |
||||||
|
*/ |
||||||
|
export function postProcessHtml(html: string, options: PostProcessOptions = {}): string { |
||||||
|
let processed = html; |
||||||
|
|
||||||
|
// Convert bookstr markers to HTML placeholders
|
||||||
|
processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { |
||||||
|
const escaped = bookContent.replace(/"/g, '"').replace(/'/g, '''); |
||||||
|
return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert hashtag links to HTML
|
||||||
|
processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { |
||||||
|
return `<a href="/notes?t=${normalizedHashtag}" class="hashtag-link text-green-600 dark:text-green-400 hover:text-green-700 dark:hover:text-green-300 hover:underline">${displayText}</a>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert wikilink:dtag[display] format to HTML
|
||||||
|
processed = processed.replace(/wikilink:([^[]+)\[([^\]]+)\]/g, (_match, dTag, displayText) => { |
||||||
|
const escapedDtag = dTag.replace(/"/g, '"'); |
||||||
|
const escapedDisplay = displayText.replace(/"/g, '"'); |
||||||
|
return `<span class="wikilink cursor-pointer text-blue-600 hover:text-blue-800 hover:underline border-b border-dotted border-blue-300" data-dtag="${escapedDtag}" data-display="${escapedDisplay}">${displayText}</span>`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Convert nostr: links to HTML
|
||||||
|
processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { |
||||||
|
const nostrType = getNostrType(bech32Id); |
||||||
|
|
||||||
|
if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { |
||||||
|
// Render as embedded event placeholder
|
||||||
|
const escaped = bech32Id.replace(/"/g, '"'); |
||||||
|
return `<div data-embedded-note="${escaped}" class="embedded-note-container">Loading embedded event...</div>`; |
||||||
|
} else if (nostrType === 'npub' || nostrType === 'nprofile') { |
||||||
|
// Render as user handle
|
||||||
|
const escaped = bech32Id.replace(/"/g, '"'); |
||||||
|
return `<span class="user-handle" data-pubkey="${escaped}">@${displayText}</span>`; |
||||||
|
} else { |
||||||
|
// Fallback to regular link
|
||||||
|
const escaped = bech32Id.replace(/"/g, '"'); |
||||||
|
return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${displayText}</a>`; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
// Process images: add max-width styling and data attributes
|
||||||
|
processed = processImages(processed); |
||||||
|
|
||||||
|
// Process musical notation if enabled
|
||||||
|
if (options.enableMusicalNotation) { |
||||||
|
processed = processMusicalNotation(processed); |
||||||
|
} |
||||||
|
|
||||||
|
// Clean up any leftover markdown syntax
|
||||||
|
processed = cleanupMarkdown(processed); |
||||||
|
|
||||||
|
// Add styling classes
|
||||||
|
processed = addStylingClasses(processed); |
||||||
|
|
||||||
|
// Hide raw ToC text
|
||||||
|
processed = hideRawTocText(processed); |
||||||
|
|
||||||
|
return processed; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Get Nostr identifier type |
||||||
|
*/ |
||||||
|
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { |
||||||
|
if (id.startsWith('npub')) return 'npub'; |
||||||
|
if (id.startsWith('nprofile')) return 'nprofile'; |
||||||
|
if (id.startsWith('nevent')) return 'nevent'; |
||||||
|
if (id.startsWith('naddr')) return 'naddr'; |
||||||
|
if (id.startsWith('note')) return 'note'; |
||||||
|
return null; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Process images: add max-width styling and data attributes |
||||||
|
*/ |
||||||
|
function processImages(html: string): string { |
||||||
|
const imageUrls: string[] = []; |
||||||
|
const imageUrlRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi; |
||||||
|
let match; |
||||||
|
|
||||||
|
while ((match = imageUrlRegex.exec(html)) !== null) { |
||||||
|
const url = match[1]; |
||||||
|
if (url && !imageUrls.includes(url)) { |
||||||
|
imageUrls.push(url); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return html.replace(/<img([^>]+)>/gi, (imgTag, attributes) => { |
||||||
|
const srcMatch = attributes.match(/src=["']([^"']+)["']/i); |
||||||
|
if (!srcMatch) return imgTag; |
||||||
|
|
||||||
|
const src = srcMatch[1]; |
||||||
|
const currentIndex = imageUrls.indexOf(src); |
||||||
|
|
||||||
|
let updatedAttributes = attributes; |
||||||
|
|
||||||
|
if (updatedAttributes.match(/class=["']/i)) { |
||||||
|
updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match, classes) => { |
||||||
|
const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim(); |
||||||
|
const newClasses = cleanedClasses
|
||||||
|
? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in` |
||||||
|
: 'max-w-[400px] object-contain cursor-zoom-in'; |
||||||
|
return `class="${newClasses}"`; |
||||||
|
}); |
||||||
|
} else { |
||||||
|
updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`; |
||||||
|
} |
||||||
|
|
||||||
|
updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '"')}"`; |
||||||
|
|
||||||
|
return `<img${updatedAttributes}>`; |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Clean up leftover markdown syntax |
||||||
|
*/ |
||||||
|
function cleanupMarkdown(html: string): string { |
||||||
|
let cleaned = html; |
||||||
|
|
||||||
|
// Clean up markdown image syntax
|
||||||
|
cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => { |
||||||
|
const altText = alt || ''; |
||||||
|
return `<img src="${url}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`; |
||||||
|
}); |
||||||
|
|
||||||
|
// Clean up markdown link syntax
|
||||||
|
cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { |
||||||
|
if (cleaned.includes(`href="${url}"`)) { |
||||||
|
return _match; |
||||||
|
} |
||||||
|
return `<a href="${url}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${text} <svg class="size-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; |
||||||
|
}); |
||||||
|
|
||||||
|
return cleaned; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Add proper CSS classes for styling |
||||||
|
*/ |
||||||
|
function addStylingClasses(html: string): string { |
||||||
|
let styled = html; |
||||||
|
|
||||||
|
// Add strikethrough styling
|
||||||
|
styled = styled.replace(/<span class="line-through">([^<]+)<\/span>/g, '<span class="line-through line-through-2">$1</span>'); |
||||||
|
|
||||||
|
// Add subscript styling
|
||||||
|
styled = styled.replace(/<span class="subscript">([^<]+)<\/span>/g, '<span class="subscript text-xs align-sub">$1</span>'); |
||||||
|
|
||||||
|
// Add superscript styling
|
||||||
|
styled = styled.replace(/<span class="superscript">([^<]+)<\/span>/g, '<span class="superscript text-xs align-super">$1</span>'); |
||||||
|
|
||||||
|
// Add code highlighting classes
|
||||||
|
styled = styled.replace(/<pre class="highlightjs[^"]*">/g, '<pre class="highlightjs hljs">'); |
||||||
|
styled = styled.replace(/<code class="highlightjs[^"]*">/g, '<code class="highlightjs hljs">'); |
||||||
|
|
||||||
|
return styled; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Hide raw AsciiDoc ToC text |
||||||
|
*/ |
||||||
|
function hideRawTocText(html: string): string { |
||||||
|
let cleaned = html; |
||||||
|
|
||||||
|
cleaned = cleaned.replace( |
||||||
|
/<h[1-6][^>]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, |
||||||
|
'' |
||||||
|
); |
||||||
|
|
||||||
|
cleaned = cleaned.replace( |
||||||
|
/<p[^>]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, |
||||||
|
'' |
||||||
|
); |
||||||
|
|
||||||
|
cleaned = cleaned.replace( |
||||||
|
/<p[^>]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, |
||||||
|
'' |
||||||
|
); |
||||||
|
|
||||||
|
return cleaned; |
||||||
|
} |
||||||
@ -1,37 +0,0 @@ |
|||||||
/** |
|
||||||
* Checks if content contains LaTeX math expressions |
|
||||||
*/ |
|
||||||
export function hasLaTeX(content: string): boolean { |
|
||||||
// Check for inline math: $...$ or \(...\)
|
|
||||||
const inlineMathPattern = /\$[^$]+\$|\\\([^)]+\\\)/; |
|
||||||
// Check for block math: $$...$$ or \[...\]
|
|
||||||
const blockMathPattern = /\$\$[^$]+\$\$|\\\[[^\]]+\\\]/; |
|
||||||
|
|
||||||
return inlineMathPattern.test(content) || blockMathPattern.test(content); |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Processes LaTeX math expressions in HTML content |
|
||||||
* Wraps LaTeX expressions in appropriate HTML for rendering with MathJax or KaTeX |
|
||||||
*/ |
|
||||||
export function processLaTeX(html: string): string { |
|
||||||
// Process block math: $$...$$ or \[...\]
|
|
||||||
// Convert to <div class="math-block">...</div> for MathJax/KaTeX
|
|
||||||
const blockMathPattern = /\$\$([^$]+)\$\$|\\\[([^\]]+)\\\]/gs; |
|
||||||
html = html.replace(blockMathPattern, (match, dollarContent, bracketContent) => { |
|
||||||
const mathContent = (dollarContent || bracketContent || '').trim(); |
|
||||||
// Wrap in appropriate tags for MathJax/KaTeX
|
|
||||||
return `<div class="math-block">\\[${mathContent}\\]</div>`; |
|
||||||
}); |
|
||||||
|
|
||||||
// Process inline math: $...$ or \(...\)
|
|
||||||
// Convert to <span class="math-inline">...</span> for MathJax/KaTeX
|
|
||||||
const inlineMathPattern = /\$([^$\n]+)\$|\\\(([^)]+)\\\)/g; |
|
||||||
html = html.replace(inlineMathPattern, (match, dollarContent, bracketContent) => { |
|
||||||
const mathContent = (dollarContent || bracketContent || '').trim(); |
|
||||||
// Wrap in appropriate tags for MathJax/KaTeX
|
|
||||||
return `<span class="math-inline">\\(${mathContent}\\)</span>`; |
|
||||||
}); |
|
||||||
|
|
||||||
return html; |
|
||||||
} |
|
||||||
@ -1,49 +0,0 @@ |
|||||||
import { normalizeDTag } from './asciidoc-links'; |
|
||||||
|
|
||||||
/** |
|
||||||
* Rewrites wikilinks and nostr: links in Markdown content |
|
||||||
*/ |
|
||||||
export function rewriteMarkdownLinks(content: string, linkBaseURL: string): string { |
|
||||||
// Rewrite wikilinks: [[target]] or [[target|display text]]
|
|
||||||
const wikilinkRegex = /\[\[([^\]]+)\]\]/g; |
|
||||||
content = content.replace(wikilinkRegex, (match, inner) => { |
|
||||||
let target: string; |
|
||||||
let display: string; |
|
||||||
|
|
||||||
if (inner.includes('|')) { |
|
||||||
const parts = inner.split('|', 2); |
|
||||||
target = parts[0].trim(); |
|
||||||
display = parts[1].trim(); |
|
||||||
} else { |
|
||||||
target = inner.trim(); |
|
||||||
display = target; |
|
||||||
} |
|
||||||
|
|
||||||
const normalized = normalizeDTag(target); |
|
||||||
|
|
||||||
if (linkBaseURL) { |
|
||||||
const url = `${linkBaseURL}/events?d=${normalized}`; |
|
||||||
return `[${display}](${url})`; |
|
||||||
} |
|
||||||
return `[${display}](#${normalized})`; |
|
||||||
}); |
|
||||||
|
|
||||||
// Rewrite nostr: links in Markdown
|
|
||||||
const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+|note1[^\s\]]+|npub1[^\s\]]+|nprofile1[^\s\]]+)/g; |
|
||||||
content = content.replace(nostrLinkRegex, (match, nostrID) => { |
|
||||||
if (linkBaseURL) { |
|
||||||
let url: string; |
|
||||||
if (nostrID.startsWith('npub')) { |
|
||||||
url = `${linkBaseURL}/profile?pubkey=${nostrID}`; |
|
||||||
} else if (nostrID.startsWith('nprofile')) { |
|
||||||
url = `${linkBaseURL}/profile?id=${nostrID}`; |
|
||||||
} else { |
|
||||||
url = `${linkBaseURL}/events?id=${nostrID}`; |
|
||||||
} |
|
||||||
return `[${match}](${url})`; |
|
||||||
} |
|
||||||
return match; |
|
||||||
}); |
|
||||||
|
|
||||||
return content; |
|
||||||
} |
|
||||||
@ -1,36 +0,0 @@ |
|||||||
import { marked } from 'marked'; |
|
||||||
import { ProcessResult } from '../types'; |
|
||||||
import { rewriteMarkdownLinks } from './markdown-links'; |
|
||||||
import { sanitizeHTML, processLinks } from './html-utils'; |
|
||||||
|
|
||||||
// Configure marked options
|
|
||||||
marked.setOptions({ |
|
||||||
breaks: true, |
|
||||||
gfm: true, |
|
||||||
headerIds: true, |
|
||||||
mangle: false, |
|
||||||
}); |
|
||||||
|
|
||||||
/** |
|
||||||
* Processes Markdown content to HTML |
|
||||||
*/ |
|
||||||
export async function processMarkdown(content: string, linkBaseURL: string): Promise<ProcessResult> { |
|
||||||
// Rewrite links in Markdown content
|
|
||||||
const processedContent = rewriteMarkdownLinks(content, linkBaseURL); |
|
||||||
|
|
||||||
// Convert Markdown to HTML
|
|
||||||
const html = await marked.parse(processedContent) as string; |
|
||||||
|
|
||||||
// Sanitize HTML to prevent XSS
|
|
||||||
const sanitized = sanitizeHTML(html); |
|
||||||
|
|
||||||
// Process links: make external links open in new tab, local links in same tab
|
|
||||||
const processed = processLinks(sanitized, linkBaseURL); |
|
||||||
|
|
||||||
return { |
|
||||||
content: processed, |
|
||||||
tableOfContents: '', |
|
||||||
hasLaTeX: false, |
|
||||||
hasMusicalNotation: false, |
|
||||||
}; |
|
||||||
} |
|
||||||
@ -1,28 +0,0 @@ |
|||||||
/** |
|
||||||
* Processes nostr: prefixed addresses |
|
||||||
*/ |
|
||||||
export function processNostrAddresses(content: string, linkBaseURL: string): string { |
|
||||||
// Pattern: nostr:naddr1..., nostr:nevent1..., nostr:note1..., nostr:npub1..., nostr:nprofile1...
|
|
||||||
const nostrPattern = /nostr:([a-z0-9]+[a-z0-9]{1,})/g; |
|
||||||
|
|
||||||
return content.replace(nostrPattern, (match, nostrID) => { |
|
||||||
// If linkBaseURL is set, convert to a link
|
|
||||||
if (linkBaseURL) { |
|
||||||
// Determine the type and create appropriate link
|
|
||||||
if (nostrID.startsWith('naddr')) { |
|
||||||
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`; |
|
||||||
} else if (nostrID.startsWith('nevent')) { |
|
||||||
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`; |
|
||||||
} else if (nostrID.startsWith('note')) { |
|
||||||
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`; |
|
||||||
} else if (nostrID.startsWith('npub')) { |
|
||||||
return `<a href="${linkBaseURL}/profile?pubkey=${nostrID}" class="nostr-address">${match}</a>`; |
|
||||||
} else if (nostrID.startsWith('nprofile')) { |
|
||||||
return `<a href="${linkBaseURL}/profile?id=${nostrID}" class="nostr-address">${match}</a>`; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// Return as a span with class for styling
|
|
||||||
return `<span class="nostr-address">${match}</span>`; |
|
||||||
}); |
|
||||||
} |
|
||||||
@ -1,42 +0,0 @@ |
|||||||
import { ProcessResult } from '../types'; |
|
||||||
|
|
||||||
/** |
|
||||||
* Escapes HTML special characters |
|
||||||
*/ |
|
||||||
function escapeHTML(text: string): string { |
|
||||||
return text |
|
||||||
.replace(/&/g, '&') |
|
||||||
.replace(/</g, '<') |
|
||||||
.replace(/>/g, '>') |
|
||||||
.replace(/"/g, '"') |
|
||||||
.replace(/'/g, '''); |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Processes plain text content with basic formatting |
|
||||||
*/ |
|
||||||
export function processPlainText(text: string): ProcessResult { |
|
||||||
// Escape HTML
|
|
||||||
let html = escapeHTML(text); |
|
||||||
|
|
||||||
// Convert line breaks to <br>
|
|
||||||
html = html.replace(/\n/g, '<br>\n'); |
|
||||||
|
|
||||||
// Convert double line breaks to paragraphs
|
|
||||||
const paragraphs = html.split('<br>\n<br>\n'); |
|
||||||
const result: string[] = []; |
|
||||||
|
|
||||||
for (const para of paragraphs) { |
|
||||||
const trimmed = para.trim(); |
|
||||||
if (trimmed) { |
|
||||||
result.push(`<p>${trimmed}</p>`); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
return { |
|
||||||
content: result.join('\n'), |
|
||||||
tableOfContents: '', |
|
||||||
hasLaTeX: false, |
|
||||||
hasMusicalNotation: false, |
|
||||||
}; |
|
||||||
} |
|
||||||
@ -0,0 +1,20 @@ |
|||||||
|
/** |
||||||
|
* Type declarations for @asciidoctor/core |
||||||
|
* These are minimal types - the actual types should come from the package |
||||||
|
*/ |
||||||
|
declare module '@asciidoctor/core' { |
||||||
|
interface ConvertOptions { |
||||||
|
safe?: string; |
||||||
|
backend?: string; |
||||||
|
doctype?: string; |
||||||
|
attributes?: Record<string, any>; |
||||||
|
extension_registry?: any; |
||||||
|
} |
||||||
|
|
||||||
|
interface Asciidoctor { |
||||||
|
convert(content: string, options?: ConvertOptions): string | any; |
||||||
|
} |
||||||
|
|
||||||
|
function asciidoctor(): Asciidoctor; |
||||||
|
export default asciidoctor; |
||||||
|
} |
||||||
Loading…
Reference in new issue