You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
174 lines
5.4 KiB
174 lines
5.4 KiB
import asciidoctor from '@asciidoctor/core'; |
|
import { ProcessResult } from '../types'; |
|
import { extractTOC, sanitizeHTML, processLinks } from './html-utils'; |
|
import { postProcessHtml } from './html-postprocess'; |
|
|
|
const asciidoctorInstance = asciidoctor(); |
|
|
|
export interface ProcessOptions { |
|
enableCodeHighlighting?: boolean; |
|
enableLaTeX?: boolean; |
|
enableMusicalNotation?: boolean; |
|
originalContent?: string; // Original content for LaTeX detection |
|
linkBaseURL?: string; // Base URL for link processing |
|
} |
|
|
|
/** |
|
* Processes AsciiDoc content to HTML using AsciiDoctor |
|
* Uses AsciiDoctor's built-in highlight.js and LaTeX support |
|
*/ |
|
export async function processAsciidoc( |
|
content: string, |
|
options: ProcessOptions = {} |
|
): Promise<ProcessResult> { |
|
const { |
|
enableCodeHighlighting = true, |
|
enableLaTeX = true, |
|
enableMusicalNotation = true, |
|
} = options; |
|
|
|
// Check if content starts with level 3+ headers |
|
// Asciidoctor article doctype requires level 1 (=) or level 2 (==) before level 3 (===) |
|
// If content starts with level 3+, use book doctype |
|
const firstHeaderMatch = content.match(/^(={1,6})\s+/m); |
|
let doctype: 'article' | 'book' = 'article'; |
|
|
|
if (firstHeaderMatch) { |
|
const firstHeaderLevel = firstHeaderMatch[1].length; |
|
if (firstHeaderLevel >= 3) { |
|
doctype = 'book'; |
|
} |
|
} |
|
|
|
try { |
|
const result = asciidoctorInstance.convert(content, { |
|
safe: 'safe', |
|
backend: 'html5', |
|
doctype: doctype, |
|
attributes: { |
|
'showtitle': true, |
|
'sectanchors': true, |
|
'sectlinks': true, |
|
'toc': 'left', |
|
'toclevels': 6, |
|
'toc-title': 'Table of Contents', |
|
'source-highlighter': enableCodeHighlighting ? 'highlight.js' : 'none', |
|
'stem': enableLaTeX ? 'latexmath' : 'none', |
|
'data-uri': true, |
|
'imagesdir': '', |
|
'linkcss': false, |
|
'stylesheet': '', |
|
'stylesdir': '', |
|
'prewrap': true, |
|
'sectnums': false, |
|
'sectnumlevels': 6, |
|
'experimental': true, |
|
'compat-mode': false, |
|
'attribute-missing': 'warn', |
|
'attribute-undefined': 'warn', |
|
'skip-front-matter': true, |
|
'source-indent': 0, |
|
'indent': 0, |
|
'tabsize': 2, |
|
'tabwidth': 2, |
|
'hardbreaks': false, |
|
'paragraph-rewrite': 'normal', |
|
'sectids': true, |
|
'idprefix': '', |
|
'idseparator': '-', |
|
'sectidprefix': '', |
|
'sectidseparator': '-' |
|
} |
|
}); |
|
|
|
const htmlString = typeof result === 'string' ? result : result.toString(); |
|
|
|
// Extract table of contents from HTML |
|
const { toc, contentWithoutTOC } = extractTOC(htmlString); |
|
|
|
// Sanitize HTML to prevent XSS |
|
const sanitized = sanitizeHTML(contentWithoutTOC); |
|
|
|
// Post-process HTML: convert macros to HTML, add styling, etc. |
|
const processed = postProcessHtml(sanitized, { |
|
enableMusicalNotation, |
|
linkBaseURL: options.linkBaseURL, |
|
}); |
|
|
|
// Process links: add target="_blank" to external links |
|
const processedWithLinks = options.linkBaseURL |
|
? processLinks(processed, options.linkBaseURL) |
|
: processed; |
|
|
|
// Also process TOC |
|
const tocSanitized = sanitizeHTML(toc); |
|
const tocProcessed = postProcessHtml(tocSanitized, { |
|
enableMusicalNotation: false, // Don't process music in TOC |
|
linkBaseURL: options.linkBaseURL, |
|
}); |
|
|
|
// Process links in TOC as well |
|
const tocProcessedWithLinks = options.linkBaseURL |
|
? processLinks(tocProcessed, options.linkBaseURL) |
|
: tocProcessed; |
|
|
|
// Check for LaTeX in original content (more reliable than checking HTML) |
|
const contentToCheck = options.originalContent || content; |
|
const hasLaTeX = enableLaTeX && hasMathContent(contentToCheck); |
|
|
|
// Check for musical notation in processed HTML |
|
const hasMusicalNotation = enableMusicalNotation && ( |
|
/class="abc-notation"|class="lilypond-notation"|class="chord"|class="musicxml-notation"/.test(processed) |
|
); |
|
|
|
return { |
|
content: processedWithLinks, |
|
tableOfContents: tocProcessedWithLinks, |
|
hasLaTeX, |
|
hasMusicalNotation, |
|
nostrLinks: [], // Will be populated by metadata extraction |
|
wikilinks: [], |
|
hashtags: [], |
|
links: [], |
|
media: [], |
|
}; |
|
} catch (error) { |
|
// Fallback to plain text with error logging |
|
const errorMessage = error instanceof Error ? error.message : String(error); |
|
// Use process.stderr.write for Node.js compatibility instead of console.error |
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any |
|
const nodeProcess = (globalThis as any).process; |
|
if (nodeProcess?.stderr) { |
|
nodeProcess.stderr.write(`Error processing AsciiDoc: ${errorMessage}\n`); |
|
} |
|
|
|
// Escape HTML in content for safe display |
|
const escapedContent = sanitizeHTML(content); |
|
|
|
return { |
|
content: `<p>${escapedContent}</p>`, |
|
tableOfContents: '', |
|
hasLaTeX: false, |
|
hasMusicalNotation: false, |
|
nostrLinks: [], |
|
wikilinks: [], |
|
hashtags: [], |
|
links: [], |
|
media: [], |
|
}; |
|
} |
|
} |
|
|
|
/** |
|
* Check if content has LaTeX math |
|
* Based on jumble's detection pattern |
|
*/ |
|
function hasMathContent(content: string): boolean { |
|
// Check for inline math: $...$ or \(...\) |
|
const inlineMath = /\$[^$]+\$|\\\([^)]+\\\)/.test(content); |
|
|
|
// Check for block math: $$...$$ or \[...\] |
|
const blockMath = /\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]/.test(content); |
|
|
|
return inlineMath || blockMath; |
|
}
|
|
|