Browse Source

refactor

master
Silberengel 2 weeks ago
parent
commit
9cedf345b3
  1. 630
      generate-test-report.ts
  2. 23
      jest.config.js
  3. 973
      src/converters/to-asciidoc.ts
  4. 16
      src/processors/asciidoc.ts
  5. 157
      src/processors/html-postprocess.js
  6. 666
      src/processors/html-postprocess.ts
  7. 352
      src/processors/html-utils.ts
  8. 586
      src/utils/report-generator.ts
  9. 573
      test-parser-report.test.ts
  10. 19277
      test-report.html
  11. 2
      tsconfig.json
  12. 10
      tsconfig.test.json

630
generate-test-report.ts

@ -1,49 +1,47 @@
import { Parser } from './src/parser'; import { Parser } from './src/parser';
import { generateHTMLReport, ReportData } from './src/utils/report-generator';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
/** /**
* Script that parses both markdown and asciidoc test documents * Standalone script to generate HTML test report
* and generates an HTML report showing the parsing results * Run with: npm run test:report
*/ */
interface TestData {
original: string;
result: any;
}
interface ReportData {
markdown: TestData;
asciidoc: TestData;
}
async function main() { async function main() {
console.log('📝 Generating test report...\n');
// Initialize parser
const parser = new Parser({ const parser = new Parser({
linkBaseURL: 'https://example.com', linkBaseURL: 'https://example.com',
wikilinkUrl: '/events?d={dtag}', wikilinkUrl: '/events?d={dtag}',
hashtagUrl: '/notes?t={topic}', hashtagUrl: '/notes?t={topic}',
}); });
console.log('Reading test documents...');
// Read test documents // Read test documents
const markdownContent = fs.readFileSync( const markdownPath = path.join(__dirname, 'markdown_testdoc.md');
path.join(__dirname, 'markdown_testdoc.md'), const asciidocPath = path.join(__dirname, 'asciidoc_testdoc.adoc');
'utf-8'
); if (!fs.existsSync(markdownPath)) {
const asciidocContent = fs.readFileSync( console.error(`❌ Error: ${markdownPath} not found`);
path.join(__dirname, 'asciidoc_testdoc.adoc'), process.exit(1);
'utf-8' }
);
if (!fs.existsSync(asciidocPath)) {
console.error(`❌ Error: ${asciidocPath} not found`);
process.exit(1);
}
console.log('Parsing markdown document...'); const markdownContent = fs.readFileSync(markdownPath, 'utf-8');
const asciidocContent = fs.readFileSync(asciidocPath, 'utf-8');
console.log('📄 Parsing markdown document...');
const markdownResult = await parser.process(markdownContent); const markdownResult = await parser.process(markdownContent);
console.log('Parsing asciidoc document...'); console.log('📄 Parsing asciidoc document...');
const asciidocResult = await parser.process(asciidocContent); const asciidocResult = await parser.process(asciidocContent);
console.log('Generating HTML report...'); console.log('🎨 Generating HTML report...');
// Generate HTML report
const htmlReport = generateHTMLReport({ const htmlReport = generateHTMLReport({
markdown: { markdown: {
original: markdownContent, original: markdownContent,
@ -55,588 +53,16 @@ async function main() {
}, },
}); });
// Write HTML report to file (force fresh write) // Write HTML report to file
const reportPath = path.join(__dirname, 'test-report.html'); const reportPath = path.join(__dirname, 'test-report.html');
// Delete old report if it exists to ensure fresh generation
if (fs.existsSync(reportPath)) {
fs.unlinkSync(reportPath);
}
fs.writeFileSync(reportPath, htmlReport, 'utf-8'); fs.writeFileSync(reportPath, htmlReport, 'utf-8');
const reportUrl = `file://${reportPath}`; console.log(`\n✅ Test report generated: ${reportPath}`);
console.log(`\n✅ Test report generated successfully!`);
console.log(` File: ${reportPath}`);
console.log(` Size: ${(htmlReport.length / 1024).toFixed(2)} KB`);
console.log(` Timestamp: ${new Date().toISOString()}`);
console.log(` Open this file in your browser to view the results.\n`); console.log(` Open this file in your browser to view the results.\n`);
} }
function generateHTMLReport(data: ReportData): string {
const { markdown, asciidoc } = data;
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GC Parser Test Report</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
line-height: 1.6;
color: #333;
background: #f5f5f5;
padding: 20px;
}
.container {
max-width: 1400px;
margin: 0 auto;
}
h1 {
color: #2c3e50;
margin-bottom: 10px;
font-size: 2.5em;
}
.subtitle {
color: #7f8c8d;
margin-bottom: 30px;
font-size: 1.1em;
}
.section {
background: white;
border-radius: 8px;
padding: 30px;
margin-bottom: 30px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.section h2 {
color: #34495e;
margin-bottom: 20px;
padding-bottom: 10px;
border-bottom: 2px solid #3498db;
font-size: 1.8em;
}
.section h3 {
color: #2c3e50;
margin-top: 25px;
margin-bottom: 15px;
font-size: 1.3em;
}
.tabs {
display: flex;
gap: 10px;
margin-bottom: 20px;
border-bottom: 2px solid #e0e0e0;
}
.tab {
padding: 12px 24px;
background: #f8f9fa;
border: none;
border-top-left-radius: 6px;
border-top-right-radius: 6px;
cursor: pointer;
font-size: 1em;
font-weight: 500;
color: #555;
transition: all 0.2s;
}
.tab:hover {
background: #e9ecef;
}
.tab.active {
background: #3498db;
color: white;
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
.metadata-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 15px;
margin-top: 15px;
}
.metadata-item {
background: #f8f9fa;
padding: 12px;
border-radius: 4px;
border-left: 3px solid #3498db;
}
.metadata-item strong {
color: #2c3e50;
display: block;
margin-bottom: 5px;
}
.metadata-item code {
background: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
font-size: 0.9em;
}
.code-block {
background: #2d2d2d;
color: #f8f8f2;
padding: 15px;
border-radius: 6px;
overflow-x: auto;
font-family: 'Courier New', monospace;
font-size: 0.9em;
line-height: 1.5;
margin: 15px 0;
max-height: 400px;
overflow-y: auto;
}
.code-block pre {
margin: 0;
white-space: pre-wrap;
word-wrap: break-word;
}
.rendered-output {
background: white;
border: 1px solid #ddd;
padding: 20px;
border-radius: 6px;
margin: 15px 0;
min-height: 200px;
}
.rendered-output * {
max-width: 100%;
}
.stats {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 15px;
margin-top: 20px;
}
.stat-card {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 8px;
text-align: center;
}
.stat-card .number {
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}
.stat-card .label {
font-size: 0.9em;
opacity: 0.9;
}
.list-item {
background: #f8f9fa;
padding: 8px 12px;
margin: 5px 0;
border-radius: 4px;
border-left: 3px solid #95a5a6;
}
.list-item code {
background: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
font-size: 0.85em;
}
.success-badge {
display: inline-block;
background: #27ae60;
color: white;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.85em;
font-weight: 500;
margin-left: 10px;
}
.warning-badge {
display: inline-block;
background: #f39c12;
color: white;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.85em;
font-weight: 500;
margin-left: 10px;
}
.comparison {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-top: 20px;
}
@media (max-width: 768px) {
.comparison {
grid-template-columns: 1fr;
}
}
.json-view {
background: #f8f9fa;
padding: 15px;
border-radius: 6px;
overflow-x: auto;
font-family: 'Courier New', monospace;
font-size: 0.85em;
max-height: 300px;
overflow-y: auto;
}
</style>
</head>
<body>
<div class="container">
<h1>GC Parser Test Report</h1>
<p class="subtitle">Generated: ${new Date().toLocaleString()}</p>
<!-- Markdown Section -->
<div class="section">
<h2>Markdown Document Test <span class="success-badge"> Parsed</span></h2>
<div class="tabs">
<button class="tab active" onclick="showTab('md-overview')">Overview</button>
<button class="tab" onclick="showTab('md-original')">Original Content</button>
<button class="tab" onclick="showTab('md-rendered')">Rendered Output</button>
<button class="tab" onclick="showTab('md-metadata')">Metadata</button>
</div>
<div id="md-overview" class="tab-content active">
<div class="stats">
<div class="stat-card">
<div class="number">${markdown.result.nostrLinks.length}</div>
<div class="label">Nostr Links</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.wikilinks.length}</div>
<div class="label">Wikilinks</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hashtags.length}</div>
<div class="label">Hashtags</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.links.length}</div>
<div class="label">Links</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.media.length}</div>
<div class="label">Media URLs</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hasLaTeX ? 'Yes' : 'No'}</div>
<div class="label">Has LaTeX</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hasMusicalNotation ? 'Yes' : 'No'}</div>
<div class="label">Has Music</div>
</div>
</div>
<h3>Frontmatter</h3>
${markdown.result.frontmatter ? `
<div class="metadata-grid">
${Object.entries(markdown.result.frontmatter).map(([key, value]) => `
<div class="metadata-item">
<strong>${escapeHtml(key)}</strong>
<code>${escapeHtml(JSON.stringify(value))}</code>
</div>
`).join('')}
</div>
` : '<p><em>No frontmatter found</em></p>'}
</div>
<div id="md-original" class="tab-content">
<h3>Original Markdown Content</h3>
<div class="code-block">
<pre>${escapeHtml(markdown.original)}</pre>
</div>
</div>
<div id="md-rendered" class="tab-content">
<h3>Rendered HTML Output</h3>
<div class="rendered-output">
${markdown.result.content}
</div>
<details style="margin-top: 15px;">
<summary style="cursor: pointer; color: #3498db; font-weight: 500;">View Raw HTML</summary>
<div class="code-block" style="margin-top: 10px;">
<pre>${escapeHtml(markdown.result.content)}</pre>
</div>
</details>
</div>
<div id="md-metadata" class="tab-content">
<h3>Extracted Metadata</h3>
${markdown.result.nostrLinks.length > 0 ? `
<h4>Nostr Links (${markdown.result.nostrLinks.length})</h4>
${markdown.result.nostrLinks.map((link: any) => `
<div class="list-item">
<strong>${escapeHtml(link.type)}</strong>: <code>${escapeHtml(link.bech32)}</code>
${link.text ? ` - ${escapeHtml(link.text)}` : ''}
</div>
`).join('')}
` : ''}
${markdown.result.wikilinks.length > 0 ? `
<h4>Wikilinks (${markdown.result.wikilinks.length})</h4>
${markdown.result.wikilinks.map((wl: any) => `
<div class="list-item">
<code>${escapeHtml(wl.original)}</code> dtag: <code>${escapeHtml(wl.dtag)}</code>
${wl.display ? ` (display: ${escapeHtml(wl.display)})` : ''}
</div>
`).join('')}
` : ''}
${markdown.result.hashtags.length > 0 ? `
<h4>Hashtags (${markdown.result.hashtags.length})</h4>
${markdown.result.hashtags.map((tag: string) => `
<div class="list-item">
<code>#${escapeHtml(tag)}</code>
</div>
`).join('')}
` : ''}
${markdown.result.links.length > 0 ? `
<h4>Links (${markdown.result.links.length})</h4>
${markdown.result.links.map((link: any) => `
<div class="list-item">
<a href="${escapeHtml(link.url)}" target="_blank">${escapeHtml(link.text || link.url)}</a>
${link.isExternal ? '<span class="warning-badge">External</span>' : ''}
</div>
`).join('')}
` : ''}
${markdown.result.media.length > 0 ? `
<h4>Media URLs (${markdown.result.media.length})</h4>
${markdown.result.media.map((url: string) => `
<div class="list-item">
<a href="${escapeHtml(url)}" target="_blank">${escapeHtml(url)}</a>
</div>
`).join('')}
` : ''}
${markdown.result.tableOfContents ? `
<h4>Table of Contents</h4>
<div class="rendered-output">
${markdown.result.tableOfContents}
</div>
` : ''}
</div>
</div>
<!-- AsciiDoc Section -->
<div class="section">
<h2>AsciiDoc Document Test <span class="success-badge"> Parsed</span></h2>
<div class="tabs">
<button class="tab active" onclick="showTab('ad-overview')">Overview</button>
<button class="tab" onclick="showTab('ad-original')">Original Content</button>
<button class="tab" onclick="showTab('ad-rendered')">Rendered Output</button>
<button class="tab" onclick="showTab('ad-metadata')">Metadata</button>
</div>
<div id="ad-overview" class="tab-content active">
<div class="stats">
<div class="stat-card">
<div class="number">${asciidoc.result.nostrLinks.length}</div>
<div class="label">Nostr Links</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.wikilinks.length}</div>
<div class="label">Wikilinks</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hashtags.length}</div>
<div class="label">Hashtags</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.links.length}</div>
<div class="label">Links</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.media.length}</div>
<div class="label">Media URLs</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hasLaTeX ? 'Yes' : 'No'}</div>
<div class="label">Has LaTeX</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hasMusicalNotation ? 'Yes' : 'No'}</div>
<div class="label">Has Music</div>
</div>
</div>
<h3>Frontmatter</h3>
${asciidoc.result.frontmatter ? `
<div class="metadata-grid">
${Object.entries(asciidoc.result.frontmatter).map(([key, value]) => `
<div class="metadata-item">
<strong>${escapeHtml(key)}</strong>
<code>${escapeHtml(JSON.stringify(value))}</code>
</div>
`).join('')}
</div>
` : '<p><em>No frontmatter found</em></p>'}
</div>
<div id="ad-original" class="tab-content">
<h3>Original AsciiDoc Content</h3>
<div class="code-block">
<pre>${escapeHtml(asciidoc.original)}</pre>
</div>
</div>
<div id="ad-rendered" class="tab-content">
<h3>Rendered HTML Output</h3>
<div class="rendered-output">
${asciidoc.result.content}
</div>
<details style="margin-top: 15px;">
<summary style="cursor: pointer; color: #3498db; font-weight: 500;">View Raw HTML</summary>
<div class="code-block" style="margin-top: 10px;">
<pre>${escapeHtml(asciidoc.result.content)}</pre>
</div>
</details>
</div>
<div id="ad-metadata" class="tab-content">
<h3>Extracted Metadata</h3>
${asciidoc.result.nostrLinks.length > 0 ? `
<h4>Nostr Links (${asciidoc.result.nostrLinks.length})</h4>
${asciidoc.result.nostrLinks.map((link: any) => `
<div class="list-item">
<strong>${escapeHtml(link.type)}</strong>: <code>${escapeHtml(link.bech32)}</code>
${link.text ? ` - ${escapeHtml(link.text)}` : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.wikilinks.length > 0 ? `
<h4>Wikilinks (${asciidoc.result.wikilinks.length})</h4>
${asciidoc.result.wikilinks.map((wl: any) => `
<div class="list-item">
<code>${escapeHtml(wl.original)}</code> dtag: <code>${escapeHtml(wl.dtag)}</code>
${wl.display ? ` (display: ${escapeHtml(wl.display)})` : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.hashtags.length > 0 ? `
<h4>Hashtags (${asciidoc.result.hashtags.length})</h4>
${asciidoc.result.hashtags.map((tag: string) => `
<div class="list-item">
<code>#${escapeHtml(tag)}</code>
</div>
`).join('')}
` : ''}
${asciidoc.result.links.length > 0 ? `
<h4>Links (${asciidoc.result.links.length})</h4>
${asciidoc.result.links.map((link: any) => `
<div class="list-item">
<a href="${escapeHtml(link.url)}" target="_blank">${escapeHtml(link.text || link.url)}</a>
${link.isExternal ? '<span class="warning-badge">External</span>' : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.media.length > 0 ? `
<h4>Media URLs (${asciidoc.result.media.length})</h4>
${asciidoc.result.media.map((url: string) => `
<div class="list-item">
<a href="${escapeHtml(url)}" target="_blank">${escapeHtml(url)}</a>
</div>
`).join('')}
` : ''}
${asciidoc.result.tableOfContents ? `
<h4>Table of Contents</h4>
<div class="rendered-output">
${asciidoc.result.tableOfContents}
</div>
` : ''}
</div>
</div>
</div>
<script>
function showTab(tabId) {
// Hide all tab contents
const allContents = document.querySelectorAll('.tab-content');
allContents.forEach(content => content.classList.remove('active'));
// Remove active class from all tabs
const allTabs = document.querySelectorAll('.tab');
allTabs.forEach(tab => tab.classList.remove('active'));
// Show selected tab content
const selectedContent = document.getElementById(tabId);
if (selectedContent) {
selectedContent.classList.add('active');
}
// Add active class to clicked tab
event.target.classList.add('active');
}
</script>
</body>
</html>`;
}
function escapeHtml(text: string): string {
const map: Record<string, string> = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#039;',
};
return text.replace(/[&<>"']/g, (m) => map[m]);
}
// Run the script // Run the script
main().catch((error) => { main().catch((error) => {
console.error('Error generating test report:', error); console.error('❌ Error generating test report:', error);
process.exit(1); process.exit(1);
}); });

23
jest.config.js

@ -8,18 +8,15 @@ module.exports = {
'src/**/*.ts', 'src/**/*.ts',
'!src/**/*.d.ts', '!src/**/*.d.ts',
], ],
globals: { transform: {
'ts-jest': { '^.+\\.ts$': ['ts-jest', {
tsconfig: { tsconfig: 'tsconfig.test.json',
target: 'ES2020', }],
module: 'commonjs',
lib: ['ES2020'],
types: ['node'],
strict: true,
esModuleInterop: true,
skipLibCheck: true,
forceConsistentCasingInFileNames: true,
},
},
}, },
// Don't transform AsciiDoctor packages - they use Opal runtime which breaks with Jest transformation
// AsciiDoctor uses CommonJS and Opal runtime, so we need to exclude it from transformation
// The pattern matches paths to ignore (not transform)
transformIgnorePatterns: [
'/node_modules/@asciidoctor/',
],
}; };

973
src/converters/to-asciidoc.ts

File diff suppressed because it is too large Load Diff

16
src/processors/asciidoc.ts

@ -1,9 +1,18 @@
import asciidoctor from '@asciidoctor/core';
import { ProcessResult } from '../types'; import { ProcessResult } from '../types';
import { extractTOC, sanitizeHTML, processLinks } from './html-utils'; import { extractTOC, sanitizeHTML, processLinks } from './html-utils';
import { postProcessHtml } from './html-postprocess'; import { postProcessHtml } from './html-postprocess';
const asciidoctorInstance = asciidoctor(); // Lazy-load AsciiDoctor instance to avoid issues with Jest module transformation
// Use dynamic import to prevent Jest from trying to transform the Opal runtime
let asciidoctorInstance: any = null;
async function getAsciidoctorInstance() {
if (!asciidoctorInstance) {
const asciidoctor = await import('@asciidoctor/core');
asciidoctorInstance = asciidoctor.default();
}
return asciidoctorInstance;
}
export interface ProcessOptions { export interface ProcessOptions {
enableCodeHighlighting?: boolean; enableCodeHighlighting?: boolean;
@ -43,7 +52,8 @@ export async function processAsciidoc(
} }
try { try {
const result = asciidoctorInstance.convert(content, { const instance = await getAsciidoctorInstance();
const result = instance.convert(content, {
safe: 'safe', safe: 'safe',
backend: 'html5', backend: 'html5',
doctype: doctype, doctype: doctype,

157
src/processors/html-postprocess.js

@ -70,6 +70,57 @@ function postProcessHtml(html, options = {}) {
const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;'); const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
return `<a class="wikilink text-primary-600 dark:text-primary-500 hover:underline" data-dtag="${escapedDtag}" data-url="${escapedUrl}" href="${escapedUrl}">${escapedDisplay}</a>`; return `<a class="wikilink text-primary-600 dark:text-primary-500 hover:underline" data-dtag="${escapedDtag}" data-url="${escapedUrl}" href="${escapedUrl}">${escapedDisplay}</a>`;
}); });
// Convert any leftover link: macros that AsciiDoctor didn't convert
// This MUST run before processOpenGraphLinks which removes "link:" prefixes
// This handles cases where AsciiDoctor couldn't parse the link (e.g., link text with special chars)
// Pattern: link:url[text] where url is http/https and text can contain any characters
// Match link: macros that are still in the HTML as plain text (not converted by AsciiDoctor)
// Also handle HTML-escaped versions that might appear
processed = processed.replace(/link:(https?:\/\/[^\[]+)\[([^\]]+)\]/g, (_match, url, text) => {
// Unescape if already HTML-escaped (but be careful not to unescape actual content)
let unescapedUrl = url;
// Only unescape if it looks like it was escaped (contains &amp; or &quot;)
if (url.includes('&amp;') || url.includes('&quot;') || url.includes('&#39;')) {
unescapedUrl = url
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
}
let unescapedText = text;
// Only unescape if it looks like it was escaped
if (text.includes('&amp;') || text.includes('&lt;') || text.includes('&gt;') || text.includes('&quot;') || text.includes('&#39;')) {
unescapedText = text
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
}
// Escape URL for HTML attribute (fresh escape, no double-escaping)
const escapedUrl = unescapedUrl
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
// Escape text content for HTML (fresh escape, no double-escaping)
const escapedText = unescapedText
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
// Check if link text contains wss:// or ws:// - these are relay URLs, don't add OpenGraph
const isRelayUrl = /wss?:\/\//i.test(unescapedText);
if (isRelayUrl) {
// Simple link without OpenGraph wrapper
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
}
else {
// Regular link - will be processed by OpenGraph handler if external
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
}
});
// Convert nostr: links to HTML // Convert nostr: links to HTML
processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => {
const nostrType = getNostrType(bech32Id); const nostrType = getNostrType(bech32Id);
@ -89,39 +140,57 @@ function postProcessHtml(html, options = {}) {
return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${displayText}</a>`; return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${displayText}</a>`;
} }
}); });
// Convert any leftover link: macros that AsciiDoctor didn't convert // Process media URLs (YouTube, Spotify, video, audio)
// This handles cases where AsciiDoctor couldn't parse the link (e.g., link text with special chars) processed = processMedia(processed);
// Pattern: link:url[text] where url is http/https and text can contain any characters // Fix double-escaped quotes in href attributes FIRST (before any other processing)
processed = processed.replace(/link:(https?:\/\/[^\[]+)\[([^\]]+)\]/g, (_match, url, text) => { // This fixes href="&quot;url&quot;" -> href="url"
// Escape URL and text for HTML attributes processed = processed.replace(/href\s*=\s*["']&quot;(https?:\/\/[^"']+)&quot;["']/gi, (_match, url) => {
const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;'); const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
const escapedText = text return `href="${escapedUrl}"`;
});
// Process OpenGraph links (external links that should have rich previews)
processed = processOpenGraphLinks(processed, options.linkBaseURL);
// Process images: add max-width styling and data attributes
processed = processImages(processed);
// Process musical notation if enabled
if (options.enableMusicalNotation) {
processed = (0, music_1.processMusicalNotation)(processed);
}
// Clean up any escaped HTML that appears as text (e.g., &lt;a href=...&gt;)
// This can happen when AsciiDoctor escapes link macros that it couldn't parse
// Pattern: &lt;a href="url"&gt;text&lt;/a&gt; should be converted to actual HTML
// Use a more flexible pattern that handles text with special characters like ://
// Fix regular escaped HTML links
processed = processed.replace(/&lt;a\s+href=["'](https?:\/\/[^"']+)["']\s*&gt;([^<]+)&lt;\/a&gt;/gi, (_match, url, text) => {
// Unescape the URL and text
const unescapedUrl = url
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
const unescapedText = text
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>');
// Re-escape properly for HTML
const escapedUrl = unescapedUrl
.replace(/&/g, '&amp;') .replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;') .replace(/"/g, '&quot;')
.replace(/'/g, '&#39;'); .replace(/'/g, '&#39;');
// Check if link text contains wss:// or ws:// - these are relay URLs, don't add OpenGraph const escapedText = unescapedText
const isRelayUrl = /wss?:\/\//i.test(text); .replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
// Check if link text contains wss:// or ws:// - these are relay URLs
const isRelayUrl = /wss?:\/\//i.test(unescapedText);
if (isRelayUrl) { if (isRelayUrl) {
// Simple link without OpenGraph wrapper // Simple link without OpenGraph wrapper
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
} }
else { else {
// Regular link - will be processed by OpenGraph handler if external // Regular link
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
} }
}); });
// Process media URLs (YouTube, Spotify, video, audio)
processed = processMedia(processed);
// Process OpenGraph links (external links that should have rich previews)
processed = processOpenGraphLinks(processed, options.linkBaseURL);
// Process images: add max-width styling and data attributes
processed = processImages(processed);
// Process musical notation if enabled
if (options.enableMusicalNotation) {
processed = (0, music_1.processMusicalNotation)(processed);
}
// Clean up any leftover markdown syntax // Clean up any leftover markdown syntax
processed = cleanupMarkdown(processed); processed = cleanupMarkdown(processed);
// Add styling classes // Add styling classes
@ -241,12 +310,20 @@ function processOpenGraphLinks(html, linkBaseURL) {
processed = processed.replace(/([^"'>\s])link:([a-zA-Z0-9])/gi, '$1$2'); processed = processed.replace(/([^"'>\s])link:([a-zA-Z0-9])/gi, '$1$2');
// Also handle cases where "link:" appears with whitespace before anchor tags // Also handle cases where "link:" appears with whitespace before anchor tags
processed = processed.replace(/\s+link:\s*(?=<a\s+href)/gi, ' '); processed = processed.replace(/\s+link:\s*(?=<a\s+href)/gi, ' ');
// Clean up any corrupted href attributes that contain HTML fragments // Clean up any corrupted href attributes that contain HTML fragments or double-escaped quotes
// Fix href attributes with escaped quotes: href="&quot;url&quot;" -> href="url"
processed = processed.replace(/href\s*=\s*["']&quot;(https?:\/\/[^"']+)&quot;["']/gi, (match, url) => {
// Extract the clean URL and properly escape it
const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
return `href="${escapedUrl}"`;
});
// Clean up href attributes that contain HTML fragments
processed = processed.replace(/href\s*=\s*["']([^"']*<[^"']*)["']/gi, (match, corruptedHref) => { processed = processed.replace(/href\s*=\s*["']([^"']*<[^"']*)["']/gi, (match, corruptedHref) => {
// If href contains HTML tags, extract just the URL part // If href contains HTML tags, extract just the URL part
const urlMatch = corruptedHref.match(/(https?:\/\/[^\s<>"']+)/i); const urlMatch = corruptedHref.match(/(https?:\/\/[^\s<>"']+)/i);
if (urlMatch) { if (urlMatch) {
return `href="${urlMatch[1]}"`; const escapedUrl = urlMatch[1].replace(/"/g, '&quot;').replace(/'/g, '&#39;');
return `href="${escapedUrl}"`;
} }
return match; // If we can't fix it, leave it (will be skipped by validation) return match; // If we can't fix it, leave it (will be skipped by validation)
}); });
@ -552,17 +629,39 @@ function cleanupMarkdown(html) {
return `<img src="${escapedUrl}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`; return `<img src="${escapedUrl}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`;
}); });
// Clean up markdown link syntax // Clean up markdown link syntax
// Skip if the link is already inside an HTML tag or is part of escaped HTML
cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => {
if (cleaned.includes(`href="${url}"`)) { // Skip if this markdown link is already inside an HTML tag
// Check if there's an <a> tag nearby that might have been created from this
if (cleaned.includes(`href="${url}"`) || cleaned.includes(`href='${url}'`)) {
return _match;
}
// Skip if the text contains HTML entities or looks like it's already processed
if (text.includes('&lt;') || text.includes('&gt;') || text.includes('&amp;')) {
return _match;
}
// Skip if the URL is already in an href attribute (check for escaped versions too)
const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
if (cleaned.includes(`href="${escapedUrl}"`) || cleaned.includes(`href='${escapedUrl}'`)) {
return _match; return _match;
} }
// Clean URL (remove tracking parameters) // Clean URL (remove tracking parameters)
const cleanedUrl = cleanUrl(url); const cleanedUrl = cleanUrl(url);
// Escape for HTML attribute // Escape for HTML attribute (but don't double-escape)
const escapedUrl = cleanedUrl.replace(/"/g, '&quot;').replace(/'/g, '&#39;'); const finalEscapedUrl = cleanedUrl
// Escape text for HTML .replace(/&amp;/g, '&') // Unescape if already escaped
const escapedText = text.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;'); .replace(/&/g, '&amp;')
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; .replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
// Escape text for HTML (but don't double-escape)
const escapedText = text
.replace(/&amp;/g, '&') // Unescape if already escaped
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
return `<a href="${finalEscapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
}); });
return cleaned; return cleaned;
} }

666
src/processors/html-postprocess.ts

@ -11,146 +11,153 @@ export interface PostProcessOptions {
/** /**
* Post-processes HTML output from AsciiDoctor * Post-processes HTML output from AsciiDoctor
* Converts AsciiDoc macros to HTML with data attributes and CSS classes *
* Processing order (critical for correct rendering):
* 1. Convert placeholders to HTML (BOOKSTR, hashtags, wikilinks, nostr links, media, link macros)
* 2. Fix corrupted HTML (double-escaped quotes, escaped HTML as text, broken links)
* 3. Process OpenGraph links (external links with previews)
* 4. Process images (add styling)
* 5. Process musical notation
* 6. Clean up leftover markdown syntax
* 7. Add styling classes
* 8. Hide raw ToC text
*/ */
export function postProcessHtml(html: string, options: PostProcessOptions = {}): string { export function postProcessHtml(html: string, options: PostProcessOptions = {}): string {
let processed = html; let processed = html;
// Convert bookstr markers to HTML placeholders // ============================================
processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { // STEP 1: Convert placeholders to HTML
const escaped = bookContent.replace(/"/g, '&quot;').replace(/'/g, '&#39;'); // ============================================
processed = convertBookstrMarkers(processed);
processed = convertHashtags(processed, options);
processed = convertWikilinks(processed, options);
processed = convertNostrLinks(processed);
processed = convertMediaPlaceholders(processed);
processed = convertLinkMacros(processed);
// ============================================
// STEP 2: Fix corrupted HTML
// ============================================
processed = fixDoubleEscapedQuotes(processed);
processed = fixEscapedHtmlLinks(processed);
processed = fixBrokenLinkPatterns(processed);
// ============================================
// STEP 3: Process OpenGraph links
// ============================================
processed = processOpenGraphLinks(processed, options.linkBaseURL);
// ============================================
// STEP 4: Process images
// ============================================
processed = processImages(processed);
// ============================================
// STEP 5: Process musical notation
// ============================================
if (options.enableMusicalNotation) {
processed = processMusicalNotation(processed);
}
// ============================================
// STEP 6: Clean up leftover markdown
// ============================================
processed = cleanupMarkdown(processed);
// ============================================
// STEP 7: Add styling classes
// ============================================
processed = addStylingClasses(processed);
// ============================================
// STEP 8: Hide raw ToC text
// ============================================
processed = hideRawTocText(processed);
return processed;
}
// ============================================
// STEP 1: Convert placeholders to HTML
// ============================================
/**
* Convert BOOKSTR markers to HTML placeholders
*/
function convertBookstrMarkers(html: string): string {
return html.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => {
const escaped = escapeHtmlAttr(bookContent);
return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`; return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`;
}); });
}
// Convert hashtag links to HTML /**
processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { * Convert hashtag placeholders to HTML
// HTML escape the display text */
const escapedDisplay = displayText function convertHashtags(html: string, options: PostProcessOptions): string {
.replace(/&/g, '&amp;') return html.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => {
.replace(/</g, '&lt;') const escapedDisplay = escapeHtml(displayText);
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
// If hashtagUrl is configured, make it a clickable link
if (options.hashtagUrl) { if (options.hashtagUrl) {
let url: string; let url: string;
if (typeof options.hashtagUrl === 'function') { if (typeof options.hashtagUrl === 'function') {
url = options.hashtagUrl(normalizedHashtag); url = options.hashtagUrl(normalizedHashtag);
} else { } else {
// String template with {topic} placeholder
url = options.hashtagUrl.replace(/{topic}/g, normalizedHashtag); url = options.hashtagUrl.replace(/{topic}/g, normalizedHashtag);
} }
// Escape URL for HTML attribute const escapedUrl = escapeHtmlAttr(url);
const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;'); const escapedTopic = escapeHtmlAttr(normalizedHashtag);
return `<a class="hashtag-link text-primary-600 dark:text-primary-500 hover:underline" data-topic="${normalizedHashtag.replace(/"/g, '&quot;')}" data-url="${escapedUrl}" href="${escapedUrl}">${escapedDisplay}</a>`; return `<a class="hashtag-link text-primary-600 dark:text-primary-500 hover:underline" data-topic="${escapedTopic}" data-url="${escapedUrl}" href="${escapedUrl}">${escapedDisplay}</a>`;
} else { } else {
// Default: Use span instead of <a> tag - same color as links but no underline and not clickable
return `<span class="hashtag-link">${escapedDisplay}</span>`; return `<span class="hashtag-link">${escapedDisplay}</span>`;
} }
}); });
}
// Convert WIKILINK:dtag|display placeholder format to HTML /**
// Match WIKILINK:dtag|display, ensuring we don't match across HTML tags * Convert wikilink placeholders to HTML
processed = processed.replace(/WIKILINK:([^|<>]+)\|([^<>\s]+)/g, (_match, dTag, displayText) => { */
const escapedDtag = dTag.trim().replace(/"/g, '&quot;'); function convertWikilinks(html: string, options: PostProcessOptions): string {
const escapedDisplay = displayText.trim() return html.replace(/WIKILINK:([^|<>]+)\|([^<>\s]+)/g, (_match, dTag, displayText) => {
.replace(/&/g, '&amp;') const escapedDtag = escapeHtmlAttr(dTag.trim());
.replace(/</g, '&lt;') const escapedDisplay = escapeHtml(displayText.trim());
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
// Generate URL using custom format or default
let url: string; let url: string;
if (options.wikilinkUrl) { if (options.wikilinkUrl) {
if (typeof options.wikilinkUrl === 'function') { if (typeof options.wikilinkUrl === 'function') {
url = options.wikilinkUrl(dTag.trim()); url = options.wikilinkUrl(dTag.trim());
} else { } else {
// String template with {dtag} placeholder
url = options.wikilinkUrl.replace(/{dtag}/g, dTag.trim()); url = options.wikilinkUrl.replace(/{dtag}/g, dTag.trim());
} }
} else { } else {
// Default format
url = `/events?d=${escapedDtag}`; url = `/events?d=${escapedDtag}`;
} }
// Escape URL for HTML attribute const escapedUrl = escapeHtmlAttr(url);
const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
return `<a class="wikilink text-primary-600 dark:text-primary-500 hover:underline" data-dtag="${escapedDtag}" data-url="${escapedUrl}" href="${escapedUrl}">${escapedDisplay}</a>`; return `<a class="wikilink text-primary-600 dark:text-primary-500 hover:underline" data-dtag="${escapedDtag}" data-url="${escapedUrl}" href="${escapedUrl}">${escapedDisplay}</a>`;
}); });
}
// Convert nostr: links to HTML /**
processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { * Convert nostr: links to HTML
*/
function convertNostrLinks(html: string): string {
return html.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => {
const nostrType = getNostrType(bech32Id); const nostrType = getNostrType(bech32Id);
const escaped = escapeHtmlAttr(bech32Id);
const escapedDisplay = escapeHtml(displayText);
if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') {
// Render as embedded event placeholder
const escaped = bech32Id.replace(/"/g, '&quot;');
return `<div data-embedded-note="${escaped}" class="embedded-note-container">Loading embedded event...</div>`; return `<div data-embedded-note="${escaped}" class="embedded-note-container">Loading embedded event...</div>`;
} else if (nostrType === 'npub' || nostrType === 'nprofile') { } else if (nostrType === 'npub' || nostrType === 'nprofile') {
// Render as user handle return `<span class="user-handle" data-pubkey="${escaped}">@${escapedDisplay}</span>`;
const escaped = bech32Id.replace(/"/g, '&quot;');
return `<span class="user-handle" data-pubkey="${escaped}">@${displayText}</span>`;
} else {
// Fallback to regular link
const escaped = bech32Id.replace(/"/g, '&quot;');
return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${displayText}</a>`;
}
});
// Convert any leftover link: macros that AsciiDoctor didn't convert
// This handles cases where AsciiDoctor couldn't parse the link (e.g., link text with special chars)
// Pattern: link:url[text] where url is http/https and text can contain any characters
processed = processed.replace(/link:(https?:\/\/[^\[]+)\[([^\]]+)\]/g, (_match, url, text) => {
// Escape URL and text for HTML attributes
const escapedUrl = url.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
const escapedText = text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
// Check if link text contains wss:// or ws:// - these are relay URLs, don't add OpenGraph
const isRelayUrl = /wss?:\/\//i.test(text);
if (isRelayUrl) {
// Simple link without OpenGraph wrapper
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
} else { } else {
// Regular link - will be processed by OpenGraph handler if external return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${escapedDisplay}</a>`;
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
} }
}); });
// Process media URLs (YouTube, Spotify, video, audio)
processed = processMedia(processed);
// Process OpenGraph links (external links that should have rich previews)
processed = processOpenGraphLinks(processed, options.linkBaseURL);
// Process images: add max-width styling and data attributes
processed = processImages(processed);
// Process musical notation if enabled
if (options.enableMusicalNotation) {
processed = processMusicalNotation(processed);
}
// Clean up any leftover markdown syntax
processed = cleanupMarkdown(processed);
// Add styling classes
processed = addStylingClasses(processed);
// Hide raw ToC text
processed = hideRawTocText(processed);
return processed;
} }
/** /**
@ -166,15 +173,14 @@ function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'n
} }
/** /**
* Process media URLs (YouTube, Spotify, video, audio) * Convert media placeholders to HTML embeds
* Converts MEDIA: placeholders to HTML embeds/players
*/ */
function processMedia(html: string): string { function convertMediaPlaceholders(html: string): string {
let processed = html; let processed = html;
// Process YouTube embeds // YouTube embeds
processed = processed.replace(/MEDIA:youtube:([a-zA-Z0-9_-]+)/g, (_match, videoId) => { processed = processed.replace(/MEDIA:youtube:([a-zA-Z0-9_-]+)/g, (_match, videoId) => {
const escapedId = videoId.replace(/"/g, '&quot;'); const escapedId = escapeHtmlAttr(videoId);
return `<div class="media-embed youtube-embed" style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; max-width: 100%; margin: 1rem 0;"> return `<div class="media-embed youtube-embed" style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; max-width: 100%; margin: 1rem 0;">
<iframe <iframe
style="position: absolute; top: 0; left: 0; width: 100%; height: 100%;" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%;"
@ -187,10 +193,10 @@ function processMedia(html: string): string {
</div>`; </div>`;
}); });
// Process Spotify embeds // Spotify embeds
processed = processed.replace(/MEDIA:spotify:(track|album|playlist|artist|episode|show):([a-zA-Z0-9]+)/g, (_match, type, id) => { processed = processed.replace(/MEDIA:spotify:(track|album|playlist|artist|episode|show):([a-zA-Z0-9]+)/g, (_match, type, id) => {
const escapedType = type.replace(/"/g, '&quot;'); const escapedType = escapeHtmlAttr(type);
const escapedId = id.replace(/"/g, '&quot;'); const escapedId = escapeHtmlAttr(id);
return `<div class="media-embed spotify-embed" style="margin: 1rem 0;"> return `<div class="media-embed spotify-embed" style="margin: 1rem 0;">
<iframe <iframe
style="border-radius: 12px; width: 100%; max-width: 100%;" style="border-radius: 12px; width: 100%; max-width: 100%;"
@ -205,14 +211,9 @@ function processMedia(html: string): string {
</div>`; </div>`;
}); });
// Process video files // Video files
processed = processed.replace(/MEDIA:video:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { processed = processed.replace(/MEDIA:video:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => {
const escapedUrl = url const escapedUrl = escapeHtmlAttr(url);
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
return `<div class="media-embed video-embed" style="margin: 1rem 0;"> return `<div class="media-embed video-embed" style="margin: 1rem 0;">
<video <video
controls controls
@ -225,14 +226,9 @@ function processMedia(html: string): string {
</div>`; </div>`;
}); });
// Process audio files // Audio files
processed = processed.replace(/MEDIA:audio:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { processed = processed.replace(/MEDIA:audio:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => {
const escapedUrl = url const escapedUrl = escapeHtmlAttr(url);
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
return `<div class="media-embed audio-embed" style="margin: 1rem 0;"> return `<div class="media-embed audio-embed" style="margin: 1rem 0;">
<audio <audio
controls controls
@ -248,224 +244,174 @@ function processMedia(html: string): string {
return processed; return processed;
} }
/**
* Convert link: macros that AsciiDoctor didn't convert
* This handles cases where AsciiDoctor couldn't parse the link (e.g., link text with special chars)
*/
function convertLinkMacros(html: string): string {
return html.replace(/link:(https?:\/\/[^\[]+)\[([^\]]+)\]/g, (_match, url, text) => {
// Unescape if already HTML-escaped
const unescapedUrl = unescapeHtml(url);
const unescapedText = unescapeHtml(text);
// Re-escape properly for HTML
const escapedUrl = escapeHtmlAttr(unescapedUrl);
const escapedText = escapeHtml(unescapedText);
// Check if link text contains wss:// or ws:// - these are relay URLs, don't add OpenGraph
const isRelayUrl = /wss?:\/\//i.test(unescapedText);
// Create link (OpenGraph processing will handle it later if needed)
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
});
}
// ============================================
// STEP 2: Fix corrupted HTML
// ============================================
/**
* Fix double-escaped quotes in href attributes: href="&quot;url&quot;" -> href="url"
*/
function fixDoubleEscapedQuotes(html: string): string {
return html.replace(/href\s*=\s*["']&quot;(https?:\/\/[^"']+)&quot;["']/gi, (_match, url) => {
const escapedUrl = escapeHtmlAttr(url);
return `href="${escapedUrl}"`;
});
}
/**
* Fix escaped HTML links: &lt;a href="..."&gt;text&lt;/a&gt; -> <a href="...">text</a>
*/
function fixEscapedHtmlLinks(html: string): string {
return html.replace(/&lt;a\s+href=["'](https?:\/\/[^"']+)["']\s*&gt;([^<]+)&lt;\/a&gt;/gi, (_match, url, text) => {
const unescapedUrl = unescapeHtml(url);
const unescapedText = unescapeHtml(text);
const escapedUrl = escapeHtmlAttr(unescapedUrl);
const escapedText = escapeHtml(unescapedText);
const isRelayUrl = /wss?:\/\//i.test(unescapedText);
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
});
}
/**
* Fix broken link patterns where attributes appear as text before escaped HTML
* Pattern: " target=...&gt;&lt;a href=...&gt;text&lt;/a&gt;
*/
function fixBrokenLinkPatterns(html: string): string {
return html.replace(/"\s+target=["'][^"']*["']\s+rel=["'][^"']*["']\s+class=["'][^"']*["']\s*&gt;&lt;a\s+href=["'](https?:\/\/[^"']+)["']\s*&gt;([^<]+)&lt;\/a&gt;/gi, (_match, url, text) => {
const unescapedUrl = unescapeHtml(url);
const unescapedText = unescapeHtml(text);
const escapedUrl = escapeHtmlAttr(unescapedUrl);
const escapedText = escapeHtml(unescapedText);
const isRelayUrl = /wss?:\/\//i.test(unescapedText);
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
});
}
// ============================================
// STEP 3: Process OpenGraph links
// ============================================
/** /**
* Process OpenGraph links - mark external links for OpenGraph preview fetching * Process OpenGraph links - mark external links for OpenGraph preview fetching
*/ */
function processOpenGraphLinks(html: string, linkBaseURL?: string): string { function processOpenGraphLinks(html: string, linkBaseURL?: string): string {
// First, clean up any corrupted HTML fragments that might interfere
// Remove "link:" prefixes that appear before links (AsciiDoc syntax that shouldn't be in HTML)
// This happens when AsciiDoctor doesn't fully convert link:url[text] syntax or when
// there's literal text like "should render like link:" before an anchor tag
let processed = html; let processed = html;
// Remove "link:" that appears immediately before anchor tags (most common case) // Remove "link:" prefixes that might appear before anchor tags
// Match "link:" followed by optional whitespace and then <a
processed = processed.replace(/link:\s*<a/gi, '<a'); processed = processed.replace(/link:\s*<a/gi, '<a');
// Remove "link:" that appears as plain text in HTML (shouldn't be there)
// Be careful not to match "link:" inside HTML attributes or tags
// Match "link:" that's not inside quotes or tags
processed = processed.replace(/([^"'>\s])link:([a-zA-Z0-9])/gi, '$1$2'); processed = processed.replace(/([^"'>\s])link:([a-zA-Z0-9])/gi, '$1$2');
// Also handle cases where "link:" appears with whitespace before anchor tags
processed = processed.replace(/\s+link:\s*(?=<a\s+href)/gi, ' '); processed = processed.replace(/\s+link:\s*(?=<a\s+href)/gi, ' ');
// Clean up any corrupted href attributes that contain HTML fragments // Clean up corrupted href attributes
processed = processed.replace(/href\s*=\s*["']([^"']*<[^"']*)["']/gi, (match, corruptedHref) => { processed = processed.replace(/href\s*=\s*["']([^"']*<[^"']*)["']/gi, (match, corruptedHref) => {
// If href contains HTML tags, extract just the URL part
const urlMatch = corruptedHref.match(/(https?:\/\/[^\s<>"']+)/i); const urlMatch = corruptedHref.match(/(https?:\/\/[^\s<>"']+)/i);
if (urlMatch) { if (urlMatch) {
return `href="${urlMatch[1]}"`; const escapedUrl = escapeHtmlAttr(urlMatch[1]);
return `href="${escapedUrl}"`;
} }
return match; // If we can't fix it, leave it (will be skipped by validation)
});
// Clean up any malformed anchor tag fragments that might cause issues
processed = processed.replace(/<a\s+href=["']([^"'>]*<[^"'>]*)["']/gi, (match, corruptedHref) => {
// Skip corrupted anchor tags - they'll be handled by the main regex with validation
return match; return match;
}); });
// Clean up links inside code blocks - AsciiDoctor creates them but they should be plain text // Protect code blocks and pre blocks
// Remove <a> tags inside <code> blocks, keeping only the link text
processed = processed.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (match, content) => {
// Remove any <a> tags inside code blocks, keeping only the text content
const cleaned = content.replace(/<a[^>]*>(.*?)<\/a>/gi, '$1');
return `<code>${cleaned}</code>`;
});
// Also clean up links inside pre blocks
processed = processed.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (match, content) => {
const cleaned = content.replace(/<a[^>]*>(.*?)<\/a>/gi, '$1');
return `<pre>${cleaned}</pre>`;
});
// Now protect code blocks and pre blocks by replacing them with placeholders
const codeBlockPlaceholders: string[] = []; const codeBlockPlaceholders: string[] = [];
const preBlockPlaceholders: string[] = []; const preBlockPlaceholders: string[] = [];
// Replace pre blocks first (they can contain code blocks)
processed = processed.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (match) => { processed = processed.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (match) => {
const placeholder = `__PREBLOCK_${preBlockPlaceholders.length}__`; const placeholder = `__PREBLOCK_${preBlockPlaceholders.length}__`;
preBlockPlaceholders.push(match); preBlockPlaceholders.push(match);
return placeholder; return placeholder;
}); });
// Replace code blocks
processed = processed.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (match) => { processed = processed.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (match) => {
const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`; const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`;
codeBlockPlaceholders.push(match); codeBlockPlaceholders.push(match);
return placeholder; return placeholder;
}); });
// Extract base domain from linkBaseURL if provided // Extract base domain
let baseDomain: string | null = null; let baseDomain: string | null = null;
if (linkBaseURL) { if (linkBaseURL) {
try { const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/);
const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/); if (urlMatch) {
if (urlMatch) { baseDomain = urlMatch[1];
baseDomain = urlMatch[1];
}
} catch {
// Ignore parsing errors
} }
} }
// Before processing, remove any corrupted opengraph containers that might have been created // Process external links
// These have malformed data-og-url attributes containing HTML fragments
// Match all spans with data-og-url and check if they're corrupted
// Use a pattern that matches spans with data-og-url, then check the attribute value
processed = processed.replace(/<span[^>]*data-og-url=["']([^"']+)["'][^>]*>[\s\S]*?<\/span>/gi, (match) => {
// This span has a corrupted data-og-url (contains <)
// Extract the clean URL from the beginning of the attribute value
const dataOgUrlMatch = match.match(/data-og-url=["']([^"']+)["']/i);
if (dataOgUrlMatch && dataOgUrlMatch[1]) {
// Extract just the URL part (everything before the first <)
const urlMatch = dataOgUrlMatch[1].match(/(https?:\/\/[^\s<>"']+)/i);
if (urlMatch) {
const cleanUrl = urlMatch[1];
// Extract the link text from inside the span
const linkMatch = match.match(/<a[^>]*>(.*?)<\/a>/i);
const linkText = linkMatch ? linkMatch[1] : cleanUrl;
// Return a clean opengraph container with the fixed URL
const escapedUrl = cleanUrl.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
return `<span class="opengraph-link-container" data-og-url="${escapedUrl}">
<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="opengraph-link break-words inline-flex items-baseline gap-1">${linkText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>
<div class="opengraph-preview" data-og-loading="true" style="display: none;">
<div class="opengraph-card">
<div class="opengraph-image-container">
<img class="opengraph-image" src="" alt="" style="display: none;" />
</div>
<div class="opengraph-content">
<div class="opengraph-site"></div>
<div class="opengraph-title"></div>
<div class="opengraph-description"></div>
</div>
</div>
</div>
</span>`;
}
// If we can't extract a clean URL, just remove the corrupted span and keep any text
const textMatch = match.match(/>([^<]+)</);
return textMatch ? textMatch[1] : '';
}
return match; // Keep valid spans
});
// Match external links (http/https) that aren't media, nostr, or wikilinks
// Skip links that are already in media embeds or special containers
// Use a stricter regex that only matches valid, complete anchor tags
// The regex must match a complete <a> tag with proper structure
processed = processed.replace(/<a\s+([^>]*\s+)?href\s*=\s*["'](https?:\/\/[^"']{1,2048})["']([^>]*?)>(.*?)<\/a>/gis, (match, before, href, after, linkText) => { processed = processed.replace(/<a\s+([^>]*\s+)?href\s*=\s*["'](https?:\/\/[^"']{1,2048})["']([^>]*?)>(.*?)<\/a>/gis, (match, before, href, after, linkText) => {
// CRITICAL: Validate href FIRST - if it contains ANY HTML tags or fragments, skip immediately // Validate href
// This prevents corrupted HTML from being created if (!href || href.includes('<') || href.includes('>') || !/^https?:\/\/[^\s<>"']+$/i.test(href)) {
if (!href) { return match;
return match; // Skip if no href
}
// Skip if href contains HTML tags or looks corrupted - be very strict
// Check for common HTML fragments that indicate corruption
if (href.includes('<') || href.includes('>') || href.includes('href=') || href.includes('</a>') || href.includes('<a') || href.includes('"') || href.includes("'")) {
return match; // Skip if href looks corrupted
}
// Additional validation: href should only contain URL-safe characters
// URLs shouldn't contain unescaped quotes or HTML tags
if (!/^https?:\/\/[^\s<>"']+$/i.test(href)) {
return match; // Skip if href doesn't match clean URL pattern
}
// Validate href is a proper URL (starts with http:// or https:// and doesn't contain invalid chars)
if (!/^https?:\/\/[^\s<>"']+$/i.test(href)) {
return match; // Skip if href doesn't match URL pattern
}
// Skip if the match contains unclosed tags or corrupted HTML
const openATags = (match.match(/<a\s/g) || []).length;
const closeATags = (match.match(/<\/a>/g) || []).length;
if (openATags !== closeATags || openATags !== 1) {
return match; // Multiple or mismatched <a> tags = corrupted
}
// Skip if match contains nested HTML that looks corrupted
if (match.includes('href="') && match.split('href="').length > 2) {
return match; // Multiple href attributes = corrupted
} }
// Skip if it's already a media embed, nostr link, wikilink, or opengraph link // Skip if already processed
if (match.includes('class="wikilink"') || if (match.includes('class="wikilink"') ||
match.includes('class="nostr-link"') || match.includes('class="nostr-link"') ||
match.includes('class="opengraph-link"') || match.includes('class="opengraph-link"') ||
match.includes('data-embedded-note') || match.includes('data-embedded-note') ||
match.includes('youtube-embed') ||
match.includes('spotify-embed') ||
match.includes('media-embed') || match.includes('media-embed') ||
match.includes('opengraph-link-container')) { match.includes('opengraph-link-container')) {
return match; return match;
} }
// Skip if it's a media file URL // Skip media files
if (/\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv|mp3|m4a|wav|flac|aac|opus|wma|jpeg|jpg|png|gif|webp|svg)$/i.test(href)) { if (/\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv|mp3|m4a|wav|flac|aac|opus|wma|jpeg|jpg|png|gif|webp|svg)$/i.test(href)) {
return match; return match;
} }
// Skip if it's YouTube or Spotify (already handled as media) // Skip YouTube/Spotify (already handled as media)
if (/youtube\.com|youtu\.be|spotify\.com/i.test(href)) { if (/youtube\.com|youtu\.be|spotify\.com/i.test(href)) {
return match; return match;
} }
// Skip if link text contains wss:// or ws:// - these are relay URLs, not web pages // Skip if link text contains wss:// or ws:// - these are relay URLs, not web pages
// They don't need OpenGraph previews
if (/wss?:\/\//i.test(linkText)) { if (/wss?:\/\//i.test(linkText)) {
return match; return match;
} }
// Check if it's an external link (not same domain) // Check if external
let isExternal = true; let isExternal = true;
if (baseDomain) { if (baseDomain) {
try { const hrefMatch = href.match(/^https?:\/\/([^\/]+)/);
const hrefMatch = href.match(/^https?:\/\/([^\/]+)/); if (hrefMatch && hrefMatch[1] === baseDomain) {
if (hrefMatch && hrefMatch[1] === baseDomain) { isExternal = false;
isExternal = false;
}
} catch {
// If parsing fails, assume external
} }
} }
// Only process external links
if (!isExternal) { if (!isExternal) {
return match; return match;
} }
// Escape the URL for data attribute // Wrap in OpenGraph container
const escapedUrl = href const escapedUrl = escapeHtmlAttr(href);
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
// Add data attribute for OpenGraph fetching and wrap in container
// The actual OpenGraph fetching will be done client-side via JavaScript
return `<span class="opengraph-link-container" data-og-url="${escapedUrl}"> return `<span class="opengraph-link-container" data-og-url="${escapedUrl}">
<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="opengraph-link break-words inline-flex items-baseline gap-1">${linkText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a> <a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="opengraph-link break-words inline-flex items-baseline gap-1">${linkText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>
<div class="opengraph-preview" data-og-loading="true" style="display: none;"> <div class="opengraph-preview" data-og-loading="true" style="display: none;">
@ -482,20 +428,23 @@ function processOpenGraphLinks(html: string, linkBaseURL?: string): string {
</div> </div>
</span>`; </span>`;
}); });
// Restore code blocks // Restore code blocks
codeBlockPlaceholders.forEach((codeBlock, index) => { codeBlockPlaceholders.forEach((codeBlock, index) => {
processed = processed.replace(`__CODEBLOCK_${index}__`, codeBlock); processed = processed.replace(`__CODEBLOCK_${index}__`, codeBlock);
}); });
// Restore pre blocks
preBlockPlaceholders.forEach((preBlock, index) => { preBlockPlaceholders.forEach((preBlock, index) => {
processed = processed.replace(`__PREBLOCK_${index}__`, preBlock); processed = processed.replace(`__PREBLOCK_${index}__`, preBlock);
}); });
return processed; return processed;
} }
// ============================================
// STEP 4: Process images
// ============================================
/** /**
* Process images: add max-width styling and data attributes * Process images: add max-width styling and data attributes
*/ */
@ -532,86 +481,15 @@ function processImages(html: string): string {
updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`; updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`;
} }
updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '&quot;')}"`; updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${escapeHtmlAttr(src)}"`;
return `<img${updatedAttributes}>`; return `<img${updatedAttributes}>`;
}); });
} }
/** // ============================================
* Clean URL by removing tracking parameters // STEP 6: Clean up leftover markdown
* Based on jumble's cleanUrl function // ============================================
*/
function cleanUrl(url: string): string {
try {
const parsedUrl = new URL(url);
// List of tracking parameter prefixes and exact names to remove
const trackingParams = [
// Google Analytics & Ads
'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic',
'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid',
// Facebook
'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref',
// Twitter/X
'twclid', 'twsrc',
// Microsoft/Bing
'msclkid', 'mc_cid', 'mc_eid',
// Adobe
'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid',
// Mailchimp
'mc_cid', 'mc_eid',
// HubSpot
'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver',
// Marketo
'mkt_tok',
// YouTube
'si', 'feature', 'kw', 'pp',
// Other common tracking
'ref', 'referrer', 'source', 'campaign', 'medium', 'content',
'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd',
// Mobile app tracking
'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative',
// Amazon
'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag',
// Affiliate tracking
'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer',
// Social media share tracking
'share', 'shared', 'sharesource'
];
// Remove all tracking parameters
trackingParams.forEach(param => {
parsedUrl.searchParams.delete(param);
});
// Remove any parameter that starts with utm_ or _
Array.from(parsedUrl.searchParams.keys()).forEach(key => {
if (key.startsWith('utm_') || key.startsWith('_')) {
parsedUrl.searchParams.delete(key);
}
});
return parsedUrl.toString();
} catch {
// If URL parsing fails, return original URL
return url;
}
}
/** /**
* Clean up leftover markdown syntax * Clean up leftover markdown syntax
@ -622,72 +500,100 @@ function cleanupMarkdown(html: string): string {
// Clean up markdown image syntax // Clean up markdown image syntax
cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => { cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => {
const altText = alt || ''; const altText = alt || '';
// Clean URL (remove tracking parameters) const escapedUrl = escapeHtmlAttr(url);
const cleanedUrl = cleanUrl(url);
// Escape for HTML attribute
const escapedUrl = cleanedUrl.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
return `<img src="${escapedUrl}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`; return `<img src="${escapedUrl}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`;
}); });
// Clean up markdown link syntax // Clean up markdown link syntax (skip if already HTML)
cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => {
if (cleaned.includes(`href="${url}"`)) { // Skip if already processed
if (cleaned.includes(`href="${url}"`) || cleaned.includes(`href='${url}'`)) {
return _match;
}
if (text.includes('&lt;') || text.includes('&gt;') || text.includes('&amp;')) {
return _match; return _match;
} }
// Clean URL (remove tracking parameters)
const cleanedUrl = cleanUrl(url); const escapedUrl = escapeHtmlAttr(url);
// Escape for HTML attribute const escapedText = escapeHtml(text);
const escapedUrl = cleanedUrl.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
// Escape text for HTML
const escapedText = text.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
}); });
return cleaned; return cleaned;
} }
// ============================================
// STEP 7: Add styling classes
// ============================================
/** /**
* Add proper CSS classes for styling * Add proper CSS classes for styling
*/ */
function addStylingClasses(html: string): string { function addStylingClasses(html: string): string {
let styled = html; let styled = html;
// Add strikethrough styling
styled = styled.replace(/<span class="line-through">([^<]+)<\/span>/g, '<span class="line-through line-through-2">$1</span>'); styled = styled.replace(/<span class="line-through">([^<]+)<\/span>/g, '<span class="line-through line-through-2">$1</span>');
// Add subscript styling
styled = styled.replace(/<span class="subscript">([^<]+)<\/span>/g, '<span class="subscript text-xs align-sub">$1</span>'); styled = styled.replace(/<span class="subscript">([^<]+)<\/span>/g, '<span class="subscript text-xs align-sub">$1</span>');
// Add superscript styling
styled = styled.replace(/<span class="superscript">([^<]+)<\/span>/g, '<span class="superscript text-xs align-super">$1</span>'); styled = styled.replace(/<span class="superscript">([^<]+)<\/span>/g, '<span class="superscript text-xs align-super">$1</span>');
// Add code highlighting classes
styled = styled.replace(/<pre class="highlightjs[^"]*">/g, '<pre class="highlightjs hljs">'); styled = styled.replace(/<pre class="highlightjs[^"]*">/g, '<pre class="highlightjs hljs">');
styled = styled.replace(/<code class="highlightjs[^"]*">/g, '<code class="highlightjs hljs">'); styled = styled.replace(/<code class="highlightjs[^"]*">/g, '<code class="highlightjs hljs">');
return styled; return styled;
} }
// ============================================
// STEP 8: Hide raw ToC text
// ============================================
/** /**
* Hide raw AsciiDoc ToC text * Hide raw AsciiDoc ToC text
*/ */
function hideRawTocText(html: string): string { function hideRawTocText(html: string): string {
let cleaned = html; let cleaned = html;
cleaned = cleaned.replace( cleaned = cleaned.replace(/<h[1-6][^>]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, '');
/<h[1-6][^>]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, cleaned = cleaned.replace(/<p[^>]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, '');
'' cleaned = cleaned.replace(/<p[^>]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, '');
);
return cleaned;
}
cleaned = cleaned.replace( // ============================================
/<p[^>]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, // Utility functions
'' // ============================================
);
cleaned = cleaned.replace( /**
/<p[^>]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, * Escape HTML content
'' */
); function escapeHtml(text: string): string {
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
return cleaned; /**
* Escape HTML attribute value
*/
function escapeHtmlAttr(text: string): string {
return text
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
/**
* Unescape HTML entities
*/
function unescapeHtml(text: string): string {
return text
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
} }

352
src/processors/html-utils.ts

@ -1,244 +1,164 @@
/** /**
* Extracts the table of contents from AsciiDoc HTML output * HTML utility functions for processing AsciiDoctor output
* Returns the TOC HTML and the content HTML without the TOC *
* Functions:
* - extractTOC: Extract table of contents from HTML
* - sanitizeHTML: Sanitize HTML to prevent XSS attacks
* - processLinks: Add target="_blank" to external links
*/ */
export function extractTOC(html: string): { toc: string; contentWithoutTOC: string } {
// AsciiDoc with toc: 'left' generates a TOC in a div with id="toc" or class="toc"
let tocContent = '';
let contentWithoutTOC = html;
// Find the start of the TOC div - try multiple patterns export interface TOCResult {
const tocStartPatterns = [ toc: string;
/<div\s+id=["']toc["']\s+class=["']toc["'][^>]*>/i, contentWithoutTOC: string;
/<div\s+id=["']toc["'][^>]*>/i, }
/<div\s+class=["']toc["'][^>]*>/i,
/<nav\s+id=["']toc["'][^>]*>/i,
];
let tocStartIdx = -1;
let tocStartTag = '';
for (const pattern of tocStartPatterns) {
const match = html.match(pattern);
if (match && match.index !== undefined) {
tocStartIdx = match.index;
tocStartTag = match[0];
break;
}
}
if (tocStartIdx === -1) {
// No TOC found
return { toc: '', contentWithoutTOC: html };
}
// Find the matching closing tag by counting div/nav tags
const searchStart = tocStartIdx + tocStartTag.length;
let depth = 1;
let i = searchStart;
while (i < html.length && depth > 0) {
// Look for opening or closing div/nav tags
if (i + 4 < html.length && html.substring(i, i + 4).toLowerCase() === '<div') {
// Check if it's a closing tag
if (i + 5 < html.length && html[i + 4] === '/') {
depth--;
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
i = closeIdx + 1;
} else {
// Opening tag - find the end (handle attributes and self-closing)
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
// Check if it's self-closing (look for /> before the >)
const tagContent = html.substring(i, closeIdx);
if (!tagContent.endsWith('/')) {
depth++;
}
i = closeIdx + 1;
}
} else if (i + 5 < html.length && html.substring(i, i + 5).toLowerCase() === '</div') {
depth--;
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
i = closeIdx + 1;
} else if (i + 5 < html.length && html.substring(i, i + 5).toLowerCase() === '</nav') {
depth--;
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
i = closeIdx + 1;
} else if (i + 4 < html.length && html.substring(i, i + 4).toLowerCase() === '<nav') {
// Handle opening nav tags
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
const tagContent = html.substring(i, closeIdx);
if (!tagContent.endsWith('/')) {
depth++;
}
i = closeIdx + 1;
} else {
i++;
}
}
if (depth === 0) {
// Found the matching closing tag
const tocEndIdx = i;
// Extract the TOC content (inner HTML)
const tocFullHTML = html.substring(tocStartIdx, tocEndIdx);
// Extract just the inner content (without the outer div tags)
let innerStart = tocStartTag.length;
let innerEnd = tocFullHTML.length;
// Find the last </div> or </nav>
if (tocFullHTML.endsWith('</div>')) {
innerEnd -= 6;
} else if (tocFullHTML.endsWith('</nav>')) {
innerEnd -= 7;
}
tocContent = tocFullHTML.substring(innerStart, innerEnd).trim();
// Remove the toctitle div if present (AsciiDoc adds "Table of Contents" title)
tocContent = tocContent.replace(/<div\s+id=["']toctitle["'][^>]*>.*?<\/div>\s*/gis, '');
tocContent = tocContent.trim();
// Remove the TOC from the content /**
contentWithoutTOC = html.substring(0, tocStartIdx) + html.substring(tocEndIdx); * Extract table of contents from AsciiDoctor HTML output
* AsciiDoctor generates a <div id="toc"> with class="toc" containing the TOC
*/
export function extractTOC(html: string): TOCResult {
// Match the TOC div - AsciiDoctor generates it with id="toc" and class="toc"
const tocMatch = html.match(/<div[^>]*id=["']toc["'][^>]*>([\s\S]*?)<\/div>/i);
if (tocMatch) {
const toc = tocMatch[0]; // Full TOC div
const contentWithoutTOC = html.replace(toc, '').trim();
return { toc, contentWithoutTOC };
} }
// Extract just the body content if the HTML includes full document structure // Fallback: try to match by class="toc"
// AsciiDoctor might return full HTML with <html>, <head>, <body> tags const tocClassMatch = html.match(/<div[^>]*class=["'][^"']*toc[^"']*["'][^>]*>([\s\S]*?)<\/div>/i);
// Check if this is a full HTML document
const isFullDocument = /^\s*<!DOCTYPE|^\s*<html/i.test(contentWithoutTOC); if (tocClassMatch) {
const toc = tocClassMatch[0];
if (isFullDocument) { const contentWithoutTOC = html.replace(toc, '').trim();
// Extract body content using a more robust approach return { toc, contentWithoutTOC };
// Find the opening <body> tag
const bodyStartMatch = contentWithoutTOC.match(/<body[^>]*>/i);
if (bodyStartMatch && bodyStartMatch.index !== undefined) {
const bodyStart = bodyStartMatch.index + bodyStartMatch[0].length;
// Find the closing </body> tag by searching backwards from the end
// This is more reliable than regex for nested content
const bodyEndMatch = contentWithoutTOC.lastIndexOf('</body>');
if (bodyEndMatch !== -1 && bodyEndMatch > bodyStart) {
contentWithoutTOC = contentWithoutTOC.substring(bodyStart, bodyEndMatch).trim();
}
}
} }
// Remove any remaining document structure tags that might have slipped through // No TOC found
contentWithoutTOC = contentWithoutTOC return {
.replace(/<html[^>]*>/gi, '') toc: '',
.replace(/<\/html>/gi, '') contentWithoutTOC: html,
.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '') };
.replace(/<body[^>]*>/gi, '')
.replace(/<\/body>/gi, '');
// Clean up any extra whitespace
contentWithoutTOC = contentWithoutTOC.trim();
return { toc: tocContent, contentWithoutTOC };
} }
/** /**
* Performs basic HTML sanitization to prevent XSS * Sanitize HTML to prevent XSS attacks
* Removes dangerous scripts and event handlers while preserving safe HTML
*
* This is a basic sanitizer. For production use, consider using a library like DOMPurify
*/ */
export function sanitizeHTML(html: string): string { export function sanitizeHTML(html: string): string {
let sanitized = html;
// Remove script tags and their content // Remove script tags and their content
html = html.replace(/<script[^>]*>.*?<\/script>/gis, ''); sanitized = sanitized.replace(/<script[\s\S]*?<\/script>/gi, '');
// Remove event handlers (onclick, onerror, etc.) // Remove event handlers from attributes (onclick, onerror, etc.)
html = html.replace(/\s*on\w+\s*=\s*["'][^"']*["']/gi, ''); sanitized = sanitized.replace(/\s*on\w+\s*=\s*["'][^"']*["']/gi, '');
sanitized = sanitized.replace(/\s*on\w+\s*=\s*[^\s>]*/gi, '');
// Remove javascript: protocol in links
html = html.replace(/javascript:/gi, ''); // Remove javascript: protocol in href and src attributes
sanitized = sanitized.replace(/href\s*=\s*["']javascript:[^"']*["']/gi, 'href="#"');
// Remove data: URLs that could be dangerous sanitized = sanitized.replace(/src\s*=\s*["']javascript:[^"']*["']/gi, 'src=""');
html = html.replace(/data:\s*text\/html/gi, '');
// Remove data: URLs that might contain scripts (allow images)
return html; // This is more permissive - you might want to be stricter
sanitized = sanitized.replace(/src\s*=\s*["']data:text\/html[^"']*["']/gi, 'src=""');
// Remove iframe with dangerous sources
sanitized = sanitized.replace(/<iframe[^>]*src\s*=\s*["']javascript:[^"']*["'][^>]*>[\s\S]*?<\/iframe>/gi, '');
// Remove object and embed tags (often used for XSS)
sanitized = sanitized.replace(/<object[\s\S]*?<\/object>/gi, '');
sanitized = sanitized.replace(/<embed[\s\S]*?>/gi, '');
// Remove style tags with potentially dangerous content
// We keep style attributes but remove <style> tags
sanitized = sanitized.replace(/<style[\s\S]*?<\/style>/gi, '');
// Remove link tags with javascript: or data: URLs
sanitized = sanitized.replace(/<link[^>]*href\s*=\s*["'](javascript|data):[^"']*["'][^>]*>/gi, '');
// Remove meta tags with http-equiv="refresh" (can be used for redirects)
sanitized = sanitized.replace(/<meta[^>]*http-equiv\s*=\s*["']refresh["'][^>]*>/gi, '');
return sanitized;
} }
/** /**
* Processes HTML links to add target="_blank" to external links * Process links to add target="_blank" and rel="noreferrer noopener" to external links
* This function is available for use but not currently called automatically. *
* It can be used in post-processing if needed. * External links are links that don't match the base domain.
* Internal links (same domain) are left unchanged.
*/ */
export function processLinks(html: string, linkBaseURL: string): string { export function processLinks(html: string, linkBaseURL: string): string {
// Extract domain from linkBaseURL for comparison if (!linkBaseURL) {
let linkBaseDomain = ''; return html;
if (linkBaseURL) { }
// Extract base domain from linkBaseURL
let baseDomain: string | null = null;
try {
const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/);
if (urlMatch) {
baseDomain = urlMatch[1];
}
} catch {
// If parsing fails, don't process links
return html;
}
if (!baseDomain) {
return html;
}
// Process anchor tags with href attributes
return html.replace(/<a\s+([^>]*\s+)?href\s*=\s*["']([^"']+)["']([^>]*?)>/gi, (match, before, href, after) => {
// Skip if already has target attribute
if (match.includes('target=')) {
return match;
}
// Skip if it's not an http/https link
if (!/^https?:\/\//i.test(href)) {
return match;
}
// Skip if it's already a special link type (nostr, wikilink, etc.)
if (match.includes('class="nostr-link"') ||
match.includes('class="wikilink"') ||
match.includes('class="hashtag-link"')) {
return match;
}
// Check if it's an external link
let isExternal = true;
try { try {
// Use URL constructor if available (Node.js 10+) const hrefMatch = href.match(/^https?:\/\/([^\/]+)/);
// eslint-disable-next-line @typescript-eslint/no-explicit-any if (hrefMatch && hrefMatch[1] === baseDomain) {
const URLConstructor = (globalThis as any).URL; isExternal = false;
if (URLConstructor) {
const url = new URLConstructor(linkBaseURL);
linkBaseDomain = url.hostname;
} else {
throw new Error('URL not available');
} }
} catch { } catch {
// Fallback to simple string parsing if URL constructor fails // If parsing fails, assume external
const url = linkBaseURL.replace(/^https?:\/\//, '');
const parts = url.split('/');
if (parts.length > 0) {
linkBaseDomain = parts[0];
}
} }
}
// Only add target="_blank" to external links
// Regex to match <a> tags with href attributes
const linkRegex = /<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>/g;
return html.replace(linkRegex, (match, before, href, after) => {
// Check if it's an external link (starts with http:// or https://)
const isExternal = href.startsWith('http://') || href.startsWith('https://');
if (isExternal) { if (isExternal) {
// Check if it's pointing to our own domain // Check if there's already a rel attribute
if (linkBaseDomain) { if (match.includes('rel=')) {
try { // Add to existing rel attribute if it doesn't already have noreferrer noopener
// eslint-disable-next-line @typescript-eslint/no-explicit-any if (!match.includes('noreferrer') && !match.includes('noopener')) {
const URLConstructor = (globalThis as any).URL; return match.replace(/rel\s*=\s*["']([^"']+)["']/i, 'rel="$1 noreferrer noopener"');
if (URLConstructor) {
const hrefUrl = new URLConstructor(href);
if (hrefUrl.hostname === linkBaseDomain) {
// Same domain - open in same tab (remove any existing target attribute)
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
}
} else {
throw new Error('URL not available');
}
} catch {
// If URL parsing fails, use simple string check
if (href.includes(linkBaseDomain)) {
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
}
}
}
// External link - add target="_blank" and rel="noopener noreferrer" if not already present
if (!match.includes('target=')) {
if (!match.includes('rel=')) {
return match.replace('>', ' target="_blank" rel="noopener noreferrer">');
} else {
// Update existing rel attribute to include noopener if not present
const updatedMatch = match.replace(/rel\s*=\s*["']([^"']*)["']/gi, (relMatch, relValue) => {
if (!relValue.includes('noopener')) {
return `rel="${relValue} noopener noreferrer"`;
}
return relMatch;
});
return updatedMatch.replace('>', ' target="_blank">');
} }
// Add target="_blank" before the closing >
return match.replace(/>$/, ' target="_blank">');
} else {
// Add both target and rel
return match.replace(/>$/, ' target="_blank" rel="noreferrer noopener">');
} }
} else {
// Local/relative link - ensure it opens in same tab (remove target if present)
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
} }
return match; return match;
}); });
} }

586
src/utils/report-generator.ts

@ -0,0 +1,586 @@
import { Parser } from '../parser';
import * as fs from 'fs';
import * as path from 'path';
import { ProcessResult } from '../types';
/**
* Shared utilities for generating test reports
*/
export interface TestData {
original: string;
result: ProcessResult;
}
export interface ReportData {
markdown: TestData;
asciidoc: TestData;
}
/**
* Generate HTML test report from parsed documents
*/
export function generateHTMLReport(data: ReportData): string {
const { markdown, asciidoc } = data;
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GC Parser Test Report</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
line-height: 1.6;
color: #333;
background: #f5f5f5;
padding: 20px;
}
.container {
max-width: 1400px;
margin: 0 auto;
}
h1 {
color: #2c3e50;
margin-bottom: 10px;
font-size: 2.5em;
}
.subtitle {
color: #7f8c8d;
margin-bottom: 30px;
font-size: 1.1em;
}
.section {
background: white;
border-radius: 8px;
padding: 30px;
margin-bottom: 30px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.section h2 {
color: #34495e;
margin-bottom: 20px;
padding-bottom: 10px;
border-bottom: 2px solid #3498db;
font-size: 1.8em;
}
.section h3 {
color: #2c3e50;
margin-top: 25px;
margin-bottom: 15px;
font-size: 1.3em;
}
.tabs {
display: flex;
gap: 10px;
margin-bottom: 20px;
border-bottom: 2px solid #e0e0e0;
}
.tab {
padding: 12px 24px;
background: #f8f9fa;
border: none;
border-top-left-radius: 6px;
border-top-right-radius: 6px;
cursor: pointer;
font-size: 1em;
font-weight: 500;
color: #555;
transition: all 0.2s;
}
.tab:hover {
background: #e9ecef;
}
.tab.active {
background: #3498db;
color: white;
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
.metadata-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 15px;
margin-top: 15px;
}
.metadata-item {
background: #f8f9fa;
padding: 12px;
border-radius: 4px;
border-left: 3px solid #3498db;
}
.metadata-item strong {
color: #2c3e50;
display: block;
margin-bottom: 5px;
}
.metadata-item code {
background: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
font-size: 0.9em;
}
.code-block {
background: #2d2d2d;
color: #f8f8f2;
padding: 15px;
border-radius: 6px;
overflow-x: auto;
font-family: 'Courier New', monospace;
font-size: 0.9em;
line-height: 1.5;
margin: 15px 0;
max-height: 400px;
overflow-y: auto;
}
.code-block pre {
margin: 0;
white-space: pre-wrap;
word-wrap: break-word;
}
.rendered-output {
background: white;
border: 1px solid #ddd;
padding: 20px;
border-radius: 6px;
margin: 15px 0;
min-height: 200px;
}
.rendered-output * {
max-width: 100%;
}
.stats {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 15px;
margin-top: 20px;
}
.stat-card {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 8px;
text-align: center;
}
.stat-card .number {
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}
.stat-card .label {
font-size: 0.9em;
opacity: 0.9;
}
.list-item {
background: #f8f9fa;
padding: 8px 12px;
margin: 5px 0;
border-radius: 4px;
border-left: 3px solid #95a5a6;
}
.list-item code {
background: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
font-size: 0.85em;
}
.success-badge {
display: inline-block;
background: #27ae60;
color: white;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.85em;
font-weight: 500;
margin-left: 10px;
}
.warning-badge {
display: inline-block;
background: #f39c12;
color: white;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.85em;
font-weight: 500;
margin-left: 10px;
}
.comparison {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-top: 20px;
}
@media (max-width: 768px) {
.comparison {
grid-template-columns: 1fr;
}
}
.json-view {
background: #f8f9fa;
padding: 15px;
border-radius: 6px;
overflow-x: auto;
font-family: 'Courier New', monospace;
font-size: 0.85em;
max-height: 300px;
overflow-y: auto;
}
</style>
</head>
<body>
<div class="container">
<h1>GC Parser Test Report</h1>
<p class="subtitle">Generated: ${new Date().toLocaleString()}</p>
<!-- Markdown Section -->
<div class="section">
<h2>Markdown Document Test <span class="success-badge"> Parsed</span></h2>
<div class="tabs">
<button class="tab active" onclick="showTab('md-overview')">Overview</button>
<button class="tab" onclick="showTab('md-original')">Original Content</button>
<button class="tab" onclick="showTab('md-rendered')">Rendered Output</button>
<button class="tab" onclick="showTab('md-metadata')">Metadata</button>
</div>
<div id="md-overview" class="tab-content active">
<div class="stats">
<div class="stat-card">
<div class="number">${markdown.result.nostrLinks.length}</div>
<div class="label">Nostr Links</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.wikilinks.length}</div>
<div class="label">Wikilinks</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hashtags.length}</div>
<div class="label">Hashtags</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.links.length}</div>
<div class="label">Links</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.media.length}</div>
<div class="label">Media URLs</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hasLaTeX ? 'Yes' : 'No'}</div>
<div class="label">Has LaTeX</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hasMusicalNotation ? 'Yes' : 'No'}</div>
<div class="label">Has Music</div>
</div>
</div>
<h3>Frontmatter</h3>
${markdown.result.frontmatter ? `
<div class="metadata-grid">
${Object.entries(markdown.result.frontmatter).map(([key, value]) => `
<div class="metadata-item">
<strong>${escapeHtml(key)}</strong>
<code>${escapeHtml(JSON.stringify(value))}</code>
</div>
`).join('')}
</div>
` : '<p><em>No frontmatter found</em></p>'}
</div>
<div id="md-original" class="tab-content">
<h3>Original Markdown Content</h3>
<div class="code-block">
<pre>${escapeHtml(markdown.original)}</pre>
</div>
</div>
<div id="md-rendered" class="tab-content">
<h3>Rendered HTML Output</h3>
<div class="rendered-output">
${markdown.result.content}
</div>
<details style="margin-top: 15px;">
<summary style="cursor: pointer; color: #3498db; font-weight: 500;">View Raw HTML</summary>
<div class="code-block" style="margin-top: 10px;">
<pre>${escapeHtml(markdown.result.content)}</pre>
</div>
</details>
</div>
<div id="md-metadata" class="tab-content">
<h3>Extracted Metadata</h3>
${markdown.result.nostrLinks.length > 0 ? `
<h4>Nostr Links (${markdown.result.nostrLinks.length})</h4>
${markdown.result.nostrLinks.map((link: any) => `
<div class="list-item">
<strong>${escapeHtml(link.type)}</strong>: <code>${escapeHtml(link.bech32)}</code>
${link.text ? ` - ${escapeHtml(link.text)}` : ''}
</div>
`).join('')}
` : ''}
${markdown.result.wikilinks.length > 0 ? `
<h4>Wikilinks (${markdown.result.wikilinks.length})</h4>
${markdown.result.wikilinks.map((wl: any) => `
<div class="list-item">
<code>${escapeHtml(wl.original)}</code> dtag: <code>${escapeHtml(wl.dtag)}</code>
${wl.display ? ` (display: ${escapeHtml(wl.display)})` : ''}
</div>
`).join('')}
` : ''}
${markdown.result.hashtags.length > 0 ? `
<h4>Hashtags (${markdown.result.hashtags.length})</h4>
${markdown.result.hashtags.map((tag: string) => `
<div class="list-item">
<code>#${escapeHtml(tag)}</code>
</div>
`).join('')}
` : ''}
${markdown.result.links.length > 0 ? `
<h4>Links (${markdown.result.links.length})</h4>
${markdown.result.links.map((link: any) => `
<div class="list-item">
<a href="${escapeHtml(link.url)}" target="_blank">${escapeHtml(link.text || link.url)}</a>
${link.isExternal ? '<span class="warning-badge">External</span>' : ''}
</div>
`).join('')}
` : ''}
${markdown.result.media.length > 0 ? `
<h4>Media URLs (${markdown.result.media.length})</h4>
${markdown.result.media.map((url: string) => `
<div class="list-item">
<a href="${escapeHtml(url)}" target="_blank">${escapeHtml(url)}</a>
</div>
`).join('')}
` : ''}
${markdown.result.tableOfContents ? `
<h4>Table of Contents</h4>
<div class="rendered-output">
${markdown.result.tableOfContents}
</div>
` : ''}
</div>
</div>
<!-- AsciiDoc Section -->
<div class="section">
<h2>AsciiDoc Document Test <span class="success-badge"> Parsed</span></h2>
<div class="tabs">
<button class="tab active" onclick="showTab('ad-overview')">Overview</button>
<button class="tab" onclick="showTab('ad-original')">Original Content</button>
<button class="tab" onclick="showTab('ad-rendered')">Rendered Output</button>
<button class="tab" onclick="showTab('ad-metadata')">Metadata</button>
</div>
<div id="ad-overview" class="tab-content active">
<div class="stats">
<div class="stat-card">
<div class="number">${asciidoc.result.nostrLinks.length}</div>
<div class="label">Nostr Links</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.wikilinks.length}</div>
<div class="label">Wikilinks</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hashtags.length}</div>
<div class="label">Hashtags</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.links.length}</div>
<div class="label">Links</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.media.length}</div>
<div class="label">Media URLs</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hasLaTeX ? 'Yes' : 'No'}</div>
<div class="label">Has LaTeX</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hasMusicalNotation ? 'Yes' : 'No'}</div>
<div class="label">Has Music</div>
</div>
</div>
<h3>Frontmatter</h3>
${asciidoc.result.frontmatter ? `
<div class="metadata-grid">
${Object.entries(asciidoc.result.frontmatter).map(([key, value]) => `
<div class="metadata-item">
<strong>${escapeHtml(key)}</strong>
<code>${escapeHtml(JSON.stringify(value))}</code>
</div>
`).join('')}
</div>
` : '<p><em>No frontmatter found</em></p>'}
</div>
<div id="ad-original" class="tab-content">
<h3>Original AsciiDoc Content</h3>
<div class="code-block">
<pre>${escapeHtml(asciidoc.original)}</pre>
</div>
</div>
<div id="ad-rendered" class="tab-content">
<h3>Rendered HTML Output</h3>
<div class="rendered-output">
${asciidoc.result.content}
</div>
<details style="margin-top: 15px;">
<summary style="cursor: pointer; color: #3498db; font-weight: 500;">View Raw HTML</summary>
<div class="code-block" style="margin-top: 10px;">
<pre>${escapeHtml(asciidoc.result.content)}</pre>
</div>
</details>
</div>
<div id="ad-metadata" class="tab-content">
<h3>Extracted Metadata</h3>
${asciidoc.result.nostrLinks.length > 0 ? `
<h4>Nostr Links (${asciidoc.result.nostrLinks.length})</h4>
${asciidoc.result.nostrLinks.map((link: any) => `
<div class="list-item">
<strong>${escapeHtml(link.type)}</strong>: <code>${escapeHtml(link.bech32)}</code>
${link.text ? ` - ${escapeHtml(link.text)}` : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.wikilinks.length > 0 ? `
<h4>Wikilinks (${asciidoc.result.wikilinks.length})</h4>
${asciidoc.result.wikilinks.map((wl: any) => `
<div class="list-item">
<code>${escapeHtml(wl.original)}</code> dtag: <code>${escapeHtml(wl.dtag)}</code>
${wl.display ? ` (display: ${escapeHtml(wl.display)})` : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.hashtags.length > 0 ? `
<h4>Hashtags (${asciidoc.result.hashtags.length})</h4>
${asciidoc.result.hashtags.map((tag: string) => `
<div class="list-item">
<code>#${escapeHtml(tag)}</code>
</div>
`).join('')}
` : ''}
${asciidoc.result.links.length > 0 ? `
<h4>Links (${asciidoc.result.links.length})</h4>
${asciidoc.result.links.map((link: any) => `
<div class="list-item">
<a href="${escapeHtml(link.url)}" target="_blank">${escapeHtml(link.text || link.url)}</a>
${link.isExternal ? '<span class="warning-badge">External</span>' : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.media.length > 0 ? `
<h4>Media URLs (${asciidoc.result.media.length})</h4>
${asciidoc.result.media.map((url: string) => `
<div class="list-item">
<a href="${escapeHtml(url)}" target="_blank">${escapeHtml(url)}</a>
</div>
`).join('')}
` : ''}
${asciidoc.result.tableOfContents ? `
<h4>Table of Contents</h4>
<div class="rendered-output">
${asciidoc.result.tableOfContents}
</div>
` : ''}
</div>
</div>
</div>
<script>
function showTab(tabId) {
// Hide all tab contents
const allContents = document.querySelectorAll('.tab-content');
allContents.forEach(content => content.classList.remove('active'));
// Remove active class from all tabs
const allTabs = document.querySelectorAll('.tab');
allTabs.forEach(tab => tab.classList.remove('active'));
// Show selected tab content
const selectedContent = document.getElementById(tabId);
if (selectedContent) {
selectedContent.classList.add('active');
}
// Add active class to clicked tab
event.target.classList.add('active');
}
</script>
</body>
</html>`;
}
/**
* Escape HTML special characters
*/
export function escapeHtml(text: string): string {
const map: Record<string, string> = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#039;',
};
return text.replace(/[&<>"']/g, (m) => map[m]);
}

573
test-parser-report.test.ts

@ -1,4 +1,5 @@
import { Parser } from './src/parser'; import { Parser } from './src/parser';
import { generateHTMLReport } from './src/utils/report-generator';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
@ -54,575 +55,3 @@ describe('Parser Test Report', () => {
expect(asciidocResult.content.length).toBeGreaterThan(0); expect(asciidocResult.content.length).toBeGreaterThan(0);
}); });
}); });
interface TestData {
original: string;
result: any;
}
interface ReportData {
markdown: TestData;
asciidoc: TestData;
}
function generateHTMLReport(data: ReportData): string {
const { markdown, asciidoc } = data;
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GC Parser Test Report</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
line-height: 1.6;
color: #333;
background: #f5f5f5;
padding: 20px;
}
.container {
max-width: 1400px;
margin: 0 auto;
}
h1 {
color: #2c3e50;
margin-bottom: 10px;
font-size: 2.5em;
}
.subtitle {
color: #7f8c8d;
margin-bottom: 30px;
font-size: 1.1em;
}
.section {
background: white;
border-radius: 8px;
padding: 30px;
margin-bottom: 30px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.section h2 {
color: #34495e;
margin-bottom: 20px;
padding-bottom: 10px;
border-bottom: 2px solid #3498db;
font-size: 1.8em;
}
.section h3 {
color: #2c3e50;
margin-top: 25px;
margin-bottom: 15px;
font-size: 1.3em;
}
.tabs {
display: flex;
gap: 10px;
margin-bottom: 20px;
border-bottom: 2px solid #e0e0e0;
}
.tab {
padding: 12px 24px;
background: #f8f9fa;
border: none;
border-top-left-radius: 6px;
border-top-right-radius: 6px;
cursor: pointer;
font-size: 1em;
font-weight: 500;
color: #555;
transition: all 0.2s;
}
.tab:hover {
background: #e9ecef;
}
.tab.active {
background: #3498db;
color: white;
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
.metadata-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 15px;
margin-top: 15px;
}
.metadata-item {
background: #f8f9fa;
padding: 12px;
border-radius: 4px;
border-left: 3px solid #3498db;
}
.metadata-item strong {
color: #2c3e50;
display: block;
margin-bottom: 5px;
}
.metadata-item code {
background: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
font-size: 0.9em;
}
.code-block {
background: #2d2d2d;
color: #f8f8f2;
padding: 15px;
border-radius: 6px;
overflow-x: auto;
font-family: 'Courier New', monospace;
font-size: 0.9em;
line-height: 1.5;
margin: 15px 0;
max-height: 400px;
overflow-y: auto;
}
.code-block pre {
margin: 0;
white-space: pre-wrap;
word-wrap: break-word;
}
.rendered-output {
background: white;
border: 1px solid #ddd;
padding: 20px;
border-radius: 6px;
margin: 15px 0;
min-height: 200px;
}
.rendered-output :global(*) {
max-width: 100%;
}
.stats {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 15px;
margin-top: 20px;
}
.stat-card {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 8px;
text-align: center;
}
.stat-card .number {
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}
.stat-card .label {
font-size: 0.9em;
opacity: 0.9;
}
.list-item {
background: #f8f9fa;
padding: 8px 12px;
margin: 5px 0;
border-radius: 4px;
border-left: 3px solid #95a5a6;
}
.list-item code {
background: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
font-size: 0.85em;
}
.success-badge {
display: inline-block;
background: #27ae60;
color: white;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.85em;
font-weight: 500;
margin-left: 10px;
}
.warning-badge {
display: inline-block;
background: #f39c12;
color: white;
padding: 4px 12px;
border-radius: 12px;
font-size: 0.85em;
font-weight: 500;
margin-left: 10px;
}
.comparison {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-top: 20px;
}
@media (max-width: 768px) {
.comparison {
grid-template-columns: 1fr;
}
}
.json-view {
background: #f8f9fa;
padding: 15px;
border-radius: 6px;
overflow-x: auto;
font-family: 'Courier New', monospace;
font-size: 0.85em;
max-height: 300px;
overflow-y: auto;
}
</style>
</head>
<body>
<div class="container">
<h1>GC Parser Test Report</h1>
<p class="subtitle">Generated: ${new Date().toLocaleString()}</p>
<!-- Markdown Section -->
<div class="section">
<h2>Markdown Document Test <span class="success-badge"> Parsed</span></h2>
<div class="tabs">
<button class="tab active" onclick="showTab('md-overview')">Overview</button>
<button class="tab" onclick="showTab('md-original')">Original Content</button>
<button class="tab" onclick="showTab('md-rendered')">Rendered Output</button>
<button class="tab" onclick="showTab('md-metadata')">Metadata</button>
</div>
<div id="md-overview" class="tab-content active">
<div class="stats">
<div class="stat-card">
<div class="number">${markdown.result.nostrLinks.length}</div>
<div class="label">Nostr Links</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.wikilinks.length}</div>
<div class="label">Wikilinks</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hashtags.length}</div>
<div class="label">Hashtags</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.links.length}</div>
<div class="label">Links</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.media.length}</div>
<div class="label">Media URLs</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hasLaTeX ? 'Yes' : 'No'}</div>
<div class="label">Has LaTeX</div>
</div>
<div class="stat-card">
<div class="number">${markdown.result.hasMusicalNotation ? 'Yes' : 'No'}</div>
<div class="label">Has Music</div>
</div>
</div>
<h3>Frontmatter</h3>
${markdown.result.frontmatter ? `
<div class="metadata-grid">
${Object.entries(markdown.result.frontmatter).map(([key, value]) => `
<div class="metadata-item">
<strong>${escapeHtml(key)}</strong>
<code>${escapeHtml(JSON.stringify(value))}</code>
</div>
`).join('')}
</div>
` : '<p><em>No frontmatter found</em></p>'}
</div>
<div id="md-original" class="tab-content">
<h3>Original Markdown Content</h3>
<div class="code-block">
<pre>${escapeHtml(markdown.original)}</pre>
</div>
</div>
<div id="md-rendered" class="tab-content">
<h3>Rendered HTML Output</h3>
<div class="rendered-output">
${markdown.result.content}
</div>
<details style="margin-top: 15px;">
<summary style="cursor: pointer; color: #3498db; font-weight: 500;">View Raw HTML</summary>
<div class="code-block" style="margin-top: 10px;">
<pre>${escapeHtml(markdown.result.content)}</pre>
</div>
</details>
</div>
<div id="md-metadata" class="tab-content">
<h3>Extracted Metadata</h3>
${markdown.result.nostrLinks.length > 0 ? `
<h4>Nostr Links (${markdown.result.nostrLinks.length})</h4>
${markdown.result.nostrLinks.map(link => `
<div class="list-item">
<strong>${escapeHtml(link.type)}</strong>: <code>${escapeHtml(link.bech32)}</code>
${link.text ? ` - ${escapeHtml(link.text)}` : ''}
</div>
`).join('')}
` : ''}
${markdown.result.wikilinks.length > 0 ? `
<h4>Wikilinks (${markdown.result.wikilinks.length})</h4>
${markdown.result.wikilinks.map(wl => `
<div class="list-item">
<code>${escapeHtml(wl.original)}</code> dtag: <code>${escapeHtml(wl.dtag)}</code>
${wl.display ? ` (display: ${escapeHtml(wl.display)})` : ''}
</div>
`).join('')}
` : ''}
${markdown.result.hashtags.length > 0 ? `
<h4>Hashtags (${markdown.result.hashtags.length})</h4>
${markdown.result.hashtags.map(tag => `
<div class="list-item">
<code>#${escapeHtml(tag)}</code>
</div>
`).join('')}
` : ''}
${markdown.result.links.length > 0 ? `
<h4>Links (${markdown.result.links.length})</h4>
${markdown.result.links.map(link => `
<div class="list-item">
<a href="${escapeHtml(link.url)}" target="_blank">${escapeHtml(link.text || link.url)}</a>
${link.isExternal ? '<span class="warning-badge">External</span>' : ''}
</div>
`).join('')}
` : ''}
${markdown.result.media.length > 0 ? `
<h4>Media URLs (${markdown.result.media.length})</h4>
${markdown.result.media.map(url => `
<div class="list-item">
<a href="${escapeHtml(url)}" target="_blank">${escapeHtml(url)}</a>
</div>
`).join('')}
` : ''}
${markdown.result.tableOfContents ? `
<h4>Table of Contents</h4>
<div class="rendered-output">
${markdown.result.tableOfContents}
</div>
` : ''}
</div>
</div>
<!-- AsciiDoc Section -->
<div class="section">
<h2>AsciiDoc Document Test <span class="success-badge"> Parsed</span></h2>
<div class="tabs">
<button class="tab active" onclick="showTab('ad-overview')">Overview</button>
<button class="tab" onclick="showTab('ad-original')">Original Content</button>
<button class="tab" onclick="showTab('ad-rendered')">Rendered Output</button>
<button class="tab" onclick="showTab('ad-metadata')">Metadata</button>
</div>
<div id="ad-overview" class="tab-content active">
<div class="stats">
<div class="stat-card">
<div class="number">${asciidoc.result.nostrLinks.length}</div>
<div class="label">Nostr Links</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.wikilinks.length}</div>
<div class="label">Wikilinks</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hashtags.length}</div>
<div class="label">Hashtags</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.links.length}</div>
<div class="label">Links</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.media.length}</div>
<div class="label">Media URLs</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hasLaTeX ? 'Yes' : 'No'}</div>
<div class="label">Has LaTeX</div>
</div>
<div class="stat-card">
<div class="number">${asciidoc.result.hasMusicalNotation ? 'Yes' : 'No'}</div>
<div class="label">Has Music</div>
</div>
</div>
<h3>Frontmatter</h3>
${asciidoc.result.frontmatter ? `
<div class="metadata-grid">
${Object.entries(asciidoc.result.frontmatter).map(([key, value]) => `
<div class="metadata-item">
<strong>${escapeHtml(key)}</strong>
<code>${escapeHtml(JSON.stringify(value))}</code>
</div>
`).join('')}
</div>
` : '<p><em>No frontmatter found</em></p>'}
</div>
<div id="ad-original" class="tab-content">
<h3>Original AsciiDoc Content</h3>
<div class="code-block">
<pre>${escapeHtml(asciidoc.original)}</pre>
</div>
</div>
<div id="ad-rendered" class="tab-content">
<h3>Rendered HTML Output</h3>
<div class="rendered-output">
${asciidoc.result.content}
</div>
<details style="margin-top: 15px;">
<summary style="cursor: pointer; color: #3498db; font-weight: 500;">View Raw HTML</summary>
<div class="code-block" style="margin-top: 10px;">
<pre>${escapeHtml(asciidoc.result.content)}</pre>
</div>
</details>
</div>
<div id="ad-metadata" class="tab-content">
<h3>Extracted Metadata</h3>
${asciidoc.result.nostrLinks.length > 0 ? `
<h4>Nostr Links (${asciidoc.result.nostrLinks.length})</h4>
${asciidoc.result.nostrLinks.map(link => `
<div class="list-item">
<strong>${escapeHtml(link.type)}</strong>: <code>${escapeHtml(link.bech32)}</code>
${link.text ? ` - ${escapeHtml(link.text)}` : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.wikilinks.length > 0 ? `
<h4>Wikilinks (${asciidoc.result.wikilinks.length})</h4>
${asciidoc.result.wikilinks.map(wl => `
<div class="list-item">
<code>${escapeHtml(wl.original)}</code> dtag: <code>${escapeHtml(wl.dtag)}</code>
${wl.display ? ` (display: ${escapeHtml(wl.display)})` : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.hashtags.length > 0 ? `
<h4>Hashtags (${asciidoc.result.hashtags.length})</h4>
${asciidoc.result.hashtags.map(tag => `
<div class="list-item">
<code>#${escapeHtml(tag)}</code>
</div>
`).join('')}
` : ''}
${asciidoc.result.links.length > 0 ? `
<h4>Links (${asciidoc.result.links.length})</h4>
${asciidoc.result.links.map(link => `
<div class="list-item">
<a href="${escapeHtml(link.url)}" target="_blank">${escapeHtml(link.text || link.url)}</a>
${link.isExternal ? '<span class="warning-badge">External</span>' : ''}
</div>
`).join('')}
` : ''}
${asciidoc.result.media.length > 0 ? `
<h4>Media URLs (${asciidoc.result.media.length})</h4>
${asciidoc.result.media.map(url => `
<div class="list-item">
<a href="${escapeHtml(url)}" target="_blank">${escapeHtml(url)}</a>
</div>
`).join('')}
` : ''}
${asciidoc.result.tableOfContents ? `
<h4>Table of Contents</h4>
<div class="rendered-output">
${asciidoc.result.tableOfContents}
</div>
` : ''}
</div>
</div>
</div>
<script>
function showTab(tabId) {
// Hide all tab contents
const allContents = document.querySelectorAll('.tab-content');
allContents.forEach(content => content.classList.remove('active'));
// Remove active class from all tabs
const allTabs = document.querySelectorAll('.tab');
allTabs.forEach(tab => tab.classList.remove('active'));
// Show selected tab content
const selectedContent = document.getElementById(tabId);
if (selectedContent) {
selectedContent.classList.add('active');
}
// Add active class to clicked tab
event.target.classList.add('active');
}
</script>
</body>
</html>`;
}
function escapeHtml(text: string): string {
const map: Record<string, string> = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#039;',
};
return text.replace(/[&<>"']/g, (m) => map[m]);
}

19277
test-report.html

File diff suppressed because it is too large Load Diff

2
tsconfig.json

@ -3,7 +3,7 @@
"target": "ES2020", "target": "ES2020",
"module": "commonjs", "module": "commonjs",
"lib": ["ES2020"], "lib": ["ES2020"],
"types": ["node"], "types": ["node", "jest"],
"outDir": "./dist", "outDir": "./dist",
"rootDir": "./src", "rootDir": "./src",
"strict": true, "strict": true,

10
tsconfig.test.json

@ -0,0 +1,10 @@
{
"extends": "./tsconfig.json",
"compilerOptions": {
"rootDir": ".",
"types": ["node", "jest"],
"noEmit": true
},
"include": ["src/**/*", "**/*.test.ts", "generate-test-report.ts"],
"exclude": ["node_modules", "dist"]
}
Loading…
Cancel
Save