"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.postProcessHtml = postProcessHtml; const music_1 = require("./music"); /** * Post-processes HTML output from AsciiDoctor * Converts AsciiDoc macros to HTML with data attributes and CSS classes */ function postProcessHtml(html, options = {}) { let processed = html; // Convert bookstr markers to HTML placeholders processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { const escaped = bookContent.replace(/"/g, '"').replace(/'/g, '''); return ``; }); // Convert hashtag links to HTML processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { // HTML escape the display text const escapedDisplay = displayText .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // If hashtagUrl is configured, make it a clickable link if (options.hashtagUrl) { let url; if (typeof options.hashtagUrl === 'function') { url = options.hashtagUrl(normalizedHashtag); } else { // String template with {topic} placeholder url = options.hashtagUrl.replace(/{topic}/g, normalizedHashtag); } // Escape URL for HTML attribute const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `${escapedDisplay}`; } else { // Default: Use span instead of tag - same color as links but no underline and not clickable return `${escapedDisplay}`; } }); // Convert WIKILINK:dtag|display placeholder format to HTML // Match WIKILINK:dtag|display, ensuring we don't match across HTML tags processed = processed.replace(/WIKILINK:([^|<>]+)\|([^<>\s]+)/g, (_match, dTag, displayText) => { const escapedDtag = dTag.trim().replace(/"/g, '"'); const escapedDisplay = displayText.trim() .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // Generate URL using custom format or default let url; if (options.wikilinkUrl) { if (typeof options.wikilinkUrl === 'function') { url = options.wikilinkUrl(dTag.trim()); } else { // String template with {dtag} placeholder url = options.wikilinkUrl.replace(/{dtag}/g, dTag.trim()); } } else { // Default format url = `/events?d=${escapedDtag}`; } // Escape URL for HTML attribute const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `${escapedDisplay}`; }); // Convert any leftover link: macros that AsciiDoctor didn't convert // This MUST run before processOpenGraphLinks which removes "link:" prefixes // This handles cases where AsciiDoctor couldn't parse the link (e.g., link text with special chars) // Pattern: link:url[text] where url is http/https and text can contain any characters // Match link: macros that are still in the HTML as plain text (not converted by AsciiDoctor) // Also handle HTML-escaped versions that might appear processed = processed.replace(/link:(https?:\/\/[^\[]+)\[([^\]]+)\]/g, (_match, url, text) => { // Unescape if already HTML-escaped (but be careful not to unescape actual content) let unescapedUrl = url; // Only unescape if it looks like it was escaped (contains & or ") if (url.includes('&') || url.includes('"') || url.includes(''')) { unescapedUrl = url .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'"); } let unescapedText = text; // Only unescape if it looks like it was escaped if (text.includes('&') || text.includes('<') || text.includes('>') || text.includes('"') || text.includes(''')) { unescapedText = text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'"); } // Escape URL for HTML attribute (fresh escape, no double-escaping) const escapedUrl = unescapedUrl .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, '''); // Escape text content for HTML (fresh escape, no double-escaping) const escapedText = unescapedText .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // Check if link text contains wss:// or ws:// - these are relay URLs, don't add OpenGraph const isRelayUrl = /wss?:\/\//i.test(unescapedText); if (isRelayUrl) { // Simple link without OpenGraph wrapper return `${escapedText} `; } else { // Regular link - will be processed by OpenGraph handler if external return `${escapedText} `; } }); // Convert nostr: links to HTML processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { const nostrType = getNostrType(bech32Id); if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { // Render as embedded event placeholder const escaped = bech32Id.replace(/"/g, '"'); return `
`; } else if (nostrType === 'npub' || nostrType === 'nprofile') { // Render as user handle const escaped = bech32Id.replace(/"/g, '"'); return `@${displayText}`; } else { // Fallback to regular link const escaped = bech32Id.replace(/"/g, '"'); return `${displayText}`; } }); // Process media URLs (YouTube, Spotify, video, audio) processed = processMedia(processed); // Fix double-escaped quotes in href attributes FIRST (before any other processing) // This fixes href=""url"" -> href="url" processed = processed.replace(/href\s*=\s*["']"(https?:\/\/[^"']+)"["']/gi, (_match, url) => { const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; }); // Process OpenGraph links (external links that should have rich previews) processed = processOpenGraphLinks(processed, options.linkBaseURL); // Process images: add max-width styling and data attributes processed = processImages(processed); // Process musical notation if enabled if (options.enableMusicalNotation) { processed = (0, music_1.processMusicalNotation)(processed); } // Clean up any escaped HTML that appears as text (e.g., <a href=...>) // This can happen when AsciiDoctor escapes link macros that it couldn't parse // Pattern: <a href="url">text</a> should be converted to actual HTML // Use a more flexible pattern that handles text with special characters like :// // Fix regular escaped HTML links processed = processed.replace(/<a\s+href=["'](https?:\/\/[^"']+)["']\s*>([^<]+)<\/a>/gi, (_match, url, text) => { // Unescape the URL and text const unescapedUrl = url .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, "'"); const unescapedText = text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>'); // Re-escape properly for HTML const escapedUrl = unescapedUrl .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, '''); const escapedText = unescapedText .replace(/&/g, '&') .replace(//g, '>'); // Check if link text contains wss:// or ws:// - these are relay URLs const isRelayUrl = /wss?:\/\//i.test(unescapedText); if (isRelayUrl) { // Simple link without OpenGraph wrapper return `${escapedText} `; } else { // Regular link return `${escapedText} `; } }); // Clean up any leftover markdown syntax processed = cleanupMarkdown(processed); // Add styling classes processed = addStylingClasses(processed); // Hide raw ToC text processed = hideRawTocText(processed); return processed; } /** * Get Nostr identifier type */ function getNostrType(id) { if (id.startsWith('npub')) return 'npub'; if (id.startsWith('nprofile')) return 'nprofile'; if (id.startsWith('nevent')) return 'nevent'; if (id.startsWith('naddr')) return 'naddr'; if (id.startsWith('note')) return 'note'; return null; } /** * Process media URLs (YouTube, Spotify, video, audio) * Converts MEDIA: placeholders to HTML embeds/players */ function processMedia(html) { let processed = html; // Process YouTube embeds processed = processed.replace(/MEDIA:youtube:([a-zA-Z0-9_-]+)/g, (_match, videoId) => { const escapedId = videoId.replace(/"/g, '"'); return ``; }); // Process Spotify embeds processed = processed.replace(/MEDIA:spotify:(track|album|playlist|artist|episode|show):([a-zA-Z0-9]+)/g, (_match, type, id) => { const escapedType = type.replace(/"/g, '"'); const escapedId = id.replace(/"/g, '"'); return ``; }); // Process video files processed = processed.replace(/MEDIA:video:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { const escapedUrl = url .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); return ``; }); // Process audio files processed = processed.replace(/MEDIA:audio:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { const escapedUrl = url .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); return ``; }); return processed; } /** * Process OpenGraph links - mark external links for OpenGraph preview fetching */ function processOpenGraphLinks(html, linkBaseURL) { // First, clean up any corrupted HTML fragments that might interfere // Remove "link:" prefixes that appear before links (AsciiDoc syntax that shouldn't be in HTML) // This happens when AsciiDoctor doesn't fully convert link:url[text] syntax or when // there's literal text like "should render like link:" before an anchor tag let processed = html; // Remove "link:" that appears immediately before anchor tags (most common case) // Match "link:" followed by optional whitespace and then \s])link:([a-zA-Z0-9])/gi, '$1$2'); // Also handle cases where "link:" appears with whitespace before anchor tags processed = processed.replace(/\s+link:\s*(?= href="url" processed = processed.replace(/href\s*=\s*["']"(https?:\/\/[^"']+)"["']/gi, (match, url) => { // Extract the clean URL and properly escape it const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; }); // Clean up href attributes that contain HTML fragments processed = processed.replace(/href\s*=\s*["']([^"']*<[^"']*)["']/gi, (match, corruptedHref) => { // If href contains HTML tags, extract just the URL part const urlMatch = corruptedHref.match(/(https?:\/\/[^\s<>"']+)/i); if (urlMatch) { const escapedUrl = urlMatch[1].replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; } return match; // If we can't fix it, leave it (will be skipped by validation) }); // Clean up any malformed anchor tag fragments that might cause issues processed = processed.replace(/]*<[^"'>]*)["']/gi, (match, corruptedHref) => { // Skip corrupted anchor tags - they'll be handled by the main regex with validation return match; }); // Clean up links inside code blocks - AsciiDoctor creates them but they should be plain text // Remove tags inside blocks, keeping only the link text
processed = processed.replace(/]*>([\s\S]*?)<\/code>/gi, (match, content) => {
// Remove any tags inside code blocks, keeping only the text content
const cleaned = content.replace(/]*>(.*?)<\/a>/gi, '$1');
return `${cleaned}`;
});
// Also clean up links inside pre blocks
processed = processed.replace(/]*>([\s\S]*?)<\/pre>/gi, (match, content) => {
const cleaned = content.replace(/]*>(.*?)<\/a>/gi, '$1');
return `${cleaned}`;
});
// Now protect code blocks and pre blocks by replacing them with placeholders
const codeBlockPlaceholders = [];
const preBlockPlaceholders = [];
// Replace pre blocks first (they can contain code blocks)
processed = processed.replace(/]*>([\s\S]*?)<\/pre>/gi, (match) => {
const placeholder = `__PREBLOCK_${preBlockPlaceholders.length}__`;
preBlockPlaceholders.push(match);
return placeholder;
});
// Replace code blocks
processed = processed.replace(/]*>([\s\S]*?)<\/code>/gi, (match) => {
const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`;
codeBlockPlaceholders.push(match);
return placeholder;
});
// Extract base domain from linkBaseURL if provided
let baseDomain = null;
if (linkBaseURL) {
try {
const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/);
if (urlMatch) {
baseDomain = urlMatch[1];
}
}
catch {
// Ignore parsing errors
}
}
// Before processing, remove any corrupted opengraph containers that might have been created
// These have malformed data-og-url attributes containing HTML fragments
// Match all spans with data-og-url and check if they're corrupted
// Use a pattern that matches spans with data-og-url, then check the attribute value
processed = processed.replace(/]*data-og-url=["']([^"']+)["'][^>]*>[\s\S]*?<\/span>/gi, (match) => {
// This span has a corrupted data-og-url (contains <)
// Extract the clean URL from the beginning of the attribute value
const dataOgUrlMatch = match.match(/data-og-url=["']([^"']+)["']/i);
if (dataOgUrlMatch && dataOgUrlMatch[1]) {
// Extract just the URL part (everything before the first <)
const urlMatch = dataOgUrlMatch[1].match(/(https?:\/\/[^\s<>"']+)/i);
if (urlMatch) {
const cleanUrl = urlMatch[1];
// Extract the link text from inside the span
const linkMatch = match.match(/]*>(.*?)<\/a>/i);
const linkText = linkMatch ? linkMatch[1] : cleanUrl;
// Return a clean opengraph container with the fixed URL
const escapedUrl = cleanUrl.replace(/"/g, '"').replace(/'/g, ''');
return `
${linkText}
`;
}
// If we can't extract a clean URL, just remove the corrupted span and keep any text
const textMatch = match.match(/>([^<]+));
return textMatch ? textMatch[1] : '';
}
return match; // Keep valid spans
});
// Match external links (http/https) that aren't media, nostr, or wikilinks
// Skip links that are already in media embeds or special containers
// Use a stricter regex that only matches valid, complete anchor tags
// The regex must match a complete tag with proper structure
processed = processed.replace(/]*\s+)?href\s*=\s*["'](https?:\/\/[^"']{1,2048})["']([^>]*?)>(.*?)<\/a>/gis, (match, before, href, after, linkText) => {
// CRITICAL: Validate href FIRST - if it contains ANY HTML tags or fragments, skip immediately
// This prevents corrupted HTML from being created
if (!href) {
return match; // Skip if no href
}
// Skip if href contains HTML tags or looks corrupted - be very strict
// Check for common HTML fragments that indicate corruption
if (href.includes('<') || href.includes('>') || href.includes('href=') || href.includes('') || href.includes('"']+$/i.test(href)) {
return match; // Skip if href doesn't match clean URL pattern
}
// Validate href is a proper URL (starts with http:// or https:// and doesn't contain invalid chars)
if (!/^https?:\/\/[^\s<>"']+$/i.test(href)) {
return match; // Skip if href doesn't match URL pattern
}
// Skip if the match contains unclosed tags or corrupted HTML
const openATags = (match.match(//g) || []).length;
if (openATags !== closeATags || openATags !== 1) {
return match; // Multiple or mismatched tags = corrupted
}
// Skip if match contains nested HTML that looks corrupted
if (match.includes('href="') && match.split('href="').length > 2) {
return match; // Multiple href attributes = corrupted
}
// Skip if it's already a media embed, nostr link, wikilink, or opengraph link
if (match.includes('class="wikilink"') ||
match.includes('class="nostr-link"') ||
match.includes('class="opengraph-link"') ||
match.includes('data-embedded-note') ||
match.includes('youtube-embed') ||
match.includes('spotify-embed') ||
match.includes('media-embed') ||
match.includes('opengraph-link-container')) {
return match;
}
// Skip if it's a media file URL
if (/\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv|mp3|m4a|wav|flac|aac|opus|wma|jpeg|jpg|png|gif|webp|svg)$/i.test(href)) {
return match;
}
// Skip if it's YouTube or Spotify (already handled as media)
if (/youtube\.com|youtu\.be|spotify\.com/i.test(href)) {
return match;
}
// Skip if link text contains wss:// or ws:// - these are relay URLs, not web pages
// They don't need OpenGraph previews
if (/wss?:\/\//i.test(linkText)) {
return match;
}
// Check if it's an external link (not same domain)
let isExternal = true;
if (baseDomain) {
try {
const hrefMatch = href.match(/^https?:\/\/([^\/]+)/);
if (hrefMatch && hrefMatch[1] === baseDomain) {
isExternal = false;
}
}
catch {
// If parsing fails, assume external
}
}
// Only process external links
if (!isExternal) {
return match;
}
// Escape the URL for data attribute
const escapedUrl = href
.replace(/&/g, '&')
.replace(/"/g, '"')
.replace(/'/g, ''');
// Add data attribute for OpenGraph fetching and wrap in container
// The actual OpenGraph fetching will be done client-side via JavaScript
return `
${linkText}
`;
});
// Restore code blocks
codeBlockPlaceholders.forEach((codeBlock, index) => {
processed = processed.replace(`__CODEBLOCK_${index}__`, codeBlock);
});
// Restore pre blocks
preBlockPlaceholders.forEach((preBlock, index) => {
processed = processed.replace(`__PREBLOCK_${index}__`, preBlock);
});
return processed;
}
/**
* Process images: add max-width styling and data attributes
*/
function processImages(html) {
const imageUrls = [];
const imageUrlRegex = /
]+src=["']([^"']+)["'][^>]*>/gi;
let match;
while ((match = imageUrlRegex.exec(html)) !== null) {
const url = match[1];
if (url && !imageUrls.includes(url)) {
imageUrls.push(url);
}
}
return html.replace(/
]+)>/gi, (imgTag, attributes) => {
const srcMatch = attributes.match(/src=["']([^"']+)["']/i);
if (!srcMatch)
return imgTag;
const src = srcMatch[1];
const currentIndex = imageUrls.indexOf(src);
let updatedAttributes = attributes;
if (updatedAttributes.match(/class=["']/i)) {
updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match, classes) => {
const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim();
const newClasses = cleanedClasses
? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in`
: 'max-w-[400px] object-contain cursor-zoom-in';
return `class="${newClasses}"`;
});
}
else {
updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`;
}
updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '"')}"`;
return `
`;
});
}
/**
* Clean URL by removing tracking parameters
* Based on jumble's cleanUrl function
*/
function cleanUrl(url) {
try {
const parsedUrl = new URL(url);
// List of tracking parameter prefixes and exact names to remove
const trackingParams = [
// Google Analytics & Ads
'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic',
'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid',
// Facebook
'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref',
// Twitter/X
'twclid', 'twsrc',
// Microsoft/Bing
'msclkid', 'mc_cid', 'mc_eid',
// Adobe
'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid',
// Mailchimp
'mc_cid', 'mc_eid',
// HubSpot
'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver',
// Marketo
'mkt_tok',
// YouTube
'si', 'feature', 'kw', 'pp',
// Other common tracking
'ref', 'referrer', 'source', 'campaign', 'medium', 'content',
'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd',
// Mobile app tracking
'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative',
// Amazon
'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag',
// Affiliate tracking
'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer',
// Social media share tracking
'share', 'shared', 'sharesource'
];
// Remove all tracking parameters
trackingParams.forEach(param => {
parsedUrl.searchParams.delete(param);
});
// Remove any parameter that starts with utm_ or _
Array.from(parsedUrl.searchParams.keys()).forEach(key => {
if (key.startsWith('utm_') || key.startsWith('_')) {
parsedUrl.searchParams.delete(key);
}
});
return parsedUrl.toString();
}
catch {
// If URL parsing fails, return original URL
return url;
}
}
/**
* Clean up leftover markdown syntax
*/
function cleanupMarkdown(html) {
let cleaned = html;
// Clean up markdown image syntax
cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => {
const altText = alt || '';
// Clean URL (remove tracking parameters)
const cleanedUrl = cleanUrl(url);
// Escape for HTML attribute
const escapedUrl = cleanedUrl.replace(/"/g, '"').replace(/'/g, ''');
return `
`;
});
// Clean up markdown link syntax
// Skip if the link is already inside an HTML tag or is part of escaped HTML
cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => {
// Skip if this markdown link is already inside an HTML tag
// Check if there's an tag nearby that might have been created from this
if (cleaned.includes(`href="${url}"`) || cleaned.includes(`href='${url}'`)) {
return _match;
}
// Skip if the text contains HTML entities or looks like it's already processed
if (text.includes('<') || text.includes('>') || text.includes('&')) {
return _match;
}
// Skip if the URL is already in an href attribute (check for escaped versions too)
const escapedUrl = url.replace(/"/g, '"').replace(/'/g, ''');
if (cleaned.includes(`href="${escapedUrl}"`) || cleaned.includes(`href='${escapedUrl}'`)) {
return _match;
}
// Clean URL (remove tracking parameters)
const cleanedUrl = cleanUrl(url);
// Escape for HTML attribute (but don't double-escape)
const finalEscapedUrl = cleanedUrl
.replace(/&/g, '&') // Unescape if already escaped
.replace(/&/g, '&')
.replace(/"/g, '"')
.replace(/'/g, ''');
// Escape text for HTML (but don't double-escape)
const escapedText = text
.replace(/&/g, '&') // Unescape if already escaped
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/&/g, '&')
.replace(//g, '>');
return `${escapedText} `;
});
return cleaned;
}
/**
* Add proper CSS classes for styling
*/
function addStylingClasses(html) {
let styled = html;
// Add strikethrough styling
styled = styled.replace(/([^<]+)<\/span>/g, '$1');
// Add subscript styling
styled = styled.replace(/([^<]+)<\/span>/g, '$1');
// Add superscript styling
styled = styled.replace(/([^<]+)<\/span>/g, '$1');
// Add code highlighting classes
styled = styled.replace(//g, '');
styled = styled.replace(//g, '');
return styled;
}
/**
* Hide raw AsciiDoc ToC text
*/
function hideRawTocText(html) {
let cleaned = html;
cleaned = cleaned.replace(/]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, '');
cleaned = cleaned.replace(/]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, '');
cleaned = cleaned.replace(/
]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, '');
return cleaned;
}