"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.postProcessHtml = postProcessHtml; const music_1 = require("./music"); /** * Post-processes HTML output from AsciiDoctor * Converts AsciiDoc macros to HTML with data attributes and CSS classes */ function postProcessHtml(html, options = {}) { let processed = html; // Convert bookstr markers to HTML placeholders processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { const escaped = bookContent.replace(/"/g, '"').replace(/'/g, '''); return ``; }); // Convert hashtag links to HTML processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { // HTML escape the display text const escapedDisplay = displayText .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // If hashtagUrl is configured, make it a clickable link if (options.hashtagUrl) { let url; if (typeof options.hashtagUrl === 'function') { url = options.hashtagUrl(normalizedHashtag); } else { // String template with {topic} placeholder url = options.hashtagUrl.replace(/{topic}/g, normalizedHashtag); } // Escape URL for HTML attribute const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `${escapedDisplay}`; } else { // Default: Use span instead of tag - same color as links but no underline and not clickable return `${escapedDisplay}`; } }); // Convert WIKILINK:dtag|display placeholder format to HTML // Match WIKILINK:dtag|display, ensuring we don't match across HTML tags processed = processed.replace(/WIKILINK:([^|<>]+)\|([^<>\s]+)/g, (_match, dTag, displayText) => { const escapedDtag = dTag.trim().replace(/"/g, '"'); const escapedDisplay = displayText.trim() .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // Generate URL using custom format or default let url; if (options.wikilinkUrl) { if (typeof options.wikilinkUrl === 'function') { url = options.wikilinkUrl(dTag.trim()); } else { // String template with {dtag} placeholder url = options.wikilinkUrl.replace(/{dtag}/g, dTag.trim()); } } else { // Default format url = `/events?d=${escapedDtag}`; } // Escape URL for HTML attribute const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `${escapedDisplay}`; }); // Convert any leftover link: macros that AsciiDoctor didn't convert // This MUST run before processOpenGraphLinks which removes "link:" prefixes // This handles cases where AsciiDoctor couldn't parse the link (e.g., link text with special chars) // Pattern: link:url[text] where url is http/https and text can contain any characters // Match link: macros that are still in the HTML as plain text (not converted by AsciiDoctor) // Also handle HTML-escaped versions that might appear processed = processed.replace(/link:(https?:\/\/[^\[]+)\[([^\]]+)\]/g, (_match, url, text) => { // Unescape if already HTML-escaped (but be careful not to unescape actual content) let unescapedUrl = url; // Only unescape if it looks like it was escaped (contains & or ") if (url.includes('&') || url.includes('"') || url.includes(''')) { unescapedUrl = url .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'"); } let unescapedText = text; // Only unescape if it looks like it was escaped if (text.includes('&') || text.includes('<') || text.includes('>') || text.includes('"') || text.includes(''')) { unescapedText = text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'"); } // Escape URL for HTML attribute (fresh escape, no double-escaping) const escapedUrl = unescapedUrl .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, '''); // Escape text content for HTML (fresh escape, no double-escaping) const escapedText = unescapedText .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // Check if link text contains wss:// or ws:// - these are relay URLs, don't add OpenGraph const isRelayUrl = /wss?:\/\//i.test(unescapedText); if (isRelayUrl) { // Simple link without OpenGraph wrapper return `${escapedText} `; } else { // Regular link - will be processed by OpenGraph handler if external return `${escapedText} `; } }); // Convert nostr: links to HTML processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { const nostrType = getNostrType(bech32Id); if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { // Render as embedded event placeholder const escaped = bech32Id.replace(/"/g, '"'); return `
Loading embedded event...
`; } else if (nostrType === 'npub' || nostrType === 'nprofile') { // Render as user handle const escaped = bech32Id.replace(/"/g, '"'); return `@${displayText}`; } else { // Fallback to regular link const escaped = bech32Id.replace(/"/g, '"'); return `${displayText}`; } }); // Process media URLs (YouTube, Spotify, video, audio) processed = processMedia(processed); // Fix double-escaped quotes in href attributes FIRST (before any other processing) // This fixes href=""url"" -> href="url" processed = processed.replace(/href\s*=\s*["']"(https?:\/\/[^"']+)"["']/gi, (_match, url) => { const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; }); // Process OpenGraph links (external links that should have rich previews) processed = processOpenGraphLinks(processed, options.linkBaseURL); // Process images: add max-width styling and data attributes processed = processImages(processed); // Process musical notation if enabled if (options.enableMusicalNotation) { processed = (0, music_1.processMusicalNotation)(processed); } // Clean up any escaped HTML that appears as text (e.g., <a href=...>) // This can happen when AsciiDoctor escapes link macros that it couldn't parse // Pattern: <a href="url">text</a> should be converted to actual HTML // Use a more flexible pattern that handles text with special characters like :// // Fix regular escaped HTML links processed = processed.replace(/<a\s+href=["'](https?:\/\/[^"']+)["']\s*>([^<]+)<\/a>/gi, (_match, url, text) => { // Unescape the URL and text const unescapedUrl = url .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, "'"); const unescapedText = text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>'); // Re-escape properly for HTML const escapedUrl = unescapedUrl .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, '''); const escapedText = unescapedText .replace(/&/g, '&') .replace(//g, '>'); // Check if link text contains wss:// or ws:// - these are relay URLs const isRelayUrl = /wss?:\/\//i.test(unescapedText); if (isRelayUrl) { // Simple link without OpenGraph wrapper return `${escapedText} `; } else { // Regular link return `${escapedText} `; } }); // Clean up any leftover markdown syntax processed = cleanupMarkdown(processed); // Add styling classes processed = addStylingClasses(processed); // Hide raw ToC text processed = hideRawTocText(processed); return processed; } /** * Get Nostr identifier type */ function getNostrType(id) { if (id.startsWith('npub')) return 'npub'; if (id.startsWith('nprofile')) return 'nprofile'; if (id.startsWith('nevent')) return 'nevent'; if (id.startsWith('naddr')) return 'naddr'; if (id.startsWith('note')) return 'note'; return null; } /** * Process media URLs (YouTube, Spotify, video, audio) * Converts MEDIA: placeholders to HTML embeds/players */ function processMedia(html) { let processed = html; // Process YouTube embeds processed = processed.replace(/MEDIA:youtube:([a-zA-Z0-9_-]+)/g, (_match, videoId) => { const escapedId = videoId.replace(/"/g, '"'); return `
`; }); // Process Spotify embeds processed = processed.replace(/MEDIA:spotify:(track|album|playlist|artist|episode|show):([a-zA-Z0-9]+)/g, (_match, type, id) => { const escapedType = type.replace(/"/g, '"'); const escapedId = id.replace(/"/g, '"'); return `
`; }); // Process video files processed = processed.replace(/MEDIA:video:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { const escapedUrl = url .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); return `
`; }); // Process audio files processed = processed.replace(/MEDIA:audio:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { const escapedUrl = url .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); return `
`; }); return processed; } /** * Process OpenGraph links - mark external links for OpenGraph preview fetching */ function processOpenGraphLinks(html, linkBaseURL) { // First, clean up any corrupted HTML fragments that might interfere // Remove "link:" prefixes that appear before links (AsciiDoc syntax that shouldn't be in HTML) // This happens when AsciiDoctor doesn't fully convert link:url[text] syntax or when // there's literal text like "should render like link:" before an anchor tag let processed = html; // Remove "link:" that appears immediately before anchor tags (most common case) // Match "link:" followed by optional whitespace and then \s])link:([a-zA-Z0-9])/gi, '$1$2'); // Also handle cases where "link:" appears with whitespace before anchor tags processed = processed.replace(/\s+link:\s*(?= href="url" processed = processed.replace(/href\s*=\s*["']"(https?:\/\/[^"']+)"["']/gi, (match, url) => { // Extract the clean URL and properly escape it const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; }); // Clean up href attributes that contain HTML fragments processed = processed.replace(/href\s*=\s*["']([^"']*<[^"']*)["']/gi, (match, corruptedHref) => { // If href contains HTML tags, extract just the URL part const urlMatch = corruptedHref.match(/(https?:\/\/[^\s<>"']+)/i); if (urlMatch) { const escapedUrl = urlMatch[1].replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; } return match; // If we can't fix it, leave it (will be skipped by validation) }); // Clean up any malformed anchor tag fragments that might cause issues processed = processed.replace(/]*<[^"'>]*)["']/gi, (match, corruptedHref) => { // Skip corrupted anchor tags - they'll be handled by the main regex with validation return match; }); // Clean up links inside code blocks - AsciiDoctor creates them but they should be plain text // Remove tags inside blocks, keeping only the link text processed = processed.replace(/]*>([\s\S]*?)<\/code>/gi, (match, content) => { // Remove any tags inside code blocks, keeping only the text content const cleaned = content.replace(/]*>(.*?)<\/a>/gi, '$1'); return `${cleaned}`; }); // Also clean up links inside pre blocks processed = processed.replace(/]*>([\s\S]*?)<\/pre>/gi, (match, content) => { const cleaned = content.replace(/]*>(.*?)<\/a>/gi, '$1'); return `
${cleaned}
`; }); // Now protect code blocks and pre blocks by replacing them with placeholders const codeBlockPlaceholders = []; const preBlockPlaceholders = []; // Replace pre blocks first (they can contain code blocks) processed = processed.replace(/]*>([\s\S]*?)<\/pre>/gi, (match) => { const placeholder = `__PREBLOCK_${preBlockPlaceholders.length}__`; preBlockPlaceholders.push(match); return placeholder; }); // Replace code blocks processed = processed.replace(/]*>([\s\S]*?)<\/code>/gi, (match) => { const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`; codeBlockPlaceholders.push(match); return placeholder; }); // Extract base domain from linkBaseURL if provided let baseDomain = null; if (linkBaseURL) { try { const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/); if (urlMatch) { baseDomain = urlMatch[1]; } } catch { // Ignore parsing errors } } // Before processing, remove any corrupted opengraph containers that might have been created // These have malformed data-og-url attributes containing HTML fragments // Match all spans with data-og-url and check if they're corrupted // Use a pattern that matches spans with data-og-url, then check the attribute value processed = processed.replace(/]*data-og-url=["']([^"']+)["'][^>]*>[\s\S]*?<\/span>/gi, (match) => { // This span has a corrupted data-og-url (contains <) // Extract the clean URL from the beginning of the attribute value const dataOgUrlMatch = match.match(/data-og-url=["']([^"']+)["']/i); if (dataOgUrlMatch && dataOgUrlMatch[1]) { // Extract just the URL part (everything before the first <) const urlMatch = dataOgUrlMatch[1].match(/(https?:\/\/[^\s<>"']+)/i); if (urlMatch) { const cleanUrl = urlMatch[1]; // Extract the link text from inside the span const linkMatch = match.match(/]*>(.*?)<\/a>/i); const linkText = linkMatch ? linkMatch[1] : cleanUrl; // Return a clean opengraph container with the fixed URL const escapedUrl = cleanUrl.replace(/"/g, '"').replace(/'/g, '''); return `
${linkText} `; } // If we can't extract a clean URL, just remove the corrupted span and keep any text const textMatch = match.match(/>([^<]+) tag with proper structure processed = processed.replace(/]*\s+)?href\s*=\s*["'](https?:\/\/[^"']{1,2048})["']([^>]*?)>(.*?)<\/a>/gis, (match, before, href, after, linkText) => { // CRITICAL: Validate href FIRST - if it contains ANY HTML tags or fragments, skip immediately // This prevents corrupted HTML from being created if (!href) { return match; // Skip if no href } // Skip if href contains HTML tags or looks corrupted - be very strict // Check for common HTML fragments that indicate corruption if (href.includes('<') || href.includes('>') || href.includes('href=') || href.includes('') || href.includes('"']+$/i.test(href)) { return match; // Skip if href doesn't match clean URL pattern } // Validate href is a proper URL (starts with http:// or https:// and doesn't contain invalid chars) if (!/^https?:\/\/[^\s<>"']+$/i.test(href)) { return match; // Skip if href doesn't match URL pattern } // Skip if the match contains unclosed tags or corrupted HTML const openATags = (match.match(//g) || []).length; if (openATags !== closeATags || openATags !== 1) { return match; // Multiple or mismatched tags = corrupted } // Skip if match contains nested HTML that looks corrupted if (match.includes('href="') && match.split('href="').length > 2) { return match; // Multiple href attributes = corrupted } // Skip if it's already a media embed, nostr link, wikilink, or opengraph link if (match.includes('class="wikilink"') || match.includes('class="nostr-link"') || match.includes('class="opengraph-link"') || match.includes('data-embedded-note') || match.includes('youtube-embed') || match.includes('spotify-embed') || match.includes('media-embed') || match.includes('opengraph-link-container')) { return match; } // Skip if it's a media file URL if (/\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv|mp3|m4a|wav|flac|aac|opus|wma|jpeg|jpg|png|gif|webp|svg)$/i.test(href)) { return match; } // Skip if it's YouTube or Spotify (already handled as media) if (/youtube\.com|youtu\.be|spotify\.com/i.test(href)) { return match; } // Skip if link text contains wss:// or ws:// - these are relay URLs, not web pages // They don't need OpenGraph previews if (/wss?:\/\//i.test(linkText)) { return match; } // Check if it's an external link (not same domain) let isExternal = true; if (baseDomain) { try { const hrefMatch = href.match(/^https?:\/\/([^\/]+)/); if (hrefMatch && hrefMatch[1] === baseDomain) { isExternal = false; } } catch { // If parsing fails, assume external } } // Only process external links if (!isExternal) { return match; } // Escape the URL for data attribute const escapedUrl = href .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, '''); // Add data attribute for OpenGraph fetching and wrap in container // The actual OpenGraph fetching will be done client-side via JavaScript return ` ${linkText} `; }); // Restore code blocks codeBlockPlaceholders.forEach((codeBlock, index) => { processed = processed.replace(`__CODEBLOCK_${index}__`, codeBlock); }); // Restore pre blocks preBlockPlaceholders.forEach((preBlock, index) => { processed = processed.replace(`__PREBLOCK_${index}__`, preBlock); }); return processed; } /** * Process images: add max-width styling and data attributes */ function processImages(html) { const imageUrls = []; const imageUrlRegex = /]+src=["']([^"']+)["'][^>]*>/gi; let match; while ((match = imageUrlRegex.exec(html)) !== null) { const url = match[1]; if (url && !imageUrls.includes(url)) { imageUrls.push(url); } } return html.replace(/]+)>/gi, (imgTag, attributes) => { const srcMatch = attributes.match(/src=["']([^"']+)["']/i); if (!srcMatch) return imgTag; const src = srcMatch[1]; const currentIndex = imageUrls.indexOf(src); let updatedAttributes = attributes; if (updatedAttributes.match(/class=["']/i)) { updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match, classes) => { const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim(); const newClasses = cleanedClasses ? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in` : 'max-w-[400px] object-contain cursor-zoom-in'; return `class="${newClasses}"`; }); } else { updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`; } updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '"')}"`; return ``; }); } /** * Clean URL by removing tracking parameters * Based on jumble's cleanUrl function */ function cleanUrl(url) { try { const parsedUrl = new URL(url); // List of tracking parameter prefixes and exact names to remove const trackingParams = [ // Google Analytics & Ads 'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', 'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic', 'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid', // Facebook 'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref', // Twitter/X 'twclid', 'twsrc', // Microsoft/Bing 'msclkid', 'mc_cid', 'mc_eid', // Adobe 'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid', // Mailchimp 'mc_cid', 'mc_eid', // HubSpot 'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver', // Marketo 'mkt_tok', // YouTube 'si', 'feature', 'kw', 'pp', // Other common tracking 'ref', 'referrer', 'source', 'campaign', 'medium', 'content', 'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd', // Mobile app tracking 'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative', // Amazon 'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag', // Affiliate tracking 'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer', // Social media share tracking 'share', 'shared', 'sharesource' ]; // Remove all tracking parameters trackingParams.forEach(param => { parsedUrl.searchParams.delete(param); }); // Remove any parameter that starts with utm_ or _ Array.from(parsedUrl.searchParams.keys()).forEach(key => { if (key.startsWith('utm_') || key.startsWith('_')) { parsedUrl.searchParams.delete(key); } }); return parsedUrl.toString(); } catch { // If URL parsing fails, return original URL return url; } } /** * Clean up leftover markdown syntax */ function cleanupMarkdown(html) { let cleaned = html; // Clean up markdown image syntax cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => { const altText = alt || ''; // Clean URL (remove tracking parameters) const cleanedUrl = cleanUrl(url); // Escape for HTML attribute const escapedUrl = cleanedUrl.replace(/"/g, '"').replace(/'/g, '''); return `${altText}`; }); // Clean up markdown link syntax // Skip if the link is already inside an HTML tag or is part of escaped HTML cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { // Skip if this markdown link is already inside an HTML tag // Check if there's an tag nearby that might have been created from this if (cleaned.includes(`href="${url}"`) || cleaned.includes(`href='${url}'`)) { return _match; } // Skip if the text contains HTML entities or looks like it's already processed if (text.includes('<') || text.includes('>') || text.includes('&')) { return _match; } // Skip if the URL is already in an href attribute (check for escaped versions too) const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); if (cleaned.includes(`href="${escapedUrl}"`) || cleaned.includes(`href='${escapedUrl}'`)) { return _match; } // Clean URL (remove tracking parameters) const cleanedUrl = cleanUrl(url); // Escape for HTML attribute (but don't double-escape) const finalEscapedUrl = cleanedUrl .replace(/&/g, '&') // Unescape if already escaped .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, '''); // Escape text for HTML (but don't double-escape) const escapedText = text .replace(/&/g, '&') // Unescape if already escaped .replace(/</g, '<') .replace(/>/g, '>') .replace(/&/g, '&') .replace(//g, '>'); return `${escapedText} `; }); return cleaned; } /** * Add proper CSS classes for styling */ function addStylingClasses(html) { let styled = html; // Add strikethrough styling styled = styled.replace(/([^<]+)<\/span>/g, '$1'); // Add subscript styling styled = styled.replace(/([^<]+)<\/span>/g, '$1'); // Add superscript styling styled = styled.replace(/([^<]+)<\/span>/g, '$1'); // Add code highlighting classes styled = styled.replace(/
/g, '
');
    styled = styled.replace(//g, '');
    return styled;
}
/**
 * Hide raw AsciiDoc ToC text
 */
function hideRawTocText(html) {
    let cleaned = html;
    cleaned = cleaned.replace(/]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, '');
    cleaned = cleaned.replace(/]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, '');
    cleaned = cleaned.replace(/]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, '');
    return cleaned;
}