"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.postProcessHtml = postProcessHtml; const music_1 = require("./music"); /** * Post-processes HTML output from AsciiDoctor * Converts AsciiDoc macros to HTML with data attributes and CSS classes */ function postProcessHtml(html, options = {}) { let processed = html; // Convert bookstr markers to HTML placeholders processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { const escaped = bookContent.replace(/"/g, '"').replace(/'/g, '''); return ``; }); // Convert hashtag links to HTML processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { // HTML escape the display text const escapedDisplay = displayText .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // If hashtagUrl is configured, make it a clickable link if (options.hashtagUrl) { let url; if (typeof options.hashtagUrl === 'function') { url = options.hashtagUrl(normalizedHashtag); } else { // String template with {topic} placeholder url = options.hashtagUrl.replace(/{topic}/g, normalizedHashtag); } // Escape URL for HTML attribute const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `${escapedDisplay}`; } else { // Default: Use span instead of tag - same color as links but no underline and not clickable return `${escapedDisplay}`; } }); // Convert WIKILINK:dtag|display placeholder format to HTML // Match WIKILINK:dtag|display, ensuring we don't match across HTML tags processed = processed.replace(/WIKILINK:([^|<>]+)\|([^<>\s]+)/g, (_match, dTag, displayText) => { const escapedDtag = dTag.trim().replace(/"/g, '"'); const escapedDisplay = displayText.trim() .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // Generate URL using custom format or default let url; if (options.wikilinkUrl) { if (typeof options.wikilinkUrl === 'function') { url = options.wikilinkUrl(dTag.trim()); } else { // String template with {dtag} placeholder url = options.wikilinkUrl.replace(/{dtag}/g, dTag.trim()); } } else { // Default format url = `/events?d=${escapedDtag}`; } // Escape URL for HTML attribute const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `${escapedDisplay}`; }); // Convert any leftover link: macros that AsciiDoctor didn't convert // This MUST run before processOpenGraphLinks which removes "link:" prefixes // This handles cases where AsciiDoctor couldn't parse the link (e.g., link text with special chars) // Pattern: link:url[text] where url is http/https and text can contain any characters // Match link: macros that are still in the HTML as plain text (not converted by AsciiDoctor) // Also handle HTML-escaped versions that might appear processed = processed.replace(/link:(https?:\/\/[^\[]+)\[([^\]]+)\]/g, (_match, url, text) => { // Unescape if already HTML-escaped (but be careful not to unescape actual content) let unescapedUrl = url; // Only unescape if it looks like it was escaped (contains & or ") if (url.includes('&') || url.includes('"') || url.includes(''')) { unescapedUrl = url .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'"); } let unescapedText = text; // Only unescape if it looks like it was escaped if (text.includes('&') || text.includes('<') || text.includes('>') || text.includes('"') || text.includes(''')) { unescapedText = text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'"); } // Escape URL for HTML attribute (fresh escape, no double-escaping) const escapedUrl = unescapedUrl .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, '''); // Escape text content for HTML (fresh escape, no double-escaping) const escapedText = unescapedText .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); // Check if link text contains wss:// or ws:// - these are relay URLs, don't add OpenGraph const isRelayUrl = /wss?:\/\//i.test(unescapedText); if (isRelayUrl) { // Simple link without OpenGraph wrapper return `${escapedText} `; } else { // Regular link - will be processed by OpenGraph handler if external return `${escapedText} `; } }); // Convert nostr: links to HTML processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { const nostrType = getNostrType(bech32Id); if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { // Render as embedded event placeholder const escaped = bech32Id.replace(/"/g, '"'); return `

Loading embedded event...

`; } else if (nostrType === 'npub' || nostrType === 'nprofile') { // Render as user handle const escaped = bech32Id.replace(/"/g, '"'); return `@${displayText}`; } else { // Fallback to regular link const escaped = bech32Id.replace(/"/g, '"'); return `${displayText}`; } }); // Process media URLs (YouTube, Spotify, video, audio) processed = processMedia(processed); // Fix double-escaped quotes in href attributes FIRST (before any other processing) // This fixes href=""url"" -> href="url" processed = processed.replace(/href\s*=\s*["']"(https?:\/\/[^"']+)"["']/gi, (_match, url) => { const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; }); // Process OpenGraph links (external links that should have rich previews) processed = processOpenGraphLinks(processed, options.linkBaseURL); // Process images: add max-width styling and data attributes processed = processImages(processed); // Process musical notation if enabled if (options.enableMusicalNotation) { processed = (0, music_1.processMusicalNotation)(processed); } // Clean up any escaped HTML that appears as text (e.g., <a href=...>) // This can happen when AsciiDoctor escapes link macros that it couldn't parse // Pattern: <a href="url">text</a> should be converted to actual HTML // Use a more flexible pattern that handles text with special characters like :// // Fix regular escaped HTML links processed = processed.replace(/<a\s+href=["'](https?:\/\/[^"']+)["']\s*>([^<]+)<\/a>/gi, (_match, url, text) => { // Unescape the URL and text const unescapedUrl = url .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, "'"); const unescapedText = text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>'); // Re-escape properly for HTML const escapedUrl = unescapedUrl .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, '''); const escapedText = unescapedText .replace(/&/g, '&') .replace(//g, '>'); // Check if link text contains wss:// or ws:// - these are relay URLs const isRelayUrl = /wss?:\/\//i.test(unescapedText); if (isRelayUrl) { // Simple link without OpenGraph wrapper return `${escapedText} `; } else { // Regular link return `${escapedText} `; } }); // Clean up any leftover markdown syntax processed = cleanupMarkdown(processed); // Add styling classes processed = addStylingClasses(processed); // Hide raw ToC text processed = hideRawTocText(processed); return processed; } /** * Get Nostr identifier type */ function getNostrType(id) { if (id.startsWith('npub')) return 'npub'; if (id.startsWith('nprofile')) return 'nprofile'; if (id.startsWith('nevent')) return 'nevent'; if (id.startsWith('naddr')) return 'naddr'; if (id.startsWith('note')) return 'note'; return null; } /** * Process media URLs (YouTube, Spotify, video, audio) * Converts MEDIA: placeholders to HTML embeds/players */ function processMedia(html) { let processed = html; // Process YouTube embeds processed = processed.replace(/MEDIA:youtube:([a-zA-Z0-9_-]+)/g, (_match, videoId) => { const escapedId = videoId.replace(/"/g, '"'); return `

`; }); // Process Spotify embeds processed = processed.replace(/MEDIA:spotify:(track|album|playlist|artist|episode|show):([a-zA-Z0-9]+)/g, (_match, type, id) => { const escapedType = type.replace(/"/g, '"'); const escapedId = id.replace(/"/g, '"'); return `

`; }); // Process video files processed = processed.replace(/MEDIA:video:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { const escapedUrl = url .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); return `

`; }); // Process audio files processed = processed.replace(/MEDIA:audio:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { const escapedUrl = url .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); return `

`; }); return processed; } /** * Process OpenGraph links - mark external links for OpenGraph preview fetching */ function processOpenGraphLinks(html, linkBaseURL) { // First, clean up any corrupted HTML fragments that might interfere // Remove "link:" prefixes that appear before links (AsciiDoc syntax that shouldn't be in HTML) // This happens when AsciiDoctor doesn't fully convert link:url[text] syntax or when // there's literal text like "should render like link:" before an anchor tag let processed = html; // Remove "link:" that appears immediately before anchor tags (most common case) // Match "link:" followed by optional whitespace and then \s])link:([a-zA-Z0-9])/gi, '$1$2'); // Also handle cases where "link:" appears with whitespace before anchor tags processed = processed.replace(/\s+link:\s*(?= href="url" processed = processed.replace(/href\s*=\s*["']"(https?:\/\/[^"']+)"["']/gi, (match, url) => { // Extract the clean URL and properly escape it const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; }); // Clean up href attributes that contain HTML fragments processed = processed.replace(/href\s*=\s*["']([^"']*<[^"']*)["']/gi, (match, corruptedHref) => { // If href contains HTML tags, extract just the URL part const urlMatch = corruptedHref.match(/(https?:\/\/[^\s<>"']+)/i); if (urlMatch) { const escapedUrl = urlMatch[1].replace(/"/g, '"').replace(/'/g, '''); return `href="${escapedUrl}"`; } return match; // If we can't fix it, leave it (will be skipped by validation) }); // Clean up any malformed anchor tag fragments that might cause issues processed = processed.replace(/]*<[^"'>]*)["']/gi, (match, corruptedHref) => { // Skip corrupted anchor tags - they'll be handled by the main regex with validation return match; }); // Clean up links inside code blocks - AsciiDoctor creates them but they should be plain text // Remove tags inside

 blocks, keeping only the link text
    processed = processed.replace(/]*>([\s\S]*?)<\/code>/gi, (match, content) => {
        // Remove any

 tags inside code blocks, keeping only the text content
        const cleaned = content.replace(/]*>(.*?)<\/a>/gi, '$1');
        return `${cleaned}`;
    });
    // Also clean up links inside pre blocks
    processed = processed.replace(/]*>([\s\S]*?)<\/pre>/gi, (match, content) => {
        const cleaned = content.replace(/]*>(.*?)<\/a>/gi, '$1');
        return `${cleaned}`;
    });
    // Now protect code blocks and pre blocks by replacing them with placeholders
    const codeBlockPlaceholders = [];
    const preBlockPlaceholders = [];
    // Replace pre blocks first (they can contain code blocks)
    processed = processed.replace(/]*>([\s\S]*?)<\/pre>/gi, (match) => {
        const placeholder = `__PREBLOCK_${preBlockPlaceholders.length}__`;
        preBlockPlaceholders.push(match);
        return placeholder;
    });
    // Replace code blocks
    processed = processed.replace(/]*>([\s\S]*?)<\/code>/gi, (match) => {
        const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`;
        codeBlockPlaceholders.push(match);
        return placeholder;
    });
    // Extract base domain from linkBaseURL if provided
    let baseDomain = null;
    if (linkBaseURL) {
        try {
            const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/);
            if (urlMatch) {
                baseDomain = urlMatch[1];
            }
        }
        catch {
            // Ignore parsing errors
        }
    }
    // Before processing, remove any corrupted opengraph containers that might have been created
    // These have malformed data-og-url attributes containing HTML fragments
    // Match all spans with data-og-url and check if they're corrupted
    // Use a pattern that matches spans with data-og-url, then check the attribute value
    processed = processed.replace(/]*data-og-url=["']([^"']+)["'][^>]*>[\s\S]*?<\/span>/gi, (match) => {
        // This span has a corrupted data-og-url (contains <)
        // Extract the clean URL from the beginning of the attribute value
        const dataOgUrlMatch = match.match(/data-og-url=["']([^"']+)["']/i);
        if (dataOgUrlMatch && dataOgUrlMatch[1]) {
            // Extract just the URL part (everything before the first <)
            const urlMatch = dataOgUrlMatch[1].match(/(https?:\/\/[^\s<>"']+)/i);
            if (urlMatch) {
                const cleanUrl = urlMatch[1];
                // Extract the link text from inside the span
                const linkMatch = match.match(/]*>(.*?)<\/a>/i);
                const linkText = linkMatch ? linkMatch[1] : cleanUrl;
                // Return a clean opengraph container with the fixed URL
                const escapedUrl = cleanUrl.replace(/"/g, '"').replace(/'/g, ''');
                return `
      ${linkText} 
      
        
          
            
          
          
            
            
            
          
        
      
    `;
            }
            // If we can't extract a clean URL, just remove the corrupted span and keep any text
            const textMatch = match.match(/>([^<]+) tag with proper structure
    processed = processed.replace(/]*\s+)?href\s*=\s*["'](https?:\/\/[^"']{1,2048})["']([^>]*?)>(.*?)<\/a>/gis, (match, before, href, after, linkText) => {
        // CRITICAL: Validate href FIRST - if it contains ANY HTML tags or fragments, skip immediately
        // This prevents corrupted HTML from being created
        if (!href) {
            return match; // Skip if no href
        }
        // Skip if href contains HTML tags or looks corrupted - be very strict
        // Check for common HTML fragments that indicate corruption
        if (href.includes('<') || href.includes('>') || href.includes('href=') || href.includes('') || href.includes('"']+$/i.test(href)) {
            return match; // Skip if href doesn't match clean URL pattern
        }
        // Validate href is a proper URL (starts with http:// or https:// and doesn't contain invalid chars)
        if (!/^https?:\/\/[^\s<>"']+$/i.test(href)) {
            return match; // Skip if href doesn't match URL pattern
        }
        // Skip if the match contains unclosed tags or corrupted HTML
        const openATags = (match.match(//g) || []).length;
        if (openATags !== closeATags || openATags !== 1) {
            return match; // Multiple or mismatched  tags = corrupted
        }
        // Skip if match contains nested HTML that looks corrupted
        if (match.includes('href="') && match.split('href="').length > 2) {
            return match; // Multiple href attributes = corrupted
        }
        // Skip if it's already a media embed, nostr link, wikilink, or opengraph link
        if (match.includes('class="wikilink"') ||
            match.includes('class="nostr-link"') ||
            match.includes('class="opengraph-link"') ||
            match.includes('data-embedded-note') ||
            match.includes('youtube-embed') ||
            match.includes('spotify-embed') ||
            match.includes('media-embed') ||
            match.includes('opengraph-link-container')) {
            return match;
        }
        // Skip if it's a media file URL
        if (/\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv|mp3|m4a|wav|flac|aac|opus|wma|jpeg|jpg|png|gif|webp|svg)$/i.test(href)) {
            return match;
        }
        // Skip if it's YouTube or Spotify (already handled as media)
        if (/youtube\.com|youtu\.be|spotify\.com/i.test(href)) {
            return match;
        }
        // Skip if link text contains wss:// or ws:// - these are relay URLs, not web pages
        // They don't need OpenGraph previews
        if (/wss?:\/\//i.test(linkText)) {
            return match;
        }
        // Check if it's an external link (not same domain)
        let isExternal = true;
        if (baseDomain) {
            try {
                const hrefMatch = href.match(/^https?:\/\/([^\/]+)/);
                if (hrefMatch && hrefMatch[1] === baseDomain) {
                    isExternal = false;
                }
            }
            catch {
                // If parsing fails, assume external
            }
        }
        // Only process external links
        if (!isExternal) {
            return match;
        }
        // Escape the URL for data attribute
        const escapedUrl = href
            .replace(/&/g, '&')
            .replace(/"/g, '"')
            .replace(/'/g, ''');
        // Add data attribute for OpenGraph fetching and wrap in container
        // The actual OpenGraph fetching will be done client-side via JavaScript
        return `
      ${linkText} 
      
        
          
            
          
          
            
            
            
          
        
      
    `;
    });
    // Restore code blocks
    codeBlockPlaceholders.forEach((codeBlock, index) => {
        processed = processed.replace(`__CODEBLOCK_${index}__`, codeBlock);
    });
    // Restore pre blocks
    preBlockPlaceholders.forEach((preBlock, index) => {
        processed = processed.replace(`__PREBLOCK_${index}__`, preBlock);
    });
    return processed;
}
/**
 * Process images: add max-width styling and data attributes
 */
function processImages(html) {
    const imageUrls = [];
    const imageUrlRegex = /]+src=["']([^"']+)["'][^>]*>/gi;
    let match;
    while ((match = imageUrlRegex.exec(html)) !== null) {
        const url = match[1];
        if (url && !imageUrls.includes(url)) {
            imageUrls.push(url);
        }
    }
    return html.replace(/]+)>/gi, (imgTag, attributes) => {
        const srcMatch = attributes.match(/src=["']([^"']+)["']/i);
        if (!srcMatch)
            return imgTag;
        const src = srcMatch[1];
        const currentIndex = imageUrls.indexOf(src);
        let updatedAttributes = attributes;
        if (updatedAttributes.match(/class=["']/i)) {
            updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match, classes) => {
                const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim();
                const newClasses = cleanedClasses
                    ? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in`
                    : 'max-w-[400px] object-contain cursor-zoom-in';
                return `class="${newClasses}"`;
            });
        }
        else {
            updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`;
        }
        updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '"')}"`;
        return ``;
    });
}
/**
 * Clean URL by removing tracking parameters
 * Based on jumble's cleanUrl function
 */
function cleanUrl(url) {
    try {
        const parsedUrl = new URL(url);
        // List of tracking parameter prefixes and exact names to remove
        const trackingParams = [
            // Google Analytics & Ads
            'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
            'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic',
            'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid',
            // Facebook
            'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref',
            // Twitter/X
            'twclid', 'twsrc',
            // Microsoft/Bing
            'msclkid', 'mc_cid', 'mc_eid',
            // Adobe
            'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid',
            // Mailchimp
            'mc_cid', 'mc_eid',
            // HubSpot
            'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver',
            // Marketo
            'mkt_tok',
            // YouTube
            'si', 'feature', 'kw', 'pp',
            // Other common tracking
            'ref', 'referrer', 'source', 'campaign', 'medium', 'content',
            'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd',
            // Mobile app tracking
            'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative',
            // Amazon
            'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag',
            // Affiliate tracking
            'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer',
            // Social media share tracking
            'share', 'shared', 'sharesource'
        ];
        // Remove all tracking parameters
        trackingParams.forEach(param => {
            parsedUrl.searchParams.delete(param);
        });
        // Remove any parameter that starts with utm_ or _
        Array.from(parsedUrl.searchParams.keys()).forEach(key => {
            if (key.startsWith('utm_') || key.startsWith('_')) {
                parsedUrl.searchParams.delete(key);
            }
        });
        return parsedUrl.toString();
    }
    catch {
        // If URL parsing fails, return original URL
        return url;
    }
}
/**
 * Clean up leftover markdown syntax
 */
function cleanupMarkdown(html) {
    let cleaned = html;
    // Clean up markdown image syntax
    cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => {
        const altText = alt || '';
        // Clean URL (remove tracking parameters)
        const cleanedUrl = cleanUrl(url);
        // Escape for HTML attribute
        const escapedUrl = cleanedUrl.replace(/"/g, '"').replace(/'/g, ''');
        return ``;
    });
    // Clean up markdown link syntax
    // Skip if the link is already inside an HTML tag or is part of escaped HTML
    cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => {
        // Skip if this markdown link is already inside an HTML tag
        // Check if there's an  tag nearby that might have been created from this
        if (cleaned.includes(`href="${url}"`) || cleaned.includes(`href='${url}'`)) {
            return _match;
        }
        // Skip if the text contains HTML entities or looks like it's already processed
        if (text.includes('<') || text.includes('>') || text.includes('&')) {
            return _match;
        }
        // Skip if the URL is already in an href attribute (check for escaped versions too)
        const escapedUrl = url.replace(/"/g, '"').replace(/'/g, ''');
        if (cleaned.includes(`href="${escapedUrl}"`) || cleaned.includes(`href='${escapedUrl}'`)) {
            return _match;
        }
        // Clean URL (remove tracking parameters)
        const cleanedUrl = cleanUrl(url);
        // Escape for HTML attribute (but don't double-escape)
        const finalEscapedUrl = cleanedUrl
            .replace(/&/g, '&') // Unescape if already escaped
            .replace(/&/g, '&')
            .replace(/"/g, '"')
            .replace(/'/g, ''');
        // Escape text for HTML (but don't double-escape)
        const escapedText = text
            .replace(/&/g, '&') // Unescape if already escaped
            .replace(/</g, '<')
            .replace(/>/g, '>')
            .replace(/&/g, '&')
            .replace(//g, '>');
        return `${escapedText} `;
    });
    return cleaned;
}
/**
 * Add proper CSS classes for styling
 */
function addStylingClasses(html) {
    let styled = html;
    // Add strikethrough styling
    styled = styled.replace(/([^<]+)<\/span>/g, '$1');
    // Add subscript styling
    styled = styled.replace(/([^<]+)<\/span>/g, '$1');
    // Add superscript styling
    styled = styled.replace(/([^<]+)<\/span>/g, '$1');
    // Add code highlighting classes
    styled = styled.replace(//g, '');
    styled = styled.replace(//g, '');
    return styled;
}
/**
 * Hide raw AsciiDoc ToC text
 */
function hideRawTocText(html) {
    let cleaned = html;
    cleaned = cleaned.replace(/]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, '');
    cleaned = cleaned.replace(/]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, '');
    cleaned = cleaned.replace(/]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, '');
    return cleaned;
}