You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
693 lines
34 KiB
693 lines
34 KiB
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.postProcessHtml = postProcessHtml; |
|
const music_1 = require("./music"); |
|
/** |
|
* Post-processes HTML output from AsciiDoctor |
|
* Converts AsciiDoc macros to HTML with data attributes and CSS classes |
|
*/ |
|
function postProcessHtml(html, options = {}) { |
|
let processed = html; |
|
// Convert bookstr markers to HTML placeholders |
|
processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { |
|
const escaped = bookContent.replace(/"/g, '"').replace(/'/g, '''); |
|
return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`; |
|
}); |
|
// Convert hashtag links to HTML |
|
processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { |
|
// HTML escape the display text |
|
const escapedDisplay = displayText |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
// If hashtagUrl is configured, make it a clickable link |
|
if (options.hashtagUrl) { |
|
let url; |
|
if (typeof options.hashtagUrl === 'function') { |
|
url = options.hashtagUrl(normalizedHashtag); |
|
} |
|
else { |
|
// String template with {topic} placeholder |
|
url = options.hashtagUrl.replace(/{topic}/g, normalizedHashtag); |
|
} |
|
// Escape URL for HTML attribute |
|
const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); |
|
return `<a class="hashtag-link text-primary-600 dark:text-primary-500 hover:underline" data-topic="${normalizedHashtag.replace(/"/g, '"')}" data-url="${escapedUrl}" href="${escapedUrl}">${escapedDisplay}</a>`; |
|
} |
|
else { |
|
// Default: Use span instead of <a> tag - same color as links but no underline and not clickable |
|
return `<span class="hashtag-link">${escapedDisplay}</span>`; |
|
} |
|
}); |
|
// Convert WIKILINK:dtag|display placeholder format to HTML |
|
// Match WIKILINK:dtag|display, ensuring we don't match across HTML tags |
|
processed = processed.replace(/WIKILINK:([^|<>]+)\|([^<>\s]+)/g, (_match, dTag, displayText) => { |
|
const escapedDtag = dTag.trim().replace(/"/g, '"'); |
|
const escapedDisplay = displayText.trim() |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
// Generate URL using custom format or default |
|
let url; |
|
if (options.wikilinkUrl) { |
|
if (typeof options.wikilinkUrl === 'function') { |
|
url = options.wikilinkUrl(dTag.trim()); |
|
} |
|
else { |
|
// String template with {dtag} placeholder |
|
url = options.wikilinkUrl.replace(/{dtag}/g, dTag.trim()); |
|
} |
|
} |
|
else { |
|
// Default format |
|
url = `/events?d=${escapedDtag}`; |
|
} |
|
// Escape URL for HTML attribute |
|
const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); |
|
return `<a class="wikilink text-primary-600 dark:text-primary-500 hover:underline" data-dtag="${escapedDtag}" data-url="${escapedUrl}" href="${escapedUrl}">${escapedDisplay}</a>`; |
|
}); |
|
// Convert any leftover link: macros that AsciiDoctor didn't convert |
|
// This MUST run before processOpenGraphLinks which removes "link:" prefixes |
|
// This handles cases where AsciiDoctor couldn't parse the link (e.g., link text with special chars) |
|
// Pattern: link:url[text] where url is http/https and text can contain any characters |
|
// Match link: macros that are still in the HTML as plain text (not converted by AsciiDoctor) |
|
// Also handle HTML-escaped versions that might appear |
|
processed = processed.replace(/link:(https?:\/\/[^\[]+)\[([^\]]+)\]/g, (_match, url, text) => { |
|
// Unescape if already HTML-escaped (but be careful not to unescape actual content) |
|
let unescapedUrl = url; |
|
// Only unescape if it looks like it was escaped (contains & or ") |
|
if (url.includes('&') || url.includes('"') || url.includes(''')) { |
|
unescapedUrl = url |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, "'"); |
|
} |
|
let unescapedText = text; |
|
// Only unescape if it looks like it was escaped |
|
if (text.includes('&') || text.includes('<') || text.includes('>') || text.includes('"') || text.includes(''')) { |
|
unescapedText = text |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, "'"); |
|
} |
|
// Escape URL for HTML attribute (fresh escape, no double-escaping) |
|
const escapedUrl = unescapedUrl |
|
.replace(/&/g, '&') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
// Escape text content for HTML (fresh escape, no double-escaping) |
|
const escapedText = unescapedText |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
// Check if link text contains wss:// or ws:// - these are relay URLs, don't add OpenGraph |
|
const isRelayUrl = /wss?:\/\//i.test(unescapedText); |
|
if (isRelayUrl) { |
|
// Simple link without OpenGraph wrapper |
|
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; |
|
} |
|
else { |
|
// Regular link - will be processed by OpenGraph handler if external |
|
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; |
|
} |
|
}); |
|
// Convert nostr: links to HTML |
|
processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { |
|
const nostrType = getNostrType(bech32Id); |
|
if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { |
|
// Render as embedded event placeholder |
|
const escaped = bech32Id.replace(/"/g, '"'); |
|
return `<div data-embedded-note="${escaped}" class="embedded-note-container">Loading embedded event...</div>`; |
|
} |
|
else if (nostrType === 'npub' || nostrType === 'nprofile') { |
|
// Render as user handle |
|
const escaped = bech32Id.replace(/"/g, '"'); |
|
return `<span class="user-handle" data-pubkey="${escaped}">@${displayText}</span>`; |
|
} |
|
else { |
|
// Fallback to regular link |
|
const escaped = bech32Id.replace(/"/g, '"'); |
|
return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${displayText}</a>`; |
|
} |
|
}); |
|
// Process media URLs (YouTube, Spotify, video, audio) |
|
processed = processMedia(processed); |
|
// Fix double-escaped quotes in href attributes FIRST (before any other processing) |
|
// This fixes href=""url"" -> href="url" |
|
processed = processed.replace(/href\s*=\s*["']"(https?:\/\/[^"']+)"["']/gi, (_match, url) => { |
|
const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); |
|
return `href="${escapedUrl}"`; |
|
}); |
|
// Process OpenGraph links (external links that should have rich previews) |
|
processed = processOpenGraphLinks(processed, options.linkBaseURL); |
|
// Process images: add max-width styling and data attributes |
|
processed = processImages(processed); |
|
// Process musical notation if enabled |
|
if (options.enableMusicalNotation) { |
|
processed = (0, music_1.processMusicalNotation)(processed); |
|
} |
|
// Clean up any escaped HTML that appears as text (e.g., <a href=...>) |
|
// This can happen when AsciiDoctor escapes link macros that it couldn't parse |
|
// Pattern: <a href="url">text</a> should be converted to actual HTML |
|
// Use a more flexible pattern that handles text with special characters like :// |
|
// Fix regular escaped HTML links |
|
processed = processed.replace(/<a\s+href=["'](https?:\/\/[^"']+)["']\s*>([^<]+)<\/a>/gi, (_match, url, text) => { |
|
// Unescape the URL and text |
|
const unescapedUrl = url |
|
.replace(/&/g, '&') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, "'"); |
|
const unescapedText = text |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>'); |
|
// Re-escape properly for HTML |
|
const escapedUrl = unescapedUrl |
|
.replace(/&/g, '&') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
const escapedText = unescapedText |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>'); |
|
// Check if link text contains wss:// or ws:// - these are relay URLs |
|
const isRelayUrl = /wss?:\/\//i.test(unescapedText); |
|
if (isRelayUrl) { |
|
// Simple link without OpenGraph wrapper |
|
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; |
|
} |
|
else { |
|
// Regular link |
|
return `<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; |
|
} |
|
}); |
|
// Clean up any leftover markdown syntax |
|
processed = cleanupMarkdown(processed); |
|
// Add styling classes |
|
processed = addStylingClasses(processed); |
|
// Hide raw ToC text |
|
processed = hideRawTocText(processed); |
|
return processed; |
|
} |
|
/** |
|
* Get Nostr identifier type |
|
*/ |
|
function getNostrType(id) { |
|
if (id.startsWith('npub')) |
|
return 'npub'; |
|
if (id.startsWith('nprofile')) |
|
return 'nprofile'; |
|
if (id.startsWith('nevent')) |
|
return 'nevent'; |
|
if (id.startsWith('naddr')) |
|
return 'naddr'; |
|
if (id.startsWith('note')) |
|
return 'note'; |
|
return null; |
|
} |
|
/** |
|
* Process media URLs (YouTube, Spotify, video, audio) |
|
* Converts MEDIA: placeholders to HTML embeds/players |
|
*/ |
|
function processMedia(html) { |
|
let processed = html; |
|
// Process YouTube embeds |
|
processed = processed.replace(/MEDIA:youtube:([a-zA-Z0-9_-]+)/g, (_match, videoId) => { |
|
const escapedId = videoId.replace(/"/g, '"'); |
|
return `<div class="media-embed youtube-embed" style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; max-width: 100%; margin: 1rem 0;"> |
|
<iframe |
|
style="position: absolute; top: 0; left: 0; width: 100%; height: 100%;" |
|
src="https://www.youtube.com/embed/${escapedId}" |
|
frameborder="0" |
|
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" |
|
allowfullscreen |
|
loading="lazy"> |
|
</iframe> |
|
</div>`; |
|
}); |
|
// Process Spotify embeds |
|
processed = processed.replace(/MEDIA:spotify:(track|album|playlist|artist|episode|show):([a-zA-Z0-9]+)/g, (_match, type, id) => { |
|
const escapedType = type.replace(/"/g, '"'); |
|
const escapedId = id.replace(/"/g, '"'); |
|
return `<div class="media-embed spotify-embed" style="margin: 1rem 0;"> |
|
<iframe |
|
style="border-radius: 12px; width: 100%; max-width: 100%;" |
|
src="https://open.spotify.com/embed/${escapedType}/${escapedId}?utm_source=generator" |
|
width="100%" |
|
height="352" |
|
frameborder="0" |
|
allowfullscreen="" |
|
allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" |
|
loading="lazy"> |
|
</iframe> |
|
</div>`; |
|
}); |
|
// Process video files |
|
processed = processed.replace(/MEDIA:video:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { |
|
const escapedUrl = url |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
return `<div class="media-embed video-embed" style="margin: 1rem 0;"> |
|
<video |
|
controls |
|
preload="metadata" |
|
style="width: 100%; max-width: 100%; height: auto; border-radius: 8px;" |
|
class="media-player"> |
|
<source src="${escapedUrl}" type="video/mp4"> |
|
Your browser does not support the video tag. |
|
</video> |
|
</div>`; |
|
}); |
|
// Process audio files |
|
processed = processed.replace(/MEDIA:audio:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => { |
|
const escapedUrl = url |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
return `<div class="media-embed audio-embed" style="margin: 1rem 0;"> |
|
<audio |
|
controls |
|
preload="metadata" |
|
style="width: 100%; max-width: 100%;" |
|
class="media-player"> |
|
<source src="${escapedUrl}"> |
|
Your browser does not support the audio tag. |
|
</audio> |
|
</div>`; |
|
}); |
|
return processed; |
|
} |
|
/** |
|
* Process OpenGraph links - mark external links for OpenGraph preview fetching |
|
*/ |
|
function processOpenGraphLinks(html, linkBaseURL) { |
|
// First, clean up any corrupted HTML fragments that might interfere |
|
// Remove "link:" prefixes that appear before links (AsciiDoc syntax that shouldn't be in HTML) |
|
// This happens when AsciiDoctor doesn't fully convert link:url[text] syntax or when |
|
// there's literal text like "should render like link:" before an anchor tag |
|
let processed = html; |
|
// Remove "link:" that appears immediately before anchor tags (most common case) |
|
// Match "link:" followed by optional whitespace and then <a |
|
processed = processed.replace(/link:\s*<a/gi, '<a'); |
|
// Remove "link:" that appears as plain text in HTML (shouldn't be there) |
|
// Be careful not to match "link:" inside HTML attributes or tags |
|
// Match "link:" that's not inside quotes or tags |
|
processed = processed.replace(/([^"'>\s])link:([a-zA-Z0-9])/gi, '$1$2'); |
|
// Also handle cases where "link:" appears with whitespace before anchor tags |
|
processed = processed.replace(/\s+link:\s*(?=<a\s+href)/gi, ' '); |
|
// Clean up any corrupted href attributes that contain HTML fragments or double-escaped quotes |
|
// Fix href attributes with escaped quotes: href=""url"" -> href="url" |
|
processed = processed.replace(/href\s*=\s*["']"(https?:\/\/[^"']+)"["']/gi, (match, url) => { |
|
// Extract the clean URL and properly escape it |
|
const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); |
|
return `href="${escapedUrl}"`; |
|
}); |
|
// Clean up href attributes that contain HTML fragments |
|
processed = processed.replace(/href\s*=\s*["']([^"']*<[^"']*)["']/gi, (match, corruptedHref) => { |
|
// If href contains HTML tags, extract just the URL part |
|
const urlMatch = corruptedHref.match(/(https?:\/\/[^\s<>"']+)/i); |
|
if (urlMatch) { |
|
const escapedUrl = urlMatch[1].replace(/"/g, '"').replace(/'/g, '''); |
|
return `href="${escapedUrl}"`; |
|
} |
|
return match; // If we can't fix it, leave it (will be skipped by validation) |
|
}); |
|
// Clean up any malformed anchor tag fragments that might cause issues |
|
processed = processed.replace(/<a\s+href=["']([^"'>]*<[^"'>]*)["']/gi, (match, corruptedHref) => { |
|
// Skip corrupted anchor tags - they'll be handled by the main regex with validation |
|
return match; |
|
}); |
|
// Clean up links inside code blocks - AsciiDoctor creates them but they should be plain text |
|
// Remove <a> tags inside <code> blocks, keeping only the link text |
|
processed = processed.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (match, content) => { |
|
// Remove any <a> tags inside code blocks, keeping only the text content |
|
const cleaned = content.replace(/<a[^>]*>(.*?)<\/a>/gi, '$1'); |
|
return `<code>${cleaned}</code>`; |
|
}); |
|
// Also clean up links inside pre blocks |
|
processed = processed.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (match, content) => { |
|
const cleaned = content.replace(/<a[^>]*>(.*?)<\/a>/gi, '$1'); |
|
return `<pre>${cleaned}</pre>`; |
|
}); |
|
// Now protect code blocks and pre blocks by replacing them with placeholders |
|
const codeBlockPlaceholders = []; |
|
const preBlockPlaceholders = []; |
|
// Replace pre blocks first (they can contain code blocks) |
|
processed = processed.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (match) => { |
|
const placeholder = `__PREBLOCK_${preBlockPlaceholders.length}__`; |
|
preBlockPlaceholders.push(match); |
|
return placeholder; |
|
}); |
|
// Replace code blocks |
|
processed = processed.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (match) => { |
|
const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`; |
|
codeBlockPlaceholders.push(match); |
|
return placeholder; |
|
}); |
|
// Extract base domain from linkBaseURL if provided |
|
let baseDomain = null; |
|
if (linkBaseURL) { |
|
try { |
|
const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/); |
|
if (urlMatch) { |
|
baseDomain = urlMatch[1]; |
|
} |
|
} |
|
catch { |
|
// Ignore parsing errors |
|
} |
|
} |
|
// Before processing, remove any corrupted opengraph containers that might have been created |
|
// These have malformed data-og-url attributes containing HTML fragments |
|
// Match all spans with data-og-url and check if they're corrupted |
|
// Use a pattern that matches spans with data-og-url, then check the attribute value |
|
processed = processed.replace(/<span[^>]*data-og-url=["']([^"']+)["'][^>]*>[\s\S]*?<\/span>/gi, (match) => { |
|
// This span has a corrupted data-og-url (contains <) |
|
// Extract the clean URL from the beginning of the attribute value |
|
const dataOgUrlMatch = match.match(/data-og-url=["']([^"']+)["']/i); |
|
if (dataOgUrlMatch && dataOgUrlMatch[1]) { |
|
// Extract just the URL part (everything before the first <) |
|
const urlMatch = dataOgUrlMatch[1].match(/(https?:\/\/[^\s<>"']+)/i); |
|
if (urlMatch) { |
|
const cleanUrl = urlMatch[1]; |
|
// Extract the link text from inside the span |
|
const linkMatch = match.match(/<a[^>]*>(.*?)<\/a>/i); |
|
const linkText = linkMatch ? linkMatch[1] : cleanUrl; |
|
// Return a clean opengraph container with the fixed URL |
|
const escapedUrl = cleanUrl.replace(/"/g, '"').replace(/'/g, '''); |
|
return `<span class="opengraph-link-container" data-og-url="${escapedUrl}"> |
|
<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="opengraph-link break-words inline-flex items-baseline gap-1">${linkText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a> |
|
<div class="opengraph-preview" data-og-loading="true" style="display: none;"> |
|
<div class="opengraph-card"> |
|
<div class="opengraph-image-container"> |
|
<img class="opengraph-image" src="" alt="" style="display: none;" /> |
|
</div> |
|
<div class="opengraph-content"> |
|
<div class="opengraph-site"></div> |
|
<div class="opengraph-title"></div> |
|
<div class="opengraph-description"></div> |
|
</div> |
|
</div> |
|
</div> |
|
</span>`; |
|
} |
|
// If we can't extract a clean URL, just remove the corrupted span and keep any text |
|
const textMatch = match.match(/>([^<]+)</); |
|
return textMatch ? textMatch[1] : ''; |
|
} |
|
return match; // Keep valid spans |
|
}); |
|
// Match external links (http/https) that aren't media, nostr, or wikilinks |
|
// Skip links that are already in media embeds or special containers |
|
// Use a stricter regex that only matches valid, complete anchor tags |
|
// The regex must match a complete <a> tag with proper structure |
|
processed = processed.replace(/<a\s+([^>]*\s+)?href\s*=\s*["'](https?:\/\/[^"']{1,2048})["']([^>]*?)>(.*?)<\/a>/gis, (match, before, href, after, linkText) => { |
|
// CRITICAL: Validate href FIRST - if it contains ANY HTML tags or fragments, skip immediately |
|
// This prevents corrupted HTML from being created |
|
if (!href) { |
|
return match; // Skip if no href |
|
} |
|
// Skip if href contains HTML tags or looks corrupted - be very strict |
|
// Check for common HTML fragments that indicate corruption |
|
if (href.includes('<') || href.includes('>') || href.includes('href=') || href.includes('</a>') || href.includes('<a') || href.includes('"') || href.includes("'")) { |
|
return match; // Skip if href looks corrupted |
|
} |
|
// Additional validation: href should only contain URL-safe characters |
|
// URLs shouldn't contain unescaped quotes or HTML tags |
|
if (!/^https?:\/\/[^\s<>"']+$/i.test(href)) { |
|
return match; // Skip if href doesn't match clean URL pattern |
|
} |
|
// Validate href is a proper URL (starts with http:// or https:// and doesn't contain invalid chars) |
|
if (!/^https?:\/\/[^\s<>"']+$/i.test(href)) { |
|
return match; // Skip if href doesn't match URL pattern |
|
} |
|
// Skip if the match contains unclosed tags or corrupted HTML |
|
const openATags = (match.match(/<a\s/g) || []).length; |
|
const closeATags = (match.match(/<\/a>/g) || []).length; |
|
if (openATags !== closeATags || openATags !== 1) { |
|
return match; // Multiple or mismatched <a> tags = corrupted |
|
} |
|
// Skip if match contains nested HTML that looks corrupted |
|
if (match.includes('href="') && match.split('href="').length > 2) { |
|
return match; // Multiple href attributes = corrupted |
|
} |
|
// Skip if it's already a media embed, nostr link, wikilink, or opengraph link |
|
if (match.includes('class="wikilink"') || |
|
match.includes('class="nostr-link"') || |
|
match.includes('class="opengraph-link"') || |
|
match.includes('data-embedded-note') || |
|
match.includes('youtube-embed') || |
|
match.includes('spotify-embed') || |
|
match.includes('media-embed') || |
|
match.includes('opengraph-link-container')) { |
|
return match; |
|
} |
|
// Skip if it's a media file URL |
|
if (/\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv|mp3|m4a|wav|flac|aac|opus|wma|jpeg|jpg|png|gif|webp|svg)$/i.test(href)) { |
|
return match; |
|
} |
|
// Skip if it's YouTube or Spotify (already handled as media) |
|
if (/youtube\.com|youtu\.be|spotify\.com/i.test(href)) { |
|
return match; |
|
} |
|
// Skip if link text contains wss:// or ws:// - these are relay URLs, not web pages |
|
// They don't need OpenGraph previews |
|
if (/wss?:\/\//i.test(linkText)) { |
|
return match; |
|
} |
|
// Check if it's an external link (not same domain) |
|
let isExternal = true; |
|
if (baseDomain) { |
|
try { |
|
const hrefMatch = href.match(/^https?:\/\/([^\/]+)/); |
|
if (hrefMatch && hrefMatch[1] === baseDomain) { |
|
isExternal = false; |
|
} |
|
} |
|
catch { |
|
// If parsing fails, assume external |
|
} |
|
} |
|
// Only process external links |
|
if (!isExternal) { |
|
return match; |
|
} |
|
// Escape the URL for data attribute |
|
const escapedUrl = href |
|
.replace(/&/g, '&') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
// Add data attribute for OpenGraph fetching and wrap in container |
|
// The actual OpenGraph fetching will be done client-side via JavaScript |
|
return `<span class="opengraph-link-container" data-og-url="${escapedUrl}"> |
|
<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="opengraph-link break-words inline-flex items-baseline gap-1">${linkText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a> |
|
<div class="opengraph-preview" data-og-loading="true" style="display: none;"> |
|
<div class="opengraph-card"> |
|
<div class="opengraph-image-container"> |
|
<img class="opengraph-image" src="" alt="" style="display: none;" /> |
|
</div> |
|
<div class="opengraph-content"> |
|
<div class="opengraph-site"></div> |
|
<div class="opengraph-title"></div> |
|
<div class="opengraph-description"></div> |
|
</div> |
|
</div> |
|
</div> |
|
</span>`; |
|
}); |
|
// Restore code blocks |
|
codeBlockPlaceholders.forEach((codeBlock, index) => { |
|
processed = processed.replace(`__CODEBLOCK_${index}__`, codeBlock); |
|
}); |
|
// Restore pre blocks |
|
preBlockPlaceholders.forEach((preBlock, index) => { |
|
processed = processed.replace(`__PREBLOCK_${index}__`, preBlock); |
|
}); |
|
return processed; |
|
} |
|
/** |
|
* Process images: add max-width styling and data attributes |
|
*/ |
|
function processImages(html) { |
|
const imageUrls = []; |
|
const imageUrlRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi; |
|
let match; |
|
while ((match = imageUrlRegex.exec(html)) !== null) { |
|
const url = match[1]; |
|
if (url && !imageUrls.includes(url)) { |
|
imageUrls.push(url); |
|
} |
|
} |
|
return html.replace(/<img([^>]+)>/gi, (imgTag, attributes) => { |
|
const srcMatch = attributes.match(/src=["']([^"']+)["']/i); |
|
if (!srcMatch) |
|
return imgTag; |
|
const src = srcMatch[1]; |
|
const currentIndex = imageUrls.indexOf(src); |
|
let updatedAttributes = attributes; |
|
if (updatedAttributes.match(/class=["']/i)) { |
|
updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match, classes) => { |
|
const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim(); |
|
const newClasses = cleanedClasses |
|
? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in` |
|
: 'max-w-[400px] object-contain cursor-zoom-in'; |
|
return `class="${newClasses}"`; |
|
}); |
|
} |
|
else { |
|
updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`; |
|
} |
|
updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '"')}"`; |
|
return `<img${updatedAttributes}>`; |
|
}); |
|
} |
|
/** |
|
* Clean URL by removing tracking parameters |
|
* Based on jumble's cleanUrl function |
|
*/ |
|
function cleanUrl(url) { |
|
try { |
|
const parsedUrl = new URL(url); |
|
// List of tracking parameter prefixes and exact names to remove |
|
const trackingParams = [ |
|
// Google Analytics & Ads |
|
'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', |
|
'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic', |
|
'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid', |
|
// Facebook |
|
'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref', |
|
// Twitter/X |
|
'twclid', 'twsrc', |
|
// Microsoft/Bing |
|
'msclkid', 'mc_cid', 'mc_eid', |
|
// Adobe |
|
'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid', |
|
// Mailchimp |
|
'mc_cid', 'mc_eid', |
|
// HubSpot |
|
'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver', |
|
// Marketo |
|
'mkt_tok', |
|
// YouTube |
|
'si', 'feature', 'kw', 'pp', |
|
// Other common tracking |
|
'ref', 'referrer', 'source', 'campaign', 'medium', 'content', |
|
'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd', |
|
// Mobile app tracking |
|
'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative', |
|
// Amazon |
|
'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag', |
|
// Affiliate tracking |
|
'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer', |
|
// Social media share tracking |
|
'share', 'shared', 'sharesource' |
|
]; |
|
// Remove all tracking parameters |
|
trackingParams.forEach(param => { |
|
parsedUrl.searchParams.delete(param); |
|
}); |
|
// Remove any parameter that starts with utm_ or _ |
|
Array.from(parsedUrl.searchParams.keys()).forEach(key => { |
|
if (key.startsWith('utm_') || key.startsWith('_')) { |
|
parsedUrl.searchParams.delete(key); |
|
} |
|
}); |
|
return parsedUrl.toString(); |
|
} |
|
catch { |
|
// If URL parsing fails, return original URL |
|
return url; |
|
} |
|
} |
|
/** |
|
* Clean up leftover markdown syntax |
|
*/ |
|
function cleanupMarkdown(html) { |
|
let cleaned = html; |
|
// Clean up markdown image syntax |
|
cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => { |
|
const altText = alt || ''; |
|
// Clean URL (remove tracking parameters) |
|
const cleanedUrl = cleanUrl(url); |
|
// Escape for HTML attribute |
|
const escapedUrl = cleanedUrl.replace(/"/g, '"').replace(/'/g, '''); |
|
return `<img src="${escapedUrl}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`; |
|
}); |
|
// Clean up markdown link syntax |
|
// Skip if the link is already inside an HTML tag or is part of escaped HTML |
|
cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { |
|
// Skip if this markdown link is already inside an HTML tag |
|
// Check if there's an <a> tag nearby that might have been created from this |
|
if (cleaned.includes(`href="${url}"`) || cleaned.includes(`href='${url}'`)) { |
|
return _match; |
|
} |
|
// Skip if the text contains HTML entities or looks like it's already processed |
|
if (text.includes('<') || text.includes('>') || text.includes('&')) { |
|
return _match; |
|
} |
|
// Skip if the URL is already in an href attribute (check for escaped versions too) |
|
const escapedUrl = url.replace(/"/g, '"').replace(/'/g, '''); |
|
if (cleaned.includes(`href="${escapedUrl}"`) || cleaned.includes(`href='${escapedUrl}'`)) { |
|
return _match; |
|
} |
|
// Clean URL (remove tracking parameters) |
|
const cleanedUrl = cleanUrl(url); |
|
// Escape for HTML attribute (but don't double-escape) |
|
const finalEscapedUrl = cleanedUrl |
|
.replace(/&/g, '&') // Unescape if already escaped |
|
.replace(/&/g, '&') |
|
.replace(/"/g, '"') |
|
.replace(/'/g, '''); |
|
// Escape text for HTML (but don't double-escape) |
|
const escapedText = text |
|
.replace(/&/g, '&') // Unescape if already escaped |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>') |
|
.replace(/&/g, '&') |
|
.replace(/</g, '<') |
|
.replace(/>/g, '>'); |
|
return `<a href="${finalEscapedUrl}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${escapedText} <svg style="width: 0.75rem; height: 0.75rem; flex-shrink: 0;" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`; |
|
}); |
|
return cleaned; |
|
} |
|
/** |
|
* Add proper CSS classes for styling |
|
*/ |
|
function addStylingClasses(html) { |
|
let styled = html; |
|
// Add strikethrough styling |
|
styled = styled.replace(/<span class="line-through">([^<]+)<\/span>/g, '<span class="line-through line-through-2">$1</span>'); |
|
// Add subscript styling |
|
styled = styled.replace(/<span class="subscript">([^<]+)<\/span>/g, '<span class="subscript text-xs align-sub">$1</span>'); |
|
// Add superscript styling |
|
styled = styled.replace(/<span class="superscript">([^<]+)<\/span>/g, '<span class="superscript text-xs align-super">$1</span>'); |
|
// Add code highlighting classes |
|
styled = styled.replace(/<pre class="highlightjs[^"]*">/g, '<pre class="highlightjs hljs">'); |
|
styled = styled.replace(/<code class="highlightjs[^"]*">/g, '<code class="highlightjs hljs">'); |
|
return styled; |
|
} |
|
/** |
|
* Hide raw AsciiDoc ToC text |
|
*/ |
|
function hideRawTocText(html) { |
|
let cleaned = html; |
|
cleaned = cleaned.replace(/<h[1-6][^>]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi, ''); |
|
cleaned = cleaned.replace(/<p[^>]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi, ''); |
|
cleaned = cleaned.replace(/<p[^>]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi, ''); |
|
return cleaned; |
|
}
|
|
|