"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.convertToAsciidoc = convertToAsciidoc; const types_1 = require("../types"); // Import node-emoji if available (optional dependency) let emoji; try { emoji = require('node-emoji'); } catch (e) { // node-emoji not available, emoji conversion will be skipped emoji = null; } /** * Clean URL by removing tracking parameters * Based on jumble's cleanUrl function */ function cleanUrl(url) { try { const parsedUrl = new URL(url); // List of tracking parameter prefixes and exact names to remove const trackingParams = [ // Google Analytics & Ads 'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', 'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic', 'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid', // Facebook 'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref', // Twitter/X 'twclid', 'twsrc', // Microsoft/Bing 'msclkid', 'mc_cid', 'mc_eid', // Adobe 'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid', // Mailchimp 'mc_cid', 'mc_eid', // HubSpot 'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver', // Marketo 'mkt_tok', // YouTube 'si', 'feature', 'kw', 'pp', // Other common tracking 'ref', 'referrer', 'source', 'campaign', 'medium', 'content', 'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd', // Mobile app tracking 'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative', // Amazon 'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag', // Affiliate tracking 'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer', // Social media share tracking 'share', 'shared', 'sharesource' ]; // Remove all tracking parameters trackingParams.forEach(param => { parsedUrl.searchParams.delete(param); }); // Remove any parameter that starts with utm_ or _ Array.from(parsedUrl.searchParams.keys()).forEach(key => { if (key.startsWith('utm_') || key.startsWith('_')) { parsedUrl.searchParams.delete(key); } }); return parsedUrl.toString(); } catch { // If URL parsing fails, return original URL return url; } } /** * Converts content to AsciiDoc format based on detected format * This is the unified entry point - everything becomes AsciiDoc */ function convertToAsciidoc(content, format, linkBaseURL, options = {}) { let asciidoc = ''; switch (format) { case types_1.ContentFormat.AsciiDoc: // For AsciiDoc content, ensure proper formatting asciidoc = content.replace(/\\n/g, '\n'); // Ensure headers are on their own lines with proper spacing asciidoc = asciidoc.replace(/(\S[^\n]*)\n(={1,6}\s+[^\n]+)/g, (_match, before, header) => { return `${before}\n\n${header}`; }); break; case types_1.ContentFormat.Wikipedia: asciidoc = convertWikipediaToAsciidoc(content); break; case types_1.ContentFormat.Markdown: asciidoc = convertMarkdownToAsciidoc(content); break; case types_1.ContentFormat.Plain: default: asciidoc = convertPlainTextToAsciidoc(content); break; } // Process special elements for all content types // Process wikilinks asciidoc = processWikilinks(asciidoc, linkBaseURL); // Process nostr: addresses if enabled if (options.enableNostrAddresses !== false) { asciidoc = processNostrAddresses(asciidoc, linkBaseURL); } // Process media URLs in markdown links/images first (before converting to AsciiDoc) // This ensures media URLs in [text](url) or ![alt](url) format are detected asciidoc = processMediaUrlsInMarkdown(asciidoc); // Process media URLs (YouTube, Spotify, video, audio files) - for bare URLs asciidoc = processMediaUrls(asciidoc); // Process bare URLs (convert to AsciiDoc links) asciidoc = processBareUrls(asciidoc); // Process hashtags (after URLs to avoid conflicts) asciidoc = processHashtags(asciidoc); return asciidoc; } /** * Converts Wikipedia markup to AsciiDoc format * Handles Wikipedia-style headings, links, and formatting */ function convertWikipediaToAsciidoc(content) { let asciidoc = content.replace(/\\n/g, '\n'); // Convert Wikipedia headings: == Heading == to AsciiDoc == Heading // Wikipedia uses == for level 2, === for level 3, etc. // AsciiDoc uses = for title, == for level 1, === for level 2, etc. // So Wikipedia level 2 (==) maps to AsciiDoc level 1 (==) asciidoc = asciidoc.replace(/^(=+)\s+(.+?)\s+\1$/gm, (match, equals, heading) => { const level = equals.length - 1; // Count = signs, subtract 1 for AsciiDoc mapping const asciidocEquals = '='.repeat(level + 1); // AsciiDoc uses one more = for same level return `${asciidocEquals} ${heading.trim()}`; }); // Convert Wikipedia bold: ''text'' to AsciiDoc *text* asciidoc = asciidoc.replace(/''([^']+)''/g, '*$1*'); // Convert Wikipedia italic: 'text' to AsciiDoc _text_ // Be careful not to match apostrophes in words asciidoc = asciidoc.replace(/(^|[^'])'([^']+)'([^']|$)/g, '$1_$2_$3'); // Convert Wikipedia links: [[Page]] or [[Page|Display]] to wikilinks // These will be processed by processWikilinks later, but we need to ensure // they're in the right format. Wikipedia links are already in [[...]] format // which matches our wikilink format, so they should work as-is. // Convert Wikipedia external links: [URL text] to AsciiDoc link:URL[text] asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\s+([^\]]+)\]/g, 'link:$1[$2]'); asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\]/g, 'link:$1[$1]'); // Convert Wikipedia lists (they use * or # similar to Markdown) // This is handled similarly to Markdown, so we can reuse that logic // But Wikipedia also uses : for definition lists and ; for term lists // For now, we'll handle basic lists and let AsciiDoc handle the rest // Convert horizontal rules: ---- to AsciiDoc ''' asciidoc = asciidoc.replace(/^----+$/gm, "'''"); return asciidoc; } /** * Converts Markdown to AsciiDoc format * Based on jumble's conversion patterns */ function convertMarkdownToAsciidoc(content) { let asciidoc = content.replace(/\\n/g, '\n'); // Fix spacing issues (but be careful not to break links and images) // Process these BEFORE converting links/images to avoid conflicts asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)'); asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3'); asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` ('); asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2'); asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2'); // Add space before == but not if it's part of a markdown link pattern // Check that == is not immediately after ]( which would be a link asciidoc = asciidoc.replace(/([a-zA-Z0-9])(? 🏕️) // Only convert if node-emoji is available if (emoji && emoji.emojify) { asciidoc = emoji.emojify(asciidoc); } // Convert code blocks (handle both \n and \r\n line endings) // Special handling for diagram languages: latex, plantuml, puml, bpmn asciidoc = asciidoc.replace(/```(\w+)?\r?\n([\s\S]*?)\r?\n```/g, (_match, lang, code) => { const trimmedCode = code.trim(); if (trimmedCode.length === 0) return ''; const langLower = lang ? lang.toLowerCase() : ''; // If it's a latex code block, always treat as code (not math) if (langLower === 'latex') { return `[source,latex]\n----\n${trimmedCode}\n----`; } // Handle PlantUML diagrams if (langLower === 'plantuml' || langLower === 'puml') { // Check if it already has @startuml/@enduml or @startbpmn/@endbpmn if (trimmedCode.includes('@start') || trimmedCode.includes('@end')) { return `[plantuml]\n----\n${trimmedCode}\n----`; } // If not, wrap it in @startuml/@enduml return `[plantuml]\n----\n@startuml\n${trimmedCode}\n@enduml\n----`; } // Handle BPMN diagrams (using PlantUML BPMN syntax) if (langLower === 'bpmn') { // Check if it already has @startbpmn/@endbpmn if (trimmedCode.includes('@startbpmn') && trimmedCode.includes('@endbpmn')) { return `[plantuml]\n----\n${trimmedCode}\n----`; } // If not, wrap it in @startbpmn/@endbpmn return `[plantuml]\n----\n@startbpmn\n${trimmedCode}\n@endbpmn\n----`; } // Check if it's ABC notation (starts with X:) if (!lang && /^X:\s*\d+/m.test(trimmedCode)) { // ABC notation - keep as plain text block, will be processed by music processor return `----\n${trimmedCode}\n----`; } const hasCodePatterns = /[{}();=<>]|function|class|import|export|def |if |for |while |return |const |let |var |public |private |static |console\.log/.test(trimmedCode); const isLikelyText = /^[A-Za-z\s.,!?\-'"]+$/.test(trimmedCode) && trimmedCode.length > 50; const hasTooManySpaces = (trimmedCode.match(/\s{3,}/g) || []).length > 3; const hasMarkdownPatterns = /^#{1,6}\s|^\*\s|^\d+\.\s|^\>\s|^\|.*\|/.test(trimmedCode); if ((!hasCodePatterns && trimmedCode.length > 100) || isLikelyText || hasTooManySpaces || hasMarkdownPatterns) { return _match; } return `[source${lang ? ',' + lang : ''}]\n----\n${trimmedCode}\n----`; }); // Handle inline code: LaTeX formulas in inline code should be rendered as math // Pattern: `$formula$` should become $formula$ (math), not code // Handle escaped brackets: `$[ ... \]$` and `$[\sqrt{...}\]$` asciidoc = asciidoc.replace(/`(\$[^`]+\$)`/g, (match, formula) => { // Extract the formula (remove the $ signs) const mathContent = formula.slice(1, -1); return `$${mathContent}$`; // Return as math, not code }); asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Regular inline code // Convert nested image links first: [![alt](img)](url) - image wrapped in link // This must come before regular image processing asciidoc = asciidoc.replace(/\[!\[([^\]]*)\]\(([^)]+?)\)\]\(([^)]+?)\)/g, (match, alt, imgUrl, linkUrl) => { const cleanImgUrl = imgUrl.trim(); const cleanLinkUrl = linkUrl.trim(); const cleanAlt = alt.trim(); // Check if linkUrl is a media URL if (cleanLinkUrl.startsWith('MEDIA:')) { return cleanLinkUrl; // Return the placeholder as-is } // Create a link with an image inside - don't escape brackets in URLs // AsciiDoc can handle URLs with brackets if they're in the URL part return `link:${cleanLinkUrl}[image:${cleanImgUrl}[${cleanAlt ? cleanAlt : 'link'}]]`; }); // Convert images (but not nested ones, which we already processed) // Match: ![alt text](url) or ![](url) - handle empty alt text // Use negative lookbehind to avoid matching nested image links // Format: image::url[alt,width=100%] - matching jumble's format asciidoc = asciidoc.replace(/(? { let processedUrl = url.trim(); const cleanAlt = alt.trim(); // Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown) if (processedUrl.startsWith('MEDIA:')) { return processedUrl; // Return the placeholder as-is } // Clean URL (remove tracking parameters) processedUrl = cleanUrl(processedUrl); // Regular image - match jumble's format: image::url[alt,width=100%] // Don't escape brackets - AsciiDoc handles URLs properly return `image::${processedUrl}[${cleanAlt ? cleanAlt + ',' : ''}width=100%]`; }); // Convert anchor links: [text](#section-id) - these are internal links asciidoc = asciidoc.replace(/(? { const cleanText = text.trim(); const cleanAnchor = anchor.trim(); // AsciiDoc uses # for anchor links, but we need to normalize the anchor ID // Convert to lowercase and replace spaces/special chars with hyphens const normalizedAnchor = cleanAnchor.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, ''); const escapedText = cleanText.replace(/([\[\]])/g, '\\$1'); return `<<${normalizedAnchor},${escapedText}>>`; }); // Convert links (but not images or anchor links, which we already processed) // Match: [text](url) - use negative lookbehind to avoid matching images // Use non-greedy matching for URL to stop at first closing paren // This ensures we don't capture trailing punctuation asciidoc = asciidoc.replace(/(? { let processedUrl = url.trim(); const cleanText = text.trim(); // Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown) if (processedUrl.startsWith('MEDIA:')) { return processedUrl; // Return the placeholder as-is } // Clean URL (remove tracking parameters) processedUrl = cleanUrl(processedUrl); // Handle WSS URLs: convert wss:// to https:// for display if (processedUrl.startsWith('wss://')) { processedUrl = processedUrl.replace(/^wss:\/\//, 'https://'); } // Regular link - don't escape brackets in URLs (AsciiDoc handles them) // Only escape brackets in the link text if needed const escapedText = cleanText.replace(/([\[\]])/g, '\\$1'); return `link:${processedUrl}[${escapedText}]`; }); // Convert horizontal rules asciidoc = asciidoc.replace(/^---$/gm, '\'\'\''); asciidoc = asciidoc.replace(/^\*\*\*$/gm, '\'\'\''); // Also handle *** // Convert lists - need to process them as blocks to preserve structure // First, convert task lists (before regular lists) // Task lists: - [x] or - [ ] or * [x] or * [ ] asciidoc = asciidoc.replace(/^(\s*)([-*])\s+\[([ x])\]\s+(.+)$/gm, (_match, indent, bullet, checked, text) => { // Use AsciiDoc checkbox syntax: * [x] Task text // The checkbox will be rendered by AsciiDoctor return `${indent}* [${checked === 'x' ? 'x' : ' '}] ${text}`; }); // Convert lists - process entire list blocks to ensure proper AsciiDoc formatting // AsciiDoc lists need to be on their own lines with proper spacing // Process lists in blocks to handle nested lists correctly const lines = asciidoc.split('\n'); const processedLines = []; let inList = false; let listType = null; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const isEmpty = line.trim() === ''; const prevLine = i > 0 ? processedLines[processedLines.length - 1] : ''; const prevLineIsEmpty = prevLine.trim() === ''; // Check if this line is a list item (but not a task list, which we already processed) const unorderedMatch = line.match(/^(\s*)([-*+])\s+(.+)$/); const orderedMatch = line.match(/^(\s*)(\d+)\.\s+(.+)$/); const isTaskList = line.match(/^(\s*)([-*])\s+\[([ x])\]\s+(.+)$/); if (unorderedMatch && !isTaskList) { const [, indent, , text] = unorderedMatch; const indentLevel = indent.length; // AsciiDoc uses 4 spaces per indentation level // Markdown typically uses 2 or 4 spaces per level // 2 spaces = 1 level (4 spaces), 4 spaces = 1 level (4 spaces) const asciidocIndent = ' '.repeat(Math.ceil(indentLevel / 4)); // Add blank line before list if not already in a list // But don't add blank line if we're switching list types within the same list context if (!inList) { // Starting a new list - add blank line if previous line has content if (processedLines.length > 0 && !prevLineIsEmpty) { processedLines.push(''); } inList = true; listType = 'unordered'; } else if (listType !== 'unordered') { // Switching list types - don't add blank line, just change type listType = 'unordered'; } processedLines.push(`${asciidocIndent}* ${text}`); } else if (orderedMatch) { const [, indent, , text] = orderedMatch; const indentLevel = indent.length; // AsciiDoc uses 4 spaces per indentation level // Markdown typically uses 2 or 4 spaces per level // 2 spaces = 1 level (4 spaces), 4 spaces = 1 level (4 spaces) const asciidocIndent = ' '.repeat(Math.ceil(indentLevel / 4)); // Add blank line before list if not already in a list // But don't add blank line if we're switching list types within the same list context if (!inList) { // Starting a new list - add blank line if previous line has content if (processedLines.length > 0 && !prevLineIsEmpty) { processedLines.push(''); } inList = true; listType = 'ordered'; } else if (listType !== 'ordered') { // Switching list types - don't add blank line, just change type listType = 'ordered'; } processedLines.push(`${asciidocIndent}. ${text}`); } else { // Not a list item if (inList && !isEmpty) { // End of list - add blank line after if the next line is not empty if (i < lines.length - 1 && lines[i + 1].trim() !== '') { processedLines.push(''); } inList = false; listType = null; } processedLines.push(line); } } asciidoc = processedLines.join('\n'); // Convert blockquotes with attribution asciidoc = asciidoc.replace(/^(>\s+.+(?:\n>\s+.+)*)/gm, (match) => { const lines = match.split('\n').map(line => line.replace(/^>\s*/, '')); let quoteBodyLines = []; let attributionLine; for (let i = lines.length - 1; i >= 0; i--) { const line = lines[i].trim(); if (line.startsWith('—') || line.startsWith('--')) { attributionLine = line; quoteBodyLines = lines.slice(0, i); break; } } const quoteContent = quoteBodyLines.filter(l => l.trim() !== '').join('\n').trim(); if (attributionLine) { let cleanedAttribution = attributionLine.replace(/^[—-]+/, '').trim(); let author = ''; let source = ''; const linkMatch = cleanedAttribution.match(/^(.*?),?\s*link:([^[\\]]+)\[([^\\]]+)\]$/); if (linkMatch) { author = linkMatch[1].trim(); source = `link:${linkMatch[2].trim()}[${linkMatch[3].trim()}]`; } else { const parts = cleanedAttribution.split(',').map(p => p.trim()); author = parts[0]; if (parts.length > 1) { source = parts.slice(1).join(', ').trim(); } } return `[quote, ${author}, ${source}]\n____\n${quoteContent}\n____`; } else { return `____\n${quoteContent}\n____`; } }); // Convert tables with alignment support asciidoc = asciidoc.replace(/(\|.*\|[\r\n]+\|[\s\-\|:]*[\r\n]+(\|.*\|[\r\n]+)*)/g, (match) => { const lines = match.trim().split('\n').filter(line => line.trim()); if (lines.length < 2) return match; const headerRow = lines[0]; const separatorRow = lines[1]; const dataRows = lines.slice(2); if (!separatorRow.includes('-')) return match; // Parse alignment from separator row // :--- = left, :----: = center, ---: = right, --- = default const cells = separatorRow.split('|').filter(c => c.trim()); const alignments = []; cells.forEach((cell, index) => { const trimmed = cell.trim(); if (trimmed.startsWith(':') && trimmed.endsWith(':')) { alignments[index] = '^'; // center (AsciiDoc uses ^ for center) } else if (trimmed.endsWith(':')) { alignments[index] = '>'; // right } else if (trimmed.startsWith(':')) { alignments[index] = '<'; // left (explicit) } else { alignments[index] = '<'; // default left } }); // Build cols attribute with alignments const colsAttr = alignments.length > 0 ? `[cols="${alignments.join(',')}"]` : ''; let tableAsciidoc = colsAttr ? `${colsAttr}\n` : ''; tableAsciidoc += '|===\n'; tableAsciidoc += headerRow + '\n'; dataRows.forEach(row => { tableAsciidoc += row + '\n'; }); tableAsciidoc += '|==='; return tableAsciidoc; }); // Convert footnotes const footnoteDefinitions = {}; let tempAsciidoc = asciidoc; tempAsciidoc = tempAsciidoc.replace(/^\[\^([^\]]+)\]:\s*([\s\S]*?)(?=\n\[\^|\n---|\n##|\n###|\n####|\n#####|\n######|$)/gm, (_, id, text) => { footnoteDefinitions[id] = text.trim(); return ''; }); asciidoc = tempAsciidoc.replace(/\[\^([^\]]+)\]/g, (match, id) => { if (footnoteDefinitions[id]) { return `footnote:[${footnoteDefinitions[id]}]`; } return match; }); return asciidoc; } /** * Converts plain text to AsciiDoc format * Preserves line breaks by converting single newlines to line continuations */ function convertPlainTextToAsciidoc(content) { // Preserve double newlines (paragraph breaks) // Convert single newlines to line continuations ( +\n) return content .replace(/\r\n/g, '\n') // Normalize line endings .replace(/\n\n+/g, '\n\n') // Normalize multiple newlines to double .replace(/([^\n])\n([^\n])/g, '$1 +\n$2'); // Single newlines become line continuations } /** * Normalizes text to d-tag format */ function normalizeDtag(text) { return text .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, ''); } /** * Processes wikilinks: [[target]] or [[target|display text]] * Converts to WIKILINK: placeholder format to protect from AsciiDoc processing */ function processWikilinks(content, linkBaseURL) { // Process bookstr macro wikilinks: [[book::...]] content = content.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => { const cleanContent = bookContent.trim(); return `BOOKSTR:${cleanContent}`; }); // Process standard wikilinks: [[Target Page]] or [[target page|see this]] // Use placeholder format to prevent AsciiDoc from processing the brackets content = content.replace(/\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g, (_match, target, displayText) => { const cleanTarget = target.trim(); const cleanDisplay = displayText ? displayText.trim() : cleanTarget; const dTag = normalizeDtag(cleanTarget); // Use placeholder format: WIKILINK:dtag|display // This prevents AsciiDoc from interpreting the brackets return `WIKILINK:${dTag}|${cleanDisplay}`; }); return content; } /** * Processes nostr: addresses * Only processes addresses with "nostr:" prefix - bare addresses are left as plaintext * Converts to link:nostr:...[...] format * Valid bech32 prefixes: npub, nprofile, nevent, naddr, note */ function processNostrAddresses(content, linkBaseURL) { // Match nostr: followed by valid bech32 prefix and identifier // Bech32 format: prefix + separator (1) + data (at least 6 chars for valid identifiers) // Only match if it has "nostr:" prefix - bare addresses should remain as plaintext const nostrPattern = /nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi; return content.replace(nostrPattern, (_match, bech32Id) => { return `link:nostr:${bech32Id}[${bech32Id}]`; }); } /** * Processes media URLs in markdown links and images * Converts them to MEDIA: placeholders before markdown conversion */ function processMediaUrlsInMarkdown(content) { let processed = content; // Process YouTube URLs in markdown links: [text](youtube-url) processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, videoId) => { return `MEDIA:youtube:${videoId}`; }); // Process Spotify URLs in markdown links: [text](spotify-url) processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, type, id) => { return `MEDIA:spotify:${type}:${id}`; }); // Process video files in markdown links/images: [text](video-url) or ![alt](video-url) processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => { const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params return `MEDIA:video:${cleanUrl}`; }); // Process audio files in markdown links/images: [text](audio-url) or ![alt](audio-url) processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp3|m4a|ogg|wav|flac|aac|opus|wma))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => { const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params return `MEDIA:audio:${cleanUrl}`; }); return processed; } /** * Processes media URLs (YouTube, Spotify, video, audio files) in bare URLs * Converts them to placeholders that will be rendered as embeds/players */ function processMediaUrls(content) { // Process YouTube URLs // Match: youtube.com/watch?v=, youtu.be/, youtube.com/embed/, youtube.com/v/ content = content.replace(/(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, videoId) => { return `MEDIA:youtube:${videoId}`; }); // Process Spotify URLs // Match: open.spotify.com/track/, open.spotify.com/album/, open.spotify.com/playlist/, open.spotify.com/artist/ content = content.replace(/(?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, type, id) => { return `MEDIA:spotify:${type}:${id}`; }); // Process video files (mp4, webm, ogg, m4v, mov, avi, etc.) content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => { const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL return `MEDIA:video:${url}`; }); // Process audio files (mp3, m4a, ogg, wav, flac, aac, etc.) content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp3|m4a|ogg|wav|flac|aac|opus|wma)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => { const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL return `MEDIA:audio:${url}`; }); return content; } /** * Processes bare URLs and converts them to AsciiDoc links * Matches http://, https://, wss://, and www. URLs that aren't already in markdown links * Also handles bare image URLs (converts to images) * Skips URLs inside code blocks (---- blocks) and inline code (backticks) */ function processBareUrls(content) { // Protect code blocks and inline code from URL processing // We'll process URLs, then restore code blocks const codeBlockPlaceholders = []; const inlineCodePlaceholders = []; // Replace code blocks with placeholders content = content.replace(/\[source[^\]]*\]\n----\n([\s\S]*?)\n----/g, (match, code) => { const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`; codeBlockPlaceholders.push(match); return placeholder; }); // Also handle plain code blocks (without [source]) content = content.replace(/----\n([\s\S]*?)\n----/g, (match, code) => { // Check if this is already a placeholder if (match.includes('__CODEBLOCK_')) { return match; } const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`; codeBlockPlaceholders.push(match); return placeholder; }); // Replace inline code with placeholders content = content.replace(/`([^`]+)`/g, (match, code) => { const placeholder = `__INLINECODE_${inlineCodePlaceholders.length}__`; inlineCodePlaceholders.push(match); return placeholder; }); // First, handle bare image URLs (before regular URLs) // Match image URLs: .jpg, .png, .gif, .webp, .svg, etc. // Format: image::url[width=100%] - matching jumble's format const imageUrlPattern = /(?"{}|\\^`\[\]()]+\.(jpe?g|png|gif|webp|svg|bmp|ico))(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi; content = content.replace(imageUrlPattern, (match, url) => { // Clean URL (remove tracking parameters) const cleanedUrl = cleanUrl(url); // Don't escape brackets - AsciiDoc handles URLs properly return `image::${cleanedUrl}[width=100%]`; }); // Match URLs that aren't already in markdown link format // Pattern: http://, https://, wss://, or www. followed by valid URL characters // Use word boundary to avoid matching URLs that are part of other text // Don't match if immediately after colon-space (like "hyperlink: www.example.com") const urlPattern = /(?"{}|\\^`\[\]()]+|wss:\/\/[^\s<>"{}|\\^`\[\]()]+|www\.[^\s<>"{}|\\^`\[\]()]+)/gi; content = content.replace(urlPattern, (match, url) => { // Skip if this URL was already converted to an image if (match.includes('image::')) { return match; } // Ensure URL starts with http:// or https:// let fullUrl = url; if (url.startsWith('www.')) { fullUrl = 'https://' + url; } else if (url.startsWith('wss://')) { // Convert wss:// to https:// for display fullUrl = url.replace(/^wss:\/\//, 'https://'); } // Clean URL (remove tracking parameters) fullUrl = cleanUrl(fullUrl); // Don't escape brackets in URLs - AsciiDoc handles them properly // The URL is in the link: part, brackets in URLs are valid // Use proper AsciiDoc link syntax: link:url[text] return `link:${fullUrl}[${url}]`; }); // Restore inline code inlineCodePlaceholders.forEach((code, index) => { content = content.replace(`__INLINECODE_${index}__`, code); }); // Restore code blocks codeBlockPlaceholders.forEach((code, index) => { content = content.replace(`__CODEBLOCK_${index}__`, code); }); return content; } /** * Processes hashtags * Converts to hashtag:tag[#tag] format * Handles hashtags at the beginning of lines to prevent line breaks */ function processHashtags(content) { // Match # followed by word characters // Match at word boundary OR at start of line OR after whitespace // This ensures we don't match # in URLs or code, but do match at line start return content.replace(/(^|\s|>)#([a-zA-Z0-9_]+)(?![a-zA-Z0-9_])/g, (match, before, hashtag) => { const normalizedHashtag = hashtag.toLowerCase(); // Preserve the space or line start before the hashtag to prevent line breaks // Add a zero-width space or ensure proper spacing const prefix = before === '' ? '' : before; return `${prefix}hashtag:${normalizedHashtag}[#${hashtag}]`; }); }