You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

692 lines
34 KiB

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.convertToAsciidoc = convertToAsciidoc;
const types_1 = require("../types");
// Import node-emoji if available (optional dependency)
let emoji;
try {
emoji = require('node-emoji');
}
catch (e) {
// node-emoji not available, emoji conversion will be skipped
emoji = null;
}
/**
* Clean URL by removing tracking parameters
* Based on jumble's cleanUrl function
*/
function cleanUrl(url) {
try {
const parsedUrl = new URL(url);
// List of tracking parameter prefixes and exact names to remove
const trackingParams = [
// Google Analytics & Ads
'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic',
'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid',
// Facebook
'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref',
// Twitter/X
'twclid', 'twsrc',
// Microsoft/Bing
'msclkid', 'mc_cid', 'mc_eid',
// Adobe
'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid',
// Mailchimp
'mc_cid', 'mc_eid',
// HubSpot
'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver',
// Marketo
'mkt_tok',
// YouTube
'si', 'feature', 'kw', 'pp',
// Other common tracking
'ref', 'referrer', 'source', 'campaign', 'medium', 'content',
'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd',
// Mobile app tracking
'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative',
// Amazon
'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag',
// Affiliate tracking
'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer',
// Social media share tracking
'share', 'shared', 'sharesource'
];
// Remove all tracking parameters
trackingParams.forEach(param => {
parsedUrl.searchParams.delete(param);
});
// Remove any parameter that starts with utm_ or _
Array.from(parsedUrl.searchParams.keys()).forEach(key => {
if (key.startsWith('utm_') || key.startsWith('_')) {
parsedUrl.searchParams.delete(key);
}
});
return parsedUrl.toString();
}
catch {
// If URL parsing fails, return original URL
return url;
}
}
/**
* Converts content to AsciiDoc format based on detected format
* This is the unified entry point - everything becomes AsciiDoc
*/
function convertToAsciidoc(content, format, linkBaseURL, options = {}) {
let asciidoc = '';
switch (format) {
case types_1.ContentFormat.AsciiDoc:
// For AsciiDoc content, ensure proper formatting
asciidoc = content.replace(/\\n/g, '\n');
// Ensure headers are on their own lines with proper spacing
asciidoc = asciidoc.replace(/(\S[^\n]*)\n(={1,6}\s+[^\n]+)/g, (_match, before, header) => {
return `${before}\n\n${header}`;
});
break;
case types_1.ContentFormat.Wikipedia:
asciidoc = convertWikipediaToAsciidoc(content);
break;
case types_1.ContentFormat.Markdown:
asciidoc = convertMarkdownToAsciidoc(content);
break;
case types_1.ContentFormat.Plain:
default:
asciidoc = convertPlainTextToAsciidoc(content);
break;
}
// Process special elements for all content types
// Process wikilinks
asciidoc = processWikilinks(asciidoc, linkBaseURL);
// Process nostr: addresses if enabled
if (options.enableNostrAddresses !== false) {
asciidoc = processNostrAddresses(asciidoc, linkBaseURL);
}
// Process media URLs in markdown links/images first (before converting to AsciiDoc)
// This ensures media URLs in [text](url) or ![alt](url) format are detected
asciidoc = processMediaUrlsInMarkdown(asciidoc);
// Process media URLs (YouTube, Spotify, video, audio files) - for bare URLs
asciidoc = processMediaUrls(asciidoc);
// Process bare URLs (convert to AsciiDoc links)
asciidoc = processBareUrls(asciidoc);
// Process hashtags (after URLs to avoid conflicts)
asciidoc = processHashtags(asciidoc);
return asciidoc;
}
/**
* Converts Wikipedia markup to AsciiDoc format
* Handles Wikipedia-style headings, links, and formatting
*/
function convertWikipediaToAsciidoc(content) {
let asciidoc = content.replace(/\\n/g, '\n');
// Convert Wikipedia headings: == Heading == to AsciiDoc == Heading
// Wikipedia uses == for level 2, === for level 3, etc.
// AsciiDoc uses = for title, == for level 1, === for level 2, etc.
// So Wikipedia level 2 (==) maps to AsciiDoc level 1 (==)
asciidoc = asciidoc.replace(/^(=+)\s+(.+?)\s+\1$/gm, (match, equals, heading) => {
const level = equals.length - 1; // Count = signs, subtract 1 for AsciiDoc mapping
const asciidocEquals = '='.repeat(level + 1); // AsciiDoc uses one more = for same level
return `${asciidocEquals} ${heading.trim()}`;
});
// Convert Wikipedia bold: ''text'' to AsciiDoc *text*
asciidoc = asciidoc.replace(/''([^']+)''/g, '*$1*');
// Convert Wikipedia italic: 'text' to AsciiDoc _text_
// Be careful not to match apostrophes in words
asciidoc = asciidoc.replace(/(^|[^'])'([^']+)'([^']|$)/g, '$1_$2_$3');
// Convert Wikipedia links: [[Page]] or [[Page|Display]] to wikilinks
// These will be processed by processWikilinks later, but we need to ensure
// they're in the right format. Wikipedia links are already in [[...]] format
// which matches our wikilink format, so they should work as-is.
// Convert Wikipedia external links: [URL text] to AsciiDoc link:URL[text]
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\s+([^\]]+)\]/g, 'link:$1[$2]');
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\]/g, 'link:$1[$1]');
// Convert Wikipedia lists (they use * or # similar to Markdown)
// This is handled similarly to Markdown, so we can reuse that logic
// But Wikipedia also uses : for definition lists and ; for term lists
// For now, we'll handle basic lists and let AsciiDoc handle the rest
// Convert horizontal rules: ---- to AsciiDoc '''
asciidoc = asciidoc.replace(/^----+$/gm, "'''");
return asciidoc;
}
/**
* Converts Markdown to AsciiDoc format
* Based on jumble's conversion patterns
*/
function convertMarkdownToAsciidoc(content) {
let asciidoc = content.replace(/\\n/g, '\n');
// Fix spacing issues (but be careful not to break links and images)
// Process these BEFORE converting links/images to avoid conflicts
asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` (');
asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2');
// Add space before == but not if it's part of a markdown link pattern
// Check that == is not immediately after ]( which would be a link
asciidoc = asciidoc.replace(/([a-zA-Z0-9])(?<!\]\()==/g, '$1 ==');
// Note: nostr: addresses are processed later in processNostrAddresses
// Convert headers
asciidoc = asciidoc.replace(/^#{6}\s+(.+)$/gm, '====== $1 ======');
asciidoc = asciidoc.replace(/^#{5}\s+(.+)$/gm, '===== $1 =====');
asciidoc = asciidoc.replace(/^#{4}\s+(.+)$/gm, '==== $1 ====');
asciidoc = asciidoc.replace(/^#{3}\s+(.+)$/gm, '=== $1 ===');
asciidoc = asciidoc.replace(/^#{2}\s+(.+)$/gm, '== $1 ==');
asciidoc = asciidoc.replace(/^#{1}\s+(.+)$/gm, '= $1 =');
asciidoc = asciidoc.replace(/^==\s+(.+?)\s+==$/gm, '== $1 ==');
asciidoc = asciidoc.replace(/\s==\s+([^=]+?)\s+==\s/g, ' == $1 == ');
// Convert emphasis
asciidoc = asciidoc.replace(/\*\*(.+?)\*\*/g, '*$1*'); // Bold
asciidoc = asciidoc.replace(/__(.+?)__/g, '*$1*'); // Bold
asciidoc = asciidoc.replace(/\*(.+?)\*/g, '_$1_'); // Italic
asciidoc = asciidoc.replace(/_(.+?)_/g, '_$1_'); // Italic
asciidoc = asciidoc.replace(/~~(.+?)~~/g, '[line-through]#$1#'); // Strikethrough
asciidoc = asciidoc.replace(/==(.+?)==/g, '[highlight]#$1#'); // Text highlighting (GFM)
asciidoc = asciidoc.replace(/~(.+?)~/g, '[subscript]#$1#'); // Subscript
asciidoc = asciidoc.replace(/\^(.+?)\^/g, '[superscript]#$1#'); // Superscript
// Convert emoji shortcodes to Unicode (e.g., :tent: -> 🏕)
// Only convert if node-emoji is available
if (emoji && emoji.emojify) {
asciidoc = emoji.emojify(asciidoc);
}
// Convert code blocks (handle both \n and \r\n line endings)
// Special handling for diagram languages: latex, plantuml, puml, bpmn
asciidoc = asciidoc.replace(/```(\w+)?\r?\n([\s\S]*?)\r?\n```/g, (_match, lang, code) => {
const trimmedCode = code.trim();
if (trimmedCode.length === 0)
return '';
const langLower = lang ? lang.toLowerCase() : '';
// If it's a latex code block, always treat as code (not math)
if (langLower === 'latex') {
return `[source,latex]\n----\n${trimmedCode}\n----`;
}
// Handle PlantUML diagrams
if (langLower === 'plantuml' || langLower === 'puml') {
// Check if it already has @startuml/@enduml or @startbpmn/@endbpmn
if (trimmedCode.includes('@start') || trimmedCode.includes('@end')) {
return `[plantuml]\n----\n${trimmedCode}\n----`;
}
// If not, wrap it in @startuml/@enduml
return `[plantuml]\n----\n@startuml\n${trimmedCode}\n@enduml\n----`;
}
// Handle BPMN diagrams (using PlantUML BPMN syntax)
if (langLower === 'bpmn') {
// Check if it already has @startbpmn/@endbpmn
if (trimmedCode.includes('@startbpmn') && trimmedCode.includes('@endbpmn')) {
return `[plantuml]\n----\n${trimmedCode}\n----`;
}
// If not, wrap it in @startbpmn/@endbpmn
return `[plantuml]\n----\n@startbpmn\n${trimmedCode}\n@endbpmn\n----`;
}
// Check if it's ABC notation (starts with X:)
if (!lang && /^X:\s*\d+/m.test(trimmedCode)) {
// ABC notation - keep as plain text block, will be processed by music processor
return `----\n${trimmedCode}\n----`;
}
const hasCodePatterns = /[{}();=<>]|function|class|import|export|def |if |for |while |return |const |let |var |public |private |static |console\.log/.test(trimmedCode);
const isLikelyText = /^[A-Za-z\s.,!?\-'"]+$/.test(trimmedCode) && trimmedCode.length > 50;
const hasTooManySpaces = (trimmedCode.match(/\s{3,}/g) || []).length > 3;
const hasMarkdownPatterns = /^#{1,6}\s|^\*\s|^\d+\.\s|^\>\s|^\|.*\|/.test(trimmedCode);
if ((!hasCodePatterns && trimmedCode.length > 100) || isLikelyText || hasTooManySpaces || hasMarkdownPatterns) {
return _match;
}
return `[source${lang ? ',' + lang : ''}]\n----\n${trimmedCode}\n----`;
});
// Handle inline code: LaTeX formulas in inline code should be rendered as math
// Pattern: `$formula$` should become $formula$ (math), not code
// Handle escaped brackets: `$[ ... \]$` and `$[\sqrt{...}\]$`
asciidoc = asciidoc.replace(/`(\$[^`]+\$)`/g, (match, formula) => {
// Extract the formula (remove the $ signs)
const mathContent = formula.slice(1, -1);
return `$${mathContent}$`; // Return as math, not code
});
asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Regular inline code
// Convert nested image links first: [![alt](img)](url) - image wrapped in link
// This must come before regular image processing
asciidoc = asciidoc.replace(/\[!\[([^\]]*)\]\(([^)]+?)\)\]\(([^)]+?)\)/g, (match, alt, imgUrl, linkUrl) => {
const cleanImgUrl = imgUrl.trim();
const cleanLinkUrl = linkUrl.trim();
const cleanAlt = alt.trim();
// Check if linkUrl is a media URL
if (cleanLinkUrl.startsWith('MEDIA:')) {
return cleanLinkUrl; // Return the placeholder as-is
}
// Create a link with an image inside - don't escape brackets in URLs
// AsciiDoc can handle URLs with brackets if they're in the URL part
return `link:${cleanLinkUrl}[image:${cleanImgUrl}[${cleanAlt ? cleanAlt : 'link'}]]`;
});
// Convert images (but not nested ones, which we already processed)
// Match: ![alt text](url) or ![](url) - handle empty alt text
// Use negative lookbehind to avoid matching nested image links
// Format: image::url[alt,width=100%] - matching jumble's format
asciidoc = asciidoc.replace(/(?<!\[)!\[([^\]]*)\]\(([^)]+?)\)/g, (match, alt, url) => {
let processedUrl = url.trim();
const cleanAlt = alt.trim();
// Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown)
if (processedUrl.startsWith('MEDIA:')) {
return processedUrl; // Return the placeholder as-is
}
// Clean URL (remove tracking parameters)
processedUrl = cleanUrl(processedUrl);
// Regular image - match jumble's format: image::url[alt,width=100%]
// Don't escape brackets - AsciiDoc handles URLs properly
return `image::${processedUrl}[${cleanAlt ? cleanAlt + ',' : ''}width=100%]`;
});
// Convert anchor links: [text](#section-id) - these are internal links
asciidoc = asciidoc.replace(/(?<!!)\[([^\]]+)\]\(#([^)]+)\)/g, (match, text, anchor) => {
const cleanText = text.trim();
const cleanAnchor = anchor.trim();
// AsciiDoc uses # for anchor links, but we need to normalize the anchor ID
// Convert to lowercase and replace spaces/special chars with hyphens
const normalizedAnchor = cleanAnchor.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '');
const escapedText = cleanText.replace(/([\[\]])/g, '\\$1');
return `<<${normalizedAnchor},${escapedText}>>`;
});
// Convert links (but not images or anchor links, which we already processed)
// Match: [text](url) - use negative lookbehind to avoid matching images
// Use non-greedy matching for URL to stop at first closing paren
// This ensures we don't capture trailing punctuation
asciidoc = asciidoc.replace(/(?<!!)\[([^\]]+)\]\(([^)]+?)\)/g, (match, text, url) => {
let processedUrl = url.trim();
const cleanText = text.trim();
// Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown)
if (processedUrl.startsWith('MEDIA:')) {
return processedUrl; // Return the placeholder as-is
}
// Clean URL (remove tracking parameters)
processedUrl = cleanUrl(processedUrl);
// Handle WSS URLs: convert wss:// to https:// for display
if (processedUrl.startsWith('wss://')) {
processedUrl = processedUrl.replace(/^wss:\/\//, 'https://');
}
// Regular link - don't escape brackets in URLs (AsciiDoc handles them)
// Only escape brackets in the link text if needed
const escapedText = cleanText.replace(/([\[\]])/g, '\\$1');
return `link:${processedUrl}[${escapedText}]`;
});
// Convert horizontal rules
asciidoc = asciidoc.replace(/^---$/gm, '\'\'\'');
asciidoc = asciidoc.replace(/^\*\*\*$/gm, '\'\'\''); // Also handle ***
// Convert lists - need to process them as blocks to preserve structure
// First, convert task lists (before regular lists)
// Task lists: - [x] or - [ ] or * [x] or * [ ]
asciidoc = asciidoc.replace(/^(\s*)([-*])\s+\[([ x])\]\s+(.+)$/gm, (_match, indent, bullet, checked, text) => {
// Use AsciiDoc checkbox syntax: * [x] Task text
// The checkbox will be rendered by AsciiDoctor
return `${indent}* [${checked === 'x' ? 'x' : ' '}] ${text}`;
});
// Convert lists - process entire list blocks to ensure proper AsciiDoc formatting
// AsciiDoc lists need to be on their own lines with proper spacing
// Process lists in blocks to handle nested lists correctly
const lines = asciidoc.split('\n');
const processedLines = [];
let inList = false;
let listType = null;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const isEmpty = line.trim() === '';
const prevLine = i > 0 ? processedLines[processedLines.length - 1] : '';
const prevLineIsEmpty = prevLine.trim() === '';
// Check if this line is a list item (but not a task list, which we already processed)
const unorderedMatch = line.match(/^(\s*)([-*+])\s+(.+)$/);
const orderedMatch = line.match(/^(\s*)(\d+)\.\s+(.+)$/);
const isTaskList = line.match(/^(\s*)([-*])\s+\[([ x])\]\s+(.+)$/);
if (unorderedMatch && !isTaskList) {
const [, indent, , text] = unorderedMatch;
const indentLevel = indent.length;
// AsciiDoc uses 4 spaces per indentation level
// Markdown typically uses 2 or 4 spaces per level
// 2 spaces = 1 level (4 spaces), 4 spaces = 1 level (4 spaces)
const asciidocIndent = ' '.repeat(Math.ceil(indentLevel / 4));
// Add blank line before list if not already in a list
// But don't add blank line if we're switching list types within the same list context
if (!inList) {
// Starting a new list - add blank line if previous line has content
if (processedLines.length > 0 && !prevLineIsEmpty) {
processedLines.push('');
}
inList = true;
listType = 'unordered';
}
else if (listType !== 'unordered') {
// Switching list types - don't add blank line, just change type
listType = 'unordered';
}
processedLines.push(`${asciidocIndent}* ${text}`);
}
else if (orderedMatch) {
const [, indent, , text] = orderedMatch;
const indentLevel = indent.length;
// AsciiDoc uses 4 spaces per indentation level
// Markdown typically uses 2 or 4 spaces per level
// 2 spaces = 1 level (4 spaces), 4 spaces = 1 level (4 spaces)
const asciidocIndent = ' '.repeat(Math.ceil(indentLevel / 4));
// Add blank line before list if not already in a list
// But don't add blank line if we're switching list types within the same list context
if (!inList) {
// Starting a new list - add blank line if previous line has content
if (processedLines.length > 0 && !prevLineIsEmpty) {
processedLines.push('');
}
inList = true;
listType = 'ordered';
}
else if (listType !== 'ordered') {
// Switching list types - don't add blank line, just change type
listType = 'ordered';
}
processedLines.push(`${asciidocIndent}. ${text}`);
}
else {
// Not a list item
if (inList && !isEmpty) {
// End of list - add blank line after if the next line is not empty
if (i < lines.length - 1 && lines[i + 1].trim() !== '') {
processedLines.push('');
}
inList = false;
listType = null;
}
processedLines.push(line);
}
}
asciidoc = processedLines.join('\n');
// Convert blockquotes with attribution
asciidoc = asciidoc.replace(/^(>\s+.+(?:\n>\s+.+)*)/gm, (match) => {
const lines = match.split('\n').map(line => line.replace(/^>\s*/, ''));
let quoteBodyLines = [];
let attributionLine;
for (let i = lines.length - 1; i >= 0; i--) {
const line = lines[i].trim();
if (line.startsWith('—') || line.startsWith('--')) {
attributionLine = line;
quoteBodyLines = lines.slice(0, i);
break;
}
}
const quoteContent = quoteBodyLines.filter(l => l.trim() !== '').join('\n').trim();
if (attributionLine) {
let cleanedAttribution = attributionLine.replace(/^[—-]+/, '').trim();
let author = '';
let source = '';
const linkMatch = cleanedAttribution.match(/^(.*?),?\s*link:([^[\\]]+)\[([^\\]]+)\]$/);
if (linkMatch) {
author = linkMatch[1].trim();
source = `link:${linkMatch[2].trim()}[${linkMatch[3].trim()}]`;
}
else {
const parts = cleanedAttribution.split(',').map(p => p.trim());
author = parts[0];
if (parts.length > 1) {
source = parts.slice(1).join(', ').trim();
}
}
return `[quote, ${author}, ${source}]\n____\n${quoteContent}\n____`;
}
else {
return `____\n${quoteContent}\n____`;
}
});
// Convert tables with alignment support
asciidoc = asciidoc.replace(/(\|.*\|[\r\n]+\|[\s\-\|:]*[\r\n]+(\|.*\|[\r\n]+)*)/g, (match) => {
const lines = match.trim().split('\n').filter(line => line.trim());
if (lines.length < 2)
return match;
const headerRow = lines[0];
const separatorRow = lines[1];
const dataRows = lines.slice(2);
if (!separatorRow.includes('-'))
return match;
// Parse alignment from separator row
// :--- = left, :----: = center, ---: = right, --- = default
const cells = separatorRow.split('|').filter(c => c.trim());
const alignments = [];
cells.forEach((cell, index) => {
const trimmed = cell.trim();
if (trimmed.startsWith(':') && trimmed.endsWith(':')) {
alignments[index] = '^'; // center (AsciiDoc uses ^ for center)
}
else if (trimmed.endsWith(':')) {
alignments[index] = '>'; // right
}
else if (trimmed.startsWith(':')) {
alignments[index] = '<'; // left (explicit)
}
else {
alignments[index] = '<'; // default left
}
});
// Build cols attribute with alignments
const colsAttr = alignments.length > 0
? `[cols="${alignments.join(',')}"]`
: '';
let tableAsciidoc = colsAttr ? `${colsAttr}\n` : '';
tableAsciidoc += '|===\n';
tableAsciidoc += headerRow + '\n';
dataRows.forEach(row => {
tableAsciidoc += row + '\n';
});
tableAsciidoc += '|===';
return tableAsciidoc;
});
// Convert footnotes
const footnoteDefinitions = {};
let tempAsciidoc = asciidoc;
tempAsciidoc = tempAsciidoc.replace(/^\[\^([^\]]+)\]:\s*([\s\S]*?)(?=\n\[\^|\n---|\n##|\n###|\n####|\n#####|\n######|$)/gm, (_, id, text) => {
footnoteDefinitions[id] = text.trim();
return '';
});
asciidoc = tempAsciidoc.replace(/\[\^([^\]]+)\]/g, (match, id) => {
if (footnoteDefinitions[id]) {
return `footnote:[${footnoteDefinitions[id]}]`;
}
return match;
});
return asciidoc;
}
/**
* Converts plain text to AsciiDoc format
* Preserves line breaks by converting single newlines to line continuations
*/
function convertPlainTextToAsciidoc(content) {
// Preserve double newlines (paragraph breaks)
// Convert single newlines to line continuations ( +\n)
return content
.replace(/\r\n/g, '\n') // Normalize line endings
.replace(/\n\n+/g, '\n\n') // Normalize multiple newlines to double
.replace(/([^\n])\n([^\n])/g, '$1 +\n$2'); // Single newlines become line continuations
}
/**
* Normalizes text to d-tag format
*/
function normalizeDtag(text) {
return text
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');
}
/**
* Processes wikilinks: [[target]] or [[target|display text]]
* Converts to WIKILINK: placeholder format to protect from AsciiDoc processing
*/
function processWikilinks(content, linkBaseURL) {
// Process bookstr macro wikilinks: [[book::...]]
content = content.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => {
const cleanContent = bookContent.trim();
return `BOOKSTR:${cleanContent}`;
});
// Process standard wikilinks: [[Target Page]] or [[target page|see this]]
// Use placeholder format to prevent AsciiDoc from processing the brackets
content = content.replace(/\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g, (_match, target, displayText) => {
const cleanTarget = target.trim();
const cleanDisplay = displayText ? displayText.trim() : cleanTarget;
const dTag = normalizeDtag(cleanTarget);
// Use placeholder format: WIKILINK:dtag|display
// This prevents AsciiDoc from interpreting the brackets
return `WIKILINK:${dTag}|${cleanDisplay}`;
});
return content;
}
/**
* Processes nostr: addresses
* Only processes addresses with "nostr:" prefix - bare addresses are left as plaintext
* Converts to link:nostr:...[...] format
* Valid bech32 prefixes: npub, nprofile, nevent, naddr, note
*/
function processNostrAddresses(content, linkBaseURL) {
// Match nostr: followed by valid bech32 prefix and identifier
// Bech32 format: prefix + separator (1) + data (at least 6 chars for valid identifiers)
// Only match if it has "nostr:" prefix - bare addresses should remain as plaintext
const nostrPattern = /nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi;
return content.replace(nostrPattern, (_match, bech32Id) => {
return `link:nostr:${bech32Id}[${bech32Id}]`;
});
}
/**
* Processes media URLs in markdown links and images
* Converts them to MEDIA: placeholders before markdown conversion
*/
function processMediaUrlsInMarkdown(content) {
let processed = content;
// Process YouTube URLs in markdown links: [text](youtube-url)
processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, videoId) => {
return `MEDIA:youtube:${videoId}`;
});
// Process Spotify URLs in markdown links: [text](spotify-url)
processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, type, id) => {
return `MEDIA:spotify:${type}:${id}`;
});
// Process video files in markdown links/images: [text](video-url) or ![alt](video-url)
processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => {
const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params
return `MEDIA:video:${cleanUrl}`;
});
// Process audio files in markdown links/images: [text](audio-url) or ![alt](audio-url)
processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp3|m4a|ogg|wav|flac|aac|opus|wma))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => {
const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params
return `MEDIA:audio:${cleanUrl}`;
});
return processed;
}
/**
* Processes media URLs (YouTube, Spotify, video, audio files) in bare URLs
* Converts them to placeholders that will be rendered as embeds/players
*/
function processMediaUrls(content) {
// Process YouTube URLs
// Match: youtube.com/watch?v=, youtu.be/, youtube.com/embed/, youtube.com/v/
content = content.replace(/(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, videoId) => {
return `MEDIA:youtube:${videoId}`;
});
// Process Spotify URLs
// Match: open.spotify.com/track/, open.spotify.com/album/, open.spotify.com/playlist/, open.spotify.com/artist/
content = content.replace(/(?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, type, id) => {
return `MEDIA:spotify:${type}:${id}`;
});
// Process video files (mp4, webm, ogg, m4v, mov, avi, etc.)
content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => {
const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL
return `MEDIA:video:${url}`;
});
// Process audio files (mp3, m4a, ogg, wav, flac, aac, etc.)
content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp3|m4a|ogg|wav|flac|aac|opus|wma)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => {
const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL
return `MEDIA:audio:${url}`;
});
return content;
}
/**
* Processes bare URLs and converts them to AsciiDoc links
* Matches http://, https://, wss://, and www. URLs that aren't already in markdown links
* Also handles bare image URLs (converts to images)
* Skips URLs inside code blocks (---- blocks) and inline code (backticks)
*/
function processBareUrls(content) {
// Protect code blocks and inline code from URL processing
// We'll process URLs, then restore code blocks
const codeBlockPlaceholders = [];
const inlineCodePlaceholders = [];
// Replace code blocks with placeholders
content = content.replace(/\[source[^\]]*\]\n----\n([\s\S]*?)\n----/g, (match, code) => {
const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`;
codeBlockPlaceholders.push(match);
return placeholder;
});
// Also handle plain code blocks (without [source])
content = content.replace(/----\n([\s\S]*?)\n----/g, (match, code) => {
// Check if this is already a placeholder
if (match.includes('__CODEBLOCK_')) {
return match;
}
const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`;
codeBlockPlaceholders.push(match);
return placeholder;
});
// Replace inline code with placeholders
content = content.replace(/`([^`]+)`/g, (match, code) => {
const placeholder = `__INLINECODE_${inlineCodePlaceholders.length}__`;
inlineCodePlaceholders.push(match);
return placeholder;
});
// First, handle bare image URLs (before regular URLs)
// Match image URLs: .jpg, .png, .gif, .webp, .svg, etc.
// Format: image::url[width=100%] - matching jumble's format
const imageUrlPattern = /(?<!\]\()\b(https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(jpe?g|png|gif|webp|svg|bmp|ico))(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi;
content = content.replace(imageUrlPattern, (match, url) => {
// Clean URL (remove tracking parameters)
const cleanedUrl = cleanUrl(url);
// Don't escape brackets - AsciiDoc handles URLs properly
return `image::${cleanedUrl}[width=100%]`;
});
// Match URLs that aren't already in markdown link format
// Pattern: http://, https://, wss://, or www. followed by valid URL characters
// Use word boundary to avoid matching URLs that are part of other text
// Don't match if immediately after colon-space (like "hyperlink: www.example.com")
const urlPattern = /(?<!\]\()(?<!:\s)\b(https?:\/\/[^\s<>"{}|\\^`\[\]()]+|wss:\/\/[^\s<>"{}|\\^`\[\]()]+|www\.[^\s<>"{}|\\^`\[\]()]+)/gi;
content = content.replace(urlPattern, (match, url) => {
// Skip if this URL was already converted to an image
if (match.includes('image::')) {
return match;
}
// Ensure URL starts with http:// or https://
let fullUrl = url;
if (url.startsWith('www.')) {
fullUrl = 'https://' + url;
}
else if (url.startsWith('wss://')) {
// Convert wss:// to https:// for display
fullUrl = url.replace(/^wss:\/\//, 'https://');
}
// Clean URL (remove tracking parameters)
fullUrl = cleanUrl(fullUrl);
// Don't escape brackets in URLs - AsciiDoc handles them properly
// The URL is in the link: part, brackets in URLs are valid
// Use proper AsciiDoc link syntax: link:url[text]
return `link:${fullUrl}[${url}]`;
});
// Restore inline code
inlineCodePlaceholders.forEach((code, index) => {
content = content.replace(`__INLINECODE_${index}__`, code);
});
// Restore code blocks
codeBlockPlaceholders.forEach((code, index) => {
content = content.replace(`__CODEBLOCK_${index}__`, code);
});
return content;
}
/**
* Processes hashtags
* Converts to hashtag:tag[#tag] format
* Handles hashtags at the beginning of lines to prevent line breaks
*/
function processHashtags(content) {
// Match # followed by word characters
// Match at word boundary OR at start of line OR after whitespace
// This ensures we don't match # in URLs or code, but do match at line start
return content.replace(/(^|\s|>)#([a-zA-Z0-9_]+)(?![a-zA-Z0-9_])/g, (match, before, hashtag) => {
const normalizedHashtag = hashtag.toLowerCase();
// Preserve the space or line start before the hashtag to prevent line breaks
// Add a zero-width space or ensure proper spacing
const prefix = before === '' ? '' : before;
return `${prefix}hashtag:${normalizedHashtag}[#${hashtag}]`;
});
}