|
|
"use strict"; |
|
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
|
exports.convertToAsciidoc = convertToAsciidoc; |
|
|
const types_1 = require("../types"); |
|
|
// Import node-emoji if available (optional dependency) |
|
|
let emoji; |
|
|
try { |
|
|
emoji = require('node-emoji'); |
|
|
} |
|
|
catch (e) { |
|
|
// node-emoji not available, emoji conversion will be skipped |
|
|
emoji = null; |
|
|
} |
|
|
/** |
|
|
* Clean URL by removing tracking parameters |
|
|
* Based on jumble's cleanUrl function |
|
|
*/ |
|
|
function cleanUrl(url) { |
|
|
try { |
|
|
const parsedUrl = new URL(url); |
|
|
// List of tracking parameter prefixes and exact names to remove |
|
|
const trackingParams = [ |
|
|
// Google Analytics & Ads |
|
|
'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', |
|
|
'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic', |
|
|
'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid', |
|
|
// Facebook |
|
|
'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref', |
|
|
// Twitter/X |
|
|
'twclid', 'twsrc', |
|
|
// Microsoft/Bing |
|
|
'msclkid', 'mc_cid', 'mc_eid', |
|
|
// Adobe |
|
|
'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid', |
|
|
// Mailchimp |
|
|
'mc_cid', 'mc_eid', |
|
|
// HubSpot |
|
|
'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver', |
|
|
// Marketo |
|
|
'mkt_tok', |
|
|
// YouTube |
|
|
'si', 'feature', 'kw', 'pp', |
|
|
// Other common tracking |
|
|
'ref', 'referrer', 'source', 'campaign', 'medium', 'content', |
|
|
'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd', |
|
|
// Mobile app tracking |
|
|
'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative', |
|
|
// Amazon |
|
|
'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag', |
|
|
// Affiliate tracking |
|
|
'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer', |
|
|
// Social media share tracking |
|
|
'share', 'shared', 'sharesource' |
|
|
]; |
|
|
// Remove all tracking parameters |
|
|
trackingParams.forEach(param => { |
|
|
parsedUrl.searchParams.delete(param); |
|
|
}); |
|
|
// Remove any parameter that starts with utm_ or _ |
|
|
Array.from(parsedUrl.searchParams.keys()).forEach(key => { |
|
|
if (key.startsWith('utm_') || key.startsWith('_')) { |
|
|
parsedUrl.searchParams.delete(key); |
|
|
} |
|
|
}); |
|
|
return parsedUrl.toString(); |
|
|
} |
|
|
catch { |
|
|
// If URL parsing fails, return original URL |
|
|
return url; |
|
|
} |
|
|
} |
|
|
/** |
|
|
* Converts content to AsciiDoc format based on detected format |
|
|
* This is the unified entry point - everything becomes AsciiDoc |
|
|
*/ |
|
|
function convertToAsciidoc(content, format, linkBaseURL, options = {}) { |
|
|
let asciidoc = ''; |
|
|
switch (format) { |
|
|
case types_1.ContentFormat.AsciiDoc: |
|
|
// For AsciiDoc content, ensure proper formatting |
|
|
asciidoc = content.replace(/\\n/g, '\n'); |
|
|
// Ensure headers are on their own lines with proper spacing |
|
|
asciidoc = asciidoc.replace(/(\S[^\n]*)\n(={1,6}\s+[^\n]+)/g, (_match, before, header) => { |
|
|
return `${before}\n\n${header}`; |
|
|
}); |
|
|
break; |
|
|
case types_1.ContentFormat.Wikipedia: |
|
|
asciidoc = convertWikipediaToAsciidoc(content); |
|
|
break; |
|
|
case types_1.ContentFormat.Markdown: |
|
|
asciidoc = convertMarkdownToAsciidoc(content); |
|
|
break; |
|
|
case types_1.ContentFormat.Plain: |
|
|
default: |
|
|
asciidoc = convertPlainTextToAsciidoc(content); |
|
|
break; |
|
|
} |
|
|
// Process special elements for all content types |
|
|
// Process wikilinks |
|
|
asciidoc = processWikilinks(asciidoc, linkBaseURL); |
|
|
// Process nostr: addresses if enabled |
|
|
if (options.enableNostrAddresses !== false) { |
|
|
asciidoc = processNostrAddresses(asciidoc, linkBaseURL); |
|
|
} |
|
|
// Process media URLs in markdown links/images first (before converting to AsciiDoc) |
|
|
// This ensures media URLs in [text](url) or  format are detected |
|
|
asciidoc = processMediaUrlsInMarkdown(asciidoc); |
|
|
// Process media URLs (YouTube, Spotify, video, audio files) - for bare URLs |
|
|
asciidoc = processMediaUrls(asciidoc); |
|
|
// Process bare URLs (convert to AsciiDoc links) |
|
|
asciidoc = processBareUrls(asciidoc); |
|
|
// Process hashtags (after URLs to avoid conflicts) |
|
|
asciidoc = processHashtags(asciidoc); |
|
|
return asciidoc; |
|
|
} |
|
|
/** |
|
|
* Converts Wikipedia markup to AsciiDoc format |
|
|
* Handles Wikipedia-style headings, links, and formatting |
|
|
*/ |
|
|
function convertWikipediaToAsciidoc(content) { |
|
|
let asciidoc = content.replace(/\\n/g, '\n'); |
|
|
// Convert Wikipedia headings: == Heading == to AsciiDoc == Heading |
|
|
// Wikipedia uses == for level 2, === for level 3, etc. |
|
|
// AsciiDoc uses = for title, == for level 1, === for level 2, etc. |
|
|
// So Wikipedia level 2 (==) maps to AsciiDoc level 1 (==) |
|
|
asciidoc = asciidoc.replace(/^(=+)\s+(.+?)\s+\1$/gm, (match, equals, heading) => { |
|
|
const level = equals.length - 1; // Count = signs, subtract 1 for AsciiDoc mapping |
|
|
const asciidocEquals = '='.repeat(level + 1); // AsciiDoc uses one more = for same level |
|
|
return `${asciidocEquals} ${heading.trim()}`; |
|
|
}); |
|
|
// Convert Wikipedia bold: ''text'' to AsciiDoc *text* |
|
|
asciidoc = asciidoc.replace(/''([^']+)''/g, '*$1*'); |
|
|
// Convert Wikipedia italic: 'text' to AsciiDoc _text_ |
|
|
// Be careful not to match apostrophes in words |
|
|
asciidoc = asciidoc.replace(/(^|[^'])'([^']+)'([^']|$)/g, '$1_$2_$3'); |
|
|
// Convert Wikipedia links: [[Page]] or [[Page|Display]] to wikilinks |
|
|
// These will be processed by processWikilinks later, but we need to ensure |
|
|
// they're in the right format. Wikipedia links are already in [[...]] format |
|
|
// which matches our wikilink format, so they should work as-is. |
|
|
// Convert Wikipedia external links: [URL text] to AsciiDoc link:URL[text] |
|
|
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\s+([^\]]+)\]/g, 'link:$1[$2]'); |
|
|
asciidoc = asciidoc.replace(/\[(https?:\/\/[^\s\]]+)\]/g, 'link:$1[$1]'); |
|
|
// Convert Wikipedia lists (they use * or # similar to Markdown) |
|
|
// This is handled similarly to Markdown, so we can reuse that logic |
|
|
// But Wikipedia also uses : for definition lists and ; for term lists |
|
|
// For now, we'll handle basic lists and let AsciiDoc handle the rest |
|
|
// Convert horizontal rules: ---- to AsciiDoc ''' |
|
|
asciidoc = asciidoc.replace(/^----+$/gm, "'''"); |
|
|
return asciidoc; |
|
|
} |
|
|
/** |
|
|
* Converts Markdown to AsciiDoc format |
|
|
* Based on jumble's conversion patterns |
|
|
*/ |
|
|
function convertMarkdownToAsciidoc(content) { |
|
|
let asciidoc = content.replace(/\\n/g, '\n'); |
|
|
// Fix spacing issues (but be careful not to break links and images) |
|
|
// Process these BEFORE converting links/images to avoid conflicts |
|
|
asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)'); |
|
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3'); |
|
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` ('); |
|
|
asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2'); |
|
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2'); |
|
|
// Add space before == but not if it's part of a markdown link pattern |
|
|
// Check that == is not immediately after ]( which would be a link |
|
|
asciidoc = asciidoc.replace(/([a-zA-Z0-9])(?<!\]\()==/g, '$1 =='); |
|
|
// Note: nostr: addresses are processed later in processNostrAddresses |
|
|
// Convert headers |
|
|
asciidoc = asciidoc.replace(/^#{6}\s+(.+)$/gm, '====== $1 ======'); |
|
|
asciidoc = asciidoc.replace(/^#{5}\s+(.+)$/gm, '===== $1 ====='); |
|
|
asciidoc = asciidoc.replace(/^#{4}\s+(.+)$/gm, '==== $1 ===='); |
|
|
asciidoc = asciidoc.replace(/^#{3}\s+(.+)$/gm, '=== $1 ==='); |
|
|
asciidoc = asciidoc.replace(/^#{2}\s+(.+)$/gm, '== $1 =='); |
|
|
asciidoc = asciidoc.replace(/^#{1}\s+(.+)$/gm, '= $1 ='); |
|
|
asciidoc = asciidoc.replace(/^==\s+(.+?)\s+==$/gm, '== $1 =='); |
|
|
asciidoc = asciidoc.replace(/\s==\s+([^=]+?)\s+==\s/g, ' == $1 == '); |
|
|
// Convert emphasis |
|
|
asciidoc = asciidoc.replace(/\*\*(.+?)\*\*/g, '*$1*'); // Bold |
|
|
asciidoc = asciidoc.replace(/__(.+?)__/g, '*$1*'); // Bold |
|
|
asciidoc = asciidoc.replace(/\*(.+?)\*/g, '_$1_'); // Italic |
|
|
asciidoc = asciidoc.replace(/_(.+?)_/g, '_$1_'); // Italic |
|
|
asciidoc = asciidoc.replace(/~~(.+?)~~/g, '[line-through]#$1#'); // Strikethrough |
|
|
asciidoc = asciidoc.replace(/==(.+?)==/g, '[highlight]#$1#'); // Text highlighting (GFM) |
|
|
asciidoc = asciidoc.replace(/~(.+?)~/g, '[subscript]#$1#'); // Subscript |
|
|
asciidoc = asciidoc.replace(/\^(.+?)\^/g, '[superscript]#$1#'); // Superscript |
|
|
// Convert emoji shortcodes to Unicode (e.g., :tent: -> 🏕️) |
|
|
// Only convert if node-emoji is available |
|
|
if (emoji && emoji.emojify) { |
|
|
asciidoc = emoji.emojify(asciidoc); |
|
|
} |
|
|
// Convert code blocks (handle both \n and \r\n line endings) |
|
|
// Special handling for diagram languages: latex, plantuml, puml, bpmn |
|
|
asciidoc = asciidoc.replace(/```(\w+)?\r?\n([\s\S]*?)\r?\n```/g, (_match, lang, code) => { |
|
|
const trimmedCode = code.trim(); |
|
|
if (trimmedCode.length === 0) |
|
|
return ''; |
|
|
const langLower = lang ? lang.toLowerCase() : ''; |
|
|
// If it's a latex code block, always treat as code (not math) |
|
|
if (langLower === 'latex') { |
|
|
return `[source,latex]\n----\n${trimmedCode}\n----`; |
|
|
} |
|
|
// Handle PlantUML diagrams |
|
|
if (langLower === 'plantuml' || langLower === 'puml') { |
|
|
// Check if it already has @startuml/@enduml or @startbpmn/@endbpmn |
|
|
if (trimmedCode.includes('@start') || trimmedCode.includes('@end')) { |
|
|
return `[plantuml]\n----\n${trimmedCode}\n----`; |
|
|
} |
|
|
// If not, wrap it in @startuml/@enduml |
|
|
return `[plantuml]\n----\n@startuml\n${trimmedCode}\n@enduml\n----`; |
|
|
} |
|
|
// Handle BPMN diagrams (using PlantUML BPMN syntax) |
|
|
if (langLower === 'bpmn') { |
|
|
// Check if it already has @startbpmn/@endbpmn |
|
|
if (trimmedCode.includes('@startbpmn') && trimmedCode.includes('@endbpmn')) { |
|
|
return `[plantuml]\n----\n${trimmedCode}\n----`; |
|
|
} |
|
|
// If not, wrap it in @startbpmn/@endbpmn |
|
|
return `[plantuml]\n----\n@startbpmn\n${trimmedCode}\n@endbpmn\n----`; |
|
|
} |
|
|
// Check if it's ABC notation (starts with X:) |
|
|
if (!lang && /^X:\s*\d+/m.test(trimmedCode)) { |
|
|
// ABC notation - keep as plain text block, will be processed by music processor |
|
|
return `----\n${trimmedCode}\n----`; |
|
|
} |
|
|
const hasCodePatterns = /[{}();=<>]|function|class|import|export|def |if |for |while |return |const |let |var |public |private |static |console\.log/.test(trimmedCode); |
|
|
const isLikelyText = /^[A-Za-z\s.,!?\-'"]+$/.test(trimmedCode) && trimmedCode.length > 50; |
|
|
const hasTooManySpaces = (trimmedCode.match(/\s{3,}/g) || []).length > 3; |
|
|
const hasMarkdownPatterns = /^#{1,6}\s|^\*\s|^\d+\.\s|^\>\s|^\|.*\|/.test(trimmedCode); |
|
|
if ((!hasCodePatterns && trimmedCode.length > 100) || isLikelyText || hasTooManySpaces || hasMarkdownPatterns) { |
|
|
return _match; |
|
|
} |
|
|
return `[source${lang ? ',' + lang : ''}]\n----\n${trimmedCode}\n----`; |
|
|
}); |
|
|
// Handle inline code: LaTeX formulas in inline code should be rendered as math |
|
|
// Pattern: `$formula$` should become $formula$ (math), not code |
|
|
// Handle escaped brackets: `$[ ... \]$` and `$[\sqrt{...}\]$` |
|
|
asciidoc = asciidoc.replace(/`(\$[^`]+\$)`/g, (match, formula) => { |
|
|
// Extract the formula (remove the $ signs) |
|
|
const mathContent = formula.slice(1, -1); |
|
|
return `$${mathContent}$`; // Return as math, not code |
|
|
}); |
|
|
asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Regular inline code |
|
|
// Convert nested image links first: [](url) - image wrapped in link |
|
|
// This must come before regular image processing |
|
|
asciidoc = asciidoc.replace(/\[!\[([^\]]*)\]\(([^)]+?)\)\]\(([^)]+?)\)/g, (match, alt, imgUrl, linkUrl) => { |
|
|
const cleanImgUrl = imgUrl.trim(); |
|
|
const cleanLinkUrl = linkUrl.trim(); |
|
|
const cleanAlt = alt.trim(); |
|
|
// Check if linkUrl is a media URL |
|
|
if (cleanLinkUrl.startsWith('MEDIA:')) { |
|
|
return cleanLinkUrl; // Return the placeholder as-is |
|
|
} |
|
|
// Create a link with an image inside - don't escape brackets in URLs |
|
|
// AsciiDoc can handle URLs with brackets if they're in the URL part |
|
|
return `link:${cleanLinkUrl}[image:${cleanImgUrl}[${cleanAlt ? cleanAlt : 'link'}]]`; |
|
|
}); |
|
|
// Convert images (but not nested ones, which we already processed) |
|
|
// Match:  or  - handle empty alt text |
|
|
// Use negative lookbehind to avoid matching nested image links |
|
|
// Format: image::url[alt,width=100%] - matching jumble's format |
|
|
asciidoc = asciidoc.replace(/(?<!\[)!\[([^\]]*)\]\(([^)]+?)\)/g, (match, alt, url) => { |
|
|
let processedUrl = url.trim(); |
|
|
const cleanAlt = alt.trim(); |
|
|
// Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown) |
|
|
if (processedUrl.startsWith('MEDIA:')) { |
|
|
return processedUrl; // Return the placeholder as-is |
|
|
} |
|
|
// Clean URL (remove tracking parameters) |
|
|
processedUrl = cleanUrl(processedUrl); |
|
|
// Regular image - match jumble's format: image::url[alt,width=100%] |
|
|
// Don't escape brackets - AsciiDoc handles URLs properly |
|
|
return `image::${processedUrl}[${cleanAlt ? cleanAlt + ',' : ''}width=100%]`; |
|
|
}); |
|
|
// Convert anchor links: [text](#section-id) - these are internal links |
|
|
asciidoc = asciidoc.replace(/(?<!!)\[([^\]]+)\]\(#([^)]+)\)/g, (match, text, anchor) => { |
|
|
const cleanText = text.trim(); |
|
|
const cleanAnchor = anchor.trim(); |
|
|
// AsciiDoc uses # for anchor links, but we need to normalize the anchor ID |
|
|
// Convert to lowercase and replace spaces/special chars with hyphens |
|
|
const normalizedAnchor = cleanAnchor.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, ''); |
|
|
const escapedText = cleanText.replace(/([\[\]])/g, '\\$1'); |
|
|
return `<<${normalizedAnchor},${escapedText}>>`; |
|
|
}); |
|
|
// Convert links (but not images or anchor links, which we already processed) |
|
|
// Match: [text](url) - use negative lookbehind to avoid matching images |
|
|
// Use non-greedy matching for URL to stop at first closing paren |
|
|
// This ensures we don't capture trailing punctuation |
|
|
asciidoc = asciidoc.replace(/(?<!!)\[([^\]]+)\]\(([^)]+?)\)/g, (match, text, url) => { |
|
|
let processedUrl = url.trim(); |
|
|
const cleanText = text.trim(); |
|
|
// Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown) |
|
|
if (processedUrl.startsWith('MEDIA:')) { |
|
|
return processedUrl; // Return the placeholder as-is |
|
|
} |
|
|
// Clean URL (remove tracking parameters) |
|
|
processedUrl = cleanUrl(processedUrl); |
|
|
// Handle WSS URLs: convert wss:// to https:// for display |
|
|
if (processedUrl.startsWith('wss://')) { |
|
|
processedUrl = processedUrl.replace(/^wss:\/\//, 'https://'); |
|
|
} |
|
|
// Regular link - don't escape brackets in URLs (AsciiDoc handles them) |
|
|
// Only escape brackets in the link text if needed |
|
|
const escapedText = cleanText.replace(/([\[\]])/g, '\\$1'); |
|
|
return `link:${processedUrl}[${escapedText}]`; |
|
|
}); |
|
|
// Convert horizontal rules |
|
|
asciidoc = asciidoc.replace(/^---$/gm, '\'\'\''); |
|
|
asciidoc = asciidoc.replace(/^\*\*\*$/gm, '\'\'\''); // Also handle *** |
|
|
// Convert lists - need to process them as blocks to preserve structure |
|
|
// First, convert task lists (before regular lists) |
|
|
// Task lists: - [x] or - [ ] or * [x] or * [ ] |
|
|
asciidoc = asciidoc.replace(/^(\s*)([-*])\s+\[([ x])\]\s+(.+)$/gm, (_match, indent, bullet, checked, text) => { |
|
|
// Use AsciiDoc checkbox syntax: * [x] Task text |
|
|
// The checkbox will be rendered by AsciiDoctor |
|
|
return `${indent}* [${checked === 'x' ? 'x' : ' '}] ${text}`; |
|
|
}); |
|
|
// Convert lists - process entire list blocks to ensure proper AsciiDoc formatting |
|
|
// AsciiDoc lists need to be on their own lines with proper spacing |
|
|
// Process lists in blocks to handle nested lists correctly |
|
|
const lines = asciidoc.split('\n'); |
|
|
const processedLines = []; |
|
|
let inList = false; |
|
|
let listType = null; |
|
|
for (let i = 0; i < lines.length; i++) { |
|
|
const line = lines[i]; |
|
|
const isEmpty = line.trim() === ''; |
|
|
const prevLine = i > 0 ? processedLines[processedLines.length - 1] : ''; |
|
|
const prevLineIsEmpty = prevLine.trim() === ''; |
|
|
// Check if this line is a list item (but not a task list, which we already processed) |
|
|
const unorderedMatch = line.match(/^(\s*)([-*+])\s+(.+)$/); |
|
|
const orderedMatch = line.match(/^(\s*)(\d+)\.\s+(.+)$/); |
|
|
const isTaskList = line.match(/^(\s*)([-*])\s+\[([ x])\]\s+(.+)$/); |
|
|
if (unorderedMatch && !isTaskList) { |
|
|
const [, indent, , text] = unorderedMatch; |
|
|
const indentLevel = indent.length; |
|
|
// AsciiDoc uses 4 spaces per indentation level |
|
|
// Markdown typically uses 2 or 4 spaces per level |
|
|
// 2 spaces = 1 level (4 spaces), 4 spaces = 1 level (4 spaces) |
|
|
const asciidocIndent = ' '.repeat(Math.ceil(indentLevel / 4)); |
|
|
// Add blank line before list if not already in a list |
|
|
// But don't add blank line if we're switching list types within the same list context |
|
|
if (!inList) { |
|
|
// Starting a new list - add blank line if previous line has content |
|
|
if (processedLines.length > 0 && !prevLineIsEmpty) { |
|
|
processedLines.push(''); |
|
|
} |
|
|
inList = true; |
|
|
listType = 'unordered'; |
|
|
} |
|
|
else if (listType !== 'unordered') { |
|
|
// Switching list types - don't add blank line, just change type |
|
|
listType = 'unordered'; |
|
|
} |
|
|
processedLines.push(`${asciidocIndent}* ${text}`); |
|
|
} |
|
|
else if (orderedMatch) { |
|
|
const [, indent, , text] = orderedMatch; |
|
|
const indentLevel = indent.length; |
|
|
// AsciiDoc uses 4 spaces per indentation level |
|
|
// Markdown typically uses 2 or 4 spaces per level |
|
|
// 2 spaces = 1 level (4 spaces), 4 spaces = 1 level (4 spaces) |
|
|
const asciidocIndent = ' '.repeat(Math.ceil(indentLevel / 4)); |
|
|
// Add blank line before list if not already in a list |
|
|
// But don't add blank line if we're switching list types within the same list context |
|
|
if (!inList) { |
|
|
// Starting a new list - add blank line if previous line has content |
|
|
if (processedLines.length > 0 && !prevLineIsEmpty) { |
|
|
processedLines.push(''); |
|
|
} |
|
|
inList = true; |
|
|
listType = 'ordered'; |
|
|
} |
|
|
else if (listType !== 'ordered') { |
|
|
// Switching list types - don't add blank line, just change type |
|
|
listType = 'ordered'; |
|
|
} |
|
|
processedLines.push(`${asciidocIndent}. ${text}`); |
|
|
} |
|
|
else { |
|
|
// Not a list item |
|
|
if (inList && !isEmpty) { |
|
|
// End of list - add blank line after if the next line is not empty |
|
|
if (i < lines.length - 1 && lines[i + 1].trim() !== '') { |
|
|
processedLines.push(''); |
|
|
} |
|
|
inList = false; |
|
|
listType = null; |
|
|
} |
|
|
processedLines.push(line); |
|
|
} |
|
|
} |
|
|
asciidoc = processedLines.join('\n'); |
|
|
// Convert blockquotes with attribution |
|
|
asciidoc = asciidoc.replace(/^(>\s+.+(?:\n>\s+.+)*)/gm, (match) => { |
|
|
const lines = match.split('\n').map(line => line.replace(/^>\s*/, '')); |
|
|
let quoteBodyLines = []; |
|
|
let attributionLine; |
|
|
for (let i = lines.length - 1; i >= 0; i--) { |
|
|
const line = lines[i].trim(); |
|
|
if (line.startsWith('—') || line.startsWith('--')) { |
|
|
attributionLine = line; |
|
|
quoteBodyLines = lines.slice(0, i); |
|
|
break; |
|
|
} |
|
|
} |
|
|
const quoteContent = quoteBodyLines.filter(l => l.trim() !== '').join('\n').trim(); |
|
|
if (attributionLine) { |
|
|
let cleanedAttribution = attributionLine.replace(/^[—-]+/, '').trim(); |
|
|
let author = ''; |
|
|
let source = ''; |
|
|
const linkMatch = cleanedAttribution.match(/^(.*?),?\s*link:([^[\\]]+)\[([^\\]]+)\]$/); |
|
|
if (linkMatch) { |
|
|
author = linkMatch[1].trim(); |
|
|
source = `link:${linkMatch[2].trim()}[${linkMatch[3].trim()}]`; |
|
|
} |
|
|
else { |
|
|
const parts = cleanedAttribution.split(',').map(p => p.trim()); |
|
|
author = parts[0]; |
|
|
if (parts.length > 1) { |
|
|
source = parts.slice(1).join(', ').trim(); |
|
|
} |
|
|
} |
|
|
return `[quote, ${author}, ${source}]\n____\n${quoteContent}\n____`; |
|
|
} |
|
|
else { |
|
|
return `____\n${quoteContent}\n____`; |
|
|
} |
|
|
}); |
|
|
// Convert tables with alignment support |
|
|
asciidoc = asciidoc.replace(/(\|.*\|[\r\n]+\|[\s\-\|:]*[\r\n]+(\|.*\|[\r\n]+)*)/g, (match) => { |
|
|
const lines = match.trim().split('\n').filter(line => line.trim()); |
|
|
if (lines.length < 2) |
|
|
return match; |
|
|
const headerRow = lines[0]; |
|
|
const separatorRow = lines[1]; |
|
|
const dataRows = lines.slice(2); |
|
|
if (!separatorRow.includes('-')) |
|
|
return match; |
|
|
// Parse alignment from separator row |
|
|
// :--- = left, :----: = center, ---: = right, --- = default |
|
|
const cells = separatorRow.split('|').filter(c => c.trim()); |
|
|
const alignments = []; |
|
|
cells.forEach((cell, index) => { |
|
|
const trimmed = cell.trim(); |
|
|
if (trimmed.startsWith(':') && trimmed.endsWith(':')) { |
|
|
alignments[index] = '^'; // center (AsciiDoc uses ^ for center) |
|
|
} |
|
|
else if (trimmed.endsWith(':')) { |
|
|
alignments[index] = '>'; // right |
|
|
} |
|
|
else if (trimmed.startsWith(':')) { |
|
|
alignments[index] = '<'; // left (explicit) |
|
|
} |
|
|
else { |
|
|
alignments[index] = '<'; // default left |
|
|
} |
|
|
}); |
|
|
// Build cols attribute with alignments |
|
|
const colsAttr = alignments.length > 0 |
|
|
? `[cols="${alignments.join(',')}"]` |
|
|
: ''; |
|
|
let tableAsciidoc = colsAttr ? `${colsAttr}\n` : ''; |
|
|
tableAsciidoc += '|===\n'; |
|
|
tableAsciidoc += headerRow + '\n'; |
|
|
dataRows.forEach(row => { |
|
|
tableAsciidoc += row + '\n'; |
|
|
}); |
|
|
tableAsciidoc += '|==='; |
|
|
return tableAsciidoc; |
|
|
}); |
|
|
// Convert footnotes |
|
|
const footnoteDefinitions = {}; |
|
|
let tempAsciidoc = asciidoc; |
|
|
tempAsciidoc = tempAsciidoc.replace(/^\[\^([^\]]+)\]:\s*([\s\S]*?)(?=\n\[\^|\n---|\n##|\n###|\n####|\n#####|\n######|$)/gm, (_, id, text) => { |
|
|
footnoteDefinitions[id] = text.trim(); |
|
|
return ''; |
|
|
}); |
|
|
asciidoc = tempAsciidoc.replace(/\[\^([^\]]+)\]/g, (match, id) => { |
|
|
if (footnoteDefinitions[id]) { |
|
|
return `footnote:[${footnoteDefinitions[id]}]`; |
|
|
} |
|
|
return match; |
|
|
}); |
|
|
return asciidoc; |
|
|
} |
|
|
/** |
|
|
* Converts plain text to AsciiDoc format |
|
|
* Preserves line breaks by converting single newlines to line continuations |
|
|
*/ |
|
|
function convertPlainTextToAsciidoc(content) { |
|
|
// Preserve double newlines (paragraph breaks) |
|
|
// Convert single newlines to line continuations ( +\n) |
|
|
return content |
|
|
.replace(/\r\n/g, '\n') // Normalize line endings |
|
|
.replace(/\n\n+/g, '\n\n') // Normalize multiple newlines to double |
|
|
.replace(/([^\n])\n([^\n])/g, '$1 +\n$2'); // Single newlines become line continuations |
|
|
} |
|
|
/** |
|
|
* Normalizes text to d-tag format |
|
|
*/ |
|
|
function normalizeDtag(text) { |
|
|
return text |
|
|
.toLowerCase() |
|
|
.replace(/[^a-z0-9]+/g, '-') |
|
|
.replace(/^-+|-+$/g, ''); |
|
|
} |
|
|
/** |
|
|
* Processes wikilinks: [[target]] or [[target|display text]] |
|
|
* Converts to WIKILINK: placeholder format to protect from AsciiDoc processing |
|
|
*/ |
|
|
function processWikilinks(content, linkBaseURL) { |
|
|
// Process bookstr macro wikilinks: [[book::...]] |
|
|
content = content.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => { |
|
|
const cleanContent = bookContent.trim(); |
|
|
return `BOOKSTR:${cleanContent}`; |
|
|
}); |
|
|
// Process standard wikilinks: [[Target Page]] or [[target page|see this]] |
|
|
// Use placeholder format to prevent AsciiDoc from processing the brackets |
|
|
content = content.replace(/\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g, (_match, target, displayText) => { |
|
|
const cleanTarget = target.trim(); |
|
|
const cleanDisplay = displayText ? displayText.trim() : cleanTarget; |
|
|
const dTag = normalizeDtag(cleanTarget); |
|
|
// Use placeholder format: WIKILINK:dtag|display |
|
|
// This prevents AsciiDoc from interpreting the brackets |
|
|
return `WIKILINK:${dTag}|${cleanDisplay}`; |
|
|
}); |
|
|
return content; |
|
|
} |
|
|
/** |
|
|
* Processes nostr: addresses |
|
|
* Only processes addresses with "nostr:" prefix - bare addresses are left as plaintext |
|
|
* Converts to link:nostr:...[...] format |
|
|
* Valid bech32 prefixes: npub, nprofile, nevent, naddr, note |
|
|
*/ |
|
|
function processNostrAddresses(content, linkBaseURL) { |
|
|
// Match nostr: followed by valid bech32 prefix and identifier |
|
|
// Bech32 format: prefix + separator (1) + data (at least 6 chars for valid identifiers) |
|
|
// Only match if it has "nostr:" prefix - bare addresses should remain as plaintext |
|
|
const nostrPattern = /nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi; |
|
|
return content.replace(nostrPattern, (_match, bech32Id) => { |
|
|
return `link:nostr:${bech32Id}[${bech32Id}]`; |
|
|
}); |
|
|
} |
|
|
/** |
|
|
* Processes media URLs in markdown links and images |
|
|
* Converts them to MEDIA: placeholders before markdown conversion |
|
|
*/ |
|
|
function processMediaUrlsInMarkdown(content) { |
|
|
let processed = content; |
|
|
// Process YouTube URLs in markdown links: [text](youtube-url) |
|
|
processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, videoId) => { |
|
|
return `MEDIA:youtube:${videoId}`; |
|
|
}); |
|
|
// Process Spotify URLs in markdown links: [text](spotify-url) |
|
|
processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, type, id) => { |
|
|
return `MEDIA:spotify:${type}:${id}`; |
|
|
}); |
|
|
// Process video files in markdown links/images: [text](video-url) or  |
|
|
processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => { |
|
|
const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params |
|
|
return `MEDIA:video:${cleanUrl}`; |
|
|
}); |
|
|
// Process audio files in markdown links/images: [text](audio-url) or  |
|
|
processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp3|m4a|ogg|wav|flac|aac|opus|wma))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => { |
|
|
const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params |
|
|
return `MEDIA:audio:${cleanUrl}`; |
|
|
}); |
|
|
return processed; |
|
|
} |
|
|
/** |
|
|
* Processes media URLs (YouTube, Spotify, video, audio files) in bare URLs |
|
|
* Converts them to placeholders that will be rendered as embeds/players |
|
|
*/ |
|
|
function processMediaUrls(content) { |
|
|
// Process YouTube URLs |
|
|
// Match: youtube.com/watch?v=, youtu.be/, youtube.com/embed/, youtube.com/v/ |
|
|
content = content.replace(/(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, videoId) => { |
|
|
return `MEDIA:youtube:${videoId}`; |
|
|
}); |
|
|
// Process Spotify URLs |
|
|
// Match: open.spotify.com/track/, open.spotify.com/album/, open.spotify.com/playlist/, open.spotify.com/artist/ |
|
|
content = content.replace(/(?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, type, id) => { |
|
|
return `MEDIA:spotify:${type}:${id}`; |
|
|
}); |
|
|
// Process video files (mp4, webm, ogg, m4v, mov, avi, etc.) |
|
|
content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => { |
|
|
const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL |
|
|
return `MEDIA:video:${url}`; |
|
|
}); |
|
|
// Process audio files (mp3, m4a, ogg, wav, flac, aac, etc.) |
|
|
content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp3|m4a|ogg|wav|flac|aac|opus|wma)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => { |
|
|
const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL |
|
|
return `MEDIA:audio:${url}`; |
|
|
}); |
|
|
return content; |
|
|
} |
|
|
/** |
|
|
* Processes bare URLs and converts them to AsciiDoc links |
|
|
* Matches http://, https://, wss://, and www. URLs that aren't already in markdown links |
|
|
* Also handles bare image URLs (converts to images) |
|
|
* Skips URLs inside code blocks (---- blocks) and inline code (backticks) |
|
|
*/ |
|
|
function processBareUrls(content) { |
|
|
// Protect code blocks and inline code from URL processing |
|
|
// We'll process URLs, then restore code blocks |
|
|
const codeBlockPlaceholders = []; |
|
|
const inlineCodePlaceholders = []; |
|
|
// Replace code blocks with placeholders |
|
|
content = content.replace(/\[source[^\]]*\]\n----\n([\s\S]*?)\n----/g, (match, code) => { |
|
|
const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`; |
|
|
codeBlockPlaceholders.push(match); |
|
|
return placeholder; |
|
|
}); |
|
|
// Also handle plain code blocks (without [source]) |
|
|
content = content.replace(/----\n([\s\S]*?)\n----/g, (match, code) => { |
|
|
// Check if this is already a placeholder |
|
|
if (match.includes('__CODEBLOCK_')) { |
|
|
return match; |
|
|
} |
|
|
const placeholder = `__CODEBLOCK_${codeBlockPlaceholders.length}__`; |
|
|
codeBlockPlaceholders.push(match); |
|
|
return placeholder; |
|
|
}); |
|
|
// Replace inline code with placeholders |
|
|
content = content.replace(/`([^`]+)`/g, (match, code) => { |
|
|
const placeholder = `__INLINECODE_${inlineCodePlaceholders.length}__`; |
|
|
inlineCodePlaceholders.push(match); |
|
|
return placeholder; |
|
|
}); |
|
|
// First, handle bare image URLs (before regular URLs) |
|
|
// Match image URLs: .jpg, .png, .gif, .webp, .svg, etc. |
|
|
// Format: image::url[width=100%] - matching jumble's format |
|
|
const imageUrlPattern = /(?<!\]\()\b(https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(jpe?g|png|gif|webp|svg|bmp|ico))(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi; |
|
|
content = content.replace(imageUrlPattern, (match, url) => { |
|
|
// Clean URL (remove tracking parameters) |
|
|
const cleanedUrl = cleanUrl(url); |
|
|
// Don't escape brackets - AsciiDoc handles URLs properly |
|
|
return `image::${cleanedUrl}[width=100%]`; |
|
|
}); |
|
|
// Match URLs that aren't already in markdown link format |
|
|
// Pattern: http://, https://, wss://, or www. followed by valid URL characters |
|
|
// Use word boundary to avoid matching URLs that are part of other text |
|
|
// Don't match if immediately after colon-space (like "hyperlink: www.example.com") |
|
|
const urlPattern = /(?<!\]\()(?<!:\s)\b(https?:\/\/[^\s<>"{}|\\^`\[\]()]+|wss:\/\/[^\s<>"{}|\\^`\[\]()]+|www\.[^\s<>"{}|\\^`\[\]()]+)/gi; |
|
|
content = content.replace(urlPattern, (match, url) => { |
|
|
// Skip if this URL was already converted to an image |
|
|
if (match.includes('image::')) { |
|
|
return match; |
|
|
} |
|
|
// Ensure URL starts with http:// or https:// |
|
|
let fullUrl = url; |
|
|
if (url.startsWith('www.')) { |
|
|
fullUrl = 'https://' + url; |
|
|
} |
|
|
else if (url.startsWith('wss://')) { |
|
|
// Convert wss:// to https:// for display |
|
|
fullUrl = url.replace(/^wss:\/\//, 'https://'); |
|
|
} |
|
|
// Clean URL (remove tracking parameters) |
|
|
fullUrl = cleanUrl(fullUrl); |
|
|
// Don't escape brackets in URLs - AsciiDoc handles them properly |
|
|
// The URL is in the link: part, brackets in URLs are valid |
|
|
// Use proper AsciiDoc link syntax: link:url[text] |
|
|
return `link:${fullUrl}[${url}]`; |
|
|
}); |
|
|
// Restore inline code |
|
|
inlineCodePlaceholders.forEach((code, index) => { |
|
|
content = content.replace(`__INLINECODE_${index}__`, code); |
|
|
}); |
|
|
// Restore code blocks |
|
|
codeBlockPlaceholders.forEach((code, index) => { |
|
|
content = content.replace(`__CODEBLOCK_${index}__`, code); |
|
|
}); |
|
|
return content; |
|
|
} |
|
|
/** |
|
|
* Processes hashtags |
|
|
* Converts to hashtag:tag[#tag] format |
|
|
* Handles hashtags at the beginning of lines to prevent line breaks |
|
|
*/ |
|
|
function processHashtags(content) { |
|
|
// Match # followed by word characters |
|
|
// Match at word boundary OR at start of line OR after whitespace |
|
|
// This ensures we don't match # in URLs or code, but do match at line start |
|
|
return content.replace(/(^|\s|>)#([a-zA-Z0-9_]+)(?![a-zA-Z0-9_])/g, (match, before, hashtag) => { |
|
|
const normalizedHashtag = hashtag.toLowerCase(); |
|
|
// Preserve the space or line start before the hashtag to prevent line breaks |
|
|
// Add a zero-width space or ensure proper spacing |
|
|
const prefix = before === '' ? '' : before; |
|
|
return `${prefix}hashtag:${normalizedHashtag}[#${hashtag}]`; |
|
|
}); |
|
|
}
|
|
|
|