diff --git a/package.json b/package.json index 70c1703..b74319d 100644 --- a/package.json +++ b/package.json @@ -21,9 +21,7 @@ "author": "", "license": "MIT", "dependencies": { - "@asciidoctor/core": "^3.0.4", - "highlight.js": "^11.10.0", - "marked": "^12.0.0" + "@asciidoctor/core": "^3.0.4" }, "devDependencies": { "@types/node": "^20.11.0", diff --git a/src/converters/to-asciidoc.ts b/src/converters/to-asciidoc.ts new file mode 100644 index 0000000..779d864 --- /dev/null +++ b/src/converters/to-asciidoc.ts @@ -0,0 +1,275 @@ +import { ContentFormat } from '../types'; + +export interface ConvertOptions { + enableNostrAddresses?: boolean; +} + +/** + * Converts content to AsciiDoc format based on detected format + * This is the unified entry point - everything becomes AsciiDoc + */ +export function convertToAsciidoc( + content: string, + format: ContentFormat, + linkBaseURL: string, + options: ConvertOptions = {} +): string { + let asciidoc = ''; + + switch (format) { + case ContentFormat.AsciiDoc: + // For AsciiDoc content, ensure proper formatting + asciidoc = content.replace(/\\n/g, '\n'); + + // Ensure headers are on their own lines with proper spacing + asciidoc = asciidoc.replace(/(\S[^\n]*)\n(={1,6}\s+[^\n]+)/g, (_match, before, header) => { + return `${before}\n\n${header}`; + }); + break; + + case ContentFormat.Markdown: + asciidoc = convertMarkdownToAsciidoc(content); + break; + + case ContentFormat.Plain: + default: + asciidoc = convertPlainTextToAsciidoc(content); + break; + } + + // Process special elements for all content types + // Process wikilinks + asciidoc = processWikilinks(asciidoc, linkBaseURL); + + // Process nostr: addresses if enabled + if (options.enableNostrAddresses !== false) { + asciidoc = processNostrAddresses(asciidoc, linkBaseURL); + } + + // Process hashtags + asciidoc = processHashtags(asciidoc); + + return asciidoc; +} + +/** + * Converts Markdown to AsciiDoc format + * Based on jumble's conversion patterns + */ +function convertMarkdownToAsciidoc(content: string): string { + let asciidoc = content.replace(/\\n/g, '\n'); + + // Fix spacing issues + asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)'); + asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3'); + asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` ('); + asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2'); + asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2'); + asciidoc = asciidoc.replace(/([a-zA-Z0-9])==/g, '$1 =='); + + // Preserve nostr: addresses temporarily + asciidoc = asciidoc.replace(/nostr:([a-z0-9]+)/g, 'nostr:$1'); + + // Convert headers + asciidoc = asciidoc.replace(/^#{6}\s+(.+)$/gm, '====== $1 ======'); + asciidoc = asciidoc.replace(/^#{5}\s+(.+)$/gm, '===== $1 ====='); + asciidoc = asciidoc.replace(/^#{4}\s+(.+)$/gm, '==== $1 ===='); + asciidoc = asciidoc.replace(/^#{3}\s+(.+)$/gm, '=== $1 ==='); + asciidoc = asciidoc.replace(/^#{2}\s+(.+)$/gm, '== $1 =='); + asciidoc = asciidoc.replace(/^#{1}\s+(.+)$/gm, '= $1 ='); + asciidoc = asciidoc.replace(/^==\s+(.+?)\s+==$/gm, '== $1 =='); + asciidoc = asciidoc.replace(/\s==\s+([^=]+?)\s+==\s/g, ' == $1 == '); + + // Convert emphasis + asciidoc = asciidoc.replace(/\*\*(.+?)\*\*/g, '*$1*'); // Bold + asciidoc = asciidoc.replace(/__(.+?)__/g, '*$1*'); // Bold + asciidoc = asciidoc.replace(/\*(.+?)\*/g, '_$1_'); // Italic + asciidoc = asciidoc.replace(/_(.+?)_/g, '_$1_'); // Italic + asciidoc = asciidoc.replace(/~~(.+?)~~/g, '[line-through]#$1#'); // Strikethrough + asciidoc = asciidoc.replace(/~(.+?)~/g, '[subscript]#$1#'); // Subscript + asciidoc = asciidoc.replace(/\^(.+?)\^/g, '[superscript]#$1#'); // Superscript + + // Convert code blocks + asciidoc = asciidoc.replace(/```(\w+)?\n([\s\S]*?)\n```/g, (_match, lang, code) => { + const trimmedCode = code.trim(); + if (trimmedCode.length === 0) return ''; + + const hasCodePatterns = /[{}();=<>]|function|class|import|export|def |if |for |while |return |const |let |var |public |private |static |console\.log/.test(trimmedCode); + const isLikelyText = /^[A-Za-z\s.,!?\-'"]+$/.test(trimmedCode) && trimmedCode.length > 50; + const hasTooManySpaces = (trimmedCode.match(/\s{3,}/g) || []).length > 3; + const hasMarkdownPatterns = /^#{1,6}\s|^\*\s|^\d+\.\s|^\>\s|^\|.*\|/.test(trimmedCode); + + if ((!hasCodePatterns && trimmedCode.length > 100) || isLikelyText || hasTooManySpaces || hasMarkdownPatterns) { + return _match; + } + + return `[source${lang ? ',' + lang : ''}]\n----\n${trimmedCode}\n----`; + }); + asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Inline code + asciidoc = asciidoc.replace(/`\$([^$]+)\$`/g, '`$\\$1\\$$`'); // Preserve LaTeX in code + + // Convert images + asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, 'image::$2[$1,width=100%]'); + asciidoc = asciidoc.replace(/image::([^\[]+)\[([^\]]+),width=100%\]/g, 'image::$1[$2,width=100%]'); + + // Convert links + asciidoc = asciidoc.replace(/\[([^\]]+)\]\(([^)]+)\)/g, 'link:$2[$1]'); + + // Convert horizontal rules + asciidoc = asciidoc.replace(/^---$/gm, '\'\'\''); + + // Convert unordered lists + asciidoc = asciidoc.replace(/^(\s*)\*\s+(.+)$/gm, '$1* $2'); + asciidoc = asciidoc.replace(/^(\s*)-\s+(.+)$/gm, '$1* $2'); + asciidoc = asciidoc.replace(/^(\s*)\+\s+(.+)$/gm, '$1* $2'); + + // Convert ordered lists + asciidoc = asciidoc.replace(/^(\s*)\d+\.\s+(.+)$/gm, '$1. $2'); + + // Convert blockquotes with attribution + asciidoc = asciidoc.replace(/^(>\s+.+(?:\n>\s+.+)*)/gm, (match) => { + const lines = match.split('\n').map(line => line.replace(/^>\s*/, '')); + + let quoteBodyLines: string[] = []; + let attributionLine: string | undefined; + + for (let i = lines.length - 1; i >= 0; i--) { + const line = lines[i].trim(); + if (line.startsWith('—') || line.startsWith('--')) { + attributionLine = line; + quoteBodyLines = lines.slice(0, i); + break; + } + } + + const quoteContent = quoteBodyLines.filter(l => l.trim() !== '').join('\n').trim(); + + if (attributionLine) { + let cleanedAttribution = attributionLine.replace(/^[—-]+/, '').trim(); + + let author = ''; + let source = ''; + + const linkMatch = cleanedAttribution.match(/^(.*?),?\s*link:([^[\\]]+)\[([^\\]]+)\]$/); + + if (linkMatch) { + author = linkMatch[1].trim(); + source = `link:${linkMatch[2].trim()}[${linkMatch[3].trim()}]`; + } else { + const parts = cleanedAttribution.split(',').map(p => p.trim()); + author = parts[0]; + if (parts.length > 1) { + source = parts.slice(1).join(', ').trim(); + } + } + + return `[quote, ${author}, ${source}]\n____\n${quoteContent}\n____`; + } else { + return `____\n${quoteContent}\n____`; + } + }); + + // Convert tables + asciidoc = asciidoc.replace(/(\|.*\|[\r\n]+\|[\s\-\|]*[\r\n]+(\|.*\|[\r\n]+)*)/g, (match) => { + const lines = match.trim().split('\n').filter(line => line.trim()); + if (lines.length < 2) return match; + + const headerRow = lines[0]; + const separatorRow = lines[1]; + const dataRows = lines.slice(2); + + if (!separatorRow.includes('-')) return match; + + let tableAsciidoc = '[cols="1,1"]\n|===\n'; + tableAsciidoc += headerRow + '\n'; + dataRows.forEach(row => { + tableAsciidoc += row + '\n'; + }); + tableAsciidoc += '|==='; + + return tableAsciidoc; + }); + + // Convert footnotes + const footnoteDefinitions: { [id: string]: string } = {}; + let tempAsciidoc = asciidoc; + + tempAsciidoc = tempAsciidoc.replace(/^\[\^([^\]]+)\]:\s*([\s\S]*?)(?=\n\[\^|\n---|\n##|\n###|\n####|\n#####|\n######|$)/gm, (_, id, text) => { + footnoteDefinitions[id] = text.trim(); + return ''; + }); + + asciidoc = tempAsciidoc.replace(/\[\^([^\]]+)\]/g, (match, id) => { + if (footnoteDefinitions[id]) { + return `footnote:[${footnoteDefinitions[id]}]`; + } + return match; + }); + + return asciidoc; +} + +/** + * Converts plain text to AsciiDoc format + */ +function convertPlainTextToAsciidoc(content: string): string { + return content + .replace(/\n\n/g, '\n\n') + .replace(/\n/g, ' +\n'); +} + +/** + * Normalizes text to d-tag format + */ +function normalizeDtag(text: string): string { + return text + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); +} + +/** + * Processes wikilinks: [[target]] or [[target|display text]] + * Converts to wikilink:dtag[display] format + */ +function processWikilinks(content: string, linkBaseURL: string): string { + // Process bookstr macro wikilinks: [[book::...]] + content = content.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => { + const cleanContent = bookContent.trim(); + return `BOOKSTR:${cleanContent}`; + }); + + // Process standard wikilinks: [[Target Page]] or [[target page|see this]] + content = content.replace(/\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g, (_match, target, displayText) => { + const cleanTarget = target.trim(); + const cleanDisplay = displayText ? displayText.trim() : cleanTarget; + const dTag = normalizeDtag(cleanTarget); + + return `wikilink:${dTag}[${cleanDisplay}]`; + }); + + return content; +} + +/** + * Processes nostr: addresses + * Converts to link:nostr:...[...] format + */ +function processNostrAddresses(content: string, linkBaseURL: string): string { + // Match nostr: followed by valid bech32 string + return content.replace(/nostr:([a-z0-9]+[a-z0-9]{6,})/g, (_match, bech32Id) => { + return `link:nostr:${bech32Id}[${bech32Id}]`; + }); +} + +/** + * Processes hashtags + * Converts to hashtag:tag[#tag] format + */ +function processHashtags(content: string): string { + // Match # followed by word characters, avoiding those in URLs, code blocks, etc. + return content.replace(/\B#([a-zA-Z0-9_]+)/g, (_match, hashtag) => { + const normalizedHashtag = hashtag.toLowerCase(); + return `hashtag:${normalizedHashtag}[#${hashtag}]`; + }); +} diff --git a/src/detector.ts b/src/detector.ts index 631ff53..e2eb6db 100644 --- a/src/detector.ts +++ b/src/detector.ts @@ -15,7 +15,9 @@ export function detectFormat(content: string): ContentFormat { '----', // Listing block '....', // Literal block '|===', // Table - ':', // Attribute (common in AsciiDoc) + 'link:', // AsciiDoc link format + 'wikilink:', // Wikilink macro + 'hashtag:', // Hashtag macro ]; let asciidocScore = 0; diff --git a/src/extractors/metadata.ts b/src/extractors/metadata.ts new file mode 100644 index 0000000..35d0911 --- /dev/null +++ b/src/extractors/metadata.ts @@ -0,0 +1,263 @@ +import { NostrLink, Wikilink } from '../types'; + +export interface ExtractedMetadata { + nostrLinks: NostrLink[]; + wikilinks: Wikilink[]; + hashtags: string[]; + links: Array<{ url: string; text: string; isExternal: boolean }>; + media: string[]; +} + +/** + * Extracts metadata from content before processing + */ +export function extractMetadata(content: string, linkBaseURL: string): ExtractedMetadata { + return { + nostrLinks: extractNostrLinks(content), + wikilinks: extractWikilinks(content), + hashtags: extractHashtags(content), + links: extractLinks(content, linkBaseURL), + media: extractMedia(content), + }; +} + +/** + * Extract Nostr links from content + */ +function extractNostrLinks(content: string): NostrLink[] { + const nostrLinks: NostrLink[] = []; + const seen = new Set(); + + // Extract nostr: prefixed links + const nostrMatches = content.match(/nostr:([a-z0-9]+[a-z0-9]{6,})/g) || []; + nostrMatches.forEach(match => { + const id = match.substring(6); // Remove 'nostr:' + const type = getNostrType(id); + if (type && !seen.has(id)) { + seen.add(id); + nostrLinks.push({ + type, + id, + text: match, + bech32: id, + }); + } + }); + + return nostrLinks; +} + +/** + * Extract wikilinks from content + */ +function extractWikilinks(content: string): Wikilink[] { + const wikilinks: Wikilink[] = []; + const seen = new Set(); + + // Match [[target]] or [[target|display]] + const wikilinkPattern = /\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g; + let match; + + while ((match = wikilinkPattern.exec(content)) !== null) { + const target = match[1].trim(); + const display = match[2] ? match[2].trim() : target; + const dtag = normalizeDtag(target); + const key = `${dtag}|${display}`; + + if (!seen.has(key)) { + seen.add(key); + wikilinks.push({ + dtag, + display, + original: match[0], + }); + } + } + + return wikilinks; +} + +/** + * Extract hashtags from content + */ +function extractHashtags(content: string): string[] { + const hashtags: string[] = []; + const seen = new Set(); + + // Extract hashtags: #hashtag + const hashtagMatches = content.match(/#([a-zA-Z0-9_]+)/g) || []; + hashtagMatches.forEach(match => { + const tag = match.substring(1).toLowerCase(); + if (!seen.has(tag)) { + hashtags.push(tag); + seen.add(tag); + } + }); + + return hashtags; +} + +/** + * Extract regular links from content + */ +function extractLinks(content: string, linkBaseURL: string): Array<{ url: string; text: string; isExternal: boolean }> { + const links: Array<{ url: string; text: string; isExternal: boolean }> = []; + const seen = new Set(); + + // Extract markdown links: [text](url) + const markdownLinks = content.match(/\[([^\]]+)\]\(([^)]+)\)/g) || []; + markdownLinks.forEach(match => { + const linkMatch = match.match(/\[([^\]]+)\]\(([^)]+)\)/); + if (linkMatch) { + const [, text, url] = linkMatch; + if (!seen.has(url) && !isNostrUrl(url)) { + seen.add(url); + links.push({ + url, + text, + isExternal: isExternalUrl(url, linkBaseURL), + }); + } + } + }); + + // Extract asciidoc links: link:url[text] + const asciidocLinks = content.match(/link:([^\[]+)\[([^\]]+)\]/g) || []; + asciidocLinks.forEach(match => { + const linkMatch = match.match(/link:([^\[]+)\[([^\]]+)\]/); + if (linkMatch) { + const [, url, text] = linkMatch; + if (!seen.has(url) && !isNostrUrl(url)) { + seen.add(url); + links.push({ + url, + text, + isExternal: isExternalUrl(url, linkBaseURL), + }); + } + } + }); + + // Extract raw URLs (basic pattern) + const urlPattern = /https?:\/\/[^\s<>"']+/g; + const rawUrls = content.match(urlPattern) || []; + rawUrls.forEach(url => { + if (!seen.has(url) && !isNostrUrl(url)) { + seen.add(url); + links.push({ + url, + text: url, + isExternal: isExternalUrl(url, linkBaseURL), + }); + } + }); + + return links; +} + +/** + * Extract media URLs from content + */ +function extractMedia(content: string): string[] { + const media: string[] = []; + const seen = new Set(); + + // Extract markdown images: ![alt](url) + const imageMatches = content.match(/!\[[^\]]*\]\(([^)]+)\)/g) || []; + imageMatches.forEach(match => { + const url = match.match(/!\[[^\]]*\]\(([^)]+)\)/)?.[1]; + if (url && !seen.has(url)) { + if (isImageUrl(url) || isVideoUrl(url)) { + media.push(url); + seen.add(url); + } + } + }); + + // Extract asciidoc images: image::url[alt] + const asciidocImageMatches = content.match(/image::([^\[]+)\[/g) || []; + asciidocImageMatches.forEach(match => { + const url = match.match(/image::([^\[]+)\[/)?.[1]; + if (url && !seen.has(url)) { + if (isImageUrl(url) || isVideoUrl(url)) { + media.push(url); + seen.add(url); + } + } + }); + + // Extract raw image/video URLs + const urlPattern = /https?:\/\/[^\s<>"']+/g; + const rawUrls = content.match(urlPattern) || []; + rawUrls.forEach(url => { + if (!seen.has(url) && (isImageUrl(url) || isVideoUrl(url))) { + media.push(url); + seen.add(url); + } + }); + + return media; +} + +/** + * Get Nostr identifier type + */ +function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { + if (id.startsWith('npub')) return 'npub'; + if (id.startsWith('nprofile')) return 'nprofile'; + if (id.startsWith('nevent')) return 'nevent'; + if (id.startsWith('naddr')) return 'naddr'; + if (id.startsWith('note')) return 'note'; + return null; +} + +/** + * Normalize text to d-tag format + */ +function normalizeDtag(text: string): string { + return text + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); +} + +/** + * Check if URL is external + */ +function isExternalUrl(url: string, linkBaseURL: string): boolean { + if (!linkBaseURL) return true; + try { + // Use a simple string-based check for Node.js compatibility + // Extract hostname from URL string + const urlMatch = url.match(/^https?:\/\/([^\/]+)/); + const baseMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/); + + if (urlMatch && baseMatch) { + return urlMatch[1] !== baseMatch[1]; + } + return true; + } catch { + return true; + } +} + +/** + * Check if URL is a Nostr URL + */ +function isNostrUrl(url: string): boolean { + return url.startsWith('nostr:') || getNostrType(url) !== null; +} + +/** + * Check if URL is an image + */ +function isImageUrl(url: string): boolean { + return /\.(jpeg|jpg|png|gif|webp|svg)$/i.test(url); +} + +/** + * Check if URL is a video + */ +function isVideoUrl(url: string): boolean { + return /\.(mp4|webm|ogg)$/i.test(url); +} diff --git a/src/index.ts b/src/index.ts index cddc130..1c8a592 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,2 +1,3 @@ export * from './parser'; export * from './types'; +export * from './detector'; \ No newline at end of file diff --git a/src/parser.ts b/src/parser.ts index 13e54a6..e12145c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,12 +1,8 @@ import { ParserOptions, ProcessResult, ContentFormat } from './types'; -import { processAsciiDoc } from './processors/asciidoc'; -import { processMarkdown } from './processors/markdown'; -import { processPlainText } from './processors/plain'; -import { processNostrAddresses } from './processors/nostr'; import { detectFormat } from './detector'; -import { processLaTeX, hasLaTeX } from './processors/latex'; -import { processMusicalNotation, hasMusicalNotation } from './processors/music'; -import { ensureCodeHighlighting } from './processors/code'; +import { convertToAsciidoc } from './converters/to-asciidoc'; +import { processAsciidoc } from './processors/asciidoc'; +import { extractMetadata } from './extractors/metadata'; /** * Default parser options @@ -27,6 +23,8 @@ export function defaultOptions(): ParserOptions { * Main parser for Nostr event content * Handles multiple content formats: AsciiDoc, Markdown, code syntax, * LaTeX, musical notation, and nostr: prefixed addresses + * + * Everything is converted to AsciiDoc first, then processed through AsciiDoctor */ export class Parser { private options: Required; @@ -47,63 +45,45 @@ export class Parser { /** * Process Nostr event content and return HTML * Automatically detects the content format and processes accordingly + * Everything is converted to AsciiDoc first, then processed through AsciiDoctor */ async process(content: string): Promise { - // First, process nostr: addresses (if enabled) - if (this.options.enableNostrAddresses) { - content = processNostrAddresses(content, this.options.linkBaseURL); - } + // Extract metadata from original content (before conversion) + const metadata = extractMetadata(content, this.options.linkBaseURL); // Detect content format const format = detectFormat(content); - let result: ProcessResult; - - switch (format) { - case ContentFormat.AsciiDoc: - if (this.options.enableAsciiDoc) { - result = await processAsciiDoc(content, this.options.linkBaseURL); - } else if (this.options.enableMarkdown) { - // Fallback to markdown if AsciiDoc is disabled - result = await processMarkdown(content, this.options.linkBaseURL); - } else { - result = processPlainText(content); - } - break; - case ContentFormat.Markdown: - if (this.options.enableMarkdown) { - result = await processMarkdown(content, this.options.linkBaseURL); - } else { - // Fallback to plain text - result = processPlainText(content); - } - break; - default: - // Plain text or mixed content - result = processPlainText(content); - } - - // Post-process: handle LaTeX and musical notation in the HTML - if (this.options.enableLaTeX) { - result.hasLaTeX = hasLaTeX(result.content); - if (result.hasLaTeX) { - result.content = processLaTeX(result.content); + // Convert everything to AsciiDoc format first + const asciidocContent = convertToAsciidoc( + content, + format, + this.options.linkBaseURL, + { + enableNostrAddresses: this.options.enableNostrAddresses, } - } + ); - if (this.options.enableMusicalNotation) { - result.hasMusicalNotation = hasMusicalNotation(result.content); - if (result.hasMusicalNotation) { - result.content = processMusicalNotation(result.content); + // Process through AsciiDoctor + const result = await processAsciidoc( + asciidocContent, + { + enableCodeHighlighting: this.options.enableCodeHighlighting, + enableLaTeX: this.options.enableLaTeX, + enableMusicalNotation: this.options.enableMusicalNotation, + originalContent: content, // Pass original for LaTeX detection } - } + ); - // Ensure code highlighting is applied if enabled - if (this.options.enableCodeHighlighting) { - result.content = ensureCodeHighlighting(result.content); - } - - return result; + // Combine with extracted metadata + return { + ...result, + nostrLinks: metadata.nostrLinks, + wikilinks: metadata.wikilinks, + hashtags: metadata.hashtags, + links: metadata.links, + media: metadata.media, + }; } } diff --git a/src/processors/asciidoc-links.ts b/src/processors/asciidoc-links.ts deleted file mode 100644 index a9aa9ea..0000000 --- a/src/processors/asciidoc-links.ts +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Normalizes a d tag according to NIP-54 rules - */ -export function normalizeDTag(dTag: string): string { - // Convert to lowercase - let normalized = dTag.toLowerCase(); - - // Convert whitespace to hyphens - normalized = normalized.replace(/\s+/g, '-'); - - // Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII) - normalized = normalized.replace(/[^a-z0-9\-\u0080-\uFFFF]/g, ''); - - // Collapse multiple consecutive hyphens - normalized = normalized.replace(/-+/g, '-'); - - // Remove leading and trailing hyphens - normalized = normalized.replace(/^-+|-+$/g, ''); - - return normalized; -} - -/** - * Rewrites wikilinks and nostr: links in AsciiDoc content - */ -export function rewriteAsciiDocLinks(content: string, linkBaseURL: string): string { - // Rewrite wikilinks: [[target]] or [[target|display text]] - // Format: [[target]] -> link:url[display] - const wikilinkRegex = /\[\[([^\]]+)\]\]/g; - content = content.replace(wikilinkRegex, (match, inner) => { - let target: string; - let display: string; - - if (inner.includes('|')) { - const parts = inner.split('|', 2); - target = parts[0].trim(); - display = parts[1].trim(); - } else { - target = inner.trim(); - display = target; - } - - // Normalize the d tag - const normalized = normalizeDTag(target); - - // Create the link - if (linkBaseURL) { - const url = `${linkBaseURL}/events?d=${normalized}`; - return `link:${url}[${display}]`; - } - return `link:#${normalized}[${display}]`; - }); - - // Rewrite nostr: links: nostr:naddr1... or nostr:nevent1... - // Format: nostr:naddr1... -> link:url[nostr:naddr1...] - const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)/g; - content = content.replace(nostrLinkRegex, (match, nostrID) => { - if (linkBaseURL) { - const url = `${linkBaseURL}/events?id=${nostrID}`; - return `link:${url}[${match}]`; - } - return match; - }); - - return content; -} diff --git a/src/processors/asciidoc.ts b/src/processors/asciidoc.ts index 7a545eb..bae2c02 100644 --- a/src/processors/asciidoc.ts +++ b/src/processors/asciidoc.ts @@ -1,49 +1,150 @@ import asciidoctor from '@asciidoctor/core'; import { ProcessResult } from '../types'; -import { rewriteAsciiDocLinks } from './asciidoc-links'; -import { extractTOC, sanitizeHTML, processLinks } from './html-utils'; +import { extractTOC, sanitizeHTML } from './html-utils'; +import { postProcessHtml } from './html-postprocess'; const asciidoctorInstance = asciidoctor(); +export interface ProcessOptions { + enableCodeHighlighting?: boolean; + enableLaTeX?: boolean; + enableMusicalNotation?: boolean; + originalContent?: string; // Original content for LaTeX detection +} + +/** + * Processes AsciiDoc content to HTML using AsciiDoctor + * Uses AsciiDoctor's built-in highlight.js and LaTeX support + */ +export async function processAsciidoc( + content: string, + options: ProcessOptions = {} +): Promise { + const { + enableCodeHighlighting = true, + enableLaTeX = true, + enableMusicalNotation = true, + } = options; + + // Check if content starts with level 3+ headers + // Asciidoctor article doctype requires level 1 (=) or level 2 (==) before level 3 (===) + // If content starts with level 3+, use book doctype + const firstHeaderMatch = content.match(/^(={1,6})\s+/m); + let doctype: 'article' | 'book' = 'article'; + + if (firstHeaderMatch) { + const firstHeaderLevel = firstHeaderMatch[1].length; + if (firstHeaderLevel >= 3) { + doctype = 'book'; + } + } + + try { + const result = asciidoctorInstance.convert(content, { + safe: 'safe', + backend: 'html5', + doctype: doctype, + attributes: { + 'showtitle': true, + 'sectanchors': true, + 'sectlinks': true, + 'toc': 'left', + 'toclevels': 6, + 'toc-title': 'Table of Contents', + 'source-highlighter': enableCodeHighlighting ? 'highlight.js' : 'none', + 'stem': enableLaTeX ? 'latexmath' : 'none', + 'data-uri': true, + 'imagesdir': '', + 'linkcss': false, + 'stylesheet': '', + 'stylesdir': '', + 'prewrap': true, + 'sectnums': false, + 'sectnumlevels': 6, + 'experimental': true, + 'compat-mode': false, + 'attribute-missing': 'warn', + 'attribute-undefined': 'warn', + 'skip-front-matter': true, + 'source-indent': 0, + 'indent': 0, + 'tabsize': 2, + 'tabwidth': 2, + 'hardbreaks': false, + 'paragraph-rewrite': 'normal', + 'sectids': true, + 'idprefix': '', + 'idseparator': '-', + 'sectidprefix': '', + 'sectidseparator': '-' + } + }); + + const htmlString = typeof result === 'string' ? result : result.toString(); + + // Extract table of contents from HTML + const { toc, contentWithoutTOC } = extractTOC(htmlString); + + // Sanitize HTML to prevent XSS + const sanitized = sanitizeHTML(contentWithoutTOC); + + // Post-process HTML: convert macros to HTML, add styling, etc. + const processed = postProcessHtml(sanitized, { + enableMusicalNotation, + }); + + // Also process TOC + const tocSanitized = sanitizeHTML(toc); + const tocProcessed = postProcessHtml(tocSanitized, { + enableMusicalNotation: false, // Don't process music in TOC + }); + + // Check for LaTeX in original content (more reliable than checking HTML) + const contentToCheck = options.originalContent || content; + const hasLaTeX = enableLaTeX && hasMathContent(contentToCheck); + + // Check for musical notation in processed HTML + const hasMusicalNotation = enableMusicalNotation && ( + /class="abc-notation"|class="lilypond-notation"|class="chord"|class="musicxml-notation"/.test(processed) + ); + + return { + content: processed, + tableOfContents: tocProcessed, + hasLaTeX, + hasMusicalNotation, + nostrLinks: [], // Will be populated by metadata extraction + wikilinks: [], + hashtags: [], + links: [], + media: [], + }; + } catch (error) { + // Fallback to plain text + return { + content: `

${sanitizeHTML(content)}

`, + tableOfContents: '', + hasLaTeX: false, + hasMusicalNotation: false, + nostrLinks: [], + wikilinks: [], + hashtags: [], + links: [], + media: [], + }; + } +} + /** - * Processes AsciiDoc content to HTML + * Check if content has LaTeX math + * Based on jumble's detection pattern */ -export async function processAsciiDoc(content: string, linkBaseURL: string): Promise { - // Rewrite links in AsciiDoc content - const processedContent = rewriteAsciiDocLinks(content, linkBaseURL); - - // Convert AsciiDoc to HTML - const html = asciidoctorInstance.convert(processedContent, { - safe: 'safe', - backend: 'html5', - doctype: 'article', - attributes: { - showtitle: true, - icons: 'font', - sectanchors: true, - sectlinks: true, - toc: 'left', - toclevels: 3, - }, - }) as string; - - // Extract table of contents from HTML - const { toc, contentWithoutTOC } = extractTOC(html); - - // Sanitize HTML to prevent XSS - const sanitized = sanitizeHTML(contentWithoutTOC); - - // Process links: make external links open in new tab, local links in same tab - const processed = processLinks(sanitized, linkBaseURL); - - // Also sanitize and process links in TOC - const tocSanitized = sanitizeHTML(toc); - const tocProcessed = processLinks(tocSanitized, linkBaseURL); - - return { - content: processed, - tableOfContents: tocProcessed, - hasLaTeX: false, - hasMusicalNotation: false, - }; +function hasMathContent(content: string): boolean { + // Check for inline math: $...$ or \(...\) + const inlineMath = /\$[^$]+\$|\\\([^)]+\\\)/.test(content); + + // Check for block math: $$...$$ or \[...\] + const blockMath = /\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]/.test(content); + + return inlineMath || blockMath; } diff --git a/src/processors/code.ts b/src/processors/code.ts deleted file mode 100644 index 7d76303..0000000 --- a/src/processors/code.ts +++ /dev/null @@ -1,52 +0,0 @@ -import hljs from 'highlight.js'; - -/** - * Ensures code blocks have syntax highlighting using highlight.js - */ -export function ensureCodeHighlighting(html: string): string { - // Pattern to match code blocks:
...
or
...
- const codeBlockRegex = /
]*>(.*?)<\/code><\/pre>/gs;
-
-  return html.replace(codeBlockRegex, (match, lang, code) => {
-    // Unescape HTML entities in code
-    const unescapedCode = unescapeHTML(code);
-
-    // Highlight the code
-    try {
-      let highlighted: hljs.HighlightResult;
-
-      if (lang) {
-        // Try to get the language
-        const language = hljs.getLanguage(lang);
-        if (language) {
-          highlighted = hljs.highlight(unescapedCode, { language: lang });
-        } else {
-          // Try auto-detection
-          highlighted = hljs.highlightAuto(unescapedCode);
-        }
-      } else {
-        // Auto-detect language
-        highlighted = hljs.highlightAuto(unescapedCode);
-      }
-
-      // Return highlighted code with proper classes
-      const langClass = highlighted.language ? ` class="language-${highlighted.language}"` : '';
-      return `
${highlighted.value}
`; - } catch (error) { - // If highlighting fails, return original - return match; - } - }); -} - -/** - * Unescapes HTML entities - */ -function unescapeHTML(text: string): string { - return text - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/&/g, '&') - .replace(/"/g, '"') - .replace(/'/g, "'"); -} diff --git a/src/processors/html-postprocess.ts b/src/processors/html-postprocess.ts new file mode 100644 index 0000000..194d1ae --- /dev/null +++ b/src/processors/html-postprocess.ts @@ -0,0 +1,192 @@ +import { processMusicalNotation } from './music'; + +export interface PostProcessOptions { + enableMusicalNotation?: boolean; +} + +/** + * Post-processes HTML output from AsciiDoctor + * Converts AsciiDoc macros to HTML with data attributes and CSS classes + */ +export function postProcessHtml(html: string, options: PostProcessOptions = {}): string { + let processed = html; + + // Convert bookstr markers to HTML placeholders + processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => { + const escaped = bookContent.replace(/"/g, '"').replace(/'/g, '''); + return ``; + }); + + // Convert hashtag links to HTML + processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { + return `${displayText}`; + }); + + // Convert wikilink:dtag[display] format to HTML + processed = processed.replace(/wikilink:([^[]+)\[([^\]]+)\]/g, (_match, dTag, displayText) => { + const escapedDtag = dTag.replace(/"/g, '"'); + const escapedDisplay = displayText.replace(/"/g, '"'); + return `${displayText}`; + }); + + // Convert nostr: links to HTML + processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => { + const nostrType = getNostrType(bech32Id); + + if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') { + // Render as embedded event placeholder + const escaped = bech32Id.replace(/"/g, '"'); + return `
Loading embedded event...
`; + } else if (nostrType === 'npub' || nostrType === 'nprofile') { + // Render as user handle + const escaped = bech32Id.replace(/"/g, '"'); + return `@${displayText}`; + } else { + // Fallback to regular link + const escaped = bech32Id.replace(/"/g, '"'); + return `${displayText}`; + } + }); + + // Process images: add max-width styling and data attributes + processed = processImages(processed); + + // Process musical notation if enabled + if (options.enableMusicalNotation) { + processed = processMusicalNotation(processed); + } + + // Clean up any leftover markdown syntax + processed = cleanupMarkdown(processed); + + // Add styling classes + processed = addStylingClasses(processed); + + // Hide raw ToC text + processed = hideRawTocText(processed); + + return processed; +} + +/** + * Get Nostr identifier type + */ +function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null { + if (id.startsWith('npub')) return 'npub'; + if (id.startsWith('nprofile')) return 'nprofile'; + if (id.startsWith('nevent')) return 'nevent'; + if (id.startsWith('naddr')) return 'naddr'; + if (id.startsWith('note')) return 'note'; + return null; +} + +/** + * Process images: add max-width styling and data attributes + */ +function processImages(html: string): string { + const imageUrls: string[] = []; + const imageUrlRegex = /]+src=["']([^"']+)["'][^>]*>/gi; + let match; + + while ((match = imageUrlRegex.exec(html)) !== null) { + const url = match[1]; + if (url && !imageUrls.includes(url)) { + imageUrls.push(url); + } + } + + return html.replace(/]+)>/gi, (imgTag, attributes) => { + const srcMatch = attributes.match(/src=["']([^"']+)["']/i); + if (!srcMatch) return imgTag; + + const src = srcMatch[1]; + const currentIndex = imageUrls.indexOf(src); + + let updatedAttributes = attributes; + + if (updatedAttributes.match(/class=["']/i)) { + updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match, classes) => { + const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim(); + const newClasses = cleanedClasses + ? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in` + : 'max-w-[400px] object-contain cursor-zoom-in'; + return `class="${newClasses}"`; + }); + } else { + updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`; + } + + updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '"')}"`; + + return ``; + }); +} + +/** + * Clean up leftover markdown syntax + */ +function cleanupMarkdown(html: string): string { + let cleaned = html; + + // Clean up markdown image syntax + cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => { + const altText = alt || ''; + return `${altText}`; + }); + + // Clean up markdown link syntax + cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => { + if (cleaned.includes(`href="${url}"`)) { + return _match; + } + return `${text} `; + }); + + return cleaned; +} + +/** + * Add proper CSS classes for styling + */ +function addStylingClasses(html: string): string { + let styled = html; + + // Add strikethrough styling + styled = styled.replace(/([^<]+)<\/span>/g, '$1'); + + // Add subscript styling + styled = styled.replace(/([^<]+)<\/span>/g, '$1'); + + // Add superscript styling + styled = styled.replace(/([^<]+)<\/span>/g, '$1'); + + // Add code highlighting classes + styled = styled.replace(/
/g, '
');
+  styled = styled.replace(//g, '');
+  
+  return styled;
+}
+
+/**
+ * Hide raw AsciiDoc ToC text
+ */
+function hideRawTocText(html: string): string {
+  let cleaned = html;
+
+  cleaned = cleaned.replace(
+    /]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi,
+    ''
+  );
+
+  cleaned = cleaned.replace(
+    /]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi,
+    ''
+  );
+
+  cleaned = cleaned.replace(
+    /]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi,
+    ''
+  );
+
+  return cleaned;
+}
diff --git a/src/processors/latex.ts b/src/processors/latex.ts
deleted file mode 100644
index 5b52f3e..0000000
--- a/src/processors/latex.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Checks if content contains LaTeX math expressions
- */
-export function hasLaTeX(content: string): boolean {
-  // Check for inline math: $...$ or \(...\)
-  const inlineMathPattern = /\$[^$]+\$|\\\([^)]+\\\)/;
-  // Check for block math: $$...$$ or \[...\]
-  const blockMathPattern = /\$\$[^$]+\$\$|\\\[[^\]]+\\\]/;
-
-  return inlineMathPattern.test(content) || blockMathPattern.test(content);
-}
-
-/**
- * Processes LaTeX math expressions in HTML content
- * Wraps LaTeX expressions in appropriate HTML for rendering with MathJax or KaTeX
- */
-export function processLaTeX(html: string): string {
-  // Process block math: $$...$$ or \[...\]
-  // Convert to 
...
for MathJax/KaTeX - const blockMathPattern = /\$\$([^$]+)\$\$|\\\[([^\]]+)\\\]/gs; - html = html.replace(blockMathPattern, (match, dollarContent, bracketContent) => { - const mathContent = (dollarContent || bracketContent || '').trim(); - // Wrap in appropriate tags for MathJax/KaTeX - return `
\\[${mathContent}\\]
`; - }); - - // Process inline math: $...$ or \(...\) - // Convert to ... for MathJax/KaTeX - const inlineMathPattern = /\$([^$\n]+)\$|\\\(([^)]+)\\\)/g; - html = html.replace(inlineMathPattern, (match, dollarContent, bracketContent) => { - const mathContent = (dollarContent || bracketContent || '').trim(); - // Wrap in appropriate tags for MathJax/KaTeX - return `\\(${mathContent}\\)`; - }); - - return html; -} diff --git a/src/processors/markdown-links.ts b/src/processors/markdown-links.ts deleted file mode 100644 index 27e155d..0000000 --- a/src/processors/markdown-links.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { normalizeDTag } from './asciidoc-links'; - -/** - * Rewrites wikilinks and nostr: links in Markdown content - */ -export function rewriteMarkdownLinks(content: string, linkBaseURL: string): string { - // Rewrite wikilinks: [[target]] or [[target|display text]] - const wikilinkRegex = /\[\[([^\]]+)\]\]/g; - content = content.replace(wikilinkRegex, (match, inner) => { - let target: string; - let display: string; - - if (inner.includes('|')) { - const parts = inner.split('|', 2); - target = parts[0].trim(); - display = parts[1].trim(); - } else { - target = inner.trim(); - display = target; - } - - const normalized = normalizeDTag(target); - - if (linkBaseURL) { - const url = `${linkBaseURL}/events?d=${normalized}`; - return `[${display}](${url})`; - } - return `[${display}](#${normalized})`; - }); - - // Rewrite nostr: links in Markdown - const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+|note1[^\s\]]+|npub1[^\s\]]+|nprofile1[^\s\]]+)/g; - content = content.replace(nostrLinkRegex, (match, nostrID) => { - if (linkBaseURL) { - let url: string; - if (nostrID.startsWith('npub')) { - url = `${linkBaseURL}/profile?pubkey=${nostrID}`; - } else if (nostrID.startsWith('nprofile')) { - url = `${linkBaseURL}/profile?id=${nostrID}`; - } else { - url = `${linkBaseURL}/events?id=${nostrID}`; - } - return `[${match}](${url})`; - } - return match; - }); - - return content; -} diff --git a/src/processors/markdown.ts b/src/processors/markdown.ts deleted file mode 100644 index 28e8b75..0000000 --- a/src/processors/markdown.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { marked } from 'marked'; -import { ProcessResult } from '../types'; -import { rewriteMarkdownLinks } from './markdown-links'; -import { sanitizeHTML, processLinks } from './html-utils'; - -// Configure marked options -marked.setOptions({ - breaks: true, - gfm: true, - headerIds: true, - mangle: false, -}); - -/** - * Processes Markdown content to HTML - */ -export async function processMarkdown(content: string, linkBaseURL: string): Promise { - // Rewrite links in Markdown content - const processedContent = rewriteMarkdownLinks(content, linkBaseURL); - - // Convert Markdown to HTML - const html = await marked.parse(processedContent) as string; - - // Sanitize HTML to prevent XSS - const sanitized = sanitizeHTML(html); - - // Process links: make external links open in new tab, local links in same tab - const processed = processLinks(sanitized, linkBaseURL); - - return { - content: processed, - tableOfContents: '', - hasLaTeX: false, - hasMusicalNotation: false, - }; -} diff --git a/src/processors/music.ts b/src/processors/music.ts index 5064894..1617df1 100644 --- a/src/processors/music.ts +++ b/src/processors/music.ts @@ -1,57 +1,32 @@ -/** - * Checks if content contains musical notation - */ -export function hasMusicalNotation(content: string): boolean { - // Check for ABC notation: X:1, K:C, etc. - const abcPattern = /X:\s*\d+|K:\s*[A-G]|M:\s*\d+\/\d+/i; - // Check for LilyPond notation: \relative, \clef, etc. - const lilypondPattern = /\\relative|\\clef|\\key|\\time/; - // Check for MusicXML-like tags: , , etc. - const musicxmlPattern = /||/i; - // Check for simple chord notation: [C], [Am], etc. - const chordPattern = /\[[A-G][#b]?m?[0-9]?\]/; - - return abcPattern.test(content) || - lilypondPattern.test(content) || - musicxmlPattern.test(content) || - chordPattern.test(content); -} - /** * Processes musical notation in HTML content * Wraps musical notation in appropriate HTML for rendering */ export function processMusicalNotation(html: string): string { // Process ABC notation blocks - // ABC notation typically starts with X:1 and contains multiple lines const abcBlockPattern = /(X:\s*\d+[^\n]*\n(?:[^\n]+\n)*)/gs; html = html.replace(abcBlockPattern, (match) => { const abcContent = match.trim(); - // Wrap in a div for ABC.js or similar renderer return `
${abcContent}
`; }); // Process LilyPond notation blocks - // LilyPond notation is typically in code blocks or between \relative and } const lilypondPattern = /(\\relative[^}]+})/gs; html = html.replace(lilypondPattern, (match) => { const lilypondContent = match.trim(); - // Wrap in a div for LilyPond rendering return `
${lilypondContent}
`; }); // Process inline chord notation: [C], [Am], [F#m7], etc. const chordPattern = /\[([A-G][#b]?m?[0-9]?[^\[\]]*)\]/g; html = html.replace(chordPattern, (match, chord) => { - // Wrap in a span for chord rendering return `[${chord}]`; }); - // Process MusicXML-like notation (if present in content) + // Process MusicXML-like notation const musicxmlPattern = /(]*>.*?<\/music>)/gs; html = html.replace(musicxmlPattern, (match) => { const musicxmlContent = match.trim(); - // Wrap in a div for MusicXML rendering return `
${musicxmlContent}
`; }); diff --git a/src/processors/nostr.ts b/src/processors/nostr.ts deleted file mode 100644 index 5ea176c..0000000 --- a/src/processors/nostr.ts +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Processes nostr: prefixed addresses - */ -export function processNostrAddresses(content: string, linkBaseURL: string): string { - // Pattern: nostr:naddr1..., nostr:nevent1..., nostr:note1..., nostr:npub1..., nostr:nprofile1... - const nostrPattern = /nostr:([a-z0-9]+[a-z0-9]{1,})/g; - - return content.replace(nostrPattern, (match, nostrID) => { - // If linkBaseURL is set, convert to a link - if (linkBaseURL) { - // Determine the type and create appropriate link - if (nostrID.startsWith('naddr')) { - return `${match}`; - } else if (nostrID.startsWith('nevent')) { - return `${match}`; - } else if (nostrID.startsWith('note')) { - return `${match}`; - } else if (nostrID.startsWith('npub')) { - return `${match}`; - } else if (nostrID.startsWith('nprofile')) { - return `${match}`; - } - } - - // Return as a span with class for styling - return `${match}`; - }); -} diff --git a/src/processors/plain.ts b/src/processors/plain.ts deleted file mode 100644 index 7e466e5..0000000 --- a/src/processors/plain.ts +++ /dev/null @@ -1,42 +0,0 @@ -import { ProcessResult } from '../types'; - -/** - * Escapes HTML special characters - */ -function escapeHTML(text: string): string { - return text - .replace(/&/g, '&') - .replace(//g, '>') - .replace(/"/g, '"') - .replace(/'/g, '''); -} - -/** - * Processes plain text content with basic formatting - */ -export function processPlainText(text: string): ProcessResult { - // Escape HTML - let html = escapeHTML(text); - - // Convert line breaks to
- html = html.replace(/\n/g, '
\n'); - - // Convert double line breaks to paragraphs - const paragraphs = html.split('
\n
\n'); - const result: string[] = []; - - for (const para of paragraphs) { - const trimmed = para.trim(); - if (trimmed) { - result.push(`

${trimmed}

`); - } - } - - return { - content: result.join('\n'), - tableOfContents: '', - hasLaTeX: false, - hasMusicalNotation: false, - }; -} diff --git a/src/types.ts b/src/types.ts index 3753f1d..8fcf337 100644 --- a/src/types.ts +++ b/src/types.ts @@ -18,6 +18,25 @@ export interface ParserOptions { enableNostrAddresses?: boolean; } +/** + * Nostr link information + */ +export interface NostrLink { + type: 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note'; + id: string; + text: string; + bech32: string; +} + +/** + * Wikilink information + */ +export interface Wikilink { + dtag: string; + display: string; + original: string; +} + /** * Result of processing content */ @@ -30,6 +49,16 @@ export interface ProcessResult { hasLaTeX: boolean; /** Indicates if musical notation was found */ hasMusicalNotation: boolean; + /** Extracted Nostr links */ + nostrLinks: NostrLink[]; + /** Extracted wikilinks */ + wikilinks: Wikilink[]; + /** Extracted hashtags */ + hashtags: string[]; + /** Extracted regular links */ + links: Array<{ url: string; text: string; isExternal: boolean }>; + /** Extracted media URLs */ + media: string[]; } /** diff --git a/src/types/asciidoctor.d.ts b/src/types/asciidoctor.d.ts new file mode 100644 index 0000000..99cf1bc --- /dev/null +++ b/src/types/asciidoctor.d.ts @@ -0,0 +1,20 @@ +/** + * Type declarations for @asciidoctor/core + * These are minimal types - the actual types should come from the package + */ +declare module '@asciidoctor/core' { + interface ConvertOptions { + safe?: string; + backend?: string; + doctype?: string; + attributes?: Record; + extension_registry?: any; + } + + interface Asciidoctor { + convert(content: string, options?: ConvertOptions): string | any; + } + + function asciidoctor(): Asciidoctor; + export default asciidoctor; +}