/** * AsciiDoc Content Parsing Service * * Handles parsing AsciiDoc content into hierarchical structures for publication. * Separated from metadata extraction to maintain single responsibility principle. */ // @ts-ignore import Processor from "asciidoctor"; import type { Document } from "asciidoctor"; import { parseSimpleAttributes, extractDocumentMetadata, extractSectionMetadata, } from "./asciidoc_metadata.ts"; export interface ParsedAsciiDoc { metadata: { title?: string; authors?: string[]; version?: string; edition?: string; publicationDate?: string; publisher?: string; summary?: string; coverImage?: string; isbn?: string; tags?: string[]; source?: string; publishedBy?: string; type?: string; autoUpdate?: "yes" | "ask" | "no"; customAttributes?: Record; }; content: string; title: string; sections: Array<{ metadata: { title?: string; authors?: string[]; version?: string; edition?: string; publicationDate?: string; publisher?: string; summary?: string; coverImage?: string; isbn?: string; tags?: string[]; source?: string; publishedBy?: string; type?: string; autoUpdate?: "yes" | "ask" | "no"; customAttributes?: Record; }; content: string; title: string; }>; } /** * Creates an Asciidoctor processor instance */ function createProcessor() { return Processor(); } /** * Helper function to determine the header level of a section */ function getSectionLevel(sectionContent: string): number { const lines = sectionContent.split(/\r?\n/); for (const line of lines) { const match = line.match(/^(=+)\s+/); if (match) { return match[1].length; } } return 0; } /** * Helper function to extract just the intro content (before first subsection) */ function extractIntroContent( sectionContent: string, currentLevel: number, ): string { const lines = sectionContent.split(/\r?\n/); const introLines: string[] = []; let foundHeader = false; for (const line of lines) { const headerMatch = line.match(/^(=+)\s+/); if (headerMatch) { const level = headerMatch[1].length; if (level === currentLevel && !foundHeader) { // This is the section header itself foundHeader = true; continue; // Skip the header line itself for intro content } else if (level > currentLevel) { // This is a subsection, stop collecting intro content break; } } else if (foundHeader) { // This is intro content after the header introLines.push(line); } } return introLines.join("\n").trim(); } /** * Parses AsciiDoc content into sections with metadata */ export function parseAsciiDocWithMetadata(content: string): ParsedAsciiDoc { const asciidoctor = createProcessor(); const document = asciidoctor.load(content, { standalone: false }) as Document; const { metadata: docMetadata } = extractDocumentMetadata(content); // Parse the original content to find section attributes const lines = content.split(/\r?\n/); const sectionsWithMetadata: Array<{ metadata: ParsedAsciiDoc["sections"][0]["metadata"]; content: string; title: string; }> = []; let currentSection: string | null = null; let currentSectionContent: string[] = []; for (const line of lines) { if (line.match(/^==\s+/)) { // Save previous section if exists if (currentSection) { const sectionContent = currentSectionContent.join("\n"); sectionsWithMetadata.push(extractSectionMetadata(sectionContent)); } // Start new section currentSection = line; currentSectionContent = [line]; } else if (currentSection) { currentSectionContent.push(line); } } // Save the last section if (currentSection) { const sectionContent = currentSectionContent.join("\n"); sectionsWithMetadata.push(extractSectionMetadata(sectionContent)); } return { metadata: docMetadata, content: document.getSource(), title: docMetadata.title || "", sections: sectionsWithMetadata, }; } /** * Iterative AsciiDoc parsing based on specified level * Level 2: Only == sections become content events (containing all subsections) * Level 3: == sections become indices + content events, === sections become content events * Level 4: === sections become indices + content events, ==== sections become content events, etc. */ export function parseAsciiDocIterative( content: string, parseLevel: number = 2, ): ParsedAsciiDoc { const asciidoctor = createProcessor(); const document = asciidoctor.load(content, { standalone: false }) as Document; // Extract document metadata using the metadata extraction functions const { metadata: docMetadata } = extractDocumentMetadata(content); const lines = content.split(/\r?\n/); const sections: Array<{ metadata: ParsedAsciiDoc["sections"][0]["metadata"]; content: string; title: string; }> = []; if (parseLevel === 2) { // Level 2: Only == sections become events const level2Pattern = /^==\s+/; let currentSection: string | null = null; let currentSectionContent: string[] = []; let documentContent: string[] = []; let inDocumentHeader = true; for (const line of lines) { if (line.match(level2Pattern)) { inDocumentHeader = false; // Save previous section if exists if (currentSection) { const sectionContent = currentSectionContent.join("\n"); const sectionMeta = extractSectionMetadata(sectionContent); // For level 2, preserve the full content including the header sections.push({ ...sectionMeta, content: sectionContent, // Use full content, not stripped }); } // Start new section currentSection = line; currentSectionContent = [line]; } else if (currentSection) { currentSectionContent.push(line); } else if (inDocumentHeader) { documentContent.push(line); } } // Save the last section if (currentSection) { const sectionContent = currentSectionContent.join("\n"); const sectionMeta = extractSectionMetadata(sectionContent); // For level 2, preserve the full content including the header sections.push({ ...sectionMeta, content: sectionContent, // Use full content, not stripped }); } const docContent = documentContent.join("\n"); return { metadata: docMetadata, content: docContent, title: docMetadata.title || "", sections: sections, }; } // Level 3+: Parse hierarchically // All levels from 2 to parseLevel-1 are indices (title only) // Level parseLevel are content sections (full content) // First, collect all sections at the content level (parseLevel) const contentLevelPattern = new RegExp(`^${"=".repeat(parseLevel)}\\s+`); let currentSection: string | null = null; let currentSectionContent: string[] = []; let documentContent: string[] = []; let inDocumentHeader = true; for (const line of lines) { if (line.match(contentLevelPattern)) { inDocumentHeader = false; // Save previous section if exists if (currentSection) { const sectionContent = currentSectionContent.join("\n"); const sectionMeta = extractSectionMetadata(sectionContent); sections.push({ ...sectionMeta, content: sectionContent, // Full content including headers }); } // Start new content section currentSection = line; currentSectionContent = [line]; } else if (currentSection) { // Continue collecting content for current section currentSectionContent.push(line); } else if (inDocumentHeader) { documentContent.push(line); } } // Save the last section if (currentSection) { const sectionContent = currentSectionContent.join("\n"); const sectionMeta = extractSectionMetadata(sectionContent); sections.push({ ...sectionMeta, content: sectionContent, // Full content including headers }); } // Now collect index sections (all levels from 2 to parseLevel-1) // These should be shown as navigation/structure but not full content const indexSections: Array<{ metadata: ParsedAsciiDoc["sections"][0]["metadata"]; content: string; title: string; level: number; }> = []; for (let level = 2; level < parseLevel; level++) { const levelPattern = new RegExp(`^${"=".repeat(level)}\\s+(.+)$`, "gm"); const matches = content.matchAll(levelPattern); for (const match of matches) { const title = match[1].trim(); indexSections.push({ metadata: { title }, content: `${"=".repeat(level)} ${title}`, // Just the header line for index sections title, level, }); } } // Add actual level to content sections based on their content const contentSectionsWithLevel = sections.map((s) => ({ ...s, level: getSectionLevel(s.content), })); // Combine index sections and content sections // Sort by position in original content to maintain order const allSections = [...indexSections, ...contentSectionsWithLevel]; // Sort sections by their appearance in the original content allSections.sort((a, b) => { const posA = content.indexOf(a.content.split("\n")[0]); const posB = content.indexOf(b.content.split("\n")[0]); return posA - posB; }); const docContent = documentContent.join("\n"); return { metadata: docMetadata, content: docContent, title: docMetadata.title || "", sections: allSections, }; } /** * Generates Nostr events from parsed AsciiDoc with proper hierarchical structure * Based on docreference.md specifications */ export function generateNostrEvents( parsed: ParsedAsciiDoc, parseLevel: number = 2, pubkey?: string, maxDepth: number = 6, ): { indexEvent?: any; contentEvents: any[]; } { const allEvents: any[] = []; const actualPubkey = pubkey || "pubkey"; // Helper function to generate section ID const generateSectionId = (title: string): string => { return title .toLowerCase() .replace(/[^\p{L}\p{N}]/gu, "-") .replace(/-+/g, "-") .replace(/^-|-$/g, ""); }; // Build hierarchical tree structure interface TreeNode { section: { metadata: any; content: string; title: string; }; level: number; sectionId: string; tags: [string, string][]; children: TreeNode[]; parent?: TreeNode; } // Convert flat sections to tree structure const buildTree = (): TreeNode[] => { const roots: TreeNode[] = []; const stack: TreeNode[] = []; for (const section of parsed.sections) { const level = getSectionLevel(section.content); const sectionId = generateSectionId(section.title); const tags = parseSimpleAttributes(section.content); const node: TreeNode = { section, level, sectionId, tags, children: [], }; // Find the correct parent based on header hierarchy while (stack.length > 0 && stack[stack.length - 1].level >= level) { stack.pop(); } if (stack.length === 0) { // This is a root level section roots.push(node); } else { // This is a child of the last item in stack const parent = stack[stack.length - 1]; parent.children.push(node); node.parent = parent; } stack.push(node); } return roots; }; const tree = buildTree(); // Recursively create events from tree const createEventsFromNode = (node: TreeNode): void => { const { section, level, sectionId, tags, children } = node; // Determine if this node should become an index const hasChildrenAtTargetLevel = children.some( (child) => child.level === parseLevel, ); const shouldBeIndex = level < parseLevel && (hasChildrenAtTargetLevel || children.some((child) => child.level <= parseLevel)); if (shouldBeIndex) { // Create content event for intro text (30041) const introContent = extractIntroContent(section.content, level); if (introContent.trim()) { const contentEvent = { id: "", pubkey: "", created_at: Math.floor(Date.now() / 1000), kind: 30041, tags: [ ["d", `${sectionId}-content`], ["title", section.title], ...tags, ], content: introContent, sig: "", }; allEvents.push(contentEvent); } // Create index event (30040) const childATags: string[][] = []; // Add a-tag for intro content if it exists if (introContent.trim()) { childATags.push([ "a", `30041:${actualPubkey}:${sectionId}-content`, "", "", ]); } // Add a-tags for direct children for (const child of children) { const childHasSubChildren = child.children.some( (grandchild) => grandchild.level <= parseLevel, ); const childShouldBeIndex = child.level < parseLevel && childHasSubChildren; const childKind = childShouldBeIndex ? 30040 : 30041; childATags.push([ "a", `${childKind}:${actualPubkey}:${child.sectionId}`, "", "", ]); } const indexEvent = { id: "", pubkey: "", created_at: Math.floor(Date.now() / 1000), kind: 30040, tags: [ ["d", sectionId], ["title", section.title], ...tags, ...childATags, ], content: "", sig: "", }; allEvents.push(indexEvent); } else { // Create regular content event (30041) const contentEvent = { id: "", pubkey: "", created_at: Math.floor(Date.now() / 1000), kind: 30041, tags: [["d", sectionId], ["title", section.title], ...tags], content: section.content, sig: "", }; allEvents.push(contentEvent); } // Recursively process children for (const child of children) { createEventsFromNode(child); } }; // Process all root level sections for (const rootNode of tree) { createEventsFromNode(rootNode); } // Create main document index if we have a document title (article format) if (parsed.title && parsed.title.trim() !== "") { const documentId = generateSectionId(parsed.title); const documentTags = parseSimpleAttributes(parsed.content); // Create a-tags for all root level sections (level 2) const mainIndexATags = tree.map((rootNode) => { const hasSubChildren = rootNode.children.some( (child) => child.level <= parseLevel, ); const shouldBeIndex = rootNode.level < parseLevel && hasSubChildren; const kind = shouldBeIndex ? 30040 : 30041; return ["a", `${kind}:${actualPubkey}:${rootNode.sectionId}`, "", ""]; }); console.log("Debug: Root sections found:", tree.length); console.log("Debug: Main index a-tags:", mainIndexATags); const mainIndexEvent = { id: "", pubkey: "", created_at: Math.floor(Date.now() / 1000), kind: 30040, tags: [ ["d", documentId], ["title", parsed.title], ...documentTags, ...mainIndexATags, ], content: "", sig: "", }; return { indexEvent: mainIndexEvent, contentEvents: allEvents, }; } // For scattered notes, return only content events return { contentEvents: allEvents, }; } /** * Detects content type for smart publishing */ export function detectContentType( content: string, ): "article" | "scattered-notes" | "none" { const hasDocTitle = content.trim().startsWith("=") && !content.trim().startsWith("=="); const hasSections = content.includes("=="); if (hasDocTitle) { return "article"; } else if (hasSections) { return "scattered-notes"; } else { return "none"; } }