/** * AST-based AsciiDoc parsing using Asciidoctor's native document structure * * This replaces the manual regex parsing in asciidoc_metadata.ts with proper * AST traversal, leveraging Asciidoctor's built-in parsing capabilities. */ import Processor from "asciidoctor"; import type { Document } from "asciidoctor"; import { PublicationTree } from "../data_structures/publication_tree"; import { NDKEvent } from "@nostr-dev-kit/ndk"; import type NDK from "@nostr-dev-kit/ndk"; import { getMimeTags } from "./mime"; export interface ASTSection { title: string; content: string; level: number; attributes: Record; subsections: ASTSection[]; } export interface ASTParsedDocument { title: string; content: string; attributes: Record; sections: ASTSection[]; } /** * Parse AsciiDoc content using Asciidoctor's AST instead of manual regex */ export function parseAsciiDocAST(content: string, parseLevel: number = 2): ASTParsedDocument { const asciidoctor = Processor(); const document = asciidoctor.load(content, { standalone: false }) as Document; return { title: document.getTitle() || '', content: document.getContent() || '', attributes: document.getAttributes(), sections: extractSectionsFromAST(document, parseLevel) }; } /** * Extract sections from Asciidoctor AST based on parse level */ function extractSectionsFromAST(document: Document, parseLevel: number): ASTSection[] { const directSections = document.getSections(); // Collect all sections at all levels up to parseLevel const allSections: ASTSection[] = []; function collectSections(sections: any[]) { for (const section of sections) { const asciidoctorLevel = section.getLevel(); // Convert Asciidoctor's internal level to our application level // Asciidoctor: == is level 1, === is level 2, etc. // Our app: == is level 2, === is level 3, etc. const appLevel = asciidoctorLevel + 1; if (appLevel <= parseLevel) { allSections.push({ title: section.getTitle() || '', content: section.getContent() || '', level: appLevel, attributes: section.getAttributes() || {}, subsections: [] }); } // Recursively collect subsections const subsections = section.getSections?.() || []; if (subsections.length > 0) { collectSections(subsections); } } } collectSections(directSections); return allSections; } /** * Extract subsections from a section (recursive helper) */ function extractSubsections(section: any, parseLevel: number): ASTSection[] { const subsections = section.getSections?.() || []; return subsections .filter((sub: any) => (sub.getLevel() + 1) <= parseLevel) .map((sub: any) => ({ title: sub.getTitle() || '', content: sub.getContent() || '', level: sub.getLevel() + 1, // Convert to app level attributes: sub.getAttributes() || {}, subsections: extractSubsections(sub, parseLevel) })); } /** * Create a PublicationTree directly from Asciidoctor AST * This integrates with Michael's PublicationTree architecture */ export async function createPublicationTreeFromAST( content: string, ndk: NDK, parseLevel: number = 2, ): Promise { const parsed = parseAsciiDocAST(content, parseLevel); // Create root 30040 index event from document metadata const rootEvent = createIndexEventFromAST(parsed, ndk); const tree = new PublicationTree(rootEvent, ndk); // Add sections as 30041 events with proper namespacing for (const section of parsed.sections) { const contentEvent = createContentEventFromSection( section, ndk, parsed.title, ); await tree.addEvent(contentEvent, rootEvent); } return tree; } /** * Create a 30040 index event from AST document metadata */ function createIndexEventFromAST(parsed: ASTParsedDocument, ndk: NDK): NDKEvent { const event = new NDKEvent(ndk); event.kind = 30040; event.created_at = Math.floor(Date.now() / 1000); // Generate d-tag from title const dTag = generateDTag(parsed.title); const [mTag, MTag] = getMimeTags(30040); const tags: string[][] = [ ["d", dTag], mTag, MTag, ["title", parsed.title], ]; // Add document attributes as tags addAttributesAsTags(tags, parsed.attributes); // Generate publication abbreviation for namespacing sections const pubAbbrev = generateTitleAbbreviation(parsed.title); // Add a-tags for each section (30041 content events) // Using new format: kind:pubkey:{abbv}-{section-d-tag} parsed.sections.forEach((section) => { const sectionDTag = generateDTag(section.title); const namespacedDTag = `${pubAbbrev}-${sectionDTag}`; tags.push([ "a", `30041:${ndk.activeUser?.pubkey || "pubkey"}:${namespacedDTag}`, ]); }); event.tags = tags; event.content = parsed.content; return event; } /** * Create a 30041 content event from an AST section * Note: This function needs the publication title for proper namespacing * but the current implementation doesn't have access to it. * Consider using createPublicationTreeFromAST instead which handles this correctly. */ function createContentEventFromSection( section: ASTSection, ndk: NDK, publicationTitle?: string, ): NDKEvent { const event = new NDKEvent(ndk); event.kind = 30041; event.created_at = Math.floor(Date.now() / 1000); // Generate namespaced d-tag if publication title is provided const sectionDTag = generateDTag(section.title); let dTag = sectionDTag; if (publicationTitle) { const pubAbbrev = generateTitleAbbreviation(publicationTitle); dTag = `${pubAbbrev}-${sectionDTag}`; } const [mTag, MTag] = getMimeTags(30041); const tags: string[][] = [ ["d", dTag], mTag, MTag, ["title", section.title], ]; // Add section attributes as tags addAttributesAsTags(tags, section.attributes); event.tags = tags; event.content = section.content; return event; } /** * Generate a deterministic d-tag from title */ function generateDTag(title: string): string { return title .toLowerCase() .replace(/[^\p{L}\p{N}]/gu, "-") .replace(/-+/g, "-") .replace(/^-|-$/g, ""); } /** * Generate title abbreviation from first letters of each word * Used for namespacing section a-tags * @param title - The publication title * @returns Abbreviation string (e.g., "My Test Article" → "mta") */ function generateTitleAbbreviation(title: string): string { if (!title || !title.trim()) { return "u"; // "untitled" } // Split on non-alphanumeric characters and filter out empty strings const words = title .split(/[^\p{L}\p{N}]+/u) .filter((word) => word.length > 0); if (words.length === 0) { return "u"; } // Take first letter of each word and join return words .map((word) => word.charAt(0).toLowerCase()) .join(""); } /** * Add AsciiDoc attributes as Nostr event tags, filtering out system attributes */ function addAttributesAsTags(tags: string[][], attributes: Record) { const systemAttributes = [ 'attribute-undefined', 'attribute-missing', 'appendix-caption', 'appendix-refsig', 'caution-caption', 'chapter-refsig', 'example-caption', 'figure-caption', 'important-caption', 'last-update-label', 'manname-title', 'note-caption', 'part-refsig', 'preface-title', 'section-refsig', 'table-caption', 'tip-caption', 'toc-title', 'untitled-label', 'version-label', 'warning-caption', 'asciidoctor', 'asciidoctor-version', 'safe-mode-name', 'backend', 'doctype', 'basebackend', 'filetype', 'outfilesuffix', 'stylesdir', 'iconsdir', 'localdate', 'localyear', 'localtime', 'localdatetime', 'docdate', 'docyear', 'doctime', 'docdatetime', 'doctitle', 'embedded', 'notitle' ]; // Add standard metadata tags if (attributes.author) tags.push(["author", attributes.author]); if (attributes.version) tags.push(["version", attributes.version]); if (attributes.description) tags.push(["summary", attributes.description]); if (attributes.tags) { attributes.tags.split(',').forEach(tag => tags.push(["t", tag.trim()]) ); } // Add custom attributes (non-system) Object.entries(attributes).forEach(([key, value]) => { if (!systemAttributes.includes(key) && value) { tags.push([key, value]); } }); } /** * Tree processor extension for Asciidoctor * This can be registered to automatically populate PublicationTree during parsing */ export function createPublicationTreeProcessor(ndk: NDK, parseLevel: number = 2) { return function(extensions: any) { extensions.treeProcessor(function(this: any) { const dsl = this; dsl.process(function(this: any, document: Document) { // Create PublicationTree and store on document for later retrieval const publicationTree = createPublicationTreeFromDocument(document, ndk, parseLevel); document.setAttribute('publicationTree', publicationTree); }); }); }; } /** * Helper function to create PublicationTree from Asciidoctor Document */ async function createPublicationTreeFromDocument( document: Document, ndk: NDK, parseLevel: number, ): Promise { const parsed: ASTParsedDocument = { title: document.getTitle() || "", content: document.getContent() || "", attributes: document.getAttributes(), sections: extractSectionsFromAST(document, parseLevel), }; const rootEvent = createIndexEventFromAST(parsed, ndk); const tree = new PublicationTree(rootEvent, ndk); for (const section of parsed.sections) { const contentEvent = createContentEventFromSection( section, ndk, parsed.title, ); await tree.addEvent(contentEvent, rootEvent); } return tree; }