clone of repo on github
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

330 lines
9.7 KiB

/**
* AST-based AsciiDoc parsing using Asciidoctor's native document structure
*
* This replaces the manual regex parsing in asciidoc_metadata.ts with proper
* AST traversal, leveraging Asciidoctor's built-in parsing capabilities.
*/
import Processor from "asciidoctor";
import type { Document } from "asciidoctor";
import { PublicationTree } from "../data_structures/publication_tree";
import { NDKEvent } from "@nostr-dev-kit/ndk";
import type NDK from "@nostr-dev-kit/ndk";
import { getMimeTags } from "./mime";
export interface ASTSection {
title: string;
content: string;
level: number;
attributes: Record<string, string>;
subsections: ASTSection[];
}
export interface ASTParsedDocument {
title: string;
content: string;
attributes: Record<string, string>;
sections: ASTSection[];
}
/**
* Parse AsciiDoc content using Asciidoctor's AST instead of manual regex
*/
export function parseAsciiDocAST(content: string, parseLevel: number = 2): ASTParsedDocument {
const asciidoctor = Processor();
const document = asciidoctor.load(content, { standalone: false }) as Document;
return {
title: document.getTitle() || '',
content: document.getContent() || '',
attributes: document.getAttributes(),
sections: extractSectionsFromAST(document, parseLevel)
};
}
/**
* Extract sections from Asciidoctor AST based on parse level
*/
function extractSectionsFromAST(document: Document, parseLevel: number): ASTSection[] {
const directSections = document.getSections();
// Collect all sections at all levels up to parseLevel
const allSections: ASTSection[] = [];
function collectSections(sections: any[]) {
for (const section of sections) {
const asciidoctorLevel = section.getLevel();
// Convert Asciidoctor's internal level to our application level
// Asciidoctor: == is level 1, === is level 2, etc.
// Our app: == is level 2, === is level 3, etc.
const appLevel = asciidoctorLevel + 1;
if (appLevel <= parseLevel) {
allSections.push({
title: section.getTitle() || '',
content: section.getContent() || '',
level: appLevel,
attributes: section.getAttributes() || {},
subsections: []
});
}
// Recursively collect subsections
const subsections = section.getSections?.() || [];
if (subsections.length > 0) {
collectSections(subsections);
}
}
}
collectSections(directSections);
return allSections;
}
/**
* Extract subsections from a section (recursive helper)
*/
function extractSubsections(section: any, parseLevel: number): ASTSection[] {
const subsections = section.getSections?.() || [];
return subsections
.filter((sub: any) => (sub.getLevel() + 1) <= parseLevel)
.map((sub: any) => ({
title: sub.getTitle() || '',
content: sub.getContent() || '',
level: sub.getLevel() + 1, // Convert to app level
attributes: sub.getAttributes() || {},
subsections: extractSubsections(sub, parseLevel)
}));
}
/**
* Create a PublicationTree directly from Asciidoctor AST
* This integrates with Michael's PublicationTree architecture
*/
export async function createPublicationTreeFromAST(
content: string,
ndk: NDK,
parseLevel: number = 2,
): Promise<PublicationTree> {
const parsed = parseAsciiDocAST(content, parseLevel);
// Create root 30040 index event from document metadata
const rootEvent = createIndexEventFromAST(parsed, ndk);
const tree = new PublicationTree(rootEvent, ndk);
// Add sections as 30041 events with proper namespacing
for (const section of parsed.sections) {
const contentEvent = createContentEventFromSection(
section,
ndk,
parsed.title,
);
await tree.addEvent(contentEvent, rootEvent);
}
return tree;
}
/**
* Create a 30040 index event from AST document metadata
*/
function createIndexEventFromAST(parsed: ASTParsedDocument, ndk: NDK): NDKEvent {
const event = new NDKEvent(ndk);
event.kind = 30040;
event.created_at = Math.floor(Date.now() / 1000);
// Generate d-tag from title
const dTag = generateDTag(parsed.title);
const [mTag, MTag] = getMimeTags(30040);
const tags: string[][] = [
["d", dTag],
mTag,
MTag,
["title", parsed.title],
];
// Add document attributes as tags
addAttributesAsTags(tags, parsed.attributes);
// Generate publication abbreviation for namespacing sections
const pubAbbrev = generateTitleAbbreviation(parsed.title);
// Add a-tags for each section (30041 content events)
// Using new format: kind:pubkey:{abbv}-{section-d-tag}
parsed.sections.forEach((section) => {
const sectionDTag = generateDTag(section.title);
const namespacedDTag = `${pubAbbrev}-${sectionDTag}`;
tags.push([
"a",
`30041:${ndk.activeUser?.pubkey || "pubkey"}:${namespacedDTag}`,
]);
});
event.tags = tags;
event.content = parsed.content;
return event;
}
/**
* Create a 30041 content event from an AST section
* Note: This function needs the publication title for proper namespacing
* but the current implementation doesn't have access to it.
* Consider using createPublicationTreeFromAST instead which handles this correctly.
*/
function createContentEventFromSection(
section: ASTSection,
ndk: NDK,
publicationTitle?: string,
): NDKEvent {
const event = new NDKEvent(ndk);
event.kind = 30041;
event.created_at = Math.floor(Date.now() / 1000);
// Generate namespaced d-tag if publication title is provided
const sectionDTag = generateDTag(section.title);
let dTag = sectionDTag;
if (publicationTitle) {
const pubAbbrev = generateTitleAbbreviation(publicationTitle);
dTag = `${pubAbbrev}-${sectionDTag}`;
}
const [mTag, MTag] = getMimeTags(30041);
const tags: string[][] = [
["d", dTag],
mTag,
MTag,
["title", section.title],
];
// Add section attributes as tags
addAttributesAsTags(tags, section.attributes);
event.tags = tags;
event.content = section.content;
return event;
}
/**
* Generate a deterministic d-tag from title
*/
function generateDTag(title: string): string {
return title
.toLowerCase()
.replace(/[^\p{L}\p{N}]/gu, "-")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "");
}
/**
* Generate title abbreviation from first letters of each word
* Used for namespacing section a-tags
* @param title - The publication title
* @returns Abbreviation string (e.g., "My Test Article" → "mta")
*/
function generateTitleAbbreviation(title: string): string {
if (!title || !title.trim()) {
return "u"; // "untitled"
}
// Split on non-alphanumeric characters and filter out empty strings
const words = title
.split(/[^\p{L}\p{N}]+/u)
.filter((word) => word.length > 0);
if (words.length === 0) {
return "u";
}
// Take first letter of each word and join
return words
.map((word) => word.charAt(0).toLowerCase())
.join("");
}
/**
* Add AsciiDoc attributes as Nostr event tags, filtering out system attributes
*/
function addAttributesAsTags(tags: string[][], attributes: Record<string, string>) {
const systemAttributes = [
'attribute-undefined', 'attribute-missing', 'appendix-caption', 'appendix-refsig',
'caution-caption', 'chapter-refsig', 'example-caption', 'figure-caption',
'important-caption', 'last-update-label', 'manname-title', 'note-caption',
'part-refsig', 'preface-title', 'section-refsig', 'table-caption',
'tip-caption', 'toc-title', 'untitled-label', 'version-label', 'warning-caption',
'asciidoctor', 'asciidoctor-version', 'safe-mode-name', 'backend', 'doctype',
'basebackend', 'filetype', 'outfilesuffix', 'stylesdir', 'iconsdir',
'localdate', 'localyear', 'localtime', 'localdatetime', 'docdate',
'docyear', 'doctime', 'docdatetime', 'doctitle', 'embedded', 'notitle'
];
// Add standard metadata tags
if (attributes.author) tags.push(["author", attributes.author]);
if (attributes.version) tags.push(["version", attributes.version]);
if (attributes.description) tags.push(["summary", attributes.description]);
if (attributes.tags) {
attributes.tags.split(',').forEach(tag =>
tags.push(["t", tag.trim()])
);
}
// Add custom attributes (non-system)
Object.entries(attributes).forEach(([key, value]) => {
if (!systemAttributes.includes(key) && value) {
tags.push([key, value]);
}
});
}
/**
* Tree processor extension for Asciidoctor
* This can be registered to automatically populate PublicationTree during parsing
*/
export function createPublicationTreeProcessor(ndk: NDK, parseLevel: number = 2) {
return function(extensions: any) {
extensions.treeProcessor(function(this: any) {
const dsl = this;
dsl.process(function(this: any, document: Document) {
// Create PublicationTree and store on document for later retrieval
const publicationTree = createPublicationTreeFromDocument(document, ndk, parseLevel);
document.setAttribute('publicationTree', publicationTree);
});
});
};
}
/**
* Helper function to create PublicationTree from Asciidoctor Document
*/
async function createPublicationTreeFromDocument(
document: Document,
ndk: NDK,
parseLevel: number,
): Promise<PublicationTree> {
const parsed: ASTParsedDocument = {
title: document.getTitle() || "",
content: document.getContent() || "",
attributes: document.getAttributes(),
sections: extractSectionsFromAST(document, parseLevel),
};
const rootEvent = createIndexEventFromAST(parsed, ndk);
const tree = new PublicationTree(rootEvent, ndk);
for (const section of parsed.sections) {
const contentEvent = createContentEventFromSection(
section,
ndk,
parsed.title,
);
await tree.addEvent(contentEvent, rootEvent);
}
return tree;
}