1 changed files with 577 additions and 0 deletions
@ -0,0 +1,577 @@ |
|||||||
|
/** |
||||||
|
* AsciiDoc Content Parsing Service |
||||||
|
* |
||||||
|
* Handles parsing AsciiDoc content into hierarchical structures for publication. |
||||||
|
* Separated from metadata extraction to maintain single responsibility principle. |
||||||
|
*/ |
||||||
|
|
||||||
|
// @ts-ignore
|
||||||
|
import Processor from "asciidoctor"; |
||||||
|
import type { Document } from "asciidoctor"; |
||||||
|
import { |
||||||
|
parseSimpleAttributes, |
||||||
|
extractDocumentMetadata, |
||||||
|
extractSectionMetadata, |
||||||
|
} from "./asciidoc_metadata.ts"; |
||||||
|
|
||||||
|
export interface ParsedAsciiDoc { |
||||||
|
metadata: { |
||||||
|
title?: string; |
||||||
|
authors?: string[]; |
||||||
|
version?: string; |
||||||
|
edition?: string; |
||||||
|
publicationDate?: string; |
||||||
|
publisher?: string; |
||||||
|
summary?: string; |
||||||
|
coverImage?: string; |
||||||
|
isbn?: string; |
||||||
|
tags?: string[]; |
||||||
|
source?: string; |
||||||
|
publishedBy?: string; |
||||||
|
type?: string; |
||||||
|
autoUpdate?: "yes" | "ask" | "no"; |
||||||
|
customAttributes?: Record<string, string>; |
||||||
|
}; |
||||||
|
content: string; |
||||||
|
title: string; |
||||||
|
sections: Array<{ |
||||||
|
metadata: { |
||||||
|
title?: string; |
||||||
|
authors?: string[]; |
||||||
|
version?: string; |
||||||
|
edition?: string; |
||||||
|
publicationDate?: string; |
||||||
|
publisher?: string; |
||||||
|
summary?: string; |
||||||
|
coverImage?: string; |
||||||
|
isbn?: string; |
||||||
|
tags?: string[]; |
||||||
|
source?: string; |
||||||
|
publishedBy?: string; |
||||||
|
type?: string; |
||||||
|
autoUpdate?: "yes" | "ask" | "no"; |
||||||
|
customAttributes?: Record<string, string>; |
||||||
|
}; |
||||||
|
content: string; |
||||||
|
title: string; |
||||||
|
}>; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Creates an Asciidoctor processor instance |
||||||
|
*/ |
||||||
|
function createProcessor() { |
||||||
|
return Processor(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Helper function to determine the header level of a section |
||||||
|
*/ |
||||||
|
function getSectionLevel(sectionContent: string): number { |
||||||
|
const lines = sectionContent.split(/\r?\n/); |
||||||
|
for (const line of lines) { |
||||||
|
const match = line.match(/^(=+)\s+/); |
||||||
|
if (match) { |
||||||
|
return match[1].length; |
||||||
|
} |
||||||
|
} |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Helper function to extract just the intro content (before first subsection) |
||||||
|
*/ |
||||||
|
function extractIntroContent( |
||||||
|
sectionContent: string, |
||||||
|
currentLevel: number, |
||||||
|
): string { |
||||||
|
const lines = sectionContent.split(/\r?\n/); |
||||||
|
const introLines: string[] = []; |
||||||
|
let foundHeader = false; |
||||||
|
|
||||||
|
for (const line of lines) { |
||||||
|
const headerMatch = line.match(/^(=+)\s+/); |
||||||
|
if (headerMatch) { |
||||||
|
const level = headerMatch[1].length; |
||||||
|
if (level === currentLevel && !foundHeader) { |
||||||
|
// This is the section header itself
|
||||||
|
foundHeader = true; |
||||||
|
continue; // Skip the header line itself for intro content
|
||||||
|
} else if (level > currentLevel) { |
||||||
|
// This is a subsection, stop collecting intro content
|
||||||
|
break; |
||||||
|
} |
||||||
|
} else if (foundHeader) { |
||||||
|
// This is intro content after the header
|
||||||
|
introLines.push(line); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return introLines.join("\n").trim(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Parses AsciiDoc content into sections with metadata |
||||||
|
*/ |
||||||
|
export function parseAsciiDocWithMetadata(content: string): ParsedAsciiDoc { |
||||||
|
const asciidoctor = createProcessor(); |
||||||
|
const document = asciidoctor.load(content, { standalone: false }) as Document; |
||||||
|
const { metadata: docMetadata } = extractDocumentMetadata(content); |
||||||
|
|
||||||
|
// Parse the original content to find section attributes
|
||||||
|
const lines = content.split(/\r?\n/); |
||||||
|
const sectionsWithMetadata: Array<{ |
||||||
|
metadata: ParsedAsciiDoc["sections"][0]["metadata"]; |
||||||
|
content: string; |
||||||
|
title: string; |
||||||
|
}> = []; |
||||||
|
let currentSection: string | null = null; |
||||||
|
let currentSectionContent: string[] = []; |
||||||
|
|
||||||
|
for (const line of lines) { |
||||||
|
if (line.match(/^==\s+/)) { |
||||||
|
// Save previous section if exists
|
||||||
|
if (currentSection) { |
||||||
|
const sectionContent = currentSectionContent.join("\n"); |
||||||
|
sectionsWithMetadata.push(extractSectionMetadata(sectionContent)); |
||||||
|
} |
||||||
|
|
||||||
|
// Start new section
|
||||||
|
currentSection = line; |
||||||
|
currentSectionContent = [line]; |
||||||
|
} else if (currentSection) { |
||||||
|
currentSectionContent.push(line); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Save the last section
|
||||||
|
if (currentSection) { |
||||||
|
const sectionContent = currentSectionContent.join("\n"); |
||||||
|
sectionsWithMetadata.push(extractSectionMetadata(sectionContent)); |
||||||
|
} |
||||||
|
|
||||||
|
return { |
||||||
|
metadata: docMetadata, |
||||||
|
content: document.getSource(), |
||||||
|
title: docMetadata.title || "", |
||||||
|
sections: sectionsWithMetadata, |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Iterative AsciiDoc parsing based on specified level |
||||||
|
* Level 2: Only == sections become content events (containing all subsections) |
||||||
|
* Level 3: == sections become indices + content events, === sections become content events |
||||||
|
* Level 4: === sections become indices + content events, ==== sections become content events, etc. |
||||||
|
*/ |
||||||
|
export function parseAsciiDocIterative( |
||||||
|
content: string, |
||||||
|
parseLevel: number = 2, |
||||||
|
): ParsedAsciiDoc { |
||||||
|
const asciidoctor = createProcessor(); |
||||||
|
const document = asciidoctor.load(content, { standalone: false }) as Document; |
||||||
|
|
||||||
|
// Extract document metadata using the metadata extraction functions
|
||||||
|
const { metadata: docMetadata } = extractDocumentMetadata(content); |
||||||
|
|
||||||
|
const lines = content.split(/\r?\n/); |
||||||
|
const sections: Array<{ |
||||||
|
metadata: ParsedAsciiDoc["sections"][0]["metadata"]; |
||||||
|
content: string; |
||||||
|
title: string; |
||||||
|
}> = []; |
||||||
|
|
||||||
|
if (parseLevel === 2) { |
||||||
|
// Level 2: Only == sections become events
|
||||||
|
const level2Pattern = /^==\s+/; |
||||||
|
let currentSection: string | null = null; |
||||||
|
let currentSectionContent: string[] = []; |
||||||
|
let documentContent: string[] = []; |
||||||
|
let inDocumentHeader = true; |
||||||
|
|
||||||
|
for (const line of lines) { |
||||||
|
if (line.match(level2Pattern)) { |
||||||
|
inDocumentHeader = false; |
||||||
|
|
||||||
|
// Save previous section if exists
|
||||||
|
if (currentSection) { |
||||||
|
const sectionContent = currentSectionContent.join("\n"); |
||||||
|
const sectionMeta = extractSectionMetadata(sectionContent); |
||||||
|
// For level 2, preserve the full content including the header
|
||||||
|
sections.push({ |
||||||
|
...sectionMeta, |
||||||
|
content: sectionContent, // Use full content, not stripped
|
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
// Start new section
|
||||||
|
currentSection = line; |
||||||
|
currentSectionContent = [line]; |
||||||
|
} else if (currentSection) { |
||||||
|
currentSectionContent.push(line); |
||||||
|
} else if (inDocumentHeader) { |
||||||
|
documentContent.push(line); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Save the last section
|
||||||
|
if (currentSection) { |
||||||
|
const sectionContent = currentSectionContent.join("\n"); |
||||||
|
const sectionMeta = extractSectionMetadata(sectionContent); |
||||||
|
// For level 2, preserve the full content including the header
|
||||||
|
sections.push({ |
||||||
|
...sectionMeta, |
||||||
|
content: sectionContent, // Use full content, not stripped
|
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
const docContent = documentContent.join("\n"); |
||||||
|
return { |
||||||
|
metadata: docMetadata, |
||||||
|
content: docContent, |
||||||
|
title: docMetadata.title || "", |
||||||
|
sections: sections, |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
// Level 3+: Parse hierarchically
|
||||||
|
// All levels from 2 to parseLevel-1 are indices (title only)
|
||||||
|
// Level parseLevel are content sections (full content)
|
||||||
|
|
||||||
|
// First, collect all sections at the content level (parseLevel)
|
||||||
|
const contentLevelPattern = new RegExp(`^${"=".repeat(parseLevel)}\\s+`); |
||||||
|
let currentSection: string | null = null; |
||||||
|
let currentSectionContent: string[] = []; |
||||||
|
let documentContent: string[] = []; |
||||||
|
let inDocumentHeader = true; |
||||||
|
|
||||||
|
for (const line of lines) { |
||||||
|
if (line.match(contentLevelPattern)) { |
||||||
|
inDocumentHeader = false; |
||||||
|
|
||||||
|
// Save previous section if exists
|
||||||
|
if (currentSection) { |
||||||
|
const sectionContent = currentSectionContent.join("\n"); |
||||||
|
const sectionMeta = extractSectionMetadata(sectionContent); |
||||||
|
sections.push({ |
||||||
|
...sectionMeta, |
||||||
|
content: sectionContent, // Full content including headers
|
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
// Start new content section
|
||||||
|
currentSection = line; |
||||||
|
currentSectionContent = [line]; |
||||||
|
} else if (currentSection) { |
||||||
|
// Continue collecting content for current section
|
||||||
|
currentSectionContent.push(line); |
||||||
|
} else if (inDocumentHeader) { |
||||||
|
documentContent.push(line); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Save the last section
|
||||||
|
if (currentSection) { |
||||||
|
const sectionContent = currentSectionContent.join("\n"); |
||||||
|
const sectionMeta = extractSectionMetadata(sectionContent); |
||||||
|
sections.push({ |
||||||
|
...sectionMeta, |
||||||
|
content: sectionContent, // Full content including headers
|
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
// Now collect index sections (all levels from 2 to parseLevel-1)
|
||||||
|
// These should be shown as navigation/structure but not full content
|
||||||
|
const indexSections: Array<{ |
||||||
|
metadata: ParsedAsciiDoc["sections"][0]["metadata"]; |
||||||
|
content: string; |
||||||
|
title: string; |
||||||
|
level: number; |
||||||
|
}> = []; |
||||||
|
|
||||||
|
for (let level = 2; level < parseLevel; level++) { |
||||||
|
const levelPattern = new RegExp(`^${"=".repeat(level)}\\s+(.+)$`, "gm"); |
||||||
|
const matches = content.matchAll(levelPattern); |
||||||
|
|
||||||
|
for (const match of matches) { |
||||||
|
const title = match[1].trim(); |
||||||
|
indexSections.push({ |
||||||
|
metadata: { title }, |
||||||
|
content: `${"=".repeat(level)} ${title}`, // Just the header line for index sections
|
||||||
|
title, |
||||||
|
level, |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Add actual level to content sections based on their content
|
||||||
|
const contentSectionsWithLevel = sections.map((s) => ({ |
||||||
|
...s, |
||||||
|
level: getSectionLevel(s.content), |
||||||
|
})); |
||||||
|
|
||||||
|
// Combine index sections and content sections
|
||||||
|
// Sort by position in original content to maintain order
|
||||||
|
const allSections = [...indexSections, ...contentSectionsWithLevel]; |
||||||
|
|
||||||
|
// Sort sections by their appearance in the original content
|
||||||
|
allSections.sort((a, b) => { |
||||||
|
const posA = content.indexOf(a.content.split("\n")[0]); |
||||||
|
const posB = content.indexOf(b.content.split("\n")[0]); |
||||||
|
return posA - posB; |
||||||
|
}); |
||||||
|
|
||||||
|
const docContent = documentContent.join("\n"); |
||||||
|
return { |
||||||
|
metadata: docMetadata, |
||||||
|
content: docContent, |
||||||
|
title: docMetadata.title || "", |
||||||
|
sections: allSections, |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Generates Nostr events from parsed AsciiDoc with proper hierarchical structure |
||||||
|
* Based on docreference.md specifications |
||||||
|
*/ |
||||||
|
export function generateNostrEvents( |
||||||
|
parsed: ParsedAsciiDoc, |
||||||
|
parseLevel: number = 2, |
||||||
|
pubkey?: string, |
||||||
|
maxDepth: number = 6, |
||||||
|
): { |
||||||
|
indexEvent?: any; |
||||||
|
contentEvents: any[]; |
||||||
|
} { |
||||||
|
const allEvents: any[] = []; |
||||||
|
const actualPubkey = pubkey || "pubkey"; |
||||||
|
|
||||||
|
// Helper function to generate section ID
|
||||||
|
const generateSectionId = (title: string): string => { |
||||||
|
return title |
||||||
|
.toLowerCase() |
||||||
|
.replace(/[^\p{L}\p{N}]/gu, "-") |
||||||
|
.replace(/-+/g, "-") |
||||||
|
.replace(/^-|-$/g, ""); |
||||||
|
}; |
||||||
|
|
||||||
|
// Build hierarchical tree structure
|
||||||
|
interface TreeNode { |
||||||
|
section: { |
||||||
|
metadata: any; |
||||||
|
content: string; |
||||||
|
title: string; |
||||||
|
}; |
||||||
|
level: number; |
||||||
|
sectionId: string; |
||||||
|
tags: [string, string][]; |
||||||
|
children: TreeNode[]; |
||||||
|
parent?: TreeNode; |
||||||
|
} |
||||||
|
|
||||||
|
// Convert flat sections to tree structure
|
||||||
|
const buildTree = (): TreeNode[] => { |
||||||
|
const roots: TreeNode[] = []; |
||||||
|
const stack: TreeNode[] = []; |
||||||
|
|
||||||
|
for (const section of parsed.sections) { |
||||||
|
const level = getSectionLevel(section.content); |
||||||
|
const sectionId = generateSectionId(section.title); |
||||||
|
const tags = parseSimpleAttributes(section.content); |
||||||
|
|
||||||
|
const node: TreeNode = { |
||||||
|
section, |
||||||
|
level, |
||||||
|
sectionId, |
||||||
|
tags, |
||||||
|
children: [], |
||||||
|
}; |
||||||
|
|
||||||
|
// Find the correct parent based on header hierarchy
|
||||||
|
while (stack.length > 0 && stack[stack.length - 1].level >= level) { |
||||||
|
stack.pop(); |
||||||
|
} |
||||||
|
|
||||||
|
if (stack.length === 0) { |
||||||
|
// This is a root level section
|
||||||
|
roots.push(node); |
||||||
|
} else { |
||||||
|
// This is a child of the last item in stack
|
||||||
|
const parent = stack[stack.length - 1]; |
||||||
|
parent.children.push(node); |
||||||
|
node.parent = parent; |
||||||
|
} |
||||||
|
|
||||||
|
stack.push(node); |
||||||
|
} |
||||||
|
|
||||||
|
return roots; |
||||||
|
}; |
||||||
|
|
||||||
|
const tree = buildTree(); |
||||||
|
|
||||||
|
// Recursively create events from tree
|
||||||
|
const createEventsFromNode = (node: TreeNode): void => { |
||||||
|
const { section, level, sectionId, tags, children } = node; |
||||||
|
|
||||||
|
// Determine if this node should become an index
|
||||||
|
const hasChildrenAtTargetLevel = children.some( |
||||||
|
(child) => child.level === parseLevel, |
||||||
|
); |
||||||
|
const shouldBeIndex = |
||||||
|
level < parseLevel && |
||||||
|
(hasChildrenAtTargetLevel || |
||||||
|
children.some((child) => child.level <= parseLevel)); |
||||||
|
|
||||||
|
if (shouldBeIndex) { |
||||||
|
// Create content event for intro text (30041)
|
||||||
|
const introContent = extractIntroContent(section.content, level); |
||||||
|
if (introContent.trim()) { |
||||||
|
const contentEvent = { |
||||||
|
id: "", |
||||||
|
pubkey: "", |
||||||
|
created_at: Math.floor(Date.now() / 1000), |
||||||
|
kind: 30041, |
||||||
|
tags: [ |
||||||
|
["d", `${sectionId}-content`], |
||||||
|
["title", section.title], |
||||||
|
...tags, |
||||||
|
], |
||||||
|
content: introContent, |
||||||
|
sig: "", |
||||||
|
}; |
||||||
|
allEvents.push(contentEvent); |
||||||
|
} |
||||||
|
|
||||||
|
// Create index event (30040)
|
||||||
|
const childATags: string[][] = []; |
||||||
|
|
||||||
|
// Add a-tag for intro content if it exists
|
||||||
|
if (introContent.trim()) { |
||||||
|
childATags.push([ |
||||||
|
"a", |
||||||
|
`30041:${actualPubkey}:${sectionId}-content`, |
||||||
|
"", |
||||||
|
"", |
||||||
|
]); |
||||||
|
} |
||||||
|
|
||||||
|
// Add a-tags for direct children
|
||||||
|
for (const child of children) { |
||||||
|
const childHasSubChildren = child.children.some( |
||||||
|
(grandchild) => grandchild.level <= parseLevel, |
||||||
|
); |
||||||
|
const childShouldBeIndex = |
||||||
|
child.level < parseLevel && childHasSubChildren; |
||||||
|
const childKind = childShouldBeIndex ? 30040 : 30041; |
||||||
|
childATags.push([ |
||||||
|
"a", |
||||||
|
`${childKind}:${actualPubkey}:${child.sectionId}`, |
||||||
|
"", |
||||||
|
"", |
||||||
|
]); |
||||||
|
} |
||||||
|
|
||||||
|
const indexEvent = { |
||||||
|
id: "", |
||||||
|
pubkey: "", |
||||||
|
created_at: Math.floor(Date.now() / 1000), |
||||||
|
kind: 30040, |
||||||
|
tags: [ |
||||||
|
["d", sectionId], |
||||||
|
["title", section.title], |
||||||
|
...tags, |
||||||
|
...childATags, |
||||||
|
], |
||||||
|
content: "", |
||||||
|
sig: "", |
||||||
|
}; |
||||||
|
allEvents.push(indexEvent); |
||||||
|
} else { |
||||||
|
// Create regular content event (30041)
|
||||||
|
const contentEvent = { |
||||||
|
id: "", |
||||||
|
pubkey: "", |
||||||
|
created_at: Math.floor(Date.now() / 1000), |
||||||
|
kind: 30041, |
||||||
|
tags: [["d", sectionId], ["title", section.title], ...tags], |
||||||
|
content: section.content, |
||||||
|
sig: "", |
||||||
|
}; |
||||||
|
allEvents.push(contentEvent); |
||||||
|
} |
||||||
|
|
||||||
|
// Recursively process children
|
||||||
|
for (const child of children) { |
||||||
|
createEventsFromNode(child); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
// Process all root level sections
|
||||||
|
for (const rootNode of tree) { |
||||||
|
createEventsFromNode(rootNode); |
||||||
|
} |
||||||
|
|
||||||
|
// Create main document index if we have a document title (article format)
|
||||||
|
if (parsed.title && parsed.title.trim() !== "") { |
||||||
|
const documentId = generateSectionId(parsed.title); |
||||||
|
const documentTags = parseSimpleAttributes(parsed.content); |
||||||
|
|
||||||
|
// Create a-tags for all root level sections (level 2)
|
||||||
|
const mainIndexATags = tree.map((rootNode) => { |
||||||
|
const hasSubChildren = rootNode.children.some( |
||||||
|
(child) => child.level <= parseLevel, |
||||||
|
); |
||||||
|
const shouldBeIndex = rootNode.level < parseLevel && hasSubChildren; |
||||||
|
const kind = shouldBeIndex ? 30040 : 30041; |
||||||
|
return ["a", `${kind}:${actualPubkey}:${rootNode.sectionId}`, "", ""]; |
||||||
|
}); |
||||||
|
|
||||||
|
console.log("Debug: Root sections found:", tree.length); |
||||||
|
console.log("Debug: Main index a-tags:", mainIndexATags); |
||||||
|
|
||||||
|
const mainIndexEvent = { |
||||||
|
id: "", |
||||||
|
pubkey: "", |
||||||
|
created_at: Math.floor(Date.now() / 1000), |
||||||
|
kind: 30040, |
||||||
|
tags: [ |
||||||
|
["d", documentId], |
||||||
|
["title", parsed.title], |
||||||
|
...documentTags, |
||||||
|
...mainIndexATags, |
||||||
|
], |
||||||
|
content: "", |
||||||
|
sig: "", |
||||||
|
}; |
||||||
|
|
||||||
|
return { |
||||||
|
indexEvent: mainIndexEvent, |
||||||
|
contentEvents: allEvents, |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
// For scattered notes, return only content events
|
||||||
|
return { |
||||||
|
contentEvents: allEvents, |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Detects content type for smart publishing |
||||||
|
*/ |
||||||
|
export function detectContentType( |
||||||
|
content: string, |
||||||
|
): "article" | "scattered-notes" | "none" { |
||||||
|
const hasDocTitle = |
||||||
|
content.trim().startsWith("=") && !content.trim().startsWith("=="); |
||||||
|
const hasSections = content.includes("=="); |
||||||
|
|
||||||
|
if (hasDocTitle) { |
||||||
|
return "article"; |
||||||
|
} else if (hasSections) { |
||||||
|
return "scattered-notes"; |
||||||
|
} else { |
||||||
|
return "none"; |
||||||
|
} |
||||||
|
} |
||||||
Loading…
Reference in new issue