Browse Source

Fix hierarchical parsing and event generation for multi-level AsciiDoc publishing

- Completely rewrote parseAsciiDocIterative to handle Level 3+ parsing correctly
- Fixed generateNostrEvents to build proper tree structure and create hierarchical 30040/30041 events
- Updated preview to show document titles for articles and only display hashtags (t-tags)
- Added parseSimpleAttributes export for direct tag parsing from section content
- Now supports proper index chains: Main 30040 → Sub 30040s → Content 30041s at configurable depth

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
master
limina1 7 months ago
parent
commit
e999ec4272
  1. 42
      src/lib/components/ZettelEditor.svelte
  2. 329
      src/lib/utils/asciidoc_metadata.ts

42
src/lib/components/ZettelEditor.svelte

@ -9,6 +9,7 @@
detectContentType, detectContentType,
type AsciiDocMetadata, type AsciiDocMetadata,
metadataToTags, metadataToTags,
parseSimpleAttributes,
} from "$lib/utils/asciidoc_metadata"; } from "$lib/utils/asciidoc_metadata";
import asciidoctor from "asciidoctor"; import asciidoctor from "asciidoctor";
@ -105,7 +106,8 @@ Understanding the nature of knowledge itself...
if (!parsedContent) return []; if (!parsedContent) return [];
return parsedContent.sections.map((section: { metadata: AsciiDocMetadata; content: string; title: string }) => { return parsedContent.sections.map((section: { metadata: AsciiDocMetadata; content: string; title: string }) => {
const tags = metadataToTags(section.metadata); // Use simple parsing directly on section content for accurate tag extraction
const tags = parseSimpleAttributes(section.content);
return { return {
title: section.title || "Untitled", title: section.title || "Untitled",
@ -261,6 +263,33 @@ Understanding the nature of knowledge itself...
</div> </div>
{:else} {:else}
<div class="prose prose-sm dark:prose-invert max-w-none"> <div class="prose prose-sm dark:prose-invert max-w-none">
<!-- Show document title and tags for articles -->
{#if contentType === 'article' && parsedContent?.title}
<div class="mb-6 border-b border-gray-200 dark:border-gray-700 pb-4">
<h1 class="text-xl font-bold text-gray-900 dark:text-gray-100 mb-3">
{parsedContent.title}
</h1>
<!-- Document-level tags -->
{#if parsedContent.content}
{@const documentTags = parseSimpleAttributes(parsedContent.content)}
{#if documentTags.filter(tag => tag[0] === 't').length > 0}
<div class="bg-gray-100 dark:bg-gray-800 rounded-lg p-3">
<div class="flex flex-wrap gap-2 items-center">
<span class="text-xs font-medium text-gray-600 dark:text-gray-400">Document tags:</span>
<!-- Show only hashtags (t-tags) -->
{#each documentTags.filter(tag => tag[0] === 't') as tag}
<div class="bg-blue-600 text-blue-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline">
<span class="mr-1">#</span>
<span>{tag[1]}</span>
</div>
{/each}
</div>
</div>
{/if}
{/if}
</div>
{/if}
{#each parsedSections as section, index} {#each parsedSections as section, index}
<div class="mb-6"> <div class="mb-6">
<div <div
@ -286,19 +315,20 @@ Understanding the nature of knowledge itself...
class="bg-gray-200 dark:bg-gray-700 rounded-lg p-3 mb-2" class="bg-gray-200 dark:bg-gray-700 rounded-lg p-3 mb-2"
> >
<div class="flex flex-wrap gap-2 items-center"> <div class="flex flex-wrap gap-2 items-center">
{#if section.tags && section.tags.length > 0} {#if section.tags && section.tags.filter(tag => tag[0] === 't').length > 0}
{#each section.tags as tag} <!-- Show only hashtags (t-tags) -->
{#each section.tags.filter(tag => tag[0] === 't') as tag}
<div <div
class="bg-amber-900 text-amber-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline" class="bg-blue-600 text-blue-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline"
> >
<span class="font-mono">{tag[0]}:</span> <span class="mr-1">#</span>
<span>{tag[1]}</span> <span>{tag[1]}</span>
</div> </div>
{/each} {/each}
{:else} {:else}
<span <span
class="text-gray-500 dark:text-gray-400 text-xs italic" class="text-gray-500 dark:text-gray-400 text-xs italic"
>No tags</span >No hashtags</span
> >
{/if} {/if}
</div> </div>

329
src/lib/utils/asciidoc_metadata.ts

@ -259,7 +259,7 @@ function stripHeaderAndAttributes(content: string, isSection: boolean = false):
* Converts :tagname: tagvalue -> [tagname, tagvalue] * Converts :tagname: tagvalue -> [tagname, tagvalue]
* Converts :tags: comma,separated -> [t, tag1], [t, tag2], etc. * Converts :tags: comma,separated -> [t, tag1], [t, tag2], etc.
*/ */
function parseSimpleAttributes(content: string): [string, string][] { export function parseSimpleAttributes(content: string): [string, string][] {
const tags: [string, string][] = []; const tags: [string, string][] = [];
const lines = content.split(/\r?\n/); const lines = content.split(/\r?\n/);
@ -559,9 +559,9 @@ export function extractMetadataFromSectionsOnly(content: string): {
/** /**
* Iterative AsciiDoc parsing based on specified level * Iterative AsciiDoc parsing based on specified level
* Level 2: Only == sections become events (containing all subsections) * Level 2: Only == sections become content events (containing all subsections)
* Level 3: == sections become indices, === sections become events * Level 3: == sections become indices + content events, === sections become content events
* Level 4: === sections become indices, ==== sections become events, etc. * Level 4: === sections become indices + content events, ==== sections become content events, etc.
*/ */
export function parseAsciiDocIterative(content: string, parseLevel: number = 2): ParsedAsciiDoc { export function parseAsciiDocIterative(content: string, parseLevel: number = 2): ParsedAsciiDoc {
const asciidoctor = createProcessor(); const asciidoctor = createProcessor();
@ -569,21 +569,67 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
const { metadata: docMetadata } = extractDocumentMetadata(content); const { metadata: docMetadata } = extractDocumentMetadata(content);
const lines = content.split(/\r?\n/); const lines = content.split(/\r?\n/);
const targetHeaderPattern = new RegExp(`^${'='.repeat(parseLevel)}\\s+`);
const sections: Array<{ const sections: Array<{
metadata: SectionMetadata; metadata: SectionMetadata;
content: string; content: string;
title: string; title: string;
}> = []; }> = [];
if (parseLevel === 2) {
// Level 2: Only == sections become events
const level2Pattern = /^==\s+/;
let currentSection: string | null = null;
let currentSectionContent: string[] = [];
let documentContent: string[] = [];
let inDocumentHeader = true;
for (const line of lines) {
if (line.match(level2Pattern)) {
inDocumentHeader = false;
// Save previous section if exists
if (currentSection) {
const sectionContent = currentSectionContent.join('\n');
sections.push(extractSectionMetadata(sectionContent));
}
// Start new section
currentSection = line;
currentSectionContent = [line];
} else if (currentSection) {
currentSectionContent.push(line);
} else if (inDocumentHeader) {
documentContent.push(line);
}
}
// Save the last section
if (currentSection) {
const sectionContent = currentSectionContent.join('\n');
sections.push(extractSectionMetadata(sectionContent));
}
const docContent = documentContent.join('\n');
return {
metadata: docMetadata,
content: docContent,
title: docMetadata.title || '',
sections: sections
};
}
// Level 3+: Parse both index level (parseLevel-1) and content level (parseLevel)
const indexLevelPattern = new RegExp(`^${'='.repeat(parseLevel - 1)}\\s+`);
const contentLevelPattern = new RegExp(`^${'='.repeat(parseLevel)}\\s+`);
let currentSection: string | null = null; let currentSection: string | null = null;
let currentSectionContent: string[] = []; let currentSectionContent: string[] = [];
let documentContent: string[] = []; let documentContent: string[] = [];
let inDocumentHeader = true; let inDocumentHeader = true;
for (const line of lines) { for (const line of lines) {
// Check if we've hit the first section at our target level // Check for both index level and content level headers
if (line.match(targetHeaderPattern)) { if (line.match(indexLevelPattern) || line.match(contentLevelPattern)) {
inDocumentHeader = false; inDocumentHeader = false;
// Save previous section if exists // Save previous section if exists
@ -596,10 +642,8 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
currentSection = line; currentSection = line;
currentSectionContent = [line]; currentSectionContent = [line];
} else if (currentSection) { } else if (currentSection) {
// We're in a section - add content
currentSectionContent.push(line); currentSectionContent.push(line);
} else if (inDocumentHeader) { } else if (inDocumentHeader) {
// We're still in document content (before first section)
documentContent.push(line); documentContent.push(line);
} }
} }
@ -610,10 +654,7 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
sections.push(extractSectionMetadata(sectionContent)); sections.push(extractSectionMetadata(sectionContent));
} }
// Extract document content (everything before first section at target level)
// Keep the original content with attributes for simple parsing
const docContent = documentContent.join('\n'); const docContent = documentContent.join('\n');
return { return {
metadata: docMetadata, metadata: docMetadata,
content: docContent, content: docContent,
@ -623,87 +664,251 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
} }
/** /**
* Generates Nostr events from parsed AsciiDoc * Helper function to determine the header level of a section
*/
function getSectionLevel(sectionContent: string): number {
const lines = sectionContent.split(/\r?\n/);
for (const line of lines) {
const match = line.match(/^(=+)\s+/);
if (match) {
return match[1].length;
}
}
return 0;
}
/**
* Helper function to extract just the intro content (before first subsection)
*/
function extractIntroContent(sectionContent: string, currentLevel: number): string {
const lines = sectionContent.split(/\r?\n/);
const introLines: string[] = [];
let foundHeader = false;
for (const line of lines) {
const headerMatch = line.match(/^(=+)\s+/);
if (headerMatch) {
const level = headerMatch[1].length;
if (level === currentLevel && !foundHeader) {
// This is the section header itself
foundHeader = true;
continue; // Skip the header line itself for intro content
} else if (level > currentLevel) {
// This is a subsection, stop collecting intro content
break;
}
} else if (foundHeader) {
// This is intro content after the header
introLines.push(line);
}
}
return introLines.join('\n').trim();
}
/**
* Generates Nostr events from parsed AsciiDoc with proper hierarchical structure
* Based on docreference.md specifications * Based on docreference.md specifications
*/ */
export function generateNostrEvents(parsed: ParsedAsciiDoc, parseLevel: number = 2, pubkey?: string): { export function generateNostrEvents(parsed: ParsedAsciiDoc, parseLevel: number = 2, pubkey?: string, maxDepth: number = 6): {
indexEvent?: any; indexEvent?: any;
contentEvents: any[]; contentEvents: any[];
} { } {
const events: any[] = []; const allEvents: any[] = [];
const actualPubkey = pubkey || 'pubkey';
// Create content events for each section (30041) // Helper function to generate section ID
const contentEvents = parsed.sections.map(section => { const generateSectionId = (title: string): string => {
const sectionId = section.title return title
.toLowerCase() .toLowerCase()
.replace(/[^a-z0-9\s]/g, '') .replace(/[^a-z0-9\s]/g, '')
.replace(/\s+/g, '-') .replace(/\s+/g, '-')
.trim(); .trim();
};
// Extract tags directly from section content using simple regex // Build hierarchical tree structure
const sectionTags = parseSimpleAttributes(section.content); interface TreeNode {
section: {
return { metadata: any;
id: '', // Will be generated by Nostr client content: string;
pubkey: '', // Will be set by client title: string;
created_at: Math.floor(Date.now() / 1000),
kind: 30041,
tags: [
['d', sectionId],
['title', section.title],
...sectionTags
],
content: section.content,
sig: '' // Will be generated by client
}; };
}); level: number;
sectionId: string;
tags: [string, string][];
children: TreeNode[];
parent?: TreeNode;
}
// Only create index event if we have a document title (article format) // Convert flat sections to tree structure
if (parsed.title && parsed.title.trim() !== '') { const buildTree = (): TreeNode[] => {
// Generate document identifier from title const roots: TreeNode[] = [];
const documentId = parsed.title const stack: TreeNode[] = [];
.toLowerCase()
.replace(/[^a-z0-9\s]/g, '') for (const section of parsed.sections) {
.replace(/\s+/g, '-') const level = getSectionLevel(section.content);
.trim(); const sectionId = generateSectionId(section.title);
const tags = parseSimpleAttributes(section.content);
const node: TreeNode = {
section,
level,
sectionId,
tags,
children: [],
};
// Find the correct parent based on header hierarchy
while (stack.length > 0 && stack[stack.length - 1].level >= level) {
stack.pop();
}
if (stack.length === 0) {
// This is a root level section
roots.push(node);
} else {
// This is a child of the last item in stack
const parent = stack[stack.length - 1];
parent.children.push(node);
node.parent = parent;
}
stack.push(node);
}
return roots;
};
const tree = buildTree();
// Recursively create events from tree
const createEventsFromNode = (node: TreeNode): void => {
const { section, level, sectionId, tags, children } = node;
// Determine if this node should become an index
const hasChildrenAtTargetLevel = children.some(child => child.level === parseLevel);
const shouldBeIndex = level < parseLevel && (hasChildrenAtTargetLevel || children.some(child => child.level <= parseLevel));
if (shouldBeIndex) {
// Create content event for intro text (30041)
const introContent = extractIntroContent(section.content, level);
if (introContent.trim()) {
const contentEvent = {
id: '',
pubkey: '',
created_at: Math.floor(Date.now() / 1000),
kind: 30041,
tags: [
['d', `${sectionId}-content`],
['title', section.title],
...tags
],
content: introContent,
sig: ''
};
allEvents.push(contentEvent);
}
// Extract tags directly from document content using simple regex // Create index event (30040)
const childATags: string[][] = [];
// Add a-tag for intro content if it exists
if (introContent.trim()) {
childATags.push(['a', `30041:${actualPubkey}:${sectionId}-content`, '', '']);
}
// Add a-tags for direct children
for (const child of children) {
const childHasSubChildren = child.children.some(grandchild => grandchild.level <= parseLevel);
const childShouldBeIndex = child.level < parseLevel && childHasSubChildren;
const childKind = childShouldBeIndex ? 30040 : 30041;
childATags.push(['a', `${childKind}:${actualPubkey}:${child.sectionId}`, '', '']);
}
const indexEvent = {
id: '',
pubkey: '',
created_at: Math.floor(Date.now() / 1000),
kind: 30040,
tags: [
['d', sectionId],
['title', section.title],
...tags,
...childATags
],
content: '',
sig: ''
};
allEvents.push(indexEvent);
} else {
// Create regular content event (30041)
const contentEvent = {
id: '',
pubkey: '',
created_at: Math.floor(Date.now() / 1000),
kind: 30041,
tags: [
['d', sectionId],
['title', section.title],
...tags
],
content: section.content,
sig: ''
};
allEvents.push(contentEvent);
}
// Recursively process children
for (const child of children) {
createEventsFromNode(child);
}
};
// Process all root level sections
for (const rootNode of tree) {
createEventsFromNode(rootNode);
}
// Create main document index if we have a document title (article format)
if (parsed.title && parsed.title.trim() !== '') {
const documentId = generateSectionId(parsed.title);
const documentTags = parseSimpleAttributes(parsed.content); const documentTags = parseSimpleAttributes(parsed.content);
// Create main index event (30040) // Create a-tags for all root level sections (level 2)
const indexEvent = { const mainIndexATags = tree.map(rootNode => {
id: '', // Will be generated by Nostr client const hasSubChildren = rootNode.children.some(child => child.level <= parseLevel);
pubkey: '', // Will be set by client const shouldBeIndex = rootNode.level < parseLevel && hasSubChildren;
const kind = shouldBeIndex ? 30040 : 30041;
return ['a', `${kind}:${actualPubkey}:${rootNode.sectionId}`, '', ''];
});
console.log('Debug: Root sections found:', tree.length);
console.log('Debug: Main index a-tags:', mainIndexATags);
const mainIndexEvent = {
id: '',
pubkey: '',
created_at: Math.floor(Date.now() / 1000), created_at: Math.floor(Date.now() / 1000),
kind: 30040, kind: 30040,
tags: [ tags: [
['d', documentId], ['d', documentId],
['title', parsed.title], ['title', parsed.title],
...documentTags, ...documentTags,
// Add a-tags for each section ...mainIndexATags
...parsed.sections.map(section => {
const sectionId = section.title
.toLowerCase()
.replace(/[^a-z0-9\s]/g, '')
.replace(/\s+/g, '-')
.trim();
const actualPubkey = pubkey || 'pubkey'; // Use actual pubkey if provided, fallback for compatibility
return ['a', `30041:${actualPubkey}:${sectionId}`, '', '']; // relay will be filled by client
})
], ],
content: '', // Index events have empty content content: '',
sig: '' // Will be generated by client sig: ''
}; };
return { return {
indexEvent, indexEvent: mainIndexEvent,
contentEvents contentEvents: allEvents
}; };
} }
// For scattered notes, return only content events // For scattered notes, return only content events
return { return {
contentEvents contentEvents: allEvents
}; };
} }

Loading…
Cancel
Save