Browse Source

Fix hierarchical parsing and event generation for multi-level AsciiDoc publishing

- Completely rewrote parseAsciiDocIterative to handle Level 3+ parsing correctly
- Fixed generateNostrEvents to build proper tree structure and create hierarchical 30040/30041 events
- Updated preview to show document titles for articles and only display hashtags (t-tags)
- Added parseSimpleAttributes export for direct tag parsing from section content
- Now supports proper index chains: Main 30040 → Sub 30040s → Content 30041s at configurable depth

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
master
limina1 7 months ago
parent
commit
e999ec4272
  1. 42
      src/lib/components/ZettelEditor.svelte
  2. 329
      src/lib/utils/asciidoc_metadata.ts

42
src/lib/components/ZettelEditor.svelte

@ -9,6 +9,7 @@ @@ -9,6 +9,7 @@
detectContentType,
type AsciiDocMetadata,
metadataToTags,
parseSimpleAttributes,
} from "$lib/utils/asciidoc_metadata";
import asciidoctor from "asciidoctor";
@ -105,7 +106,8 @@ Understanding the nature of knowledge itself... @@ -105,7 +106,8 @@ Understanding the nature of knowledge itself...
if (!parsedContent) return [];
return parsedContent.sections.map((section: { metadata: AsciiDocMetadata; content: string; title: string }) => {
const tags = metadataToTags(section.metadata);
// Use simple parsing directly on section content for accurate tag extraction
const tags = parseSimpleAttributes(section.content);
return {
title: section.title || "Untitled",
@ -261,6 +263,33 @@ Understanding the nature of knowledge itself... @@ -261,6 +263,33 @@ Understanding the nature of knowledge itself...
</div>
{:else}
<div class="prose prose-sm dark:prose-invert max-w-none">
<!-- Show document title and tags for articles -->
{#if contentType === 'article' && parsedContent?.title}
<div class="mb-6 border-b border-gray-200 dark:border-gray-700 pb-4">
<h1 class="text-xl font-bold text-gray-900 dark:text-gray-100 mb-3">
{parsedContent.title}
</h1>
<!-- Document-level tags -->
{#if parsedContent.content}
{@const documentTags = parseSimpleAttributes(parsedContent.content)}
{#if documentTags.filter(tag => tag[0] === 't').length > 0}
<div class="bg-gray-100 dark:bg-gray-800 rounded-lg p-3">
<div class="flex flex-wrap gap-2 items-center">
<span class="text-xs font-medium text-gray-600 dark:text-gray-400">Document tags:</span>
<!-- Show only hashtags (t-tags) -->
{#each documentTags.filter(tag => tag[0] === 't') as tag}
<div class="bg-blue-600 text-blue-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline">
<span class="mr-1">#</span>
<span>{tag[1]}</span>
</div>
{/each}
</div>
</div>
{/if}
{/if}
</div>
{/if}
{#each parsedSections as section, index}
<div class="mb-6">
<div
@ -286,19 +315,20 @@ Understanding the nature of knowledge itself... @@ -286,19 +315,20 @@ Understanding the nature of knowledge itself...
class="bg-gray-200 dark:bg-gray-700 rounded-lg p-3 mb-2"
>
<div class="flex flex-wrap gap-2 items-center">
{#if section.tags && section.tags.length > 0}
{#each section.tags as tag}
{#if section.tags && section.tags.filter(tag => tag[0] === 't').length > 0}
<!-- Show only hashtags (t-tags) -->
{#each section.tags.filter(tag => tag[0] === 't') as tag}
<div
class="bg-amber-900 text-amber-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline"
class="bg-blue-600 text-blue-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline"
>
<span class="font-mono">{tag[0]}:</span>
<span class="mr-1">#</span>
<span>{tag[1]}</span>
</div>
{/each}
{:else}
<span
class="text-gray-500 dark:text-gray-400 text-xs italic"
>No tags</span
>No hashtags</span
>
{/if}
</div>

329
src/lib/utils/asciidoc_metadata.ts

@ -259,7 +259,7 @@ function stripHeaderAndAttributes(content: string, isSection: boolean = false): @@ -259,7 +259,7 @@ function stripHeaderAndAttributes(content: string, isSection: boolean = false):
* Converts :tagname: tagvalue -> [tagname, tagvalue]
* Converts :tags: comma,separated -> [t, tag1], [t, tag2], etc.
*/
function parseSimpleAttributes(content: string): [string, string][] {
export function parseSimpleAttributes(content: string): [string, string][] {
const tags: [string, string][] = [];
const lines = content.split(/\r?\n/);
@ -559,9 +559,9 @@ export function extractMetadataFromSectionsOnly(content: string): { @@ -559,9 +559,9 @@ export function extractMetadataFromSectionsOnly(content: string): {
/**
* Iterative AsciiDoc parsing based on specified level
* Level 2: Only == sections become events (containing all subsections)
* Level 3: == sections become indices, === sections become events
* Level 4: === sections become indices, ==== sections become events, etc.
* Level 2: Only == sections become content events (containing all subsections)
* Level 3: == sections become indices + content events, === sections become content events
* Level 4: === sections become indices + content events, ==== sections become content events, etc.
*/
export function parseAsciiDocIterative(content: string, parseLevel: number = 2): ParsedAsciiDoc {
const asciidoctor = createProcessor();
@ -569,21 +569,67 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2): @@ -569,21 +569,67 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
const { metadata: docMetadata } = extractDocumentMetadata(content);
const lines = content.split(/\r?\n/);
const targetHeaderPattern = new RegExp(`^${'='.repeat(parseLevel)}\\s+`);
const sections: Array<{
metadata: SectionMetadata;
content: string;
title: string;
}> = [];
if (parseLevel === 2) {
// Level 2: Only == sections become events
const level2Pattern = /^==\s+/;
let currentSection: string | null = null;
let currentSectionContent: string[] = [];
let documentContent: string[] = [];
let inDocumentHeader = true;
for (const line of lines) {
if (line.match(level2Pattern)) {
inDocumentHeader = false;
// Save previous section if exists
if (currentSection) {
const sectionContent = currentSectionContent.join('\n');
sections.push(extractSectionMetadata(sectionContent));
}
// Start new section
currentSection = line;
currentSectionContent = [line];
} else if (currentSection) {
currentSectionContent.push(line);
} else if (inDocumentHeader) {
documentContent.push(line);
}
}
// Save the last section
if (currentSection) {
const sectionContent = currentSectionContent.join('\n');
sections.push(extractSectionMetadata(sectionContent));
}
const docContent = documentContent.join('\n');
return {
metadata: docMetadata,
content: docContent,
title: docMetadata.title || '',
sections: sections
};
}
// Level 3+: Parse both index level (parseLevel-1) and content level (parseLevel)
const indexLevelPattern = new RegExp(`^${'='.repeat(parseLevel - 1)}\\s+`);
const contentLevelPattern = new RegExp(`^${'='.repeat(parseLevel)}\\s+`);
let currentSection: string | null = null;
let currentSectionContent: string[] = [];
let documentContent: string[] = [];
let inDocumentHeader = true;
for (const line of lines) {
// Check if we've hit the first section at our target level
if (line.match(targetHeaderPattern)) {
// Check for both index level and content level headers
if (line.match(indexLevelPattern) || line.match(contentLevelPattern)) {
inDocumentHeader = false;
// Save previous section if exists
@ -596,10 +642,8 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2): @@ -596,10 +642,8 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
currentSection = line;
currentSectionContent = [line];
} else if (currentSection) {
// We're in a section - add content
currentSectionContent.push(line);
} else if (inDocumentHeader) {
// We're still in document content (before first section)
documentContent.push(line);
}
}
@ -610,10 +654,7 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2): @@ -610,10 +654,7 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
sections.push(extractSectionMetadata(sectionContent));
}
// Extract document content (everything before first section at target level)
// Keep the original content with attributes for simple parsing
const docContent = documentContent.join('\n');
return {
metadata: docMetadata,
content: docContent,
@ -623,87 +664,251 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2): @@ -623,87 +664,251 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
}
/**
* Generates Nostr events from parsed AsciiDoc
* Helper function to determine the header level of a section
*/
function getSectionLevel(sectionContent: string): number {
const lines = sectionContent.split(/\r?\n/);
for (const line of lines) {
const match = line.match(/^(=+)\s+/);
if (match) {
return match[1].length;
}
}
return 0;
}
/**
* Helper function to extract just the intro content (before first subsection)
*/
function extractIntroContent(sectionContent: string, currentLevel: number): string {
const lines = sectionContent.split(/\r?\n/);
const introLines: string[] = [];
let foundHeader = false;
for (const line of lines) {
const headerMatch = line.match(/^(=+)\s+/);
if (headerMatch) {
const level = headerMatch[1].length;
if (level === currentLevel && !foundHeader) {
// This is the section header itself
foundHeader = true;
continue; // Skip the header line itself for intro content
} else if (level > currentLevel) {
// This is a subsection, stop collecting intro content
break;
}
} else if (foundHeader) {
// This is intro content after the header
introLines.push(line);
}
}
return introLines.join('\n').trim();
}
/**
* Generates Nostr events from parsed AsciiDoc with proper hierarchical structure
* Based on docreference.md specifications
*/
export function generateNostrEvents(parsed: ParsedAsciiDoc, parseLevel: number = 2, pubkey?: string): {
export function generateNostrEvents(parsed: ParsedAsciiDoc, parseLevel: number = 2, pubkey?: string, maxDepth: number = 6): {
indexEvent?: any;
contentEvents: any[];
} {
const events: any[] = [];
const allEvents: any[] = [];
const actualPubkey = pubkey || 'pubkey';
// Create content events for each section (30041)
const contentEvents = parsed.sections.map(section => {
const sectionId = section.title
// Helper function to generate section ID
const generateSectionId = (title: string): string => {
return title
.toLowerCase()
.replace(/[^a-z0-9\s]/g, '')
.replace(/\s+/g, '-')
.trim();
};
// Extract tags directly from section content using simple regex
const sectionTags = parseSimpleAttributes(section.content);
return {
id: '', // Will be generated by Nostr client
pubkey: '', // Will be set by client
created_at: Math.floor(Date.now() / 1000),
kind: 30041,
tags: [
['d', sectionId],
['title', section.title],
...sectionTags
],
content: section.content,
sig: '' // Will be generated by client
// Build hierarchical tree structure
interface TreeNode {
section: {
metadata: any;
content: string;
title: string;
};
});
level: number;
sectionId: string;
tags: [string, string][];
children: TreeNode[];
parent?: TreeNode;
}
// Only create index event if we have a document title (article format)
if (parsed.title && parsed.title.trim() !== '') {
// Generate document identifier from title
const documentId = parsed.title
.toLowerCase()
.replace(/[^a-z0-9\s]/g, '')
.replace(/\s+/g, '-')
.trim();
// Convert flat sections to tree structure
const buildTree = (): TreeNode[] => {
const roots: TreeNode[] = [];
const stack: TreeNode[] = [];
for (const section of parsed.sections) {
const level = getSectionLevel(section.content);
const sectionId = generateSectionId(section.title);
const tags = parseSimpleAttributes(section.content);
const node: TreeNode = {
section,
level,
sectionId,
tags,
children: [],
};
// Find the correct parent based on header hierarchy
while (stack.length > 0 && stack[stack.length - 1].level >= level) {
stack.pop();
}
if (stack.length === 0) {
// This is a root level section
roots.push(node);
} else {
// This is a child of the last item in stack
const parent = stack[stack.length - 1];
parent.children.push(node);
node.parent = parent;
}
stack.push(node);
}
return roots;
};
const tree = buildTree();
// Recursively create events from tree
const createEventsFromNode = (node: TreeNode): void => {
const { section, level, sectionId, tags, children } = node;
// Determine if this node should become an index
const hasChildrenAtTargetLevel = children.some(child => child.level === parseLevel);
const shouldBeIndex = level < parseLevel && (hasChildrenAtTargetLevel || children.some(child => child.level <= parseLevel));
if (shouldBeIndex) {
// Create content event for intro text (30041)
const introContent = extractIntroContent(section.content, level);
if (introContent.trim()) {
const contentEvent = {
id: '',
pubkey: '',
created_at: Math.floor(Date.now() / 1000),
kind: 30041,
tags: [
['d', `${sectionId}-content`],
['title', section.title],
...tags
],
content: introContent,
sig: ''
};
allEvents.push(contentEvent);
}
// Extract tags directly from document content using simple regex
// Create index event (30040)
const childATags: string[][] = [];
// Add a-tag for intro content if it exists
if (introContent.trim()) {
childATags.push(['a', `30041:${actualPubkey}:${sectionId}-content`, '', '']);
}
// Add a-tags for direct children
for (const child of children) {
const childHasSubChildren = child.children.some(grandchild => grandchild.level <= parseLevel);
const childShouldBeIndex = child.level < parseLevel && childHasSubChildren;
const childKind = childShouldBeIndex ? 30040 : 30041;
childATags.push(['a', `${childKind}:${actualPubkey}:${child.sectionId}`, '', '']);
}
const indexEvent = {
id: '',
pubkey: '',
created_at: Math.floor(Date.now() / 1000),
kind: 30040,
tags: [
['d', sectionId],
['title', section.title],
...tags,
...childATags
],
content: '',
sig: ''
};
allEvents.push(indexEvent);
} else {
// Create regular content event (30041)
const contentEvent = {
id: '',
pubkey: '',
created_at: Math.floor(Date.now() / 1000),
kind: 30041,
tags: [
['d', sectionId],
['title', section.title],
...tags
],
content: section.content,
sig: ''
};
allEvents.push(contentEvent);
}
// Recursively process children
for (const child of children) {
createEventsFromNode(child);
}
};
// Process all root level sections
for (const rootNode of tree) {
createEventsFromNode(rootNode);
}
// Create main document index if we have a document title (article format)
if (parsed.title && parsed.title.trim() !== '') {
const documentId = generateSectionId(parsed.title);
const documentTags = parseSimpleAttributes(parsed.content);
// Create main index event (30040)
const indexEvent = {
id: '', // Will be generated by Nostr client
pubkey: '', // Will be set by client
// Create a-tags for all root level sections (level 2)
const mainIndexATags = tree.map(rootNode => {
const hasSubChildren = rootNode.children.some(child => child.level <= parseLevel);
const shouldBeIndex = rootNode.level < parseLevel && hasSubChildren;
const kind = shouldBeIndex ? 30040 : 30041;
return ['a', `${kind}:${actualPubkey}:${rootNode.sectionId}`, '', ''];
});
console.log('Debug: Root sections found:', tree.length);
console.log('Debug: Main index a-tags:', mainIndexATags);
const mainIndexEvent = {
id: '',
pubkey: '',
created_at: Math.floor(Date.now() / 1000),
kind: 30040,
tags: [
['d', documentId],
['title', parsed.title],
...documentTags,
// Add a-tags for each section
...parsed.sections.map(section => {
const sectionId = section.title
.toLowerCase()
.replace(/[^a-z0-9\s]/g, '')
.replace(/\s+/g, '-')
.trim();
const actualPubkey = pubkey || 'pubkey'; // Use actual pubkey if provided, fallback for compatibility
return ['a', `30041:${actualPubkey}:${sectionId}`, '', '']; // relay will be filled by client
})
...mainIndexATags
],
content: '', // Index events have empty content
sig: '' // Will be generated by client
content: '',
sig: ''
};
return {
indexEvent,
contentEvents
indexEvent: mainIndexEvent,
contentEvents: allEvents
};
}
// For scattered notes, return only content events
return {
contentEvents
contentEvents: allEvents
};
}

Loading…
Cancel
Save