Fix hierarchical parsing and event generation for multi-level AsciiDoc publishing

- Completely rewrote parseAsciiDocIterative to handle Level 3+ parsing correctly - Fixed generateNostrEvents to build proper tree structure and create hierarchical 30040/30041 events - Updated preview to show document titles for articles and only display hashtags (t-tags) - Added parseSimpleAttributes export for direct tag parsing from section content - Now supports proper index chains: Main 30040 → Sub 30040s → Content 30041s at configurable depth 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
9 months ago · e999ec4272
2 changed files with 303 additions and 68 deletions
--- a/src/lib/components/ZettelEditor.svelte
+++ b/src/lib/components/ZettelEditor.svelte
@ -9,6 +9,7 @@
  detectContentType,
  type AsciiDocMetadata,
  metadataToTags,
  parseSimpleAttributes,
 } from "$lib/utils/asciidoc_metadata";
 import asciidoctor from "asciidoctor";
@ -105,7 +106,8 @@ Understanding the nature of knowledge itself...
    if (!parsedContent) return [];
    return parsedContent.sections.map((section: { metadata: AsciiDocMetadata; content: string; title: string }) => {
-      const tags = metadataToTags(section.metadata);
+      // Use simple parsing directly on section content for accurate tag extraction
      const tags = parseSimpleAttributes(section.content);
      return {
        title: section.title || "Untitled",
@ -261,6 +263,33 @@ Understanding the nature of knowledge itself...
              </div>
            {:else}
              <div class="prose prose-sm dark:prose-invert max-w-none">
                <!-- Show document title and tags for articles -->
                {#if contentType === 'article' && parsedContent?.title}
                  <div class="mb-6 border-b border-gray-200 dark:border-gray-700 pb-4">
                    <h1 class="text-xl font-bold text-gray-900 dark:text-gray-100 mb-3">
                      {parsedContent.title}
                    </h1>
                    <!-- Document-level tags -->
                    {#if parsedContent.content}
                      {@const documentTags = parseSimpleAttributes(parsedContent.content)}
                      {#if documentTags.filter(tag => tag[0] === 't').length > 0}
                        <div class="bg-gray-100 dark:bg-gray-800 rounded-lg p-3">
                          <div class="flex flex-wrap gap-2 items-center">
                            <span class="text-xs font-medium text-gray-600 dark:text-gray-400">Document tags:</span>
                            <!-- Show only hashtags (t-tags) -->
                            {#each documentTags.filter(tag => tag[0] === 't') as tag}
                              <div class="bg-blue-600 text-blue-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline">
                                <span class="mr-1">#</span>
                                <span>{tag[1]}</span>
                              </div>
                            {/each}
                          </div>
                        </div>
                      {/if}
                    {/if}
                  </div>
                {/if}
                {#each parsedSections as section, index}
                  <div class="mb-6">
                    <div
@ -286,19 +315,20 @@ Understanding the nature of knowledge itself...
                        class="bg-gray-200 dark:bg-gray-700 rounded-lg p-3 mb-2"
                      >
                        <div class="flex flex-wrap gap-2 items-center">
-                          {#if section.tags && section.tags.length > 0}
+                          {#if section.tags && section.tags.filter(tag => tag[0] === 't').length > 0}
-                            {#each section.tags as tag}
+                            <!-- Show only hashtags (t-tags) -->
                            {#each section.tags.filter(tag => tag[0] === 't') as tag}
                              <div
-                                class="bg-amber-900 text-amber-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline"
+                                class="bg-blue-600 text-blue-100 px-2 py-1 rounded-full text-xs font-medium flex items-baseline"
                              >
-                                <span class="font-mono">{tag[0]}:</span>
+                                <span class="mr-1">#</span>
                                <span>{tag[1]}</span>
                              </div>
                            {/each}
                          {:else}
                            <span
                              class="text-gray-500 dark:text-gray-400 text-xs italic"
-                              >No tags</span
+                              >No hashtags</span
                            >
                          {/if}
                        </div>
--- a/src/lib/utils/asciidoc_metadata.ts
+++ b/src/lib/utils/asciidoc_metadata.ts
@ -259,7 +259,7 @@ function stripHeaderAndAttributes(content: string, isSection: boolean = false):
 * Converts :tagname: tagvalue -> [tagname, tagvalue] 
 * Converts :tags: comma,separated -> [t, tag1], [t, tag2], etc.
 */
-function parseSimpleAttributes(content: string): [string, string][] {
+export function parseSimpleAttributes(content: string): [string, string][] {
  const tags: [string, string][] = [];
  const lines = content.split(/\r?\n/);
@ -559,9 +559,9 @@ export function extractMetadataFromSectionsOnly(content: string): {
 /**
 * Iterative AsciiDoc parsing based on specified level
- * Level 2: Only == sections become events (containing all subsections)
+ * Level 2: Only == sections become content events (containing all subsections) 
- * Level 3: == sections become indices, === sections become events
+ * Level 3: == sections become indices + content events, === sections become content events
- * Level 4: === sections become indices, ==== sections become events, etc.
+ * Level 4: === sections become indices + content events, ==== sections become content events, etc.
 */
 export function parseAsciiDocIterative(content: string, parseLevel: number = 2): ParsedAsciiDoc {
  const asciidoctor = createProcessor();
@ -569,21 +569,67 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
  const { metadata: docMetadata } = extractDocumentMetadata(content);
  const lines = content.split(/\r?\n/);
  const targetHeaderPattern = new RegExp(`^${'='.repeat(parseLevel)}\\s+`);
  const sections: Array<{
    metadata: SectionMetadata;
    content: string;
    title: string;
  }> = [];
  if (parseLevel === 2) {
    // Level 2: Only == sections become events
    const level2Pattern = /^==\s+/;
    let currentSection: string | null = null;
    let currentSectionContent: string[] = [];
    let documentContent: string[] = [];
    let inDocumentHeader = true;
    for (const line of lines) {
      if (line.match(level2Pattern)) {
        inDocumentHeader = false;
        // Save previous section if exists
        if (currentSection) {
          const sectionContent = currentSectionContent.join('\n');
          sections.push(extractSectionMetadata(sectionContent));
        }
        // Start new section
        currentSection = line;
        currentSectionContent = [line];
      } else if (currentSection) {
        currentSectionContent.push(line);
      } else if (inDocumentHeader) {
        documentContent.push(line);
      }
    }
    // Save the last section
    if (currentSection) {
      const sectionContent = currentSectionContent.join('\n');
      sections.push(extractSectionMetadata(sectionContent));
    }
    const docContent = documentContent.join('\n');
    return {
      metadata: docMetadata,
      content: docContent,
      title: docMetadata.title || '',
      sections: sections
    };
  }
  // Level 3+: Parse both index level (parseLevel-1) and content level (parseLevel)
  const indexLevelPattern = new RegExp(`^${'='.repeat(parseLevel - 1)}\\s+`);
  const contentLevelPattern = new RegExp(`^${'='.repeat(parseLevel)}\\s+`);
  let currentSection: string | null = null;
  let currentSectionContent: string[] = [];
  let documentContent: string[] = [];
  let inDocumentHeader = true;
  for (const line of lines) {
-    // Check if we've hit the first section at our target level
+    // Check for both index level and content level headers
-    if (line.match(targetHeaderPattern)) {
+    if (line.match(indexLevelPattern) || line.match(contentLevelPattern)) {
      inDocumentHeader = false;
      // Save previous section if exists
@ -596,10 +642,8 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
      currentSection = line;
      currentSectionContent = [line];
    } else if (currentSection) {
      // We're in a section - add content
      currentSectionContent.push(line);
    } else if (inDocumentHeader) {
      // We're still in document content (before first section)
      documentContent.push(line);
    }
  }
@ -610,10 +654,7 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
    sections.push(extractSectionMetadata(sectionContent));
  }
  // Extract document content (everything before first section at target level)
  // Keep the original content with attributes for simple parsing
  const docContent = documentContent.join('\n');
  return {
    metadata: docMetadata,
    content: docContent,
@ -623,87 +664,251 @@ export function parseAsciiDocIterative(content: string, parseLevel: number = 2):
 }
 /**
- * Generates Nostr events from parsed AsciiDoc
+ * Helper function to determine the header level of a section
 */
 function getSectionLevel(sectionContent: string): number {
  const lines = sectionContent.split(/\r?\n/);
  for (const line of lines) {
    const match = line.match(/^(=+)\s+/);
    if (match) {
      return match[1].length;
    }
  }
  return 0;
 }
 /**
 * Helper function to extract just the intro content (before first subsection)
 */
 function extractIntroContent(sectionContent: string, currentLevel: number): string {
  const lines = sectionContent.split(/\r?\n/);
  const introLines: string[] = [];
  let foundHeader = false;
  for (const line of lines) {
    const headerMatch = line.match(/^(=+)\s+/);
    if (headerMatch) {
      const level = headerMatch[1].length;
      if (level === currentLevel && !foundHeader) {
        // This is the section header itself
        foundHeader = true;
        continue; // Skip the header line itself for intro content
      } else if (level > currentLevel) {
        // This is a subsection, stop collecting intro content
        break;
      }
    } else if (foundHeader) {
      // This is intro content after the header
      introLines.push(line);
    }
  }
  return introLines.join('\n').trim();
 }
 /**
 * Generates Nostr events from parsed AsciiDoc with proper hierarchical structure
 * Based on docreference.md specifications
 */
-export function generateNostrEvents(parsed: ParsedAsciiDoc, parseLevel: number = 2, pubkey?: string): {
+export function generateNostrEvents(parsed: ParsedAsciiDoc, parseLevel: number = 2, pubkey?: string, maxDepth: number = 6): {
  indexEvent?: any;
  contentEvents: any[];
 } {
-  const events: any[] = [];
+  const allEvents: any[] = [];
  const actualPubkey = pubkey || 'pubkey';
-  // Create content events for each section (30041)
+  // Helper function to generate section ID
-  const contentEvents = parsed.sections.map(section => {
+  const generateSectionId = (title: string): string => {
-    const sectionId = section.title
+    return title
      .toLowerCase()
      .replace(/[^a-z0-9\s]/g, '')
      .replace(/\s+/g, '-')
      .trim();
  };
-    // Extract tags directly from section content using simple regex
+  // Build hierarchical tree structure
-    const sectionTags = parseSimpleAttributes(section.content);
+  interface TreeNode {
-    
+    section: {
-    return {
+      metadata: any;
-      id: '', // Will be generated by Nostr client
+      content: string;
-      pubkey: '', // Will be set by client  
+      title: string;
      created_at: Math.floor(Date.now() / 1000),
      kind: 30041,
      tags: [
        ['d', sectionId],
        ['title', section.title],
        ...sectionTags
      ],
      content: section.content,
      sig: '' // Will be generated by client
    };
-  });
+    level: number;
    sectionId: string;
    tags: [string, string][];
    children: TreeNode[];
    parent?: TreeNode;
  }
-  // Only create index event if we have a document title (article format)
+  // Convert flat sections to tree structure
-  if (parsed.title && parsed.title.trim() !== '') {
+  const buildTree = (): TreeNode[] => {
-    // Generate document identifier from title
+    const roots: TreeNode[] = [];
-    const documentId = parsed.title
+    const stack: TreeNode[] = [];
-      .toLowerCase()
+    
-      .replace(/[^a-z0-9\s]/g, '')
+    for (const section of parsed.sections) {
-      .replace(/\s+/g, '-')
+      const level = getSectionLevel(section.content);
-      .trim();
+      const sectionId = generateSectionId(section.title);
      const tags = parseSimpleAttributes(section.content);
      const node: TreeNode = {
        section,
        level,
        sectionId,
        tags,
        children: [],
      };
      // Find the correct parent based on header hierarchy
      while (stack.length > 0 && stack[stack.length - 1].level >= level) {
        stack.pop();
      }
      if (stack.length === 0) {
        // This is a root level section
        roots.push(node);
      } else {
        // This is a child of the last item in stack
        const parent = stack[stack.length - 1];
        parent.children.push(node);
        node.parent = parent;
      }
      stack.push(node);
    }
    return roots;
  };
  const tree = buildTree();
  // Recursively create events from tree
  const createEventsFromNode = (node: TreeNode): void => {
    const { section, level, sectionId, tags, children } = node;
    // Determine if this node should become an index
    const hasChildrenAtTargetLevel = children.some(child => child.level === parseLevel);
    const shouldBeIndex = level < parseLevel && (hasChildrenAtTargetLevel || children.some(child => child.level <= parseLevel));
    if (shouldBeIndex) {
      // Create content event for intro text (30041)
      const introContent = extractIntroContent(section.content, level);
      if (introContent.trim()) {
        const contentEvent = {
          id: '',
          pubkey: '',
          created_at: Math.floor(Date.now() / 1000),
          kind: 30041,
          tags: [
            ['d', `${sectionId}-content`],
            ['title', section.title],
            ...tags
          ],
          content: introContent,
          sig: ''
        };
        allEvents.push(contentEvent);
      }
-    // Extract tags directly from document content using simple regex  
+      // Create index event (30040)
      const childATags: string[][] = [];
      // Add a-tag for intro content if it exists
      if (introContent.trim()) {
        childATags.push(['a', `30041:${actualPubkey}:${sectionId}-content`, '', '']);
      }
      // Add a-tags for direct children
      for (const child of children) {
        const childHasSubChildren = child.children.some(grandchild => grandchild.level <= parseLevel);
        const childShouldBeIndex = child.level < parseLevel && childHasSubChildren;
        const childKind = childShouldBeIndex ? 30040 : 30041;
        childATags.push(['a', `${childKind}:${actualPubkey}:${child.sectionId}`, '', '']);
      }
      const indexEvent = {
        id: '',
        pubkey: '',
        created_at: Math.floor(Date.now() / 1000),
        kind: 30040,
        tags: [
          ['d', sectionId],
          ['title', section.title],
          ...tags,
          ...childATags
        ],
        content: '',
        sig: ''
      };
      allEvents.push(indexEvent);
    } else {
      // Create regular content event (30041)
      const contentEvent = {
        id: '',
        pubkey: '',
        created_at: Math.floor(Date.now() / 1000),
        kind: 30041,
        tags: [
          ['d', sectionId],
          ['title', section.title],
          ...tags
        ],
        content: section.content,
        sig: ''
      };
      allEvents.push(contentEvent);
    }
    // Recursively process children
    for (const child of children) {
      createEventsFromNode(child);
    }
  };
  // Process all root level sections
  for (const rootNode of tree) {
    createEventsFromNode(rootNode);
  }
  // Create main document index if we have a document title (article format)
  if (parsed.title && parsed.title.trim() !== '') {
    const documentId = generateSectionId(parsed.title);
    const documentTags = parseSimpleAttributes(parsed.content);
-    // Create main index event (30040)
+    // Create a-tags for all root level sections (level 2)
-    const indexEvent = {
+    const mainIndexATags = tree.map(rootNode => {
-      id: '', // Will be generated by Nostr client
+      const hasSubChildren = rootNode.children.some(child => child.level <= parseLevel);
-      pubkey: '', // Will be set by client
+      const shouldBeIndex = rootNode.level < parseLevel && hasSubChildren;
      const kind = shouldBeIndex ? 30040 : 30041;
      return ['a', `${kind}:${actualPubkey}:${rootNode.sectionId}`, '', ''];
    });
    console.log('Debug: Root sections found:', tree.length);
    console.log('Debug: Main index a-tags:', mainIndexATags);
    const mainIndexEvent = {
      id: '',
      pubkey: '',
      created_at: Math.floor(Date.now() / 1000),
      kind: 30040,
      tags: [
        ['d', documentId],
        ['title', parsed.title],
        ...documentTags,
-        // Add a-tags for each section
+        ...mainIndexATags
        ...parsed.sections.map(section => {
          const sectionId = section.title
            .toLowerCase()
            .replace(/[^a-z0-9\s]/g, '')
            .replace(/\s+/g, '-')
            .trim();
          const actualPubkey = pubkey || 'pubkey'; // Use actual pubkey if provided, fallback for compatibility
          return ['a', `30041:${actualPubkey}:${sectionId}`, '', '']; // relay will be filled by client
        })
      ],
-      content: '', // Index events have empty content
+      content: '',
-      sig: '' // Will be generated by client
+      sig: ''
    };
    return {
-      indexEvent,
+      indexEvent: mainIndexEvent,
-      contentEvents
+      contentEvents: allEvents
    };
  }
  // For scattered notes, return only content events
  return {
-    contentEvents
+    contentEvents: allEvents
  };
 }