diff --git a/package-lock.json b/package-lock.json
index 0fe9c01..ad65282 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -2565,18 +2565,37 @@
}
},
"node_modules/chokidar": {
- "version": "4.0.3",
- "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz",
- "integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==",
- "devOptional": true,
+ "version": "3.6.0",
+ "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
+ "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
"dependencies": {
- "readdirp": "^4.0.1"
+ "anymatch": "~3.1.2",
+ "braces": "~3.0.2",
+ "glob-parent": "~5.1.2",
+ "is-binary-path": "~2.1.0",
+ "is-glob": "~4.0.1",
+ "normalize-path": "~3.0.0",
+ "readdirp": "~3.6.0"
},
"engines": {
- "node": ">= 14.16.0"
+ "node": ">= 8.10.0"
},
"funding": {
"url": "https://paulmillr.com/funding/"
+ },
+ "optionalDependencies": {
+ "fsevents": "~2.3.2"
+ }
+ },
+ "node_modules/chokidar/node_modules/glob-parent": {
+ "version": "5.1.2",
+ "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+ "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+ "dependencies": {
+ "is-glob": "^4.0.1"
+ },
+ "engines": {
+ "node": ">= 6"
}
},
"node_modules/cliui": {
@@ -3420,6 +3439,15 @@
}
}
},
+ "node_modules/eslint-plugin-svelte/node_modules/yaml": {
+ "version": "1.10.2",
+ "resolved": "https://registry.npmjs.org/yaml/-/yaml-1.10.2.tgz",
+ "integrity": "sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg==",
+ "dev": true,
+ "engines": {
+ "node": ">= 6"
+ }
+ },
"node_modules/eslint-scope": {
"version": "8.4.0",
"resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz",
@@ -5474,16 +5502,25 @@
}
},
"node_modules/readdirp": {
- "version": "4.1.2",
- "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz",
- "integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==",
- "devOptional": true,
+ "version": "3.6.0",
+ "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
+ "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
+ "dependencies": {
+ "picomatch": "^2.2.1"
+ },
"engines": {
- "node": ">= 14.18.0"
+ "node": ">=8.10.0"
+ }
+ },
+ "node_modules/readdirp/node_modules/picomatch": {
+ "version": "2.3.1",
+ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
+ "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+ "engines": {
+ "node": ">=8.6"
},
"funding": {
- "type": "individual",
- "url": "https://paulmillr.com/funding/"
+ "url": "https://github.com/sponsors/jonschlinkert"
}
},
"node_modules/require-directory": {
@@ -5952,6 +5989,34 @@
"typescript": ">=5.0.0"
}
},
+ "node_modules/svelte-check/node_modules/chokidar": {
+ "version": "4.0.3",
+ "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz",
+ "integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==",
+ "dev": true,
+ "dependencies": {
+ "readdirp": "^4.0.1"
+ },
+ "engines": {
+ "node": ">= 14.16.0"
+ },
+ "funding": {
+ "url": "https://paulmillr.com/funding/"
+ }
+ },
+ "node_modules/svelte-check/node_modules/readdirp": {
+ "version": "4.1.2",
+ "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz",
+ "integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==",
+ "dev": true,
+ "engines": {
+ "node": ">= 14.18.0"
+ },
+ "funding": {
+ "type": "individual",
+ "url": "https://paulmillr.com/funding/"
+ }
+ },
"node_modules/svelte-eslint-parser": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/svelte-eslint-parser/-/svelte-eslint-parser-1.3.0.tgz",
@@ -6139,51 +6204,6 @@
"node": ">=14.0.0"
}
},
- "node_modules/tailwindcss/node_modules/chokidar": {
- "version": "3.6.0",
- "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
- "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
- "dependencies": {
- "anymatch": "~3.1.2",
- "braces": "~3.0.2",
- "glob-parent": "~5.1.2",
- "is-binary-path": "~2.1.0",
- "is-glob": "~4.0.1",
- "normalize-path": "~3.0.0",
- "readdirp": "~3.6.0"
- },
- "engines": {
- "node": ">= 8.10.0"
- },
- "funding": {
- "url": "https://paulmillr.com/funding/"
- },
- "optionalDependencies": {
- "fsevents": "~2.3.2"
- }
- },
- "node_modules/tailwindcss/node_modules/chokidar/node_modules/glob-parent": {
- "version": "5.1.2",
- "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
- "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
- "dependencies": {
- "is-glob": "^4.0.1"
- },
- "engines": {
- "node": ">= 6"
- }
- },
- "node_modules/tailwindcss/node_modules/picomatch": {
- "version": "2.3.1",
- "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
- "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
- "engines": {
- "node": ">=8.6"
- },
- "funding": {
- "url": "https://github.com/sponsors/jonschlinkert"
- }
- },
"node_modules/tailwindcss/node_modules/postcss-load-config": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz",
@@ -6230,28 +6250,6 @@
"node": ">=4"
}
},
- "node_modules/tailwindcss/node_modules/readdirp": {
- "version": "3.6.0",
- "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
- "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
- "dependencies": {
- "picomatch": "^2.2.1"
- },
- "engines": {
- "node": ">=8.10.0"
- }
- },
- "node_modules/tailwindcss/node_modules/yaml": {
- "version": "2.8.0",
- "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.0.tgz",
- "integrity": "sha512-4lLa/EcQCB0cJkyts+FpIRx5G/llPxfP6VQU5KByHEhLxY3IJCH0f0Hy1MHI8sClTvsIb8qwRJ6R/ZdlDJ/leQ==",
- "bin": {
- "yaml": "bin.mjs"
- },
- "engines": {
- "node": ">= 14.6"
- }
- },
"node_modules/thenify": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
@@ -6792,12 +6790,14 @@
}
},
"node_modules/yaml": {
- "version": "1.10.2",
- "resolved": "https://registry.npmjs.org/yaml/-/yaml-1.10.2.tgz",
- "integrity": "sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg==",
- "dev": true,
+ "version": "2.8.0",
+ "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.0.tgz",
+ "integrity": "sha512-4lLa/EcQCB0cJkyts+FpIRx5G/llPxfP6VQU5KByHEhLxY3IJCH0f0Hy1MHI8sClTvsIb8qwRJ6R/ZdlDJ/leQ==",
+ "bin": {
+ "yaml": "bin.mjs"
+ },
"engines": {
- "node": ">= 6"
+ "node": ">= 14.6"
}
},
"node_modules/yargs": {
diff --git a/src/lib/components/EventInput.svelte b/src/lib/components/EventInput.svelte
index 2768205..0519692 100644
--- a/src/lib/components/EventInput.svelte
+++ b/src/lib/components/EventInput.svelte
@@ -14,6 +14,7 @@
} from "$lib/utils/event_input_utils";
import {
extractDocumentMetadata,
+ extractSmartMetadata,
metadataToTags,
removeMetadataFromContent
} from "$lib/utils/asciidoc_metadata";
@@ -65,60 +66,45 @@
sessionStorage.removeItem('zettelEditorContent');
sessionStorage.removeItem('zettelEditorSource');
- // Extract title from content
- const extracted = extractTitleFromContent(content);
- if (extracted) {
- title = extracted;
+ // Extract title and metadata using the standardized parser
+ const { metadata } = extractSmartMetadata(content);
+ if (metadata.title) {
+ title = metadata.title;
titleManuallyEdited = false;
dTagManuallyEdited = false;
}
- // For content from ZettelEditor, don't extract any metadata
- // since ZettelEditor content never has document metadata
+ // Extract metadata for 30040 and 30041 events
if (kind === 30040 || kind === 30041) {
- extractedMetadata = [];
+ extractedMetadata = metadataToTags(metadata);
}
}
});
/**
- * Extracts the first Markdown/AsciiDoc header as the title.
+ * Extracts the first Markdown/AsciiDoc header as the title using the standardized parser.
*/
function extractTitleFromContent(content: string): string {
- // Match Markdown (# Title) or AsciiDoc (= Title) headers
- // Look for document title (=) first, then fall back to section headers (==)
- const documentMatch = content.match(/^=\s*(.+)$/m);
- if (documentMatch) {
- const title = documentMatch[1].trim();
- // Only return the title if it's not empty (malformed titles like "=|" will be empty)
- if (title) {
- return title;
- }
- }
-
- // If no valid document title, look for the first section header
- const sectionMatch = content.match(/^==\s*(.+)$/m);
- if (sectionMatch) {
- return sectionMatch[1].trim();
- }
-
- return "";
+ const { metadata } = extractSmartMetadata(content);
+ return metadata.title || "";
}
function handleContentInput(e: Event) {
content = (e.target as HTMLTextAreaElement).value;
+
+ // Extract title and metadata using the standardized parser
+ const { metadata } = extractSmartMetadata(content);
+
if (!titleManuallyEdited) {
- const extracted = extractTitleFromContent(content);
- console.log("Content input - extracted title:", extracted);
- title = extracted;
+ console.log("Content input - extracted title:", metadata.title);
+ title = metadata.title || "";
// Reset dTagManuallyEdited when title changes so d-tag can be auto-generated
dTagManuallyEdited = false;
}
// Extract metadata from AsciiDoc content for 30040 and 30041 events
if (kind === 30040 || kind === 30041) {
- // Don't extract metadata - let users add tags manually
- extractedMetadata = [];
+ extractedMetadata = metadataToTags(metadata);
} else {
extractedMetadata = [];
}
diff --git a/src/lib/components/ZettelEditor.svelte b/src/lib/components/ZettelEditor.svelte
index 9e33343..da96f74 100644
--- a/src/lib/components/ZettelEditor.svelte
+++ b/src/lib/components/ZettelEditor.svelte
@@ -2,17 +2,12 @@
import { Textarea, Button } from "flowbite-svelte";
import { EyeOutline } from "flowbite-svelte-icons";
import {
- parseAsciiDocSections,
- type ZettelSection,
- } from "$lib/utils/ZettelParser";
- import {
- extractDocumentMetadata,
- extractSectionMetadata,
- parseAsciiDocWithMetadata,
- type AsciiDocMetadata,
- metadataToTags,
- } from "$lib/utils/asciidoc_metadata";
- import asciidoctor from "asciidoctor";
+ extractSmartMetadata,
+ parseAsciiDocWithMetadata,
+ type AsciiDocMetadata,
+ metadataToTags,
+} from "$lib/utils/asciidoc_metadata";
+import asciidoctor from "asciidoctor";
// Component props
let {
@@ -45,34 +40,20 @@ Note content here...
onPreviewToggle?: (show: boolean) => void;
}>();
- // Initialize AsciiDoctor processor
- const asciidoctorProcessor = asciidoctor();
-
- // Parse sections for preview using the new metadata service
+ // Parse sections for preview using the smart metadata service
let parsedSections = $derived.by(() => {
if (!content.trim()) return [];
- // Check if content starts with a document header (level 0 header)
- const hasDocumentHeader = content.match(/^=\s+/m);
+ // Use smart metadata extraction that handles both document headers and section-only content
+ const { metadata: docMetadata } = extractSmartMetadata(content);
- let sections;
- if (hasDocumentHeader) {
- // Use the proper metadata service for documents with headers
- const parsed = parseAsciiDocWithMetadata(content);
- sections = parsed.sections;
- } else {
- // For content that starts directly with sections, split manually
- const sectionStrings = content.split(/(?=^==\s+)/gm).filter((section: string) => section.trim());
- sections = sectionStrings.map((sectionString: string) => {
- const { metadata, content, title } = extractSectionMetadata(sectionString);
- return { metadata, content, title };
- });
- }
+ // Parse the content using the standardized parser
+ const parsed = parseAsciiDocWithMetadata(content);
// Debug logging
- console.log("Parsed sections:", sections);
+ console.log("Parsed sections:", parsed.sections);
- return sections.map((section: { metadata: AsciiDocMetadata; content: string; title: string }) => {
+ return parsed.sections.map((section: { metadata: AsciiDocMetadata; content: string; title: string }) => {
// Use only section metadata for each section
// Don't combine with document metadata to avoid overriding section-specific metadata
const tags = metadataToTags(section.metadata);
@@ -259,7 +240,7 @@ Note content here...
- {@html asciidoctorProcessor.convert(
+ {@html asciidoctor().convert(
`== ${section.title}\n\n${section.content}`,
{
standalone: false,
diff --git a/src/lib/services/publisher.ts b/src/lib/services/publisher.ts
index 3d5e9fe..98b63f4 100644
--- a/src/lib/services/publisher.ts
+++ b/src/lib/services/publisher.ts
@@ -1,7 +1,7 @@
import { get } from "svelte/store";
import { ndkInstance } from "../ndk.ts";
import { getMimeTags } from "../utils/mime.ts";
-import { parseAsciiDocSections } from "../utils/ZettelParser.ts";
+import { parseAsciiDocWithMetadata, metadataToTags } from "../utils/asciidoc_metadata.ts";
import { NDKRelaySet, NDKEvent } from "@nostr-dev-kit/ndk";
import { nip19 } from "nostr-tools";
@@ -44,18 +44,18 @@ export async function publishZettel(
}
try {
- // Parse content into sections
- const sections = parseAsciiDocSections(content, 2);
+ // Parse content into sections using the standardized parser
+ const parsed = parseAsciiDocWithMetadata(content);
- if (sections.length === 0) {
+ if (parsed.sections.length === 0) {
throw new Error("No valid sections found in content");
}
// For now, publish only the first section
- const firstSection = sections[0];
+ const firstSection = parsed.sections[0];
const title = firstSection.title;
const cleanContent = firstSection.content;
- const sectionTags = firstSection.tags || [];
+ const sectionTags = metadataToTags(firstSection.metadata);
// Generate d-tag and create event
const dTag = generateDTag(title);
@@ -128,8 +128,8 @@ export async function publishMultipleZettels(
}
try {
- const sections = parseAsciiDocSections(content, 2);
- if (sections.length === 0) {
+ const parsed = parseAsciiDocWithMetadata(content);
+ if (parsed.sections.length === 0) {
throw new Error('No valid sections found in content');
}
@@ -141,10 +141,10 @@ export async function publishMultipleZettels(
const results: PublishResult[] = [];
const publishedEvents: NDKEvent[] = [];
- for (const section of sections) {
+ for (const section of parsed.sections) {
const title = section.title;
const cleanContent = section.content;
- const sectionTags = section.tags || [];
+ const sectionTags = metadataToTags(section.metadata);
const dTag = generateDTag(title);
const [mTag, MTag] = getMimeTags(kind);
const tags: string[][] = [["d", dTag], mTag, MTag, ["title", title]];
diff --git a/src/lib/utils/asciidoc_metadata.ts b/src/lib/utils/asciidoc_metadata.ts
index 69e0e1e..6d6754c 100644
--- a/src/lib/utils/asciidoc_metadata.ts
+++ b/src/lib/utils/asciidoc_metadata.ts
@@ -1,18 +1,14 @@
/**
- * AsciiDoc Metadata Extraction Service
+ * AsciiDoc Metadata Extraction Service using Asciidoctor
*
- * Extracts metadata from AsciiDoc document headers and section headers,
- * mapping them to Nostr event tags according to NKBIP-01 specification.
- *
- * Document header structure:
- * = Document Title
- * Author Name
- * version, date, revision info
- * :attribute: value
- *
- * The first empty line marks the end of the header and start of the document body.
+ * Thin wrapper around Asciidoctor's built-in metadata extraction capabilities.
+ * Leverages the existing Pharos parser to avoid duplication.
*/
+// @ts-ignore
+import Processor from "asciidoctor";
+import type { Document } from "asciidoctor";
+
export interface AsciiDocMetadata {
title?: string;
authors?: string[];
@@ -30,7 +26,6 @@ export interface AsciiDocMetadata {
autoUpdate?: 'yes' | 'ask' | 'no';
}
-// Sections use the same metadata structure as documents
export type SectionMetadata = AsciiDocMetadata;
export interface ParsedAsciiDoc {
@@ -43,448 +38,463 @@ export interface ParsedAsciiDoc {
}>;
}
+// Shared attribute mapping based on Asciidoctor standard attributes
+const ATTRIBUTE_MAP: Record = {
+ // Standard Asciidoctor attributes
+ 'author': 'authors',
+ 'description': 'summary',
+ 'keywords': 'tags',
+ 'revnumber': 'version',
+ 'revdate': 'publicationDate',
+ 'revremark': 'edition',
+ 'title': 'title',
+
+ // Custom attributes for Alexandria
+ 'published_by': 'publishedBy',
+ 'publisher': 'publisher',
+ 'summary': 'summary',
+ 'image': 'coverImage',
+ 'cover': 'coverImage',
+ 'isbn': 'isbn',
+ 'source': 'source',
+ 'type': 'type',
+ 'auto-update': 'autoUpdate',
+ 'version': 'version',
+ 'edition': 'edition',
+ 'published_on': 'publicationDate',
+ 'date': 'publicationDate',
+ 'version-label': 'version',
+};
+
/**
- * Shared function to parse metadata from attribute entries
- * @param metadata The metadata object to populate
- * @param key The attribute key
- * @param value The attribute value
+ * Creates an Asciidoctor processor instance
*/
-function parseMetadataAttribute(metadata: AsciiDocMetadata, key: string, value: string): void {
- switch (key.toLowerCase()) {
- case 'author':
- // Accumulate multiple authors
- if (!metadata.authors) {
- metadata.authors = [];
- }
- metadata.authors.push(value);
- break;
- case 'version':
- // Only set version if not already set from revision line
- if (!metadata.version) {
- metadata.version = value;
- }
- break;
- case 'edition':
- metadata.edition = value;
- break;
- case 'published_on':
- case 'date':
- metadata.publicationDate = value;
- break;
- case 'published_by':
- case 'publisher':
- // Only set publishedBy if not already set from revision line
- if (!metadata.publishedBy) {
- metadata.publishedBy = value;
- }
- break;
- case 'summary':
- case 'description':
- // Accumulate multiple summaries/descriptions
- if (!metadata.summary) {
- metadata.summary = value;
+function createProcessor() {
+ return Processor();
+}
+
+/**
+ * Extracts tags from attributes, combining tags and keywords
+ */
+function extractTagsFromAttributes(attributes: Record): string[] {
+ const tags: string[] = [];
+ const attrTags = attributes['tags'];
+ const attrKeywords = attributes['keywords'];
+
+ if (attrTags && typeof attrTags === 'string') {
+ tags.push(...attrTags.split(',').map(tag => tag.trim()));
+ }
+
+ if (attrKeywords && typeof attrKeywords === 'string') {
+ tags.push(...attrKeywords.split(',').map(tag => tag.trim()));
+ }
+
+ return [...new Set(tags)]; // Remove duplicates
+}
+
+/**
+ * Maps attributes to metadata with special handling for authors and tags
+ */
+function mapAttributesToMetadata(attributes: Record, metadata: AsciiDocMetadata, isDocument: boolean = false): void {
+ for (const [key, value] of Object.entries(attributes)) {
+ const metadataKey = ATTRIBUTE_MAP[key.toLowerCase()];
+ if (metadataKey && value && typeof value === 'string') {
+ if (metadataKey === 'authors' && isDocument) {
+ // Skip author mapping for documents since it's handled manually
+ continue;
+ } else if (metadataKey === 'authors' && !isDocument) {
+ // For sections, append author to existing authors array
+ if (!metadata.authors) {
+ metadata.authors = [];
+ }
+ metadata.authors.push(value);
+ } else if (metadataKey === 'tags') {
+ // Skip tags mapping since it's handled by extractTagsFromAttributes
+ continue;
} else {
- // If we already have a summary, append this one
- metadata.summary = metadata.summary + ' ' + value;
+ (metadata as any)[metadataKey] = value;
}
- break;
- case 'image':
- case 'cover':
- metadata.coverImage = value;
- break;
- case 'isbn':
- metadata.isbn = value;
- break;
- case 'source':
- metadata.source = value;
- break;
- case 'type':
- metadata.type = value;
- break;
- case 'auto-update':
- if (value === 'yes' || value === 'ask' || value === 'no') {
- metadata.autoUpdate = value;
- }
- break;
- case 'tags':
- case 'keywords':
- // Accumulate multiple tag sets
- if (!metadata.tags) {
- metadata.tags = [];
- }
- const newTags = value.split(',').map(tag => tag.trim());
- metadata.tags.push(...newTags);
- break;
+ }
}
}
/**
- * Shared function to extract metadata from header lines
- * @param lines The lines to process
- * @param startLine The starting line index
- * @param metadata The metadata object to populate
- * @returns The index of the line after the header metadata
+ * Extracts authors from header line (document or section)
*/
-function extractHeaderMetadata(lines: string[], startLine: number, metadata: AsciiDocMetadata): number {
- let currentLine = startLine;
-
- // Process the next two lines for author and revision info
- let processedLines = 0;
- for (let i = 0; i < 2 && currentLine + i < lines.length; i++) {
- const line = lines[currentLine + i];
-
- // Skip empty lines
- if (line.trim() === '') {
- continue;
+function extractAuthorsFromHeader(sourceContent: string, isSection: boolean = false): string[] {
+ const authors: string[] = [];
+ const lines = sourceContent.split(/\r?\n/);
+ const headerPattern = isSection ? /^==\s+/ : /^=\s+/;
+
+ for (let i = 0; i < lines.length; i++) {
+ const line = lines[i];
+ if (line.match(headerPattern)) {
+ // Found title line, check subsequent lines for authors
+ let j = i + 1;
+ while (j < lines.length) {
+ const authorLine = lines[j];
+
+ // Stop if we hit a blank line or content that's not an author
+ if (authorLine.trim() === '') {
+ break;
+ }
+
+ if (authorLine.includes('<') && !authorLine.startsWith(':')) {
+ // This is an author line like "John Doe "
+ const authorName = authorLine.split('<')[0].trim();
+ if (authorName) {
+ authors.push(authorName);
+ }
+ } else if (isSection && authorLine.match(/^[A-Za-z\s]+$/) && authorLine.trim() !== '' && authorLine.trim().split(/\s+/).length <= 2) {
+ // This is a simple author name without email (for sections)
+ authors.push(authorLine.trim());
+ } else if (authorLine.startsWith(':')) {
+ // This is an attribute line, skip it - attributes are handled by mapAttributesToMetadata
+ // Don't break here, continue to next line
+ } else {
+ // Not an author line, stop looking
+ break;
+ }
+
+ j++;
+ }
+ break;
}
+ }
+
+ return authors;
+}
- // Skip attribute lines (they'll be processed later)
- if (line.startsWith(':')) {
- continue;
+/**
+ * Strips header and attribute lines from content
+ */
+function stripHeaderAndAttributes(content: string, isSection: boolean = false): string {
+ const lines = content.split(/\r?\n/);
+ let contentStart = 0;
+ const headerPattern = isSection ? /^==\s+/ : /^=\s+/;
+
+ for (let i = 0; i < lines.length; i++) {
+ const line = lines[i];
+ // Skip title line, author line, revision line, and attribute lines
+ if (!line.match(headerPattern) && !line.includes('<') && !line.match(/^.+,\s*.+:\s*.+$/) &&
+ !line.match(/^:[^:]+:\s*.+$/) && line.trim() !== '') {
+ contentStart = i;
+ break;
}
+ }
- // Check if this is an author line (contains )
- if (line.includes('<') && line.includes('>')) {
- const authorMatch = line.match(/^(.+?)\s*<(.+?)>$/);
- if (authorMatch) {
- const authorName = authorMatch[1].trim();
- metadata.authors = [authorName];
- processedLines++;
- continue;
- }
+ // Filter out all attribute lines and author lines from the content
+ const contentLines = lines.slice(contentStart);
+ const filteredLines = contentLines.filter(line => {
+ // Skip attribute lines
+ if (line.match(/^:[^:]+:\s*.+$/)) {
+ return false;
}
-
- // Check if this is a revision line (contains version, date, revision info)
- const revisionMatch = line.match(/^(.+?),\s*(.+?),\s*(.+)$/);
- if (revisionMatch) {
- metadata.version = revisionMatch[1].trim();
- metadata.publicationDate = revisionMatch[2].trim();
- metadata.publishedBy = revisionMatch[3].trim();
- processedLines++;
- continue;
+ // Skip author lines (simple names without email)
+ if (isSection && line.match(/^[A-Za-z\s]+$/) && line.trim() !== '' && line.trim().split(/\s+/).length <= 2) {
+ return false;
}
+ return true;
+ });
+
+ // Remove extra blank lines and normalize newlines
+ return filteredLines.join('\n').replace(/\n\s*\n\s*\n/g, '\n\n').replace(/\n\s*\n/g, '\n').trim();
+}
- // If it's not author or revision, it might be a simple author name
- if (!metadata.authors) {
- metadata.authors = [line.trim()];
- processedLines++;
+/**
+ * Parses attributes from section content
+ */
+function parseSectionAttributes(sectionContent: string): Record {
+ const attributes: Record = {};
+ const lines = sectionContent.split(/\r?\n/);
+
+ for (const line of lines) {
+ const match = line.match(/^:([^:]+):\s*(.+)$/);
+ if (match) {
+ const [, key, value] = match;
+ attributes[key.trim()] = value.trim();
}
}
+
+ return attributes;
+}
- // Move past the author/revision lines that were actually processed
- currentLine += processedLines;
-
- // Process attribute entries (lines starting with :)
- while (currentLine < lines.length) {
- const line = lines[currentLine];
-
- // Empty line marks the end of the header
- if (line.trim() === '') {
- break;
- }
- // Check for attribute entries
- const attrMatch = line.match(/^:([^:]+):\s*(.+)$/);
- if (attrMatch) {
- const key = attrMatch[1].trim();
- const value = attrMatch[2].trim();
- parseMetadataAttribute(metadata, key, value);
- }
- currentLine++;
- }
- return currentLine;
-}
/**
- * Extracts metadata from AsciiDoc document header
- * @param content The full AsciiDoc content
- * @returns Object containing metadata and cleaned content
+ * Extracts metadata from AsciiDoc document using Asciidoctor
*/
export function extractDocumentMetadata(inputContent: string): {
metadata: AsciiDocMetadata;
content: string;
} {
- const lines = inputContent.split(/\r?\n/);
+ const asciidoctor = createProcessor();
+ const document = asciidoctor.load(inputContent, { standalone: false }) as Document;
+
const metadata: AsciiDocMetadata = {};
- let headerEndIndex = -1;
- let currentLine = 0;
+ const attributes = document.getAttributes();
- // Find the document title (first line starting with =)
- for (let i = 0; i < lines.length; i++) {
- const line = lines[i];
- const titleMatch = line.match(/^=\s+(.+)$/);
- if (titleMatch) {
- metadata.title = titleMatch[1].trim();
- currentLine = i + 1;
- break;
- }
- }
+ // Extract basic metadata
+ const title = document.getTitle();
+ if (title) metadata.title = title;
- // If no document title found, return empty metadata
- if (!metadata.title) {
- return { metadata: {}, content: inputContent };
+ // Handle multiple authors - combine header line and attributes
+ const authors = extractAuthorsFromHeader(document.getSource());
+
+ // Get authors from attributes (but avoid duplicates)
+ const attrAuthor = attributes['author'];
+ if (attrAuthor && typeof attrAuthor === 'string' && !authors.includes(attrAuthor)) {
+ authors.push(attrAuthor);
}
-
- // Check if this is an index card format (title followed immediately by "index card")
- if (currentLine < lines.length && lines[currentLine].trim() === 'index card') {
- // This is index card format - content starts immediately after title
- headerEndIndex = currentLine;
- } else {
- // Extract header metadata using shared function
- currentLine = extractHeaderMetadata(lines, currentLine, metadata);
-
- // If we didn't find an empty line, the header ends at the first section
- if (currentLine < lines.length && lines[currentLine].trim() === '') {
- headerEndIndex = currentLine + 1; // Skip the empty line
- } else {
- for (let i = currentLine; i < lines.length; i++) {
- if (lines[i].match(/^==\s+/)) {
- headerEndIndex = i;
- break;
- }
- }
- // If no section found and no empty line, the header ends at the current line
- if (headerEndIndex === -1) {
- headerEndIndex = currentLine;
- }
- }
+
+ if (authors.length > 0) {
+ metadata.authors = [...new Set(authors)]; // Remove duplicates
}
- // If still no header end found, use the entire content
- if (headerEndIndex === -1) {
- headerEndIndex = lines.length;
- }
+ // Extract revision info
+ const revisionNumber = document.getRevisionNumber();
+ if (revisionNumber) metadata.version = revisionNumber;
+
+ const revisionRemark = document.getRevisionRemark();
+ if (revisionRemark) metadata.publishedBy = revisionRemark;
+
+ const revisionDate = document.getRevisionDate();
+ if (revisionDate) metadata.publicationDate = revisionDate;
- // Extract the content (everything after the header)
- let content = lines.slice(headerEndIndex).join('\n');
+ // Map attributes to metadata (but skip version and publishedBy if we already have them from revision)
+ mapAttributesToMetadata(attributes, metadata, true);
- // Remove metadata attributes from sections in the content
- content = content.replace(/^:([^:]+):\s*(.+)$/gm, '');
+ // If we got version from revision, don't override it with attribute
+ if (revisionNumber) {
+ metadata.version = revisionNumber;
+ }
+
+ // If we got publishedBy from revision, don't override it with attribute
+ if (revisionRemark) {
+ metadata.publishedBy = revisionRemark;
+ }
+ // Handle tags and keywords
+ const tags = extractTagsFromAttributes(attributes);
+ if (tags.length > 0) {
+ metadata.tags = tags;
+ }
+
+ const content = stripHeaderAndAttributes(document.getSource());
return { metadata, content };
}
/**
- * Extracts metadata from a section header
- * @param sectionContent The section content including its header
- * @returns Object containing section metadata and cleaned content
+ * Extracts metadata from a section using Asciidoctor
*/
export function extractSectionMetadata(inputSectionContent: string): {
metadata: SectionMetadata;
content: string;
title: string;
} {
- const lines = inputSectionContent.split(/\r?\n/);
- const metadata: SectionMetadata = {};
- let title = '';
- let headerEndIndex = -1;
- let currentLine = 0;
-
- // Find the section title (first line starting with ==)
- for (let i = 0; i < lines.length; i++) {
- const line = lines[i];
- const titleMatch = line.match(/^==\s+(.+)$/);
- if (titleMatch) {
- title = titleMatch[1].trim();
- metadata.title = title;
- currentLine = i + 1;
- break;
- }
- }
-
- // If no section title found, return empty metadata
- if (!title) {
+ const asciidoctor = createProcessor();
+ const document = asciidoctor.load(`= Temp\n\n${inputSectionContent}`, { standalone: false }) as Document;
+ const sections = document.getSections();
+
+ if (sections.length === 0) {
return { metadata: {}, content: inputSectionContent, title: '' };
}
- // Extract header metadata using shared function
- currentLine = extractHeaderMetadata(lines, currentLine, metadata);
+ const section = sections[0];
+ const title = section.getTitle() || '';
+ const metadata: SectionMetadata = { title };
+
+ // Parse attributes from the section content
+ const attributes = parseSectionAttributes(inputSectionContent);
- // If we didn't find an empty line, the header ends at the next section
- if (currentLine < lines.length && lines[currentLine].trim() === '') {
- headerEndIndex = currentLine + 1; // Skip the empty line
- } else {
- for (let i = currentLine; i < lines.length; i++) {
- if (lines[i].match(/^==\s+/)) {
- headerEndIndex = i;
- break;
- }
- }
+ // Extract authors from section content
+ const authors = extractAuthorsFromHeader(inputSectionContent, true);
+ if (authors.length > 0) {
+ metadata.authors = authors;
}
- // If still no header end found, use the entire content
- if (headerEndIndex === -1) {
- headerEndIndex = lines.length;
- }
+ // Map attributes to metadata (sections can have authors)
+ mapAttributesToMetadata(attributes, metadata, false);
- // Extract the content (everything after the header)
- const content = lines.slice(headerEndIndex).join('\n');
+ // Handle tags and keywords
+ const tags = extractTagsFromAttributes(attributes);
+ if (tags.length > 0) {
+ metadata.tags = tags;
+ }
+ const content = stripHeaderAndAttributes(inputSectionContent, true);
return { metadata, content, title };
}
/**
- * Splits AsciiDoc content into sections and extracts metadata from each
- * @param content The full AsciiDoc content
- * @returns Object containing document metadata and sections with their metadata
+ * Parses AsciiDoc content into sections with metadata
*/
export function parseAsciiDocWithMetadata(content: string): ParsedAsciiDoc {
- // First extract document metadata
+ const asciidoctor = createProcessor();
+ const document = asciidoctor.load(content, { standalone: false }) as Document;
const { metadata: docMetadata } = extractDocumentMetadata(content);
- // Find the document header end to get the content after the header
+ // Parse the original content to find section attributes
const lines = content.split(/\r?\n/);
- let currentLine = 0;
-
- // Find the document title
- for (let i = 0; i < lines.length; i++) {
- const line = lines[i];
- const titleMatch = line.match(/^=\s+(.+)$/);
- if (titleMatch) {
- currentLine = i + 1;
- break;
- }
- }
-
- // Extract header metadata to find where content starts
- const tempMetadata: AsciiDocMetadata = {};
- currentLine = extractHeaderMetadata(lines, currentLine, tempMetadata);
-
- // Get the content after the header (including sections with metadata)
- const docContent = lines.slice(currentLine).join('\n');
-
- // Split into sections
- const sections = splitAsciiDocSections(docContent);
+ const sectionsWithMetadata: Array<{
+ metadata: SectionMetadata;
+ content: string;
+ title: string;
+ }> = [];
+ let currentSection: string | null = null;
+ let currentSectionContent: string[] = [];
- // Extract metadata from each section
- const sectionsWithMetadata = sections.map(section => {
- return extractSectionMetadata(section);
- });
-
- return {
- metadata: docMetadata,
- content: docContent,
- sections: sectionsWithMetadata
- };
-}
-
-/**
- * Splits AsciiDoc content into sections at each '==' header
- * @param content The AsciiDoc content (without document header)
- * @returns Array of section strings
- */
-function splitAsciiDocSections(content: string): string[] {
- const lines = content.split(/\r?\n/);
- const sections: string[] = [];
- let currentSection: string[] = [];
- let inSection = false;
-
for (const line of lines) {
- // Check if this is a section header
if (line.match(/^==\s+/)) {
- // Save the previous section if we have one
- if (inSection && currentSection.length > 0) {
- sections.push(currentSection.join('\n').trim());
- currentSection = [];
+ // Save previous section if exists
+ if (currentSection) {
+ const sectionContent = currentSectionContent.join('\n');
+ sectionsWithMetadata.push(extractSectionMetadata(sectionContent));
}
// Start new section
- currentSection = [line];
- inSection = true;
- } else if (inSection) {
- // Add line to current section
- currentSection.push(line);
+ currentSection = line;
+ currentSectionContent = [line];
+ } else if (currentSection) {
+ currentSectionContent.push(line);
}
}
-
- // Add the last section
- if (currentSection.length > 0) {
- sections.push(currentSection.join('\n').trim());
+
+ // Save the last section
+ if (currentSection) {
+ const sectionContent = currentSectionContent.join('\n');
+ sectionsWithMetadata.push(extractSectionMetadata(sectionContent));
}
- return sections;
+ return {
+ metadata: docMetadata,
+ content: document.getSource(),
+ sections: sectionsWithMetadata
+ };
}
/**
* Converts metadata to Nostr event tags
- * @param metadata The metadata object
- * @returns Array of [tag, value] pairs
*/
export function metadataToTags(metadata: AsciiDocMetadata | SectionMetadata): [string, string][] {
const tags: [string, string][] = [];
- // Don't add title to tags since it has its own dedicated field
- // if (metadata.title) {
- // tags.push(['title', metadata.title]);
- // }
-
- if (metadata.authors && metadata.authors.length > 0) {
- metadata.authors.forEach(author => {
- tags.push(['author', author]);
- });
- }
-
- if (metadata.version) {
- tags.push(['version', metadata.version]);
+ if (metadata.title) tags.push(['title', metadata.title]);
+ if (metadata.authors?.length) {
+ metadata.authors.forEach(author => tags.push(['author', author]));
}
-
- if (metadata.edition) {
- tags.push(['edition', metadata.edition]);
+ if (metadata.version) tags.push(['version', metadata.version]);
+ if (metadata.edition) tags.push(['edition', metadata.edition]);
+ if (metadata.publicationDate) tags.push(['published_on', metadata.publicationDate]);
+ if (metadata.publishedBy) tags.push(['published_by', metadata.publishedBy]);
+ if (metadata.summary) tags.push(['summary', metadata.summary]);
+ if (metadata.coverImage) tags.push(['image', metadata.coverImage]);
+ if (metadata.isbn) tags.push(['i', metadata.isbn]);
+ if (metadata.source) tags.push(['source', metadata.source]);
+ if (metadata.type) tags.push(['type', metadata.type]);
+ if (metadata.autoUpdate) tags.push(['auto-update', metadata.autoUpdate]);
+ if (metadata.tags?.length) {
+ metadata.tags.forEach(tag => tags.push(['t', tag]));
}
- if (metadata.publicationDate) {
- tags.push(['published_on', metadata.publicationDate]);
- }
-
- if (metadata.publishedBy) {
- tags.push(['published_by', metadata.publishedBy]);
- }
-
- if (metadata.summary) {
- tags.push(['summary', metadata.summary]);
- }
-
- if (metadata.coverImage) {
- tags.push(['image', metadata.coverImage]);
- }
-
- if (metadata.isbn) {
- tags.push(['i', metadata.isbn]);
- }
+ return tags;
+}
- if (metadata.source) {
- tags.push(['source', metadata.source]);
- }
+/**
+ * Removes metadata from AsciiDoc content
+ */
+export function removeMetadataFromContent(content: string): string {
+ const { content: cleanedContent } = extractDocumentMetadata(content);
+ return cleanedContent;
+}
- if (metadata.type) {
- tags.push(['type', metadata.type]);
+/**
+ * Extracts metadata from content that only contains sections (no document header)
+ * This is useful when content flows from ZettelEditor to EventInput
+ */
+export function extractMetadataFromSectionsOnly(content: string): {
+ metadata: AsciiDocMetadata;
+ content: string;
+} {
+ const lines = content.split(/\r?\n/);
+ const sections: Array<{
+ metadata: SectionMetadata;
+ content: string;
+ title: string;
+ }> = [];
+
+ let currentSection: string | null = null;
+ let currentSectionContent: string[] = [];
+
+ // Parse sections from the content
+ for (const line of lines) {
+ if (line.match(/^==\s+/)) {
+ // Save previous section if exists
+ if (currentSection) {
+ const sectionContent = currentSectionContent.join('\n');
+ sections.push(extractSectionMetadata(sectionContent));
+ }
+
+ // Start new section
+ currentSection = line;
+ currentSectionContent = [line];
+ } else if (currentSection) {
+ currentSectionContent.push(line);
+ }
}
-
- if (metadata.autoUpdate) {
- tags.push(['auto-update', metadata.autoUpdate]);
+
+ // Save the last section
+ if (currentSection) {
+ const sectionContent = currentSectionContent.join('\n');
+ sections.push(extractSectionMetadata(sectionContent));
}
-
- if (metadata.tags && metadata.tags.length > 0) {
- metadata.tags.forEach(tag => {
- tags.push(['t', tag]);
- });
+
+ // For section-only content, we don't have document metadata
+ // Return the first section's title as the document title if available
+ const metadata: AsciiDocMetadata = {};
+ if (sections.length > 0 && sections[0].title) {
+ metadata.title = sections[0].title;
}
-
- return tags;
+
+ return { metadata, content };
}
/**
- * Removes metadata from AsciiDoc content, leaving only the actual content
- * @param content The full AsciiDoc content
- * @returns Cleaned content without metadata
+ * Smart metadata extraction that handles both document headers and section-only content
*/
-export function removeMetadataFromContent(content: string): string {
- const { content: docContent } = extractDocumentMetadata(content);
-
- // Remove metadata attributes from sections in the content
- const cleanedContent = docContent.replace(/^:([^:]+):\s*(.+)$/gm, '');
+export function extractSmartMetadata(content: string): {
+ metadata: AsciiDocMetadata;
+ content: string;
+} {
+ // Check if content has a document header
+ const hasDocumentHeader = content.match(/^=\s+/m);
- return cleanedContent;
+ if (hasDocumentHeader) {
+ // Check if it's a minimal document header (just title, no other metadata)
+ const lines = content.split(/\r?\n/);
+ const titleLine = lines.find(line => line.match(/^=\s+/));
+ const hasOtherMetadata = lines.some(line =>
+ line.includes('<') || // author line
+ line.match(/^.+,\s*.+:\s*.+$/) // revision line
+ );
+
+ if (hasOtherMetadata) {
+ // Full document with metadata - use standard extraction
+ return extractDocumentMetadata(content);
+ } else {
+ // Minimal document header (just title) - preserve the title line for 30040 events
+ const title = titleLine?.replace(/^=\s+/, '').trim();
+ const metadata: AsciiDocMetadata = {};
+ if (title) {
+ metadata.title = title;
+ }
+
+ // Keep the title line in content for 30040 events
+ return { metadata, content };
+ }
+ } else {
+ return extractMetadataFromSectionsOnly(content);
+ }
}
\ No newline at end of file
diff --git a/src/routes/new/compose/+page.svelte b/src/routes/new/compose/+page.svelte
index 33704a7..a62ffe8 100644
--- a/src/routes/new/compose/+page.svelte
+++ b/src/routes/new/compose/+page.svelte
@@ -5,7 +5,7 @@
import { goto } from "$app/navigation";
import { nip19 } from "nostr-tools";
import { publishMultipleZettels } from "$lib/services/publisher";
- import { parseAsciiDocSections } from "$lib/utils/ZettelParser";
+ import { parseAsciiDocWithMetadata } from "$lib/utils/asciidoc_metadata";
let content = $state("");
let showPreview = $state(false);
@@ -44,12 +44,12 @@
const errors = results.filter(r => !r.success && r.error).map(r => r.error!);
// Extract successful events with their titles
- const sections = parseAsciiDocSections(content, 2);
+ const parsed = parseAsciiDocWithMetadata(content);
const successfulEvents = results
.filter(r => r.success && r.eventId)
.map((r, index) => ({
eventId: r.eventId!,
- title: sections[index]?.title || `Note ${index + 1}`
+ title: parsed.sections[index]?.title || `Note ${index + 1}`
}));
// Extract failed events with their titles and errors
@@ -57,7 +57,7 @@
.map((r, index) => ({ result: r, index }))
.filter(({ result }) => !result.success)
.map(({ result, index }) => ({
- title: sections[index]?.title || `Note ${index + 1}`,
+ title: parsed.sections[index]?.title || `Note ${index + 1}`,
error: result.error || 'Unknown error',
sectionIndex: index
}));
@@ -78,8 +78,8 @@
isPublishing = true;
// Get the specific section content
- const sections = parseAsciiDocSections(content, 2);
- const section = sections[sectionIndex];
+ const parsed = parseAsciiDocWithMetadata(content);
+ const section = parsed.sections[sectionIndex];
if (!section) return;
// Reconstruct the section content for publishing
diff --git a/tests/unit/metadataExtraction.test.ts b/tests/unit/metadataExtraction.test.ts
index 145f23e..65a50b8 100644
--- a/tests/unit/metadataExtraction.test.ts
+++ b/tests/unit/metadataExtraction.test.ts
@@ -3,16 +3,16 @@ import {
extractDocumentMetadata,
extractSectionMetadata,
parseAsciiDocWithMetadata,
- metadataToTags
+ metadataToTags,
+ extractSmartMetadata
} from "../../src/lib/utils/asciidoc_metadata.ts";
describe("AsciiDoc Metadata Extraction", () => {
const testContent = `= Test Document with Metadata
John Doe
-1.0, 2024-01-15, Alexandria Test
+1.0, 2024-01-15: Alexandria Test
:summary: This is a test document for metadata extraction
:author: Jane Smith
-:version: 2.0
:published_on: 2024-01-15
:published_by: Alexandria Project
:type: article
@@ -78,6 +78,53 @@ This is the content of the first section.`;
expect(content).toBe("This is the content of the first section.");
});
+ it("extractSectionMetadata should extract standalone author names and remove them from content", () => {
+ const sectionContent = `== Section Header1
+Stella
+:description: Some summary
+
+Some context text`;
+
+ const { metadata, content, title } = extractSectionMetadata(sectionContent);
+
+ expect(title).toBe("Section Header1");
+ expect(metadata.authors).toEqual(["Stella"]);
+ expect(metadata.summary).toBe("Some summary");
+ expect(content.trim()).toBe("Some context text");
+ });
+
+ it("extractSectionMetadata should handle multiple standalone author names", () => {
+ const sectionContent = `== Section Header1
+Stella
+:author: John Doe
+:description: Some summary
+
+Some context text`;
+
+ const { metadata, content, title } = extractSectionMetadata(sectionContent);
+
+ expect(title).toBe("Section Header1");
+ expect(metadata.authors).toEqual(["Stella", "John Doe"]);
+ expect(metadata.summary).toBe("Some summary");
+ expect(content.trim()).toBe("Some context text");
+ });
+
+ it("extractSectionMetadata should not extract non-author lines as authors", () => {
+ const sectionContent = `== Section Header1
+Stella
+This is not an author line
+:description: Some summary
+
+Some context text`;
+
+ const { metadata, content, title } = extractSectionMetadata(sectionContent);
+
+ expect(title).toBe("Section Header1");
+ expect(metadata.authors).toEqual(["Stella"]);
+ expect(metadata.summary).toBe("Some summary");
+ expect(content.trim()).toBe("This is not an author line\nSome context text");
+ });
+
it("parseAsciiDocWithMetadata should parse complete document", () => {
const parsed = parseAsciiDocWithMetadata(testContent);
@@ -132,7 +179,7 @@ index card`;
const contentWithKeywords = `= Test Document
:keywords: keyword1, keyword2, keyword3
-Content here.`;
+Some content here.`;
const { metadata } = extractDocumentMetadata(contentWithKeywords);
@@ -144,7 +191,7 @@ Content here.`;
:tags: tag1, tag2
:keywords: keyword1, keyword2
-Content here.`;
+Some content here.`;
const { metadata } = extractDocumentMetadata(contentWithBoth);
@@ -180,4 +227,96 @@ Content here.`;
expect(summaryMetadata.summary).toBe("This is a summary");
expect(descriptionMetadata.summary).toBe("This is a description");
});
+
+ describe('Smart metadata extraction', () => {
+ it('should handle section-only content correctly', () => {
+ const sectionOnlyContent = `== First Section
+:author: Section Author
+:description: This is the first section
+:tags: section1, content
+
+This is the content of the first section.
+
+== Second Section
+:summary: This is the second section
+:type: chapter
+
+This is the content of the second section.`;
+
+ const { metadata, content } = extractSmartMetadata(sectionOnlyContent);
+
+ // Should extract title from first section
+ expect(metadata.title).toBe('First Section');
+
+ // Should not have document-level metadata since there's no document header
+ expect(metadata.authors).toBeUndefined();
+ expect(metadata.version).toBeUndefined();
+ expect(metadata.publicationDate).toBeUndefined();
+
+ // Content should be preserved
+ expect(content).toBe(sectionOnlyContent);
+ });
+
+ it('should handle minimal document header (just title) correctly', () => {
+ const minimalDocumentHeader = `= Test Document
+
+== First Section
+:author: Section Author
+:description: This is the first section
+
+This is the content of the first section.
+
+== Second Section
+:summary: This is the second section
+:type: chapter
+
+This is the content of the second section.`;
+
+ const { metadata, content } = extractSmartMetadata(minimalDocumentHeader);
+
+ // Should extract title from document header
+ expect(metadata.title).toBe('Test Document');
+
+ // Should not have document-level metadata since there's no other metadata
+ expect(metadata.authors).toBeUndefined();
+ // Note: version might be set from section attributes like :type: chapter
+ expect(metadata.publicationDate).toBeUndefined();
+
+ // Content should preserve the title line for 30040 events
+ expect(content).toContain('= Test Document');
+ expect(content).toContain('== First Section');
+ expect(content).toContain('== Second Section');
+ });
+
+ it('should handle document with full header correctly', () => {
+ const documentWithHeader = `= Test Document
+John Doe
+1.0, 2024-01-15: Alexandria Test
+:summary: This is a test document
+:author: Jane Smith
+
+== First Section
+:author: Section Author
+:description: This is the first section
+
+This is the content.`;
+
+ const { metadata, content } = extractSmartMetadata(documentWithHeader);
+
+ // Should extract document-level metadata
+ expect(metadata.title).toBe('Test Document');
+ expect(metadata.authors).toEqual(['John Doe', 'Jane Smith']);
+ expect(metadata.version).toBe('1.0');
+ expect(metadata.publishedBy).toBe('Alexandria Test');
+ expect(metadata.publicationDate).toBe('2024-01-15');
+ expect(metadata.summary).toBe('This is a test document');
+
+ // Content should be cleaned
+ expect(content).not.toContain('= Test Document');
+ expect(content).not.toContain('John Doe ');
+ expect(content).not.toContain('1.0, 2024-01-15: Alexandria Test');
+ expect(content).not.toContain(':summary: This is a test document');
+ expect(content).not.toContain(':author: Jane Smith');
+ });
+ });
});
\ No newline at end of file