Browse Source

interim checkin

master
silberengel 8 months ago
parent
commit
b1d66ebd79
  1. 170
      package-lock.json
  2. 48
      src/lib/components/EventInput.svelte
  3. 47
      src/lib/components/ZettelEditor.svelte
  4. 20
      src/lib/services/publisher.ts
  5. 730
      src/lib/utils/asciidoc_metadata.ts
  6. 12
      src/routes/new/compose/+page.svelte
  7. 149
      tests/unit/metadataExtraction.test.ts

170
package-lock.json generated

@ -2565,18 +2565,37 @@
} }
}, },
"node_modules/chokidar": { "node_modules/chokidar": {
"version": "4.0.3", "version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
"integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==", "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
"devOptional": true,
"dependencies": { "dependencies": {
"readdirp": "^4.0.1" "anymatch": "~3.1.2",
"braces": "~3.0.2",
"glob-parent": "~5.1.2",
"is-binary-path": "~2.1.0",
"is-glob": "~4.0.1",
"normalize-path": "~3.0.0",
"readdirp": "~3.6.0"
}, },
"engines": { "engines": {
"node": ">= 14.16.0" "node": ">= 8.10.0"
}, },
"funding": { "funding": {
"url": "https://paulmillr.com/funding/" "url": "https://paulmillr.com/funding/"
},
"optionalDependencies": {
"fsevents": "~2.3.2"
}
},
"node_modules/chokidar/node_modules/glob-parent": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
"integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
"dependencies": {
"is-glob": "^4.0.1"
},
"engines": {
"node": ">= 6"
} }
}, },
"node_modules/cliui": { "node_modules/cliui": {
@ -3420,6 +3439,15 @@
} }
} }
}, },
"node_modules/eslint-plugin-svelte/node_modules/yaml": {
"version": "1.10.2",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-1.10.2.tgz",
"integrity": "sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg==",
"dev": true,
"engines": {
"node": ">= 6"
}
},
"node_modules/eslint-scope": { "node_modules/eslint-scope": {
"version": "8.4.0", "version": "8.4.0",
"resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz", "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz",
@ -5474,16 +5502,25 @@
} }
}, },
"node_modules/readdirp": { "node_modules/readdirp": {
"version": "4.1.2", "version": "3.6.0",
"resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
"integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==", "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
"devOptional": true, "dependencies": {
"picomatch": "^2.2.1"
},
"engines": { "engines": {
"node": ">= 14.18.0" "node": ">=8.10.0"
}
},
"node_modules/readdirp/node_modules/picomatch": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
"engines": {
"node": ">=8.6"
}, },
"funding": { "funding": {
"type": "individual", "url": "https://github.com/sponsors/jonschlinkert"
"url": "https://paulmillr.com/funding/"
} }
}, },
"node_modules/require-directory": { "node_modules/require-directory": {
@ -5952,6 +5989,34 @@
"typescript": ">=5.0.0" "typescript": ">=5.0.0"
} }
}, },
"node_modules/svelte-check/node_modules/chokidar": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz",
"integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==",
"dev": true,
"dependencies": {
"readdirp": "^4.0.1"
},
"engines": {
"node": ">= 14.16.0"
},
"funding": {
"url": "https://paulmillr.com/funding/"
}
},
"node_modules/svelte-check/node_modules/readdirp": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz",
"integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==",
"dev": true,
"engines": {
"node": ">= 14.18.0"
},
"funding": {
"type": "individual",
"url": "https://paulmillr.com/funding/"
}
},
"node_modules/svelte-eslint-parser": { "node_modules/svelte-eslint-parser": {
"version": "1.3.0", "version": "1.3.0",
"resolved": "https://registry.npmjs.org/svelte-eslint-parser/-/svelte-eslint-parser-1.3.0.tgz", "resolved": "https://registry.npmjs.org/svelte-eslint-parser/-/svelte-eslint-parser-1.3.0.tgz",
@ -6139,51 +6204,6 @@
"node": ">=14.0.0" "node": ">=14.0.0"
} }
}, },
"node_modules/tailwindcss/node_modules/chokidar": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
"integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
"dependencies": {
"anymatch": "~3.1.2",
"braces": "~3.0.2",
"glob-parent": "~5.1.2",
"is-binary-path": "~2.1.0",
"is-glob": "~4.0.1",
"normalize-path": "~3.0.0",
"readdirp": "~3.6.0"
},
"engines": {
"node": ">= 8.10.0"
},
"funding": {
"url": "https://paulmillr.com/funding/"
},
"optionalDependencies": {
"fsevents": "~2.3.2"
}
},
"node_modules/tailwindcss/node_modules/chokidar/node_modules/glob-parent": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
"integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
"dependencies": {
"is-glob": "^4.0.1"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/tailwindcss/node_modules/picomatch": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
"engines": {
"node": ">=8.6"
},
"funding": {
"url": "https://github.com/sponsors/jonschlinkert"
}
},
"node_modules/tailwindcss/node_modules/postcss-load-config": { "node_modules/tailwindcss/node_modules/postcss-load-config": {
"version": "4.0.2", "version": "4.0.2",
"resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz", "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-4.0.2.tgz",
@ -6230,28 +6250,6 @@
"node": ">=4" "node": ">=4"
} }
}, },
"node_modules/tailwindcss/node_modules/readdirp": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
"integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
"dependencies": {
"picomatch": "^2.2.1"
},
"engines": {
"node": ">=8.10.0"
}
},
"node_modules/tailwindcss/node_modules/yaml": {
"version": "2.8.0",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.0.tgz",
"integrity": "sha512-4lLa/EcQCB0cJkyts+FpIRx5G/llPxfP6VQU5KByHEhLxY3IJCH0f0Hy1MHI8sClTvsIb8qwRJ6R/ZdlDJ/leQ==",
"bin": {
"yaml": "bin.mjs"
},
"engines": {
"node": ">= 14.6"
}
},
"node_modules/thenify": { "node_modules/thenify": {
"version": "3.3.1", "version": "3.3.1",
"resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
@ -6792,12 +6790,14 @@
} }
}, },
"node_modules/yaml": { "node_modules/yaml": {
"version": "1.10.2", "version": "2.8.0",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-1.10.2.tgz", "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.0.tgz",
"integrity": "sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg==", "integrity": "sha512-4lLa/EcQCB0cJkyts+FpIRx5G/llPxfP6VQU5KByHEhLxY3IJCH0f0Hy1MHI8sClTvsIb8qwRJ6R/ZdlDJ/leQ==",
"dev": true, "bin": {
"yaml": "bin.mjs"
},
"engines": { "engines": {
"node": ">= 6" "node": ">= 14.6"
} }
}, },
"node_modules/yargs": { "node_modules/yargs": {

48
src/lib/components/EventInput.svelte

@ -14,6 +14,7 @@
} from "$lib/utils/event_input_utils"; } from "$lib/utils/event_input_utils";
import { import {
extractDocumentMetadata, extractDocumentMetadata,
extractSmartMetadata,
metadataToTags, metadataToTags,
removeMetadataFromContent removeMetadataFromContent
} from "$lib/utils/asciidoc_metadata"; } from "$lib/utils/asciidoc_metadata";
@ -65,60 +66,45 @@
sessionStorage.removeItem('zettelEditorContent'); sessionStorage.removeItem('zettelEditorContent');
sessionStorage.removeItem('zettelEditorSource'); sessionStorage.removeItem('zettelEditorSource');
// Extract title from content // Extract title and metadata using the standardized parser
const extracted = extractTitleFromContent(content); const { metadata } = extractSmartMetadata(content);
if (extracted) { if (metadata.title) {
title = extracted; title = metadata.title;
titleManuallyEdited = false; titleManuallyEdited = false;
dTagManuallyEdited = false; dTagManuallyEdited = false;
} }
// For content from ZettelEditor, don't extract any metadata // Extract metadata for 30040 and 30041 events
// since ZettelEditor content never has document metadata
if (kind === 30040 || kind === 30041) { if (kind === 30040 || kind === 30041) {
extractedMetadata = []; extractedMetadata = metadataToTags(metadata);
} }
} }
}); });
/** /**
* Extracts the first Markdown/AsciiDoc header as the title. * Extracts the first Markdown/AsciiDoc header as the title using the standardized parser.
*/ */
function extractTitleFromContent(content: string): string { function extractTitleFromContent(content: string): string {
// Match Markdown (# Title) or AsciiDoc (= Title) headers const { metadata } = extractSmartMetadata(content);
// Look for document title (=) first, then fall back to section headers (==) return metadata.title || "";
const documentMatch = content.match(/^=\s*(.+)$/m);
if (documentMatch) {
const title = documentMatch[1].trim();
// Only return the title if it's not empty (malformed titles like "=|" will be empty)
if (title) {
return title;
}
}
// If no valid document title, look for the first section header
const sectionMatch = content.match(/^==\s*(.+)$/m);
if (sectionMatch) {
return sectionMatch[1].trim();
}
return "";
} }
function handleContentInput(e: Event) { function handleContentInput(e: Event) {
content = (e.target as HTMLTextAreaElement).value; content = (e.target as HTMLTextAreaElement).value;
// Extract title and metadata using the standardized parser
const { metadata } = extractSmartMetadata(content);
if (!titleManuallyEdited) { if (!titleManuallyEdited) {
const extracted = extractTitleFromContent(content); console.log("Content input - extracted title:", metadata.title);
console.log("Content input - extracted title:", extracted); title = metadata.title || "";
title = extracted;
// Reset dTagManuallyEdited when title changes so d-tag can be auto-generated // Reset dTagManuallyEdited when title changes so d-tag can be auto-generated
dTagManuallyEdited = false; dTagManuallyEdited = false;
} }
// Extract metadata from AsciiDoc content for 30040 and 30041 events // Extract metadata from AsciiDoc content for 30040 and 30041 events
if (kind === 30040 || kind === 30041) { if (kind === 30040 || kind === 30041) {
// Don't extract metadata - let users add tags manually extractedMetadata = metadataToTags(metadata);
extractedMetadata = [];
} else { } else {
extractedMetadata = []; extractedMetadata = [];
} }

47
src/lib/components/ZettelEditor.svelte

@ -2,17 +2,12 @@
import { Textarea, Button } from "flowbite-svelte"; import { Textarea, Button } from "flowbite-svelte";
import { EyeOutline } from "flowbite-svelte-icons"; import { EyeOutline } from "flowbite-svelte-icons";
import { import {
parseAsciiDocSections, extractSmartMetadata,
type ZettelSection, parseAsciiDocWithMetadata,
} from "$lib/utils/ZettelParser"; type AsciiDocMetadata,
import { metadataToTags,
extractDocumentMetadata, } from "$lib/utils/asciidoc_metadata";
extractSectionMetadata, import asciidoctor from "asciidoctor";
parseAsciiDocWithMetadata,
type AsciiDocMetadata,
metadataToTags,
} from "$lib/utils/asciidoc_metadata";
import asciidoctor from "asciidoctor";
// Component props // Component props
let { let {
@ -45,34 +40,20 @@ Note content here...
onPreviewToggle?: (show: boolean) => void; onPreviewToggle?: (show: boolean) => void;
}>(); }>();
// Initialize AsciiDoctor processor // Parse sections for preview using the smart metadata service
const asciidoctorProcessor = asciidoctor();
// Parse sections for preview using the new metadata service
let parsedSections = $derived.by(() => { let parsedSections = $derived.by(() => {
if (!content.trim()) return []; if (!content.trim()) return [];
// Check if content starts with a document header (level 0 header) // Use smart metadata extraction that handles both document headers and section-only content
const hasDocumentHeader = content.match(/^=\s+/m); const { metadata: docMetadata } = extractSmartMetadata(content);
let sections; // Parse the content using the standardized parser
if (hasDocumentHeader) { const parsed = parseAsciiDocWithMetadata(content);
// Use the proper metadata service for documents with headers
const parsed = parseAsciiDocWithMetadata(content);
sections = parsed.sections;
} else {
// For content that starts directly with sections, split manually
const sectionStrings = content.split(/(?=^==\s+)/gm).filter((section: string) => section.trim());
sections = sectionStrings.map((sectionString: string) => {
const { metadata, content, title } = extractSectionMetadata(sectionString);
return { metadata, content, title };
});
}
// Debug logging // Debug logging
console.log("Parsed sections:", sections); console.log("Parsed sections:", parsed.sections);
return sections.map((section: { metadata: AsciiDocMetadata; content: string; title: string }) => { return parsed.sections.map((section: { metadata: AsciiDocMetadata; content: string; title: string }) => {
// Use only section metadata for each section // Use only section metadata for each section
// Don't combine with document metadata to avoid overriding section-specific metadata // Don't combine with document metadata to avoid overriding section-specific metadata
const tags = metadataToTags(section.metadata); const tags = metadataToTags(section.metadata);
@ -259,7 +240,7 @@ Note content here...
<div <div
class="text-sm text-gray-800 dark:text-gray-200 asciidoc-content" class="text-sm text-gray-800 dark:text-gray-200 asciidoc-content"
> >
{@html asciidoctorProcessor.convert( {@html asciidoctor().convert(
`== ${section.title}\n\n${section.content}`, `== ${section.title}\n\n${section.content}`,
{ {
standalone: false, standalone: false,

20
src/lib/services/publisher.ts

@ -1,7 +1,7 @@
import { get } from "svelte/store"; import { get } from "svelte/store";
import { ndkInstance } from "../ndk.ts"; import { ndkInstance } from "../ndk.ts";
import { getMimeTags } from "../utils/mime.ts"; import { getMimeTags } from "../utils/mime.ts";
import { parseAsciiDocSections } from "../utils/ZettelParser.ts"; import { parseAsciiDocWithMetadata, metadataToTags } from "../utils/asciidoc_metadata.ts";
import { NDKRelaySet, NDKEvent } from "@nostr-dev-kit/ndk"; import { NDKRelaySet, NDKEvent } from "@nostr-dev-kit/ndk";
import { nip19 } from "nostr-tools"; import { nip19 } from "nostr-tools";
@ -44,18 +44,18 @@ export async function publishZettel(
} }
try { try {
// Parse content into sections // Parse content into sections using the standardized parser
const sections = parseAsciiDocSections(content, 2); const parsed = parseAsciiDocWithMetadata(content);
if (sections.length === 0) { if (parsed.sections.length === 0) {
throw new Error("No valid sections found in content"); throw new Error("No valid sections found in content");
} }
// For now, publish only the first section // For now, publish only the first section
const firstSection = sections[0]; const firstSection = parsed.sections[0];
const title = firstSection.title; const title = firstSection.title;
const cleanContent = firstSection.content; const cleanContent = firstSection.content;
const sectionTags = firstSection.tags || []; const sectionTags = metadataToTags(firstSection.metadata);
// Generate d-tag and create event // Generate d-tag and create event
const dTag = generateDTag(title); const dTag = generateDTag(title);
@ -128,8 +128,8 @@ export async function publishMultipleZettels(
} }
try { try {
const sections = parseAsciiDocSections(content, 2); const parsed = parseAsciiDocWithMetadata(content);
if (sections.length === 0) { if (parsed.sections.length === 0) {
throw new Error('No valid sections found in content'); throw new Error('No valid sections found in content');
} }
@ -141,10 +141,10 @@ export async function publishMultipleZettels(
const results: PublishResult[] = []; const results: PublishResult[] = [];
const publishedEvents: NDKEvent[] = []; const publishedEvents: NDKEvent[] = [];
for (const section of sections) { for (const section of parsed.sections) {
const title = section.title; const title = section.title;
const cleanContent = section.content; const cleanContent = section.content;
const sectionTags = section.tags || []; const sectionTags = metadataToTags(section.metadata);
const dTag = generateDTag(title); const dTag = generateDTag(title);
const [mTag, MTag] = getMimeTags(kind); const [mTag, MTag] = getMimeTags(kind);
const tags: string[][] = [["d", dTag], mTag, MTag, ["title", title]]; const tags: string[][] = [["d", dTag], mTag, MTag, ["title", title]];

730
src/lib/utils/asciidoc_metadata.ts

@ -1,18 +1,14 @@
/** /**
* AsciiDoc Metadata Extraction Service * AsciiDoc Metadata Extraction Service using Asciidoctor
* *
* Extracts metadata from AsciiDoc document headers and section headers, * Thin wrapper around Asciidoctor's built-in metadata extraction capabilities.
* mapping them to Nostr event tags according to NKBIP-01 specification. * Leverages the existing Pharos parser to avoid duplication.
*
* Document header structure:
* = Document Title
* Author Name <email@example.com>
* version, date, revision info
* :attribute: value
*
* The first empty line marks the end of the header and start of the document body.
*/ */
// @ts-ignore
import Processor from "asciidoctor";
import type { Document } from "asciidoctor";
export interface AsciiDocMetadata { export interface AsciiDocMetadata {
title?: string; title?: string;
authors?: string[]; authors?: string[];
@ -30,7 +26,6 @@ export interface AsciiDocMetadata {
autoUpdate?: 'yes' | 'ask' | 'no'; autoUpdate?: 'yes' | 'ask' | 'no';
} }
// Sections use the same metadata structure as documents
export type SectionMetadata = AsciiDocMetadata; export type SectionMetadata = AsciiDocMetadata;
export interface ParsedAsciiDoc { export interface ParsedAsciiDoc {
@ -43,448 +38,463 @@ export interface ParsedAsciiDoc {
}>; }>;
} }
// Shared attribute mapping based on Asciidoctor standard attributes
const ATTRIBUTE_MAP: Record<string, keyof AsciiDocMetadata> = {
// Standard Asciidoctor attributes
'author': 'authors',
'description': 'summary',
'keywords': 'tags',
'revnumber': 'version',
'revdate': 'publicationDate',
'revremark': 'edition',
'title': 'title',
// Custom attributes for Alexandria
'published_by': 'publishedBy',
'publisher': 'publisher',
'summary': 'summary',
'image': 'coverImage',
'cover': 'coverImage',
'isbn': 'isbn',
'source': 'source',
'type': 'type',
'auto-update': 'autoUpdate',
'version': 'version',
'edition': 'edition',
'published_on': 'publicationDate',
'date': 'publicationDate',
'version-label': 'version',
};
/** /**
* Shared function to parse metadata from attribute entries * Creates an Asciidoctor processor instance
* @param metadata The metadata object to populate
* @param key The attribute key
* @param value The attribute value
*/ */
function parseMetadataAttribute(metadata: AsciiDocMetadata, key: string, value: string): void { function createProcessor() {
switch (key.toLowerCase()) { return Processor();
case 'author': }
// Accumulate multiple authors
if (!metadata.authors) { /**
metadata.authors = []; * Extracts tags from attributes, combining tags and keywords
} */
metadata.authors.push(value); function extractTagsFromAttributes(attributes: Record<string, any>): string[] {
break; const tags: string[] = [];
case 'version': const attrTags = attributes['tags'];
// Only set version if not already set from revision line const attrKeywords = attributes['keywords'];
if (!metadata.version) {
metadata.version = value; if (attrTags && typeof attrTags === 'string') {
} tags.push(...attrTags.split(',').map(tag => tag.trim()));
break; }
case 'edition':
metadata.edition = value; if (attrKeywords && typeof attrKeywords === 'string') {
break; tags.push(...attrKeywords.split(',').map(tag => tag.trim()));
case 'published_on': }
case 'date':
metadata.publicationDate = value; return [...new Set(tags)]; // Remove duplicates
break; }
case 'published_by':
case 'publisher': /**
// Only set publishedBy if not already set from revision line * Maps attributes to metadata with special handling for authors and tags
if (!metadata.publishedBy) { */
metadata.publishedBy = value; function mapAttributesToMetadata(attributes: Record<string, any>, metadata: AsciiDocMetadata, isDocument: boolean = false): void {
} for (const [key, value] of Object.entries(attributes)) {
break; const metadataKey = ATTRIBUTE_MAP[key.toLowerCase()];
case 'summary': if (metadataKey && value && typeof value === 'string') {
case 'description': if (metadataKey === 'authors' && isDocument) {
// Accumulate multiple summaries/descriptions // Skip author mapping for documents since it's handled manually
if (!metadata.summary) { continue;
metadata.summary = value; } else if (metadataKey === 'authors' && !isDocument) {
// For sections, append author to existing authors array
if (!metadata.authors) {
metadata.authors = [];
}
metadata.authors.push(value);
} else if (metadataKey === 'tags') {
// Skip tags mapping since it's handled by extractTagsFromAttributes
continue;
} else { } else {
// If we already have a summary, append this one (metadata as any)[metadataKey] = value;
metadata.summary = metadata.summary + ' ' + value;
} }
break; }
case 'image':
case 'cover':
metadata.coverImage = value;
break;
case 'isbn':
metadata.isbn = value;
break;
case 'source':
metadata.source = value;
break;
case 'type':
metadata.type = value;
break;
case 'auto-update':
if (value === 'yes' || value === 'ask' || value === 'no') {
metadata.autoUpdate = value;
}
break;
case 'tags':
case 'keywords':
// Accumulate multiple tag sets
if (!metadata.tags) {
metadata.tags = [];
}
const newTags = value.split(',').map(tag => tag.trim());
metadata.tags.push(...newTags);
break;
} }
} }
/** /**
* Shared function to extract metadata from header lines * Extracts authors from header line (document or section)
* @param lines The lines to process
* @param startLine The starting line index
* @param metadata The metadata object to populate
* @returns The index of the line after the header metadata
*/ */
function extractHeaderMetadata(lines: string[], startLine: number, metadata: AsciiDocMetadata): number { function extractAuthorsFromHeader(sourceContent: string, isSection: boolean = false): string[] {
let currentLine = startLine; const authors: string[] = [];
const lines = sourceContent.split(/\r?\n/);
// Process the next two lines for author and revision info const headerPattern = isSection ? /^==\s+/ : /^=\s+/;
let processedLines = 0;
for (let i = 0; i < 2 && currentLine + i < lines.length; i++) { for (let i = 0; i < lines.length; i++) {
const line = lines[currentLine + i]; const line = lines[i];
if (line.match(headerPattern)) {
// Skip empty lines // Found title line, check subsequent lines for authors
if (line.trim() === '') { let j = i + 1;
continue; while (j < lines.length) {
const authorLine = lines[j];
// Stop if we hit a blank line or content that's not an author
if (authorLine.trim() === '') {
break;
}
if (authorLine.includes('<') && !authorLine.startsWith(':')) {
// This is an author line like "John Doe <john@example.com>"
const authorName = authorLine.split('<')[0].trim();
if (authorName) {
authors.push(authorName);
}
} else if (isSection && authorLine.match(/^[A-Za-z\s]+$/) && authorLine.trim() !== '' && authorLine.trim().split(/\s+/).length <= 2) {
// This is a simple author name without email (for sections)
authors.push(authorLine.trim());
} else if (authorLine.startsWith(':')) {
// This is an attribute line, skip it - attributes are handled by mapAttributesToMetadata
// Don't break here, continue to next line
} else {
// Not an author line, stop looking
break;
}
j++;
}
break;
} }
}
return authors;
}
// Skip attribute lines (they'll be processed later) /**
if (line.startsWith(':')) { * Strips header and attribute lines from content
continue; */
function stripHeaderAndAttributes(content: string, isSection: boolean = false): string {
const lines = content.split(/\r?\n/);
let contentStart = 0;
const headerPattern = isSection ? /^==\s+/ : /^=\s+/;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Skip title line, author line, revision line, and attribute lines
if (!line.match(headerPattern) && !line.includes('<') && !line.match(/^.+,\s*.+:\s*.+$/) &&
!line.match(/^:[^:]+:\s*.+$/) && line.trim() !== '') {
contentStart = i;
break;
} }
}
// Check if this is an author line (contains <email>) // Filter out all attribute lines and author lines from the content
if (line.includes('<') && line.includes('>')) { const contentLines = lines.slice(contentStart);
const authorMatch = line.match(/^(.+?)\s*<(.+?)>$/); const filteredLines = contentLines.filter(line => {
if (authorMatch) { // Skip attribute lines
const authorName = authorMatch[1].trim(); if (line.match(/^:[^:]+:\s*.+$/)) {
metadata.authors = [authorName]; return false;
processedLines++;
continue;
}
} }
// Skip author lines (simple names without email)
// Check if this is a revision line (contains version, date, revision info) if (isSection && line.match(/^[A-Za-z\s]+$/) && line.trim() !== '' && line.trim().split(/\s+/).length <= 2) {
const revisionMatch = line.match(/^(.+?),\s*(.+?),\s*(.+)$/); return false;
if (revisionMatch) {
metadata.version = revisionMatch[1].trim();
metadata.publicationDate = revisionMatch[2].trim();
metadata.publishedBy = revisionMatch[3].trim();
processedLines++;
continue;
} }
return true;
});
// Remove extra blank lines and normalize newlines
return filteredLines.join('\n').replace(/\n\s*\n\s*\n/g, '\n\n').replace(/\n\s*\n/g, '\n').trim();
}
// If it's not author or revision, it might be a simple author name /**
if (!metadata.authors) { * Parses attributes from section content
metadata.authors = [line.trim()]; */
processedLines++; function parseSectionAttributes(sectionContent: string): Record<string, any> {
const attributes: Record<string, any> = {};
const lines = sectionContent.split(/\r?\n/);
for (const line of lines) {
const match = line.match(/^:([^:]+):\s*(.+)$/);
if (match) {
const [, key, value] = match;
attributes[key.trim()] = value.trim();
} }
} }
return attributes;
}
// Move past the author/revision lines that were actually processed
currentLine += processedLines;
// Process attribute entries (lines starting with :)
while (currentLine < lines.length) {
const line = lines[currentLine];
// Empty line marks the end of the header
if (line.trim() === '') {
break;
}
// Check for attribute entries
const attrMatch = line.match(/^:([^:]+):\s*(.+)$/);
if (attrMatch) {
const key = attrMatch[1].trim();
const value = attrMatch[2].trim();
parseMetadataAttribute(metadata, key, value);
}
currentLine++;
}
return currentLine;
}
/** /**
* Extracts metadata from AsciiDoc document header * Extracts metadata from AsciiDoc document using Asciidoctor
* @param content The full AsciiDoc content
* @returns Object containing metadata and cleaned content
*/ */
export function extractDocumentMetadata(inputContent: string): { export function extractDocumentMetadata(inputContent: string): {
metadata: AsciiDocMetadata; metadata: AsciiDocMetadata;
content: string; content: string;
} { } {
const lines = inputContent.split(/\r?\n/); const asciidoctor = createProcessor();
const document = asciidoctor.load(inputContent, { standalone: false }) as Document;
const metadata: AsciiDocMetadata = {}; const metadata: AsciiDocMetadata = {};
let headerEndIndex = -1; const attributes = document.getAttributes();
let currentLine = 0;
// Find the document title (first line starting with =) // Extract basic metadata
for (let i = 0; i < lines.length; i++) { const title = document.getTitle();
const line = lines[i]; if (title) metadata.title = title;
const titleMatch = line.match(/^=\s+(.+)$/);
if (titleMatch) {
metadata.title = titleMatch[1].trim();
currentLine = i + 1;
break;
}
}
// If no document title found, return empty metadata // Handle multiple authors - combine header line and attributes
if (!metadata.title) { const authors = extractAuthorsFromHeader(document.getSource());
return { metadata: {}, content: inputContent };
// Get authors from attributes (but avoid duplicates)
const attrAuthor = attributes['author'];
if (attrAuthor && typeof attrAuthor === 'string' && !authors.includes(attrAuthor)) {
authors.push(attrAuthor);
} }
// Check if this is an index card format (title followed immediately by "index card") if (authors.length > 0) {
if (currentLine < lines.length && lines[currentLine].trim() === 'index card') { metadata.authors = [...new Set(authors)]; // Remove duplicates
// This is index card format - content starts immediately after title
headerEndIndex = currentLine;
} else {
// Extract header metadata using shared function
currentLine = extractHeaderMetadata(lines, currentLine, metadata);
// If we didn't find an empty line, the header ends at the first section
if (currentLine < lines.length && lines[currentLine].trim() === '') {
headerEndIndex = currentLine + 1; // Skip the empty line
} else {
for (let i = currentLine; i < lines.length; i++) {
if (lines[i].match(/^==\s+/)) {
headerEndIndex = i;
break;
}
}
// If no section found and no empty line, the header ends at the current line
if (headerEndIndex === -1) {
headerEndIndex = currentLine;
}
}
} }
// If still no header end found, use the entire content // Extract revision info
if (headerEndIndex === -1) { const revisionNumber = document.getRevisionNumber();
headerEndIndex = lines.length; if (revisionNumber) metadata.version = revisionNumber;
}
const revisionRemark = document.getRevisionRemark();
if (revisionRemark) metadata.publishedBy = revisionRemark;
const revisionDate = document.getRevisionDate();
if (revisionDate) metadata.publicationDate = revisionDate;
// Extract the content (everything after the header) // Map attributes to metadata (but skip version and publishedBy if we already have them from revision)
let content = lines.slice(headerEndIndex).join('\n'); mapAttributesToMetadata(attributes, metadata, true);
// Remove metadata attributes from sections in the content // If we got version from revision, don't override it with attribute
content = content.replace(/^:([^:]+):\s*(.+)$/gm, ''); if (revisionNumber) {
metadata.version = revisionNumber;
}
// If we got publishedBy from revision, don't override it with attribute
if (revisionRemark) {
metadata.publishedBy = revisionRemark;
}
// Handle tags and keywords
const tags = extractTagsFromAttributes(attributes);
if (tags.length > 0) {
metadata.tags = tags;
}
const content = stripHeaderAndAttributes(document.getSource());
return { metadata, content }; return { metadata, content };
} }
/** /**
* Extracts metadata from a section header * Extracts metadata from a section using Asciidoctor
* @param sectionContent The section content including its header
* @returns Object containing section metadata and cleaned content
*/ */
export function extractSectionMetadata(inputSectionContent: string): { export function extractSectionMetadata(inputSectionContent: string): {
metadata: SectionMetadata; metadata: SectionMetadata;
content: string; content: string;
title: string; title: string;
} { } {
const lines = inputSectionContent.split(/\r?\n/); const asciidoctor = createProcessor();
const metadata: SectionMetadata = {}; const document = asciidoctor.load(`= Temp\n\n${inputSectionContent}`, { standalone: false }) as Document;
let title = ''; const sections = document.getSections();
let headerEndIndex = -1;
let currentLine = 0; if (sections.length === 0) {
// Find the section title (first line starting with ==)
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const titleMatch = line.match(/^==\s+(.+)$/);
if (titleMatch) {
title = titleMatch[1].trim();
metadata.title = title;
currentLine = i + 1;
break;
}
}
// If no section title found, return empty metadata
if (!title) {
return { metadata: {}, content: inputSectionContent, title: '' }; return { metadata: {}, content: inputSectionContent, title: '' };
} }
// Extract header metadata using shared function const section = sections[0];
currentLine = extractHeaderMetadata(lines, currentLine, metadata); const title = section.getTitle() || '';
const metadata: SectionMetadata = { title };
// Parse attributes from the section content
const attributes = parseSectionAttributes(inputSectionContent);
// If we didn't find an empty line, the header ends at the next section // Extract authors from section content
if (currentLine < lines.length && lines[currentLine].trim() === '') { const authors = extractAuthorsFromHeader(inputSectionContent, true);
headerEndIndex = currentLine + 1; // Skip the empty line if (authors.length > 0) {
} else { metadata.authors = authors;
for (let i = currentLine; i < lines.length; i++) {
if (lines[i].match(/^==\s+/)) {
headerEndIndex = i;
break;
}
}
} }
// If still no header end found, use the entire content // Map attributes to metadata (sections can have authors)
if (headerEndIndex === -1) { mapAttributesToMetadata(attributes, metadata, false);
headerEndIndex = lines.length;
}
// Extract the content (everything after the header) // Handle tags and keywords
const content = lines.slice(headerEndIndex).join('\n'); const tags = extractTagsFromAttributes(attributes);
if (tags.length > 0) {
metadata.tags = tags;
}
const content = stripHeaderAndAttributes(inputSectionContent, true);
return { metadata, content, title }; return { metadata, content, title };
} }
/** /**
* Splits AsciiDoc content into sections and extracts metadata from each * Parses AsciiDoc content into sections with metadata
* @param content The full AsciiDoc content
* @returns Object containing document metadata and sections with their metadata
*/ */
export function parseAsciiDocWithMetadata(content: string): ParsedAsciiDoc { export function parseAsciiDocWithMetadata(content: string): ParsedAsciiDoc {
// First extract document metadata const asciidoctor = createProcessor();
const document = asciidoctor.load(content, { standalone: false }) as Document;
const { metadata: docMetadata } = extractDocumentMetadata(content); const { metadata: docMetadata } = extractDocumentMetadata(content);
// Find the document header end to get the content after the header // Parse the original content to find section attributes
const lines = content.split(/\r?\n/); const lines = content.split(/\r?\n/);
let currentLine = 0; const sectionsWithMetadata: Array<{
metadata: SectionMetadata;
// Find the document title content: string;
for (let i = 0; i < lines.length; i++) { title: string;
const line = lines[i]; }> = [];
const titleMatch = line.match(/^=\s+(.+)$/); let currentSection: string | null = null;
if (titleMatch) { let currentSectionContent: string[] = [];
currentLine = i + 1;
break;
}
}
// Extract header metadata to find where content starts
const tempMetadata: AsciiDocMetadata = {};
currentLine = extractHeaderMetadata(lines, currentLine, tempMetadata);
// Get the content after the header (including sections with metadata)
const docContent = lines.slice(currentLine).join('\n');
// Split into sections
const sections = splitAsciiDocSections(docContent);
// Extract metadata from each section
const sectionsWithMetadata = sections.map(section => {
return extractSectionMetadata(section);
});
return {
metadata: docMetadata,
content: docContent,
sections: sectionsWithMetadata
};
}
/**
* Splits AsciiDoc content into sections at each '==' header
* @param content The AsciiDoc content (without document header)
* @returns Array of section strings
*/
function splitAsciiDocSections(content: string): string[] {
const lines = content.split(/\r?\n/);
const sections: string[] = [];
let currentSection: string[] = [];
let inSection = false;
for (const line of lines) { for (const line of lines) {
// Check if this is a section header
if (line.match(/^==\s+/)) { if (line.match(/^==\s+/)) {
// Save the previous section if we have one // Save previous section if exists
if (inSection && currentSection.length > 0) { if (currentSection) {
sections.push(currentSection.join('\n').trim()); const sectionContent = currentSectionContent.join('\n');
currentSection = []; sectionsWithMetadata.push(extractSectionMetadata(sectionContent));
} }
// Start new section // Start new section
currentSection = [line]; currentSection = line;
inSection = true; currentSectionContent = [line];
} else if (inSection) { } else if (currentSection) {
// Add line to current section currentSectionContent.push(line);
currentSection.push(line);
} }
} }
// Add the last section // Save the last section
if (currentSection.length > 0) { if (currentSection) {
sections.push(currentSection.join('\n').trim()); const sectionContent = currentSectionContent.join('\n');
sectionsWithMetadata.push(extractSectionMetadata(sectionContent));
} }
return sections; return {
metadata: docMetadata,
content: document.getSource(),
sections: sectionsWithMetadata
};
} }
/** /**
* Converts metadata to Nostr event tags * Converts metadata to Nostr event tags
* @param metadata The metadata object
* @returns Array of [tag, value] pairs
*/ */
export function metadataToTags(metadata: AsciiDocMetadata | SectionMetadata): [string, string][] { export function metadataToTags(metadata: AsciiDocMetadata | SectionMetadata): [string, string][] {
const tags: [string, string][] = []; const tags: [string, string][] = [];
// Don't add title to tags since it has its own dedicated field if (metadata.title) tags.push(['title', metadata.title]);
// if (metadata.title) { if (metadata.authors?.length) {
// tags.push(['title', metadata.title]); metadata.authors.forEach(author => tags.push(['author', author]));
// }
if (metadata.authors && metadata.authors.length > 0) {
metadata.authors.forEach(author => {
tags.push(['author', author]);
});
}
if (metadata.version) {
tags.push(['version', metadata.version]);
} }
if (metadata.version) tags.push(['version', metadata.version]);
if (metadata.edition) { if (metadata.edition) tags.push(['edition', metadata.edition]);
tags.push(['edition', metadata.edition]); if (metadata.publicationDate) tags.push(['published_on', metadata.publicationDate]);
if (metadata.publishedBy) tags.push(['published_by', metadata.publishedBy]);
if (metadata.summary) tags.push(['summary', metadata.summary]);
if (metadata.coverImage) tags.push(['image', metadata.coverImage]);
if (metadata.isbn) tags.push(['i', metadata.isbn]);
if (metadata.source) tags.push(['source', metadata.source]);
if (metadata.type) tags.push(['type', metadata.type]);
if (metadata.autoUpdate) tags.push(['auto-update', metadata.autoUpdate]);
if (metadata.tags?.length) {
metadata.tags.forEach(tag => tags.push(['t', tag]));
} }
if (metadata.publicationDate) { return tags;
tags.push(['published_on', metadata.publicationDate]); }
}
if (metadata.publishedBy) {
tags.push(['published_by', metadata.publishedBy]);
}
if (metadata.summary) {
tags.push(['summary', metadata.summary]);
}
if (metadata.coverImage) {
tags.push(['image', metadata.coverImage]);
}
if (metadata.isbn) {
tags.push(['i', metadata.isbn]);
}
if (metadata.source) { /**
tags.push(['source', metadata.source]); * Removes metadata from AsciiDoc content
} */
export function removeMetadataFromContent(content: string): string {
const { content: cleanedContent } = extractDocumentMetadata(content);
return cleanedContent;
}
if (metadata.type) { /**
tags.push(['type', metadata.type]); * Extracts metadata from content that only contains sections (no document header)
* This is useful when content flows from ZettelEditor to EventInput
*/
export function extractMetadataFromSectionsOnly(content: string): {
metadata: AsciiDocMetadata;
content: string;
} {
const lines = content.split(/\r?\n/);
const sections: Array<{
metadata: SectionMetadata;
content: string;
title: string;
}> = [];
let currentSection: string | null = null;
let currentSectionContent: string[] = [];
// Parse sections from the content
for (const line of lines) {
if (line.match(/^==\s+/)) {
// Save previous section if exists
if (currentSection) {
const sectionContent = currentSectionContent.join('\n');
sections.push(extractSectionMetadata(sectionContent));
}
// Start new section
currentSection = line;
currentSectionContent = [line];
} else if (currentSection) {
currentSectionContent.push(line);
}
} }
if (metadata.autoUpdate) { // Save the last section
tags.push(['auto-update', metadata.autoUpdate]); if (currentSection) {
const sectionContent = currentSectionContent.join('\n');
sections.push(extractSectionMetadata(sectionContent));
} }
if (metadata.tags && metadata.tags.length > 0) { // For section-only content, we don't have document metadata
metadata.tags.forEach(tag => { // Return the first section's title as the document title if available
tags.push(['t', tag]); const metadata: AsciiDocMetadata = {};
}); if (sections.length > 0 && sections[0].title) {
metadata.title = sections[0].title;
} }
return tags; return { metadata, content };
} }
/** /**
* Removes metadata from AsciiDoc content, leaving only the actual content * Smart metadata extraction that handles both document headers and section-only content
* @param content The full AsciiDoc content
* @returns Cleaned content without metadata
*/ */
export function removeMetadataFromContent(content: string): string { export function extractSmartMetadata(content: string): {
const { content: docContent } = extractDocumentMetadata(content); metadata: AsciiDocMetadata;
content: string;
// Remove metadata attributes from sections in the content } {
const cleanedContent = docContent.replace(/^:([^:]+):\s*(.+)$/gm, ''); // Check if content has a document header
const hasDocumentHeader = content.match(/^=\s+/m);
return cleanedContent; if (hasDocumentHeader) {
// Check if it's a minimal document header (just title, no other metadata)
const lines = content.split(/\r?\n/);
const titleLine = lines.find(line => line.match(/^=\s+/));
const hasOtherMetadata = lines.some(line =>
line.includes('<') || // author line
line.match(/^.+,\s*.+:\s*.+$/) // revision line
);
if (hasOtherMetadata) {
// Full document with metadata - use standard extraction
return extractDocumentMetadata(content);
} else {
// Minimal document header (just title) - preserve the title line for 30040 events
const title = titleLine?.replace(/^=\s+/, '').trim();
const metadata: AsciiDocMetadata = {};
if (title) {
metadata.title = title;
}
// Keep the title line in content for 30040 events
return { metadata, content };
}
} else {
return extractMetadataFromSectionsOnly(content);
}
} }

12
src/routes/new/compose/+page.svelte

@ -5,7 +5,7 @@
import { goto } from "$app/navigation"; import { goto } from "$app/navigation";
import { nip19 } from "nostr-tools"; import { nip19 } from "nostr-tools";
import { publishMultipleZettels } from "$lib/services/publisher"; import { publishMultipleZettels } from "$lib/services/publisher";
import { parseAsciiDocSections } from "$lib/utils/ZettelParser"; import { parseAsciiDocWithMetadata } from "$lib/utils/asciidoc_metadata";
let content = $state(""); let content = $state("");
let showPreview = $state(false); let showPreview = $state(false);
@ -44,12 +44,12 @@
const errors = results.filter(r => !r.success && r.error).map(r => r.error!); const errors = results.filter(r => !r.success && r.error).map(r => r.error!);
// Extract successful events with their titles // Extract successful events with their titles
const sections = parseAsciiDocSections(content, 2); const parsed = parseAsciiDocWithMetadata(content);
const successfulEvents = results const successfulEvents = results
.filter(r => r.success && r.eventId) .filter(r => r.success && r.eventId)
.map((r, index) => ({ .map((r, index) => ({
eventId: r.eventId!, eventId: r.eventId!,
title: sections[index]?.title || `Note ${index + 1}` title: parsed.sections[index]?.title || `Note ${index + 1}`
})); }));
// Extract failed events with their titles and errors // Extract failed events with their titles and errors
@ -57,7 +57,7 @@
.map((r, index) => ({ result: r, index })) .map((r, index) => ({ result: r, index }))
.filter(({ result }) => !result.success) .filter(({ result }) => !result.success)
.map(({ result, index }) => ({ .map(({ result, index }) => ({
title: sections[index]?.title || `Note ${index + 1}`, title: parsed.sections[index]?.title || `Note ${index + 1}`,
error: result.error || 'Unknown error', error: result.error || 'Unknown error',
sectionIndex: index sectionIndex: index
})); }));
@ -78,8 +78,8 @@
isPublishing = true; isPublishing = true;
// Get the specific section content // Get the specific section content
const sections = parseAsciiDocSections(content, 2); const parsed = parseAsciiDocWithMetadata(content);
const section = sections[sectionIndex]; const section = parsed.sections[sectionIndex];
if (!section) return; if (!section) return;
// Reconstruct the section content for publishing // Reconstruct the section content for publishing

149
tests/unit/metadataExtraction.test.ts

@ -3,16 +3,16 @@ import {
extractDocumentMetadata, extractDocumentMetadata,
extractSectionMetadata, extractSectionMetadata,
parseAsciiDocWithMetadata, parseAsciiDocWithMetadata,
metadataToTags metadataToTags,
extractSmartMetadata
} from "../../src/lib/utils/asciidoc_metadata.ts"; } from "../../src/lib/utils/asciidoc_metadata.ts";
describe("AsciiDoc Metadata Extraction", () => { describe("AsciiDoc Metadata Extraction", () => {
const testContent = `= Test Document with Metadata const testContent = `= Test Document with Metadata
John Doe <john@example.com> John Doe <john@example.com>
1.0, 2024-01-15, Alexandria Test 1.0, 2024-01-15: Alexandria Test
:summary: This is a test document for metadata extraction :summary: This is a test document for metadata extraction
:author: Jane Smith :author: Jane Smith
:version: 2.0
:published_on: 2024-01-15 :published_on: 2024-01-15
:published_by: Alexandria Project :published_by: Alexandria Project
:type: article :type: article
@ -78,6 +78,53 @@ This is the content of the first section.`;
expect(content).toBe("This is the content of the first section."); expect(content).toBe("This is the content of the first section.");
}); });
it("extractSectionMetadata should extract standalone author names and remove them from content", () => {
const sectionContent = `== Section Header1
Stella
:description: Some summary
Some context text`;
const { metadata, content, title } = extractSectionMetadata(sectionContent);
expect(title).toBe("Section Header1");
expect(metadata.authors).toEqual(["Stella"]);
expect(metadata.summary).toBe("Some summary");
expect(content.trim()).toBe("Some context text");
});
it("extractSectionMetadata should handle multiple standalone author names", () => {
const sectionContent = `== Section Header1
Stella
:author: John Doe
:description: Some summary
Some context text`;
const { metadata, content, title } = extractSectionMetadata(sectionContent);
expect(title).toBe("Section Header1");
expect(metadata.authors).toEqual(["Stella", "John Doe"]);
expect(metadata.summary).toBe("Some summary");
expect(content.trim()).toBe("Some context text");
});
it("extractSectionMetadata should not extract non-author lines as authors", () => {
const sectionContent = `== Section Header1
Stella
This is not an author line
:description: Some summary
Some context text`;
const { metadata, content, title } = extractSectionMetadata(sectionContent);
expect(title).toBe("Section Header1");
expect(metadata.authors).toEqual(["Stella"]);
expect(metadata.summary).toBe("Some summary");
expect(content.trim()).toBe("This is not an author line\nSome context text");
});
it("parseAsciiDocWithMetadata should parse complete document", () => { it("parseAsciiDocWithMetadata should parse complete document", () => {
const parsed = parseAsciiDocWithMetadata(testContent); const parsed = parseAsciiDocWithMetadata(testContent);
@ -132,7 +179,7 @@ index card`;
const contentWithKeywords = `= Test Document const contentWithKeywords = `= Test Document
:keywords: keyword1, keyword2, keyword3 :keywords: keyword1, keyword2, keyword3
Content here.`; Some content here.`;
const { metadata } = extractDocumentMetadata(contentWithKeywords); const { metadata } = extractDocumentMetadata(contentWithKeywords);
@ -144,7 +191,7 @@ Content here.`;
:tags: tag1, tag2 :tags: tag1, tag2
:keywords: keyword1, keyword2 :keywords: keyword1, keyword2
Content here.`; Some content here.`;
const { metadata } = extractDocumentMetadata(contentWithBoth); const { metadata } = extractDocumentMetadata(contentWithBoth);
@ -180,4 +227,96 @@ Content here.`;
expect(summaryMetadata.summary).toBe("This is a summary"); expect(summaryMetadata.summary).toBe("This is a summary");
expect(descriptionMetadata.summary).toBe("This is a description"); expect(descriptionMetadata.summary).toBe("This is a description");
}); });
describe('Smart metadata extraction', () => {
it('should handle section-only content correctly', () => {
const sectionOnlyContent = `== First Section
:author: Section Author
:description: This is the first section
:tags: section1, content
This is the content of the first section.
== Second Section
:summary: This is the second section
:type: chapter
This is the content of the second section.`;
const { metadata, content } = extractSmartMetadata(sectionOnlyContent);
// Should extract title from first section
expect(metadata.title).toBe('First Section');
// Should not have document-level metadata since there's no document header
expect(metadata.authors).toBeUndefined();
expect(metadata.version).toBeUndefined();
expect(metadata.publicationDate).toBeUndefined();
// Content should be preserved
expect(content).toBe(sectionOnlyContent);
});
it('should handle minimal document header (just title) correctly', () => {
const minimalDocumentHeader = `= Test Document
== First Section
:author: Section Author
:description: This is the first section
This is the content of the first section.
== Second Section
:summary: This is the second section
:type: chapter
This is the content of the second section.`;
const { metadata, content } = extractSmartMetadata(minimalDocumentHeader);
// Should extract title from document header
expect(metadata.title).toBe('Test Document');
// Should not have document-level metadata since there's no other metadata
expect(metadata.authors).toBeUndefined();
// Note: version might be set from section attributes like :type: chapter
expect(metadata.publicationDate).toBeUndefined();
// Content should preserve the title line for 30040 events
expect(content).toContain('= Test Document');
expect(content).toContain('== First Section');
expect(content).toContain('== Second Section');
});
it('should handle document with full header correctly', () => {
const documentWithHeader = `= Test Document
John Doe <john@example.com>
1.0, 2024-01-15: Alexandria Test
:summary: This is a test document
:author: Jane Smith
== First Section
:author: Section Author
:description: This is the first section
This is the content.`;
const { metadata, content } = extractSmartMetadata(documentWithHeader);
// Should extract document-level metadata
expect(metadata.title).toBe('Test Document');
expect(metadata.authors).toEqual(['John Doe', 'Jane Smith']);
expect(metadata.version).toBe('1.0');
expect(metadata.publishedBy).toBe('Alexandria Test');
expect(metadata.publicationDate).toBe('2024-01-15');
expect(metadata.summary).toBe('This is a test document');
// Content should be cleaned
expect(content).not.toContain('= Test Document');
expect(content).not.toContain('John Doe <john@example.com>');
expect(content).not.toContain('1.0, 2024-01-15: Alexandria Test');
expect(content).not.toContain(':summary: This is a test document');
expect(content).not.toContain(':author: Jane Smith');
});
});
}); });
Loading…
Cancel
Save