9 changed files with 1513 additions and 149 deletions
@ -0,0 +1,489 @@
@@ -0,0 +1,489 @@
|
||||
/** |
||||
* AsciiDoc Metadata Extraction Service |
||||
*
|
||||
* Extracts metadata from AsciiDoc document headers and section headers, |
||||
* mapping them to Nostr event tags according to NKBIP-01 specification. |
||||
*
|
||||
* Document header structure: |
||||
* = Document Title |
||||
* Author Name <email@example.com> |
||||
* version, date, revision info |
||||
* :attribute: value |
||||
*
|
||||
* The first empty line marks the end of the header and start of the document body. |
||||
*/ |
||||
|
||||
export interface AsciiDocMetadata { |
||||
title?: string; |
||||
authors?: string[]; |
||||
version?: string; |
||||
edition?: string; |
||||
publicationDate?: string; |
||||
publisher?: string; |
||||
summary?: string; |
||||
coverImage?: string; |
||||
isbn?: string; |
||||
tags?: string[]; |
||||
source?: string; |
||||
publishedBy?: string; |
||||
type?: string; |
||||
autoUpdate?: 'yes' | 'ask' | 'no'; |
||||
} |
||||
|
||||
// Sections use the same metadata structure as documents
|
||||
export type SectionMetadata = AsciiDocMetadata; |
||||
|
||||
export interface ParsedAsciiDoc { |
||||
metadata: AsciiDocMetadata; |
||||
content: string; |
||||
sections: Array<{ |
||||
metadata: SectionMetadata; |
||||
content: string; |
||||
title: string; |
||||
}>; |
||||
} |
||||
|
||||
/** |
||||
* Shared function to parse metadata from attribute entries |
||||
* @param metadata The metadata object to populate |
||||
* @param key The attribute key |
||||
* @param value The attribute value |
||||
*/ |
||||
function parseMetadataAttribute(metadata: AsciiDocMetadata, key: string, value: string): void { |
||||
switch (key.toLowerCase()) { |
||||
case 'author': |
||||
// Accumulate multiple authors
|
||||
if (!metadata.authors) { |
||||
metadata.authors = []; |
||||
} |
||||
metadata.authors.push(value); |
||||
break; |
||||
case 'version': |
||||
// Only set version if not already set from revision line
|
||||
if (!metadata.version) { |
||||
metadata.version = value; |
||||
} |
||||
break; |
||||
case 'edition': |
||||
metadata.edition = value; |
||||
break; |
||||
case 'published_on': |
||||
case 'date': |
||||
metadata.publicationDate = value; |
||||
break; |
||||
case 'published_by': |
||||
case 'publisher': |
||||
// Only set publishedBy if not already set from revision line
|
||||
if (!metadata.publishedBy) { |
||||
metadata.publishedBy = value; |
||||
} |
||||
break; |
||||
case 'summary': |
||||
case 'description': |
||||
// Accumulate multiple summaries/descriptions
|
||||
if (!metadata.summary) { |
||||
metadata.summary = value; |
||||
} else { |
||||
// If we already have a summary, append this one
|
||||
metadata.summary = metadata.summary + ' ' + value; |
||||
} |
||||
break; |
||||
case 'image': |
||||
case 'cover': |
||||
metadata.coverImage = value; |
||||
break; |
||||
case 'isbn': |
||||
metadata.isbn = value; |
||||
break; |
||||
case 'source': |
||||
metadata.source = value; |
||||
break; |
||||
case 'type': |
||||
metadata.type = value; |
||||
break; |
||||
case 'auto-update': |
||||
if (value === 'yes' || value === 'ask' || value === 'no') { |
||||
metadata.autoUpdate = value; |
||||
} |
||||
break; |
||||
case 'tags': |
||||
case 'keywords': |
||||
// Accumulate multiple tag sets
|
||||
if (!metadata.tags) { |
||||
metadata.tags = []; |
||||
} |
||||
const newTags = value.split(',').map(tag => tag.trim()); |
||||
metadata.tags.push(...newTags); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Shared function to extract metadata from header lines |
||||
* @param lines The lines to process |
||||
* @param startLine The starting line index |
||||
* @param metadata The metadata object to populate |
||||
* @returns The index of the line after the header metadata |
||||
*/ |
||||
function extractHeaderMetadata(lines: string[], startLine: number, metadata: AsciiDocMetadata): number { |
||||
let currentLine = startLine; |
||||
|
||||
// Process the next two lines for author and revision info
|
||||
let processedLines = 0; |
||||
for (let i = 0; i < 2 && currentLine + i < lines.length; i++) { |
||||
const line = lines[currentLine + i]; |
||||
|
||||
// Skip empty lines
|
||||
if (line.trim() === '') { |
||||
continue; |
||||
} |
||||
|
||||
// Skip attribute lines (they'll be processed later)
|
||||
if (line.startsWith(':')) { |
||||
continue; |
||||
} |
||||
|
||||
// Check if this is an author line (contains <email>)
|
||||
if (line.includes('<') && line.includes('>')) { |
||||
const authorMatch = line.match(/^(.+?)\s*<(.+?)>$/); |
||||
if (authorMatch) { |
||||
const authorName = authorMatch[1].trim(); |
||||
metadata.authors = [authorName]; |
||||
processedLines++; |
||||
continue; |
||||
} |
||||
} |
||||
|
||||
// Check if this is a revision line (contains version, date, revision info)
|
||||
const revisionMatch = line.match(/^(.+?),\s*(.+?),\s*(.+)$/); |
||||
if (revisionMatch) { |
||||
metadata.version = revisionMatch[1].trim(); |
||||
metadata.publicationDate = revisionMatch[2].trim(); |
||||
metadata.publishedBy = revisionMatch[3].trim(); |
||||
processedLines++; |
||||
continue; |
||||
} |
||||
|
||||
// If it's not author or revision, it might be a simple author name
|
||||
if (!metadata.authors) { |
||||
metadata.authors = [line.trim()]; |
||||
processedLines++; |
||||
} |
||||
} |
||||
|
||||
// Move past the author/revision lines that were actually processed
|
||||
currentLine += processedLines; |
||||
|
||||
// Process attribute entries (lines starting with :)
|
||||
while (currentLine < lines.length) { |
||||
const line = lines[currentLine]; |
||||
|
||||
// Empty line marks the end of the header
|
||||
if (line.trim() === '') { |
||||
break; |
||||
} |
||||
|
||||
// Check for attribute entries
|
||||
const attrMatch = line.match(/^:([^:]+):\s*(.+)$/); |
||||
if (attrMatch) { |
||||
const key = attrMatch[1].trim(); |
||||
const value = attrMatch[2].trim(); |
||||
parseMetadataAttribute(metadata, key, value); |
||||
} |
||||
|
||||
currentLine++; |
||||
} |
||||
|
||||
return currentLine; |
||||
} |
||||
|
||||
/** |
||||
* Extracts metadata from AsciiDoc document header |
||||
* @param content The full AsciiDoc content |
||||
* @returns Object containing metadata and cleaned content |
||||
*/ |
||||
export function extractDocumentMetadata(inputContent: string): { |
||||
metadata: AsciiDocMetadata; |
||||
content: string; |
||||
} { |
||||
const lines = inputContent.split(/\r?\n/); |
||||
const metadata: AsciiDocMetadata = {}; |
||||
let headerEndIndex = -1; |
||||
let currentLine = 0; |
||||
|
||||
// Find the document title (first line starting with =)
|
||||
for (let i = 0; i < lines.length; i++) { |
||||
const line = lines[i]; |
||||
const titleMatch = line.match(/^=\s+(.+)$/); |
||||
if (titleMatch) { |
||||
metadata.title = titleMatch[1].trim(); |
||||
currentLine = i + 1; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
// If no document title found, return empty metadata
|
||||
if (!metadata.title) { |
||||
return { metadata: {}, content: inputContent }; |
||||
} |
||||
|
||||
// Check if this is an index card format (title followed immediately by "index card")
|
||||
if (currentLine < lines.length && lines[currentLine].trim() === 'index card') { |
||||
// This is index card format - content starts immediately after title
|
||||
headerEndIndex = currentLine; |
||||
} else { |
||||
// Extract header metadata using shared function
|
||||
currentLine = extractHeaderMetadata(lines, currentLine, metadata); |
||||
|
||||
// If we didn't find an empty line, the header ends at the first section
|
||||
if (currentLine < lines.length && lines[currentLine].trim() === '') { |
||||
headerEndIndex = currentLine + 1; // Skip the empty line
|
||||
} else { |
||||
for (let i = currentLine; i < lines.length; i++) { |
||||
if (lines[i].match(/^==\s+/)) { |
||||
headerEndIndex = i; |
||||
break; |
||||
} |
||||
} |
||||
// If no section found and no empty line, the header ends at the current line
|
||||
if (headerEndIndex === -1) { |
||||
headerEndIndex = currentLine; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// If still no header end found, use the entire content
|
||||
if (headerEndIndex === -1) { |
||||
headerEndIndex = lines.length; |
||||
} |
||||
|
||||
// Extract the content (everything after the header)
|
||||
let content = lines.slice(headerEndIndex).join('\n'); |
||||
|
||||
// Remove metadata attributes from sections in the content
|
||||
content = content.replace(/^:([^:]+):\s*(.+)$/gm, ''); |
||||
|
||||
return { metadata, content }; |
||||
} |
||||
|
||||
/** |
||||
* Extracts metadata from a section header |
||||
* @param sectionContent The section content including its header |
||||
* @returns Object containing section metadata and cleaned content |
||||
*/ |
||||
export function extractSectionMetadata(inputSectionContent: string): { |
||||
metadata: SectionMetadata; |
||||
content: string; |
||||
title: string; |
||||
} { |
||||
const lines = inputSectionContent.split(/\r?\n/); |
||||
const metadata: SectionMetadata = {}; |
||||
let title = ''; |
||||
let headerEndIndex = -1; |
||||
let currentLine = 0; |
||||
|
||||
// Find the section title (first line starting with ==)
|
||||
for (let i = 0; i < lines.length; i++) { |
||||
const line = lines[i]; |
||||
const titleMatch = line.match(/^==\s+(.+)$/); |
||||
if (titleMatch) { |
||||
title = titleMatch[1].trim(); |
||||
metadata.title = title; |
||||
currentLine = i + 1; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
// If no section title found, return empty metadata
|
||||
if (!title) { |
||||
return { metadata: {}, content: inputSectionContent, title: '' }; |
||||
} |
||||
|
||||
// Extract header metadata using shared function
|
||||
currentLine = extractHeaderMetadata(lines, currentLine, metadata); |
||||
|
||||
// If we didn't find an empty line, the header ends at the next section
|
||||
if (currentLine < lines.length && lines[currentLine].trim() === '') { |
||||
headerEndIndex = currentLine + 1; // Skip the empty line
|
||||
} else { |
||||
for (let i = currentLine; i < lines.length; i++) { |
||||
if (lines[i].match(/^==\s+/)) { |
||||
headerEndIndex = i; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// If still no header end found, use the entire content
|
||||
if (headerEndIndex === -1) { |
||||
headerEndIndex = lines.length; |
||||
} |
||||
|
||||
// Extract the content (everything after the header)
|
||||
const content = lines.slice(headerEndIndex).join('\n'); |
||||
|
||||
return { metadata, content, title }; |
||||
} |
||||
|
||||
/** |
||||
* Splits AsciiDoc content into sections and extracts metadata from each |
||||
* @param content The full AsciiDoc content |
||||
* @returns Object containing document metadata and sections with their metadata |
||||
*/ |
||||
export function parseAsciiDocWithMetadata(content: string): ParsedAsciiDoc { |
||||
// First extract document metadata
|
||||
const { metadata: docMetadata } = extractDocumentMetadata(content); |
||||
|
||||
// Find the document header end to get the content after the header
|
||||
const lines = content.split(/\r?\n/); |
||||
let currentLine = 0; |
||||
|
||||
// Find the document title
|
||||
for (let i = 0; i < lines.length; i++) { |
||||
const line = lines[i]; |
||||
const titleMatch = line.match(/^=\s+(.+)$/); |
||||
if (titleMatch) { |
||||
currentLine = i + 1; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
// Extract header metadata to find where content starts
|
||||
const tempMetadata: AsciiDocMetadata = {}; |
||||
currentLine = extractHeaderMetadata(lines, currentLine, tempMetadata); |
||||
|
||||
// Get the content after the header (including sections with metadata)
|
||||
const docContent = lines.slice(currentLine).join('\n'); |
||||
|
||||
// Split into sections
|
||||
const sections = splitAsciiDocSections(docContent); |
||||
|
||||
// Extract metadata from each section
|
||||
const sectionsWithMetadata = sections.map(section => { |
||||
return extractSectionMetadata(section); |
||||
}); |
||||
|
||||
return { |
||||
metadata: docMetadata, |
||||
content: docContent, |
||||
sections: sectionsWithMetadata |
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Splits AsciiDoc content into sections at each '==' header |
||||
* @param content The AsciiDoc content (without document header) |
||||
* @returns Array of section strings |
||||
*/ |
||||
function splitAsciiDocSections(content: string): string[] { |
||||
const lines = content.split(/\r?\n/); |
||||
const sections: string[] = []; |
||||
let currentSection: string[] = []; |
||||
let inSection = false; |
||||
|
||||
for (const line of lines) { |
||||
// Check if this is a section header
|
||||
if (line.match(/^==\s+/)) { |
||||
// Save the previous section if we have one
|
||||
if (inSection && currentSection.length > 0) { |
||||
sections.push(currentSection.join('\n').trim()); |
||||
currentSection = []; |
||||
} |
||||
|
||||
// Start new section
|
||||
currentSection = [line]; |
||||
inSection = true; |
||||
} else if (inSection) { |
||||
// Add line to current section
|
||||
currentSection.push(line); |
||||
} |
||||
} |
||||
|
||||
// Add the last section
|
||||
if (currentSection.length > 0) { |
||||
sections.push(currentSection.join('\n').trim()); |
||||
} |
||||
|
||||
return sections; |
||||
} |
||||
|
||||
/** |
||||
* Converts metadata to Nostr event tags |
||||
* @param metadata The metadata object |
||||
* @returns Array of [tag, value] pairs |
||||
*/ |
||||
export function metadataToTags(metadata: AsciiDocMetadata | SectionMetadata): [string, string][] { |
||||
const tags: [string, string][] = []; |
||||
|
||||
if (metadata.title) { |
||||
tags.push(['title', metadata.title]); |
||||
} |
||||
|
||||
if (metadata.authors && metadata.authors.length > 0) { |
||||
metadata.authors.forEach(author => { |
||||
tags.push(['author', author]); |
||||
}); |
||||
} |
||||
|
||||
if (metadata.version) { |
||||
tags.push(['version', metadata.version]); |
||||
} |
||||
|
||||
if (metadata.edition) { |
||||
tags.push(['edition', metadata.edition]); |
||||
} |
||||
|
||||
if (metadata.publicationDate) { |
||||
tags.push(['published_on', metadata.publicationDate]); |
||||
} |
||||
|
||||
if (metadata.publishedBy) { |
||||
tags.push(['published_by', metadata.publishedBy]); |
||||
} |
||||
|
||||
if (metadata.summary) { |
||||
tags.push(['summary', metadata.summary]); |
||||
} |
||||
|
||||
if (metadata.coverImage) { |
||||
tags.push(['image', metadata.coverImage]); |
||||
} |
||||
|
||||
if (metadata.isbn) { |
||||
tags.push(['i', metadata.isbn]); |
||||
} |
||||
|
||||
if (metadata.source) { |
||||
tags.push(['source', metadata.source]); |
||||
} |
||||
|
||||
if (metadata.type) { |
||||
tags.push(['type', metadata.type]); |
||||
} |
||||
|
||||
if (metadata.autoUpdate) { |
||||
tags.push(['auto-update', metadata.autoUpdate]); |
||||
} |
||||
|
||||
if (metadata.tags && metadata.tags.length > 0) { |
||||
metadata.tags.forEach(tag => { |
||||
tags.push(['t', tag]); |
||||
}); |
||||
} |
||||
|
||||
return tags; |
||||
} |
||||
|
||||
/** |
||||
* Removes metadata from AsciiDoc content, leaving only the actual content |
||||
* @param content The full AsciiDoc content |
||||
* @returns Cleaned content without metadata |
||||
*/ |
||||
export function removeMetadataFromContent(content: string): string { |
||||
const { content: docContent } = extractDocumentMetadata(content); |
||||
|
||||
// Remove metadata attributes from sections in the content
|
||||
const cleanedContent = docContent.replace(/^:([^:]+):\s*(.+)$/gm, ''); |
||||
|
||||
return cleanedContent; |
||||
}
|
||||
@ -0,0 +1,446 @@
@@ -0,0 +1,446 @@
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest"; |
||||
import { build30040EventSet, validate30040EventSet } from "../../src/lib/utils/event_input_utils"; |
||||
import { extractDocumentMetadata, parseAsciiDocWithMetadata } from "../../src/lib/utils/asciidoc_metadata"; |
||||
|
||||
// Mock NDK and other dependencies
|
||||
vi.mock("@nostr-dev-kit/ndk", () => ({ |
||||
NDKEvent: vi.fn().mockImplementation((ndk, eventData) => ({ |
||||
...eventData, |
||||
id: "mock-event-id", |
||||
sig: "mock-signature", |
||||
kind: eventData.kind, |
||||
content: eventData.content, |
||||
tags: eventData.tags, |
||||
pubkey: eventData.pubkey, |
||||
created_at: eventData.created_at, |
||||
})), |
||||
})); |
||||
|
||||
vi.mock("../../src/lib/ndk", () => ({ |
||||
ndkInstance: { |
||||
subscribe: vi.fn(), |
||||
}, |
||||
getNdk: vi.fn(() => ({})), |
||||
})); |
||||
|
||||
vi.mock("svelte/store", () => ({ |
||||
get: vi.fn(() => ({})), |
||||
})); |
||||
|
||||
describe("EventInput 30040 Publishing", () => { |
||||
const baseEvent = { |
||||
pubkey: "test-pubkey", |
||||
created_at: 1234567890, |
||||
}; |
||||
|
||||
beforeEach(() => { |
||||
vi.clearAllMocks(); |
||||
}); |
||||
|
||||
describe("Normal Structure with Preamble", () => { |
||||
it("should build 30040 event set with preamble content", () => { |
||||
const content = `= Test Document with Preamble
|
||||
John Doe <john@example.com> |
||||
1.0, 2024-01-15, Alexandria Test |
||||
:summary: This is a test document with preamble |
||||
:keywords: test, preamble, asciidoc |
||||
|
||||
This is the preamble content that should be included. |
||||
|
||||
== First Section |
||||
:author: Section Author |
||||
:summary: This is the first section |
||||
|
||||
This is the content of the first section. |
||||
|
||||
== Second Section |
||||
:summary: This is the second section |
||||
|
||||
This is the content of the second section.`;
|
||||
|
||||
const tags: [string, string][] = [["type", "article"]]; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
// Test index event
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.content).toBe(""); |
||||
expect(indexEvent.tags).toContainEqual(["d", "test-document-with-preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Test Document with Preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["author", "John Doe"]); |
||||
expect(indexEvent.tags).toContainEqual(["version", "1.0"]); |
||||
expect(indexEvent.tags).toContainEqual(["summary", "This is a test document with preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "test"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "asciidoc"]); |
||||
expect(indexEvent.tags).toContainEqual(["type", "article"]); |
||||
|
||||
// Test section events
|
||||
expect(sectionEvents).toHaveLength(2); |
||||
|
||||
// First section
|
||||
expect(sectionEvents[0].kind).toBe(30041); |
||||
expect(sectionEvents[0].content).toBe("This is the content of the first section."); |
||||
expect(sectionEvents[0].tags).toContainEqual(["d", "test-document-with-preamble-first-section"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["title", "First Section"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["author", "Section Author"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["summary", "This is the first section"]); |
||||
|
||||
// Second section
|
||||
expect(sectionEvents[1].kind).toBe(30041); |
||||
expect(sectionEvents[1].content).toBe("This is the content of the second section."); |
||||
expect(sectionEvents[1].tags).toContainEqual(["d", "test-document-with-preamble-second-section"]); |
||||
expect(sectionEvents[1].tags).toContainEqual(["title", "Second Section"]); |
||||
expect(sectionEvents[1].tags).toContainEqual(["summary", "This is the second section"]); |
||||
|
||||
// Test a-tags in index event
|
||||
expect(indexEvent.tags).toContainEqual(["a", "30041:test-pubkey:test-document-with-preamble-first-section"]); |
||||
expect(indexEvent.tags).toContainEqual(["a", "30041:test-pubkey:test-document-with-preamble-second-section"]); |
||||
}); |
||||
}); |
||||
|
||||
describe("Normal Structure without Preamble", () => { |
||||
it("should build 30040 event set without preamble content", () => { |
||||
const content = `= Test Document without Preamble
|
||||
:summary: This is a test document without preamble |
||||
:keywords: test, no-preamble, asciidoc |
||||
|
||||
== First Section |
||||
:author: Section Author |
||||
:summary: This is the first section |
||||
|
||||
This is the content of the first section. |
||||
|
||||
== Second Section |
||||
:summary: This is the second section |
||||
|
||||
This is the content of the second section.`;
|
||||
|
||||
const tags: [string, string][] = [["type", "article"]]; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
// Test index event
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.content).toBe(""); |
||||
expect(indexEvent.tags).toContainEqual(["d", "test-document-without-preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Test Document without Preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["summary", "This is a test document without preamble"]); |
||||
|
||||
// Test section events
|
||||
expect(sectionEvents).toHaveLength(2); |
||||
|
||||
// First section
|
||||
expect(sectionEvents[0].kind).toBe(30041); |
||||
expect(sectionEvents[0].content).toBe("This is the content of the first section."); |
||||
expect(sectionEvents[0].tags).toContainEqual(["d", "test-document-without-preamble-first-section"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["title", "First Section"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["author", "Section Author"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["summary", "This is the first section"]); |
||||
|
||||
// Second section
|
||||
expect(sectionEvents[1].kind).toBe(30041); |
||||
expect(sectionEvents[1].content).toBe("This is the content of the second section."); |
||||
expect(sectionEvents[1].tags).toContainEqual(["d", "test-document-without-preamble-second-section"]); |
||||
expect(sectionEvents[1].tags).toContainEqual(["title", "Second Section"]); |
||||
expect(sectionEvents[1].tags).toContainEqual(["summary", "This is the second section"]); |
||||
}); |
||||
}); |
||||
|
||||
describe("Skeleton Structure with Preamble", () => { |
||||
it("should build 30040 event set with skeleton structure and preamble", () => { |
||||
const content = `= Skeleton Document with Preamble
|
||||
:summary: This is a skeleton document with preamble |
||||
:keywords: skeleton, preamble, empty |
||||
|
||||
This is the preamble content. |
||||
|
||||
== Empty Section 1 |
||||
|
||||
== Empty Section 2 |
||||
|
||||
== Empty Section 3`;
|
||||
|
||||
const tags: [string, string][] = [["type", "skeleton"]]; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
// Test index event
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.content).toBe(""); |
||||
expect(indexEvent.tags).toContainEqual(["d", "skeleton-document-with-preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Skeleton Document with Preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["summary", "This is a skeleton document with preamble"]); |
||||
|
||||
// Test section events
|
||||
expect(sectionEvents).toHaveLength(3); |
||||
|
||||
// All sections should have empty content
|
||||
sectionEvents.forEach((section, index) => { |
||||
expect(section.kind).toBe(30041); |
||||
expect(section.content).toBe(""); |
||||
expect(section.tags).toContainEqual(["d", `skeleton-document-with-preamble-empty-section-${index + 1}`]); |
||||
expect(section.tags).toContainEqual(["title", `Empty Section ${index + 1}`]); |
||||
}); |
||||
}); |
||||
}); |
||||
|
||||
describe("Skeleton Structure without Preamble", () => { |
||||
it("should build 30040 event set with skeleton structure without preamble", () => { |
||||
const content = `= Skeleton Document without Preamble
|
||||
:summary: This is a skeleton document without preamble |
||||
:keywords: skeleton, no-preamble, empty |
||||
|
||||
== Empty Section 1 |
||||
|
||||
== Empty Section 2 |
||||
|
||||
== Empty Section 3`;
|
||||
|
||||
const tags: [string, string][] = [["type", "skeleton"]]; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
// Test index event
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.content).toBe(""); |
||||
expect(indexEvent.tags).toContainEqual(["d", "skeleton-document-without-preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Skeleton Document without Preamble"]); |
||||
expect(indexEvent.tags).toContainEqual(["summary", "This is a skeleton document without preamble"]); |
||||
|
||||
// Test section events
|
||||
expect(sectionEvents).toHaveLength(3); |
||||
|
||||
// All sections should have empty content
|
||||
sectionEvents.forEach((section, index) => { |
||||
expect(section.kind).toBe(30041); |
||||
expect(section.content).toBe(""); |
||||
expect(section.tags).toContainEqual(["d", `skeleton-document-without-preamble-empty-section-${index + 1}`]); |
||||
expect(section.tags).toContainEqual(["title", `Empty Section ${index + 1}`]); |
||||
}); |
||||
}); |
||||
}); |
||||
|
||||
describe("Index Card Format", () => { |
||||
it("should build 30040 event set for index card format", () => { |
||||
const content = `= Test Index Card
|
||||
index card`;
|
||||
|
||||
const tags: [string, string][] = [["type", "index-card"]]; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
// Test index event
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.content).toBe(""); |
||||
expect(indexEvent.tags).toContainEqual(["d", "test-index-card"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Test Index Card"]); |
||||
expect(indexEvent.tags).toContainEqual(["type", "index-card"]); |
||||
|
||||
// Should have no section events for index card
|
||||
expect(sectionEvents).toHaveLength(0); |
||||
}); |
||||
|
||||
it("should build 30040 event set for index card with metadata", () => { |
||||
const content = `= Test Index Card with Metadata
|
||||
:summary: This is an index card with metadata |
||||
:keywords: index, card, metadata |
||||
index card`;
|
||||
|
||||
const tags: [string, string][] = [["type", "index-card"]]; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
// Test index event
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.content).toBe(""); |
||||
expect(indexEvent.tags).toContainEqual(["d", "test-index-card-with-metadata"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Test Index Card with Metadata"]); |
||||
expect(indexEvent.tags).toContainEqual(["summary", "This is an index card with metadata"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "index"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "card"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "metadata"]); |
||||
expect(indexEvent.tags).toContainEqual(["type", "index-card"]); |
||||
|
||||
// Should have no section events for index card
|
||||
expect(sectionEvents).toHaveLength(0); |
||||
}); |
||||
}); |
||||
|
||||
describe("Complex Metadata Structures", () => { |
||||
it("should handle complex metadata with all attribute types", () => { |
||||
const content = `= Complex Metadata Document
|
||||
Jane Smith <jane@example.com> |
||||
2.0, 2024-02-20, Alexandria Complex |
||||
:summary: This is a complex document with all metadata types |
||||
:description: Alternative description field |
||||
:keywords: complex, metadata, all-types |
||||
:tags: additional, tags, here |
||||
:author: Override Author |
||||
:author: Third Author |
||||
:version: 3.0 |
||||
:published_on: 2024-03-01 |
||||
:published_by: Alexandria Complex |
||||
:type: book |
||||
:image: https://example.com/cover.jpg
|
||||
:isbn: 978-0-123456-78-9 |
||||
:source: https://github.com/alexandria/complex
|
||||
:auto-update: yes |
||||
|
||||
This is the preamble content. |
||||
|
||||
== Section with Complex Metadata |
||||
:author: Section Author |
||||
:author: Section Co-Author |
||||
:summary: This section has complex metadata |
||||
:description: Alternative description for section |
||||
:keywords: section, complex, metadata |
||||
:tags: section, tags |
||||
:type: chapter |
||||
:image: https://example.com/section-image.jpg
|
||||
|
||||
This is the section content.`;
|
||||
|
||||
const tags: [string, string][] = [["type", "complex"]]; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
// Test index event metadata
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.tags).toContainEqual(["d", "complex-metadata-document"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Complex Metadata Document"]); |
||||
expect(indexEvent.tags).toContainEqual(["author", "Jane Smith"]); // Should use header line author
|
||||
expect(indexEvent.tags).toContainEqual(["author", "Override Author"]); // Additional author from attribute
|
||||
expect(indexEvent.tags).toContainEqual(["author", "Third Author"]); // Additional author from attribute
|
||||
expect(indexEvent.tags).toContainEqual(["version", "2.0"]); // Should use revision line version
|
||||
expect(indexEvent.tags).toContainEqual(["summary", "This is a complex document with all metadata types Alternative description field"]); |
||||
expect(indexEvent.tags).toContainEqual(["published_on", "2024-03-01"]); |
||||
expect(indexEvent.tags).toContainEqual(["published_by", "Alexandria Complex"]); |
||||
expect(indexEvent.tags).toContainEqual(["type", "book"]); |
||||
expect(indexEvent.tags).toContainEqual(["image", "https://example.com/cover.jpg"]); |
||||
expect(indexEvent.tags).toContainEqual(["i", "978-0-123456-78-9"]); |
||||
expect(indexEvent.tags).toContainEqual(["source", "https://github.com/alexandria/complex"]); |
||||
expect(indexEvent.tags).toContainEqual(["auto-update", "yes"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "complex"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "metadata"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "all-types"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "additional"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "tags"]); |
||||
expect(indexEvent.tags).toContainEqual(["t", "here"]); |
||||
|
||||
// Test section metadata
|
||||
expect(sectionEvents).toHaveLength(1); |
||||
expect(sectionEvents[0].kind).toBe(30041); |
||||
expect(sectionEvents[0].content).toBe("This is the section content."); |
||||
expect(sectionEvents[0].tags).toContainEqual(["d", "complex-metadata-document-section-with-complex-metadata"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["title", "Section with Complex Metadata"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["author", "Section Author"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["author", "Section Co-Author"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["summary", "This section has complex metadata Alternative description for section"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["type", "chapter"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["image", "https://example.com/section-image.jpg"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["t", "section"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["t", "complex"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["t", "metadata"]); |
||||
expect(sectionEvents[0].tags).toContainEqual(["t", "tags"]); |
||||
}); |
||||
}); |
||||
|
||||
describe("Validation Tests", () => { |
||||
it("should validate normal structure correctly", () => { |
||||
const content = `= Valid Document
|
||||
:summary: This is a valid document |
||||
|
||||
== Section 1 |
||||
|
||||
Content here. |
||||
|
||||
== Section 2 |
||||
|
||||
More content.`;
|
||||
|
||||
const validation = validate30040EventSet(content); |
||||
expect(validation.valid).toBe(true); |
||||
}); |
||||
|
||||
it("should validate index card format correctly", () => { |
||||
const content = `= Valid Index Card
|
||||
index card`;
|
||||
|
||||
const validation = validate30040EventSet(content); |
||||
expect(validation.valid).toBe(true); |
||||
}); |
||||
|
||||
it("should validate skeleton structure correctly", () => { |
||||
const content = `= Skeleton Document
|
||||
|
||||
== Empty Section 1 |
||||
|
||||
== Empty Section 2`;
|
||||
|
||||
const validation = validate30040EventSet(content); |
||||
expect(validation.valid).toBe(true); |
||||
}); |
||||
|
||||
it("should reject invalid structure", () => { |
||||
const content = `This is not a valid AsciiDoc document.`; |
||||
|
||||
const validation = validate30040EventSet(content); |
||||
expect(validation.valid).toBe(false); |
||||
expect(validation.reason).toContain("30040 events must have a document title"); |
||||
}); |
||||
}); |
||||
|
||||
describe("Edge Cases", () => { |
||||
it("should handle document with only title and no sections", () => { |
||||
const content = `= Document with No Sections
|
||||
:summary: This document has no sections |
||||
|
||||
This is just preamble content.`;
|
||||
|
||||
const tags: [string, string][] = []; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.tags).toContainEqual(["d", "document-with-no-sections"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Document with No Sections"]); |
||||
expect(sectionEvents).toHaveLength(0); |
||||
}); |
||||
|
||||
it("should handle document with special characters in title", () => { |
||||
const content = `= Document with Special Characters: Test & More!
|
||||
:summary: This document has special characters in the title |
||||
|
||||
== Section 1 |
||||
|
||||
Content here.`;
|
||||
|
||||
const tags: [string, string][] = []; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.tags).toContainEqual(["d", "document-with-special-characters-test-more"]); |
||||
expect(indexEvent.tags).toContainEqual(["title", "Document with Special Characters: Test & More!"]); |
||||
expect(sectionEvents).toHaveLength(1); |
||||
}); |
||||
|
||||
it("should handle document with very long title", () => { |
||||
const content = `= This is a very long document title that should be handled properly by the system and should not cause any issues with the d-tag generation or any other functionality
|
||||
:summary: This document has a very long title |
||||
|
||||
== Section 1 |
||||
|
||||
Content here.`;
|
||||
|
||||
const tags: [string, string][] = []; |
||||
|
||||
const { indexEvent, sectionEvents } = build30040EventSet(content, tags, baseEvent); |
||||
|
||||
expect(indexEvent.kind).toBe(30040); |
||||
expect(indexEvent.tags).toContainEqual(["title", "This is a very long document title that should be handled properly by the system and should not cause any issues with the d-tag generation or any other functionality"]); |
||||
expect(sectionEvents).toHaveLength(1); |
||||
}); |
||||
}); |
||||
});
|
||||
@ -0,0 +1,183 @@
@@ -0,0 +1,183 @@
|
||||
import { describe, it, expect } from "vitest"; |
||||
import {
|
||||
extractDocumentMetadata,
|
||||
extractSectionMetadata,
|
||||
parseAsciiDocWithMetadata, |
||||
metadataToTags
|
||||
} from "../../src/lib/utils/asciidoc_metadata.ts"; |
||||
|
||||
describe("AsciiDoc Metadata Extraction", () => { |
||||
const testContent = `= Test Document with Metadata
|
||||
John Doe <john@example.com> |
||||
1.0, 2024-01-15, Alexandria Test |
||||
:summary: This is a test document for metadata extraction |
||||
:author: Jane Smith |
||||
:version: 2.0 |
||||
:published_on: 2024-01-15 |
||||
:published_by: Alexandria Project |
||||
:type: article |
||||
:keywords: test, metadata, asciidoc |
||||
:image: https://example.com/cover.jpg
|
||||
:isbn: 978-0-123456-78-9 |
||||
:source: https://github.com/alexandria/test
|
||||
:auto-update: yes |
||||
|
||||
This is the preamble content that should be included in the document body. |
||||
|
||||
== First Section |
||||
:author: Section Author |
||||
:summary: This is the first section |
||||
:keywords: section1, content |
||||
|
||||
This is the content of the first section. |
||||
|
||||
== Second Section |
||||
:summary: This is the second section |
||||
:type: chapter |
||||
|
||||
This is the content of the second section.`;
|
||||
|
||||
it("extractDocumentMetadata should extract document metadata correctly", () => { |
||||
const { metadata, content } = extractDocumentMetadata(testContent); |
||||
|
||||
expect(metadata.title).toBe("Test Document with Metadata"); |
||||
expect(metadata.authors).toEqual(["John Doe", "Jane Smith"]); |
||||
expect(metadata.version).toBe("1.0"); |
||||
expect(metadata.publicationDate).toBe("2024-01-15"); |
||||
expect(metadata.publishedBy).toBe("Alexandria Test"); |
||||
expect(metadata.summary).toBe("This is a test document for metadata extraction"); |
||||
expect(metadata.authors).toEqual(["John Doe", "Jane Smith"]); |
||||
expect(metadata.type).toBe("article"); |
||||
expect(metadata.tags).toEqual(["test", "metadata", "asciidoc"]); |
||||
expect(metadata.coverImage).toBe("https://example.com/cover.jpg"); |
||||
expect(metadata.isbn).toBe("978-0-123456-78-9"); |
||||
expect(metadata.source).toBe("https://github.com/alexandria/test"); |
||||
expect(metadata.autoUpdate).toBe("yes"); |
||||
|
||||
// Content should not include the header metadata
|
||||
expect(content).toContain("This is the preamble content"); |
||||
expect(content).toContain("== First Section"); |
||||
expect(content).not.toContain("= Test Document with Metadata"); |
||||
expect(content).not.toContain(":summary:"); |
||||
}); |
||||
|
||||
it("extractSectionMetadata should extract section metadata correctly", () => { |
||||
const sectionContent = `== First Section
|
||||
:author: Section Author |
||||
:description: This is the first section |
||||
:tags: section1, content |
||||
|
||||
This is the content of the first section.`;
|
||||
|
||||
const { metadata, content, title } = extractSectionMetadata(sectionContent); |
||||
|
||||
expect(title).toBe("First Section"); |
||||
expect(metadata.authors).toEqual(["Section Author"]); |
||||
expect(metadata.summary).toBe("This is the first section"); |
||||
expect(metadata.tags).toEqual(["section1", "content"]); |
||||
expect(content).toBe("This is the content of the first section."); |
||||
}); |
||||
|
||||
it("parseAsciiDocWithMetadata should parse complete document", () => { |
||||
const parsed = parseAsciiDocWithMetadata(testContent); |
||||
|
||||
expect(parsed.metadata.title).toBe("Test Document with Metadata"); |
||||
expect(parsed.sections).toHaveLength(2); |
||||
expect(parsed.sections[0].title).toBe("First Section"); |
||||
expect(parsed.sections[1].title).toBe("Second Section"); |
||||
expect(parsed.sections[0].metadata.authors).toEqual(["Section Author"]); |
||||
expect(parsed.sections[1].metadata.summary).toBe("This is the second section"); |
||||
}); |
||||
|
||||
it("metadataToTags should convert metadata to Nostr tags", () => { |
||||
const metadata = { |
||||
title: "Test Title", |
||||
authors: ["Author 1", "Author 2"], |
||||
version: "1.0", |
||||
summary: "Test summary", |
||||
tags: ["tag1", "tag2"] |
||||
}; |
||||
|
||||
const tags = metadataToTags(metadata); |
||||
|
||||
expect(tags).toContainEqual(["title", "Test Title"]); |
||||
expect(tags).toContainEqual(["author", "Author 1"]); |
||||
expect(tags).toContainEqual(["author", "Author 2"]); |
||||
expect(tags).toContainEqual(["version", "1.0"]); |
||||
expect(tags).toContainEqual(["summary", "Test summary"]); |
||||
expect(tags).toContainEqual(["t", "tag1"]); |
||||
expect(tags).toContainEqual(["t", "tag2"]); |
||||
}); |
||||
|
||||
it("should handle index card format correctly", () => { |
||||
const indexCardContent = `= Test Index Card
|
||||
index card`;
|
||||
|
||||
const { metadata, content } = extractDocumentMetadata(indexCardContent); |
||||
|
||||
expect(metadata.title).toBe("Test Index Card"); |
||||
expect(content.trim()).toBe("index card"); |
||||
}); |
||||
|
||||
it("should handle empty content gracefully", () => { |
||||
const emptyContent = ""; |
||||
|
||||
const { metadata, content } = extractDocumentMetadata(emptyContent); |
||||
|
||||
expect(metadata.title).toBeUndefined(); |
||||
expect(content).toBe(""); |
||||
}); |
||||
|
||||
it("should handle keywords as tags", () => { |
||||
const contentWithKeywords = `= Test Document
|
||||
:keywords: keyword1, keyword2, keyword3 |
||||
|
||||
Content here.`;
|
||||
|
||||
const { metadata } = extractDocumentMetadata(contentWithKeywords); |
||||
|
||||
expect(metadata.tags).toEqual(["keyword1", "keyword2", "keyword3"]); |
||||
}); |
||||
|
||||
it("should handle both tags and keywords", () => { |
||||
const contentWithBoth = `= Test Document
|
||||
:tags: tag1, tag2 |
||||
:keywords: keyword1, keyword2 |
||||
|
||||
Content here.`;
|
||||
|
||||
const { metadata } = extractDocumentMetadata(contentWithBoth); |
||||
|
||||
// Both tags and keywords are valid, both should be accumulated
|
||||
expect(metadata.tags).toEqual(["tag1", "tag2", "keyword1", "keyword2"]); |
||||
}); |
||||
|
||||
it("should handle tags only", () => { |
||||
const contentWithTags = `= Test Document
|
||||
:tags: tag1, tag2, tag3 |
||||
|
||||
Content here.`;
|
||||
|
||||
const { metadata } = extractDocumentMetadata(contentWithTags); |
||||
|
||||
expect(metadata.tags).toEqual(["tag1", "tag2", "tag3"]); |
||||
}); |
||||
|
||||
it("should handle both summary and description", () => { |
||||
const contentWithSummary = `= Test Document
|
||||
:summary: This is a summary |
||||
|
||||
Content here.`;
|
||||
|
||||
const contentWithDescription = `= Test Document
|
||||
:description: This is a description |
||||
|
||||
Content here.`;
|
||||
|
||||
const { metadata: summaryMetadata } = extractDocumentMetadata(contentWithSummary); |
||||
const { metadata: descriptionMetadata } = extractDocumentMetadata(contentWithDescription); |
||||
|
||||
expect(summaryMetadata.summary).toBe("This is a summary"); |
||||
expect(descriptionMetadata.summary).toBe("This is a description"); |
||||
}); |
||||
});
|
||||
Loading…
Reference in new issue