clone of repo on github
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

332 lines
11 KiB

import { describe, expect, it } from "vitest";
import {
extractDocumentMetadata,
extractSectionMetadata,
extractSmartMetadata,
metadataToTags,
parseAsciiDocWithMetadata,
} from "../../src/lib/utils/asciidoc_metadata.ts";
describe("AsciiDoc Metadata Extraction", () => {
const testContent = `= Test Document with Metadata
John Doe <john@example.com>
1.0, 2024-01-15: Alexandria Test
:summary: This is a test document for metadata extraction
:author: Jane Smith
:published_on: 2024-01-15
:published_by: Alexandria Project
:type: article
:keywords: test, metadata, asciidoc
:image: https://example.com/cover.jpg
:isbn: 978-0-123456-78-9
:source: https://github.com/alexandria/test
:auto-update: yes
This is the preamble content that should be included in the document body.
== First Section
:author: Section Author
:summary: This is the first section
:keywords: section1, content
This is the content of the first section.
== Second Section
:summary: This is the second section
:type: chapter
This is the content of the second section.`;
it("extractDocumentMetadata should extract document metadata correctly", () => {
const { metadata, content } = extractDocumentMetadata(testContent);
expect(metadata.title).toBe("Test Document with Metadata");
expect(metadata.authors).toEqual(["John Doe", "Jane Smith"]);
expect(metadata.version).toBe("1.0");
expect(metadata.publicationDate).toBe("2024-01-15");
expect(metadata.publishedBy).toBe("Alexandria Test");
expect(metadata.summary).toBe(
"This is a test document for metadata extraction",
);
expect(metadata.authors).toEqual(["John Doe", "Jane Smith"]);
expect(metadata.type).toBe("article");
expect(metadata.tags).toEqual(["test", "metadata", "asciidoc"]);
expect(metadata.coverImage).toBe("https://example.com/cover.jpg");
expect(metadata.isbn).toBe("978-0-123456-78-9");
expect(metadata.source).toBe("https://github.com/alexandria/test");
expect(metadata.autoUpdate).toBe("yes");
// Content should not include the header metadata
expect(content).toContain("This is the preamble content");
expect(content).toContain("== First Section");
expect(content).not.toContain("= Test Document with Metadata");
expect(content).not.toContain(":summary:");
});
it("extractSectionMetadata should extract section metadata correctly", () => {
const sectionContent = `== First Section
:author: Section Author
:description: This is the first section
:tags: section1, content
This is the content of the first section.`;
const { metadata, content, title } = extractSectionMetadata(sectionContent);
expect(title).toBe("First Section");
expect(metadata.authors).toEqual(["Section Author"]);
expect(metadata.summary).toBe("This is the first section");
expect(metadata.tags).toEqual(["section1", "content"]);
expect(content).toBe("This is the content of the first section.");
});
it("extractSectionMetadata should extract standalone author names and remove them from content", () => {
const sectionContent = `== Section Header1
Stella
:description: Some summary
Some context text`;
const { metadata, content, title } = extractSectionMetadata(sectionContent);
expect(title).toBe("Section Header1");
expect(metadata.authors).toEqual(["Stella"]);
expect(metadata.summary).toBe("Some summary");
expect(content.trim()).toBe("Some context text");
});
it("extractSectionMetadata should handle multiple standalone author names", () => {
const sectionContent = `== Section Header1
Stella
:author: John Doe
:description: Some summary
Some context text`;
const { metadata, content, title } = extractSectionMetadata(sectionContent);
expect(title).toBe("Section Header1");
expect(metadata.authors).toEqual(["Stella", "John Doe"]);
expect(metadata.summary).toBe("Some summary");
expect(content.trim()).toBe("Some context text");
});
it("extractSectionMetadata should not extract non-author lines as authors", () => {
const sectionContent = `== Section Header1
Stella
This is not an author line
:description: Some summary
Some context text`;
const { metadata, content, title } = extractSectionMetadata(sectionContent);
expect(title).toBe("Section Header1");
expect(metadata.authors).toEqual(["Stella"]);
expect(metadata.summary).toBe("Some summary");
expect(content.trim()).toBe(
"This is not an author line\nSome context text",
);
});
it("parseAsciiDocWithMetadata should parse complete document", () => {
const parsed = parseAsciiDocWithMetadata(testContent);
expect(parsed.metadata.title).toBe("Test Document with Metadata");
expect(parsed.sections).toHaveLength(2);
expect(parsed.sections[0].title).toBe("First Section");
expect(parsed.sections[1].title).toBe("Second Section");
expect(parsed.sections[0].metadata.authors).toEqual(["Section Author"]);
expect(parsed.sections[1].metadata.summary).toBe(
"This is the second section",
);
});
it("metadataToTags should convert metadata to Nostr tags", () => {
const metadata = {
title: "Test Title",
authors: ["Author 1", "Author 2"],
version: "1.0",
summary: "Test summary",
tags: ["tag1", "tag2"],
};
const tags = metadataToTags(metadata);
expect(tags).toContainEqual(["title", "Test Title"]);
expect(tags).toContainEqual(["author", "Author 1"]);
expect(tags).toContainEqual(["author", "Author 2"]);
expect(tags).toContainEqual(["version", "1.0"]);
expect(tags).toContainEqual(["summary", "Test summary"]);
expect(tags).toContainEqual(["t", "tag1"]);
expect(tags).toContainEqual(["t", "tag2"]);
});
it("should handle index card format correctly", () => {
const indexCardContent = `= Test Index Card
index card`;
const { metadata, content } = extractDocumentMetadata(indexCardContent);
expect(metadata.title).toBe("Test Index Card");
expect(content.trim()).toBe("index card");
});
it("should handle empty content gracefully", () => {
const emptyContent = "";
const { metadata, content } = extractDocumentMetadata(emptyContent);
expect(metadata.title).toBeUndefined();
expect(content).toBe("");
});
it("should handle keywords as tags", () => {
const contentWithKeywords = `= Test Document
:keywords: keyword1, keyword2, keyword3
Some content here.`;
const { metadata } = extractDocumentMetadata(contentWithKeywords);
expect(metadata.tags).toEqual(["keyword1", "keyword2", "keyword3"]);
});
it("should handle both tags and keywords", () => {
const contentWithBoth = `= Test Document
:tags: tag1, tag2
:keywords: keyword1, keyword2
Some content here.`;
const { metadata } = extractDocumentMetadata(contentWithBoth);
// Both tags and keywords are valid, both should be accumulated
expect(metadata.tags).toEqual(["tag1", "tag2", "keyword1", "keyword2"]);
});
it("should handle tags only", () => {
const contentWithTags = `= Test Document
:tags: tag1, tag2, tag3
Content here.`;
const { metadata } = extractDocumentMetadata(contentWithTags);
expect(metadata.tags).toEqual(["tag1", "tag2", "tag3"]);
});
it("should handle both summary and description", () => {
const contentWithSummary = `= Test Document
:summary: This is a summary
Content here.`;
const contentWithDescription = `= Test Document
:description: This is a description
Content here.`;
const { metadata: summaryMetadata } = extractDocumentMetadata(
contentWithSummary,
);
const { metadata: descriptionMetadata } = extractDocumentMetadata(
contentWithDescription,
);
expect(summaryMetadata.summary).toBe("This is a summary");
expect(descriptionMetadata.summary).toBe("This is a description");
});
describe("Smart metadata extraction", () => {
it("should handle section-only content correctly", () => {
const sectionOnlyContent = `== First Section
:author: Section Author
:description: This is the first section
:tags: section1, content
This is the content of the first section.
== Second Section
:summary: This is the second section
:type: chapter
This is the content of the second section.`;
const { metadata, content } = extractSmartMetadata(sectionOnlyContent);
// Should extract title from first section
expect(metadata.title).toBe("First Section");
// Should not have document-level metadata since there's no document header
expect(metadata.authors).toBeUndefined();
expect(metadata.version).toBeUndefined();
expect(metadata.publicationDate).toBeUndefined();
// Content should be preserved
expect(content).toBe(sectionOnlyContent);
});
it("should handle minimal document header (just title) correctly", () => {
const minimalDocumentHeader = `= Test Document
== First Section
:author: Section Author
:description: This is the first section
This is the content of the first section.
== Second Section
:summary: This is the second section
:type: chapter
This is the content of the second section.`;
const { metadata, content } = extractSmartMetadata(minimalDocumentHeader);
// Should extract title from document header
expect(metadata.title).toBe("Test Document");
// Should not have document-level metadata since there's no other metadata
expect(metadata.authors).toBeUndefined();
// Note: version might be set from section attributes like :type: chapter
expect(metadata.publicationDate).toBeUndefined();
// Content should preserve the title line for 30040 events
expect(content).toContain("= Test Document");
expect(content).toContain("== First Section");
expect(content).toContain("== Second Section");
});
it("should handle document with full header correctly", () => {
const documentWithHeader = `= Test Document
John Doe <john@example.com>
1.0, 2024-01-15: Alexandria Test
:summary: This is a test document
:author: Jane Smith
== First Section
:author: Section Author
:description: This is the first section
This is the content.`;
const { metadata, content } = extractSmartMetadata(documentWithHeader);
// Should extract document-level metadata
expect(metadata.title).toBe("Test Document");
expect(metadata.authors).toEqual(["John Doe", "Jane Smith"]);
expect(metadata.version).toBe("1.0");
expect(metadata.publishedBy).toBe("Alexandria Test");
expect(metadata.publicationDate).toBe("2024-01-15");
expect(metadata.summary).toBe("This is a test document");
// Content should be cleaned
expect(content).not.toContain("= Test Document");
expect(content).not.toContain("John Doe <john@example.com>");
expect(content).not.toContain("1.0, 2024-01-15: Alexandria Test");
expect(content).not.toContain(":summary: This is a test document");
expect(content).not.toContain(":author: Jane Smith");
});
});
});