gc-alexandria/tests/unit/metadataExtraction.test.ts

import { describe, it, expect } from "vitest";
import {
  extractDocumentMetadata,
  extractSectionMetadata,
  parseAsciiDocWithMetadata,
  metadataToTags
} from "../../src/lib/utils/asciidoc_metadata.ts";

describe("AsciiDoc Metadata Extraction", () => {
  const testContent = `= Test Document with Metadata
John Doe <john@example.com>
1.0, 2024-01-15, Alexandria Test
:summary: This is a test document for metadata extraction
:author: Jane Smith
:version: 2.0
:published_on: 2024-01-15
:published_by: Alexandria Project
:type: article
:keywords: test, metadata, asciidoc
:image: https://example.com/cover.jpg
:isbn: 978-0-123456-78-9
:source: https://github.com/alexandria/test
:auto-update: yes

This is the preamble content that should be included in the document body.

== First Section
:author: Section Author
:summary: This is the first section
:keywords: section1, content

This is the content of the first section.

== Second Section
:summary: This is the second section
:type: chapter

This is the content of the second section.`;

  it("extractDocumentMetadata should extract document metadata correctly", () => {
    const { metadata, content } = extractDocumentMetadata(testContent);

    expect(metadata.title).toBe("Test Document with Metadata");
    expect(metadata.authors).toEqual(["John Doe", "Jane Smith"]);
    expect(metadata.version).toBe("1.0");
    expect(metadata.publicationDate).toBe("2024-01-15");
    expect(metadata.publishedBy).toBe("Alexandria Test");
    expect(metadata.summary).toBe("This is a test document for metadata extraction");
    expect(metadata.authors).toEqual(["John Doe", "Jane Smith"]);
    expect(metadata.type).toBe("article");
    expect(metadata.tags).toEqual(["test", "metadata", "asciidoc"]);
    expect(metadata.coverImage).toBe("https://example.com/cover.jpg");
    expect(metadata.isbn).toBe("978-0-123456-78-9");
    expect(metadata.source).toBe("https://github.com/alexandria/test");
    expect(metadata.autoUpdate).toBe("yes");

    // Content should not include the header metadata
    expect(content).toContain("This is the preamble content");
    expect(content).toContain("== First Section");
    expect(content).not.toContain("= Test Document with Metadata");
    expect(content).not.toContain(":summary:");
  });

  it("extractSectionMetadata should extract section metadata correctly", () => {
    const sectionContent = `== First Section
:author: Section Author
:description: This is the first section
:tags: section1, content

This is the content of the first section.`;

    const { metadata, content, title } = extractSectionMetadata(sectionContent);

    expect(title).toBe("First Section");
    expect(metadata.authors).toEqual(["Section Author"]);
    expect(metadata.summary).toBe("This is the first section");
    expect(metadata.tags).toEqual(["section1", "content"]);
    expect(content).toBe("This is the content of the first section.");
  });

  it("parseAsciiDocWithMetadata should parse complete document", () => {
    const parsed = parseAsciiDocWithMetadata(testContent);

    expect(parsed.metadata.title).toBe("Test Document with Metadata");
    expect(parsed.sections).toHaveLength(2);
    expect(parsed.sections[0].title).toBe("First Section");
    expect(parsed.sections[1].title).toBe("Second Section");
    expect(parsed.sections[0].metadata.authors).toEqual(["Section Author"]);
    expect(parsed.sections[1].metadata.summary).toBe("This is the second section");
  });

  it("metadataToTags should convert metadata to Nostr tags", () => {
    const metadata = {
      title: "Test Title",
      authors: ["Author 1", "Author 2"],
      version: "1.0",
      summary: "Test summary",
      tags: ["tag1", "tag2"]
    };

    const tags = metadataToTags(metadata);

    expect(tags).toContainEqual(["title", "Test Title"]);
    expect(tags).toContainEqual(["author", "Author 1"]);
    expect(tags).toContainEqual(["author", "Author 2"]);
    expect(tags).toContainEqual(["version", "1.0"]);
    expect(tags).toContainEqual(["summary", "Test summary"]);
    expect(tags).toContainEqual(["t", "tag1"]);
    expect(tags).toContainEqual(["t", "tag2"]);
  });

  it("should handle index card format correctly", () => {
    const indexCardContent = `= Test Index Card
index card`;

    const { metadata, content } = extractDocumentMetadata(indexCardContent);

    expect(metadata.title).toBe("Test Index Card");
    expect(content.trim()).toBe("index card");
  });

  it("should handle empty content gracefully", () => {
    const emptyContent = "";

    const { metadata, content } = extractDocumentMetadata(emptyContent);

    expect(metadata.title).toBeUndefined();
    expect(content).toBe("");
  });

  it("should handle keywords as tags", () => {
    const contentWithKeywords = `= Test Document
:keywords: keyword1, keyword2, keyword3

Content here.`;

    const { metadata } = extractDocumentMetadata(contentWithKeywords);

    expect(metadata.tags).toEqual(["keyword1", "keyword2", "keyword3"]);
  });

  it("should handle both tags and keywords", () => {
    const contentWithBoth = `= Test Document
:tags: tag1, tag2
:keywords: keyword1, keyword2

Content here.`;

    const { metadata } = extractDocumentMetadata(contentWithBoth);

    // Both tags and keywords are valid, both should be accumulated
    expect(metadata.tags).toEqual(["tag1", "tag2", "keyword1", "keyword2"]);
  });

  it("should handle tags only", () => {
    const contentWithTags = `= Test Document
:tags: tag1, tag2, tag3

Content here.`;

    const { metadata } = extractDocumentMetadata(contentWithTags);

    expect(metadata.tags).toEqual(["tag1", "tag2", "tag3"]);
  });

  it("should handle both summary and description", () => {
    const contentWithSummary = `= Test Document
:summary: This is a summary

Content here.`;

    const contentWithDescription = `= Test Document
:description: This is a description

Content here.`;

    const { metadata: summaryMetadata } = extractDocumentMetadata(contentWithSummary);
    const { metadata: descriptionMetadata } = extractDocumentMetadata(contentWithDescription);

    expect(summaryMetadata.summary).toBe("This is a summary");
    expect(descriptionMetadata.summary).toBe("This is a description");
  });
});