update book-wikilinks

7 months ago · 50936a60a1
6 changed files with 109 additions and 208 deletions
--- a/src/components/Bookstr/BookstrContent.tsx
+++ b/src/components/Bookstr/BookstrContent.tsx
@ -70,32 +70,28 @@ export function BookstrContent({ wikilink, className }: BookstrContentProps) {
				@@ -70,32 +70,28 @@ export function BookstrContent({ wikilink, className }: BookstrContentProps) {
  // Parse the wikilink
  const parsed = useMemo(() => {
    try {
-      // Extract book type from wikilink (e.g., "book:bible:Genesis 3:1")
-      let bookType = 'bible'
-      let content = wikilink
+      // NKBIP-08 format: book::... (must have double colon)
+      let wikilinkToParse = wikilink
      
-      if (wikilink.startsWith('book:')) {
-        const parts = wikilink.substring(5).split(':')
-        if (parts.length >= 2) {
-          bookType = parts[0]
-          content = parts.slice(1).join(':')
-        }
-      } else if (wikilink.includes(':')) {
-        // Might be "bible:Genesis 3:1" format
-        const firstColon = wikilink.indexOf(':')
-        const potentialType = wikilink.substring(0, firstColon)
-        if (['bible', 'quran', 'catechism', 'torah'].includes(potentialType.toLowerCase())) {
-          bookType = potentialType.toLowerCase()
-          content = wikilink.substring(firstColon + 1)
+      if (wikilink.startsWith('book::')) {
+        // Already in correct format, add brackets if needed
+        if (!wikilink.startsWith('[[')) {
+          wikilinkToParse = `[[${wikilink}]]`
+        } else {
+          wikilinkToParse = wikilink
        }
+      } else {
+        // Invalid format - must start with book::
+        return null
      }
      
-      const result = parseBookWikilink(`[[book:${bookType}:${content}]]`, bookType)
+      const result = parseBookWikilink(wikilinkToParse)
      if (result) {
+        const inferredBookType = result.bookType || 'bible'
        logger.debug('BookstrContent: Parsed wikilink', {
          wikilink,
-          content,
-          bookType,
+          wikilinkToParse,
+          bookType: inferredBookType,
          referenceCount: result.references.length,
          references: result.references.map(r => ({
            book: r.book,
@ -105,8 +101,9 @@ export function BookstrContent({ wikilink, className }: BookstrContentProps) {
				@@ -105,8 +101,9 @@ export function BookstrContent({ wikilink, className }: BookstrContentProps) {
          })),
          versions: result.versions
        })
+        return { ...result, bookType: inferredBookType }
      }
-      return result ? { ...result, bookType } : null
+      return null
    } catch (err) {
      logger.error('Error parsing bookstr wikilink', { error: err, wikilink })
      return null
--- a/src/components/Note/AsciidocArticle/AsciidocArticle.tsx
+++ b/src/components/Note/AsciidocArticle/AsciidocArticle.tsx
@ -974,7 +974,7 @@ export default function AsciidocArticle({
				@@ -974,7 +974,7 @@ export default function AsciidocArticle({
      if (!linkContent) return
      
      // Skip if this is a bookstr wikilink (already processed)
-      if (linkContent.startsWith('book:')) {
+      if (linkContent.startsWith('book::')) {
        return
      }
      
--- a/src/components/Note/MarkdownArticle/MarkdownArticle.tsx
+++ b/src/components/Note/MarkdownArticle/MarkdownArticle.tsx
@ -2026,26 +2026,12 @@ function parseMarkdownContent(
				@@ -2026,26 +2026,12 @@ function parseMarkdownContent(
    } else if (pattern.type === 'wikilink') {
      const linkContent = pattern.data
      
-      // Check if this is a bookstr wikilink
-      // Formats: book:bible:..., bible:..., quran:..., etc.
-      const isBookstrLink = linkContent.startsWith('book:') || 
-        ['bible', 'quran', 'catechism', 'torah'].some(type => 
-          linkContent.toLowerCase().startsWith(`${type}:`)
-        )
+      // Check if this is a bookstr wikilink (NKBIP-08 format: book::...)
+      const isBookstrLink = linkContent.startsWith('book::')
      
      if (isBookstrLink) {
-        // Extract the bookstr content
-        let bookstrContent = linkContent.trim()
-        // If it doesn't start with "book:", add it for consistency
-        if (!bookstrContent.startsWith('book:')) {
-          // Format: "bible:Genesis 3:1" -> "book:bible:Genesis 3:1"
-          const firstColon = bookstrContent.indexOf(':')
-          if (firstColon > 0) {
-            const bookType = bookstrContent.substring(0, firstColon)
-            const rest = bookstrContent.substring(firstColon + 1)
-            bookstrContent = `book:${bookType}:${rest}`
-          }
-        }
+        // Extract the bookstr content (already in book:: format)
+        const bookstrContent = linkContent.trim()
        parts.push(
          <BookstrContent key={`bookstr-${patternIdx}`} wikilink={bookstrContent} />
        )
--- a/src/lib/bookstr-parser.ts
+++ b/src/lib/bookstr-parser.ts
@ -11,197 +11,115 @@ export interface BookReference {
				@@ -11,197 +11,115 @@ export interface BookReference {
 }

 /**
- * Normalize whitespace and case in book reference strings
+ * Normalize string according to NIP-54 rules
 */
-function normalizeBookReferenceWhitespace(ref: string): string {
-  let normalized = ref.trim()
-  
-  // Handle cases where there's no space between book name and chapter/verse
-  normalized = normalized.replace(/^([A-Za-z]+)(\d+)/, '$1 $2')
-  
-  // Normalize multiple spaces to single spaces
-  normalized = normalized.replace(/\s+/g, ' ')
-  
-  return normalized.trim()
+function normalizeNip54(text: string): string {
+  return text
+    .replace(/['"]/g, '') // Remove quotes
+    .replace(/[^a-zA-Z0-9]/g, (char) => {
+      if (/[a-zA-Z]/.test(char)) {
+        return char.toLowerCase()
+      }
+      if (/[0-9]/.test(char)) {
+        return char
+      }
+      return '-'
+    })
+    .toLowerCase()
+    .replace(/-+/g, '-') // Collapse multiple hyphens
+    .replace(/^-|-$/g, '') // Remove leading/trailing hyphens
 }

 /**
- * Parse book notation like "John 1–3; 3:16; 6:14, 44" for any book type
- * Returns an array of BookReference objects
+ * Parse book wikilink notation according to NKBIP-08
+ * Format: "[[book::collection | title chapter:section | version]]"
 */
-export function parseBookNotation(notation: string, bookType: string = 'bible'): BookReference[] {
-  const references: BookReference[] = []
+export function parseBookWikilink(wikilink: string): { references: BookReference[], versions?: string[], bookType?: string } | null {
+  // Remove the [[ and ]] brackets
+  const content = wikilink.replace(/^\[\[|\]\]$/g, '')
+  
+  // Must start with book::
+  if (!content.startsWith('book::')) {
+    return null
+  }
  
-  // Split by comma or semicolon to handle multiple references
-  // Strategy:
-  // 1. First, try to intelligently split on commas/semicolons that are followed by a capital letter (new book)
-  // 2. If that doesn't work, check if all parts start with capital letters (multiple references)
-  // 3. Otherwise, treat as a single reference with verse lists
+  // Format: book::collection | title chapter:section | version
+  const bookContent = content.substring(6).trim() // Remove "book::"
  
-  // Step 1: Try intelligent splitting
-  const parts: string[] = []
-  let currentPart = ''
+  // Split by pipes to parse structure
+  const pipeParts = bookContent.split(/\s+\|\s+/)
  
-  for (let i = 0; i < notation.length; i++) {
-    const char = notation[i]
+  let collection: string | undefined
+  let titlePart = ''
+  let versionPart = ''
+  
+  if (pipeParts.length === 1) {
+    // No pipes: just title (e.g., "book::genesis")
+    titlePart = pipeParts[0]
+  } else if (pipeParts.length === 2) {
+    // One pipe: could be "collection | title" or "title chapter | version"
+    const first = pipeParts[0].trim()
+    const second = pipeParts[1].trim()
    
-    if (char === ',' || char === ';') {
-      // Look ahead to see if this is separating references
-      // Check if there's whitespace followed by a capital letter or number after this comma/semicolon
-      // (Numbers handle cases like "1 John", "2 Corinthians")
-      const afterComma = notation.substring(i + 1)
-      const trimmedAfter = afterComma.trim()
-      
-      // If the next non-whitespace character is a capital letter or number, it's likely a new book reference
-      if (trimmedAfter.length > 0 && /^[A-Z0-9]/.test(trimmedAfter)) {
-        // This comma/semicolon is separating references
-        if (currentPart.trim()) {
-          parts.push(currentPart.trim())
-        }
-        currentPart = ''
-      } else {
-        // This comma/semicolon is part of the current reference (e.g., verse list "1,3,5")
-        currentPart += char
-      }
+    // Check if first part has chapter/section (indicates it's title chapter | version)
+    const hasChapterSection = first.match(/:\d+/) || first.match(/\s+\d+(\s|$)/)
+    
+    if (hasChapterSection) {
+      // Format: "title chapter | version"
+      titlePart = first
+      versionPart = second
    } else {
-      currentPart += char
+      // Format: "collection | title"
+      collection = normalizeNip54(first)
+      titlePart = second
    }
+  } else {
+    // Multiple pipes: "collection | title chapter | version"
+    collection = normalizeNip54(pipeParts[0].trim())
+    titlePart = pipeParts.slice(1, -1).join(' | ')
+    versionPart = pipeParts[pipeParts.length - 1].trim()
  }
  
-  // Add the last part
-  if (currentPart.trim()) {
-    parts.push(currentPart.trim())
-  }
+  // Parse title, chapter, section from titlePart
+  const chapterSectionMatch = titlePart.match(/^(.+?)\s+(\d+|[a-zA-Z0-9_-]+)(?::(.+))?$/)
  
-  // Step 2: If we only got one part but there are commas/semicolons, try simple split
-  if (parts.length === 1 && (notation.includes(',') || notation.includes(';'))) {
-    const simpleParts = notation.split(/[,;]/).map(p => p.trim()).filter(p => p.length > 0)
-    
-    if (simpleParts.length > 1) {
-      // Check if these look like separate references (each starts with a capital letter or number)
-      // Numbers handle cases like "1 John", "2 Corinthians"
-      const allStartWithCapitalOrNumber = simpleParts.every(part => {
-        const trimmed = part.trim()
-        return trimmed.length > 0 && /^[A-Z0-9]/.test(trimmed)
-      })
-      
-      if (allStartWithCapitalOrNumber) {
-        // These are multiple references
-        parts.length = 0
-        parts.push(...simpleParts)
-      }
-      // Otherwise, treat as a single reference with verse lists (e.g., "Genesis 1:1,2,3")
-    }
-  }
+  let title = ''
+  let chapter: number | undefined
+  let verse: string | undefined
  
-  // Step 3: Parse each part
-  for (const part of parts) {
-    const normalizedPart = normalizeBookReferenceWhitespace(part)
-    const ref = parseSingleBookReference(normalizedPart, bookType)
-    if (ref) {
-      references.push(ref)
+  if (chapterSectionMatch) {
+    title = normalizeNip54(chapterSectionMatch[1].trim())
+    const chapterStr = chapterSectionMatch[2]
+    chapter = /^\d+$/.test(chapterStr) ? parseInt(chapterStr, 10) : undefined
+    if (chapterSectionMatch[3]) {
+      verse = chapterSectionMatch[3].trim()
    }
+  } else {
+    title = normalizeNip54(titlePart)
  }
  
-  return references
-}
-
-/**
- * Parse a single book reference like "John 3:16" or "John 1-3" or "John 3:16 KJV"
- */
-function parseSingleBookReference(ref: string, _bookType: string = 'bible'): BookReference | null {
-  // Remove extra whitespace
-  ref = ref.trim()
+  // Parse versions
+  const versions = versionPart ? versionPart.split(/\s+/).map(v => normalizeNip54(v).toUpperCase()).filter(v => v) : undefined
  
-  // First, try to extract version from the end
-  let version: string | undefined
-  let refWithoutVersion = ref
+  // Use collection as bookType (e.g., "bible", "quran", "torah")
+  // If no collection, default to "bible"
+  const inferredBookType = collection || 'bible'
  
-  // Common version abbreviations (can be extended)
-  const versionPattern = /\s+(KJV|NKJV|NIV|ESV|NASB|NLT|MSG|CEV|NRSV|RSV|ASV|YLT|WEB|GNV|DRB|SAHIH|PICKTHALL|YUSUFALI|SHAKIR|CCC|YOUCAT|COMPENDIUM)$/i
-  const versionMatch = ref.match(versionPattern)
-  if (versionMatch) {
-    version = versionMatch[1].toUpperCase()
-    refWithoutVersion = ref.replace(versionPattern, '').trim()
+  // Create reference
+  const reference: BookReference = {
+    book: title
  }
-  
-  // Match patterns
-  const patterns = [
-    // Book Chapter:Verses (e.g., "John 3:16", "John 3:16,18")
-    /^(.+?)\s+(\d+):(.+)$/,
-    // Book Chapter-Verses (e.g., "John 1-3", "John 1-3,5")
-    /^(.+?)\s+(\d+)-(.+)$/,
-    // Book Chapter (e.g., "John 3")
-    /^(.+?)\s+(\d+)$/,
-    // Just Book (e.g., "John")
-    /^(.+)$/
-  ]
-  
-  for (const pattern of patterns) {
-    const match = refWithoutVersion.match(pattern)
-    if (match) {
-      const bookName = match[1].trim()
-      
-      const reference: BookReference = {
-        book: bookName
-      }
-      
-      if (match[2]) {
-        reference.chapter = parseInt(match[2])
-      }
-      
-      if (match[3]) {
-        reference.verse = match[3]
-      }
-      
-      if (version) {
-        reference.version = version
-      }
-      
-      return reference
-    }
+  if (chapter !== undefined) {
+    reference.chapter = chapter
  }
-  
-  return null
-}
-
-/**
- * Parse book wikilink notation like "[[book:bible:John 3:16 | KJV]]" or "[[book:bible:John 3:16 | KJV DRB]]"
- */
-export function parseBookWikilink(wikilink: string, bookType: string = 'bible'): { references: BookReference[], versions?: string[] } | null {
-  // Remove the [[ and ]] brackets
-  const content = wikilink.replace(/^\[\[|\]\]$/g, '')
-  
-  // Handle book: prefix (e.g., "book:bible:John 3:16")
-  let referenceContent = content
-  if (content.startsWith('book:')) {
-    const parts = content.substring(5).split(':')
-    if (parts.length >= 2) {
-      bookType = parts[0]
-      referenceContent = parts.slice(1).join(':')
-    }
-  } else if (content.startsWith('bible:')) {
-    // Legacy Bible prefix support
-    bookType = 'bible'
-    referenceContent = content.substring(6).trim()
+  if (verse) {
+    reference.verse = verse
  }
-  
-  // Split by | to separate references from versions
-  const parts = referenceContent.split('|').map(p => p.trim())
-  
-  if (parts.length === 0) return null
-  
-  // Normalize whitespace in the reference part
-  const normalizedReference = normalizeBookReferenceWhitespace(parts[0])
-  const references = parseBookNotation(normalizedReference, bookType)
-  
-  // Parse multiple versions if provided
-  let versions: string[] | undefined
-  if (parts[1]) {
-    versions = parts[1].split(/\s+/).map(v => v.trim().toUpperCase()).filter(v => v.length > 0)
+  if (versions && versions.length > 0) {
+    reference.version = versions[0] // Use first version for backward compatibility
  }
  
-  return { references, versions }
+  return { references: [reference], versions, bookType: inferredBookType }
 }

 /**
--- a/src/lib/nostr-parser.tsx
+++ b/src/lib/nostr-parser.tsx
@ -44,7 +44,7 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
				@@ -44,7 +44,7 @@ export function parseNostrContent(content: string, event?: Event): ParsedNostrCo
  // Regex to match hashtags
  const hashtagRegex = /#([a-zA-Z0-9_]+)/g
  
-  // Regex to match wikilinks: [[target]] or [[target|display text]] or [[book:...]]
+  // Regex to match wikilinks: [[target]] or [[target|display text]] or [[book::...]]
  const wikilinkRegex = /\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g
  
  // Regex to match Jumble note URLs: https://jumble.imwald.eu/notes/noteId
--- a/src/services/content-parser.service.ts
+++ b/src/services/content-parser.service.ts
@ -517,9 +517,9 @@ class ContentParserService {
				@@ -517,9 +517,9 @@ class ContentParserService {
  private processWikilinks(content: string): string {
    let processed = content

-    // Process bookstr macro wikilinks: [[book:...]] where ... can be any book type and reference
+    // Process bookstr macro wikilinks: [[book::...]] where ... follows NKBIP-08 format
    // These should be converted to a special marker that will be processed in HTML
-    processed = processed.replace(/\[\[book:([^\]]+)\]\]/g, (_match, bookContent) => {
+    processed = processed.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => {
      const cleanContent = bookContent.trim()
      // Use a passthrough marker that will be converted to HTML placeholder in processWikilinksInHtml
      return `BOOKSTR:${cleanContent}`