bug-fixes

3 months ago · 9708d879b4
8 changed files with 159 additions and 86 deletions
--- a/README.md
+++ b/README.md
@ -3,9 +3,7 @@
 A super-parser for Nostr event content that handles multiple content formats including AsciiDoc, Markdown, code syntax highlighting, LaTeX, musical notation, and `nostr:` prefixed addresses.
 Built with TypeScript/JavaScript using:
- **asciidoctor.js** for AsciiDoc processing
+- **@asciidoctor/core** for AsciiDoc processing (includes Markdown-to-AsciiDoc conversion and highlight.js integration)
 - **marked** for Markdown processing
 - **highlight.js** for code syntax highlighting
 ## Features
--- a/src/converters/to-asciidoc.ts
+++ b/src/converters/to-asciidoc.ts
@ -67,8 +67,7 @@ function convertMarkdownToAsciidoc(content: string): string {
  asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2');
  asciidoc = asciidoc.replace(/([a-zA-Z0-9])==/g, '$1 ==');
-  // Preserve nostr: addresses temporarily
+  // Note: nostr: addresses are processed later in processNostrAddresses
  asciidoc = asciidoc.replace(/nostr:([a-z0-9]+)/g, 'nostr:$1');
  // Convert headers
  asciidoc = asciidoc.replace(/^#{6}\s+(.+)$/gm, '====== $1 ======');
@ -89,8 +88,8 @@ function convertMarkdownToAsciidoc(content: string): string {
  asciidoc = asciidoc.replace(/~(.+?)~/g, '[subscript]#$1#'); // Subscript
  asciidoc = asciidoc.replace(/\^(.+?)\^/g, '[superscript]#$1#'); // Superscript
-  // Convert code blocks
+  // Convert code blocks (handle both \n and \r\n line endings)
-  asciidoc = asciidoc.replace(/```(\w+)?\n([\s\S]*?)\n```/g, (_match, lang, code) => {
+  asciidoc = asciidoc.replace(/```(\w+)?\r?\n([\s\S]*?)\r?\n```/g, (_match, lang, code) => {
    const trimmedCode = code.trim();
    if (trimmedCode.length === 0) return '';
@ -211,11 +210,15 @@ function convertMarkdownToAsciidoc(content: string): string {
 /**
 * Converts plain text to AsciiDoc format
 * Preserves line breaks by converting single newlines to line continuations
 */
 function convertPlainTextToAsciidoc(content: string): string {
  // Preserve double newlines (paragraph breaks)
  // Convert single newlines to line continuations ( +\n)
  return content
-    .replace(/\n\n/g, '\n\n')
+    .replace(/\r\n/g, '\n') // Normalize line endings
-    .replace(/\n/g, ' +\n');
+    .replace(/\n\n+/g, '\n\n') // Normalize multiple newlines to double
    .replace(/([^\n])\n([^\n])/g, '$1 +\n$2'); // Single newlines become line continuations
 }
 /**
@ -254,10 +257,13 @@ function processWikilinks(content: string, linkBaseURL: string): string {
 /**
 * Processes nostr: addresses
 * Converts to link:nostr:...[...] format
 * Valid bech32 prefixes: npub, nprofile, nevent, naddr, note
 */
 function processNostrAddresses(content: string, linkBaseURL: string): string {
-  // Match nostr: followed by valid bech32 string
+  // Match nostr: followed by valid bech32 prefix and identifier
-  return content.replace(/nostr:([a-z0-9]+[a-z0-9]{6,})/g, (_match, bech32Id) => {
+  // Bech32 format: prefix + separator (1) + data (at least 6 chars for valid identifiers)
  const nostrPattern = /nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi;
  return content.replace(nostrPattern, (_match, bech32Id) => {
    return `link:nostr:${bech32Id}[${bech32Id}]`;
  });
 }
--- a/src/detector.ts
+++ b/src/detector.ts
@ -27,21 +27,19 @@ export function detectFormat(content: string): ContentFormat {
    }
  }
-  // Check for Markdown indicators
+  // Check for Markdown indicators (more specific patterns to avoid false positives)
  const markdownIndicators = [
-    '# ',           // Heading
+    /^#{1,6}\s+/m,           // Heading at start of line
-    '## ',          // Subheading
+    /```[\s\S]*?```/,        // Code block
-    '```',          // Code block
+    /\*\*[^*]+\*\*/,         // Bold text
-    '**',           // Bold
+    /^[-*+]\s+/m,            // List item at start of line
-    '*',            // Italic or list
+    /!\[[^\]]*\]\([^)]+\)/,  // Image syntax
-    '- ',           // List item
+    /\[[^\]]+\]\([^)]+\)/,   // Link syntax
    '![',           // Image
    '[',            // Link
  ];
  let markdownScore = 0;
  for (const indicator of markdownIndicators) {
-    if (content.includes(indicator)) {
+    if (indicator.test(content)) {
      markdownScore++;
    }
  }
--- a/src/extractors/metadata.ts
+++ b/src/extractors/metadata.ts
@ -28,8 +28,8 @@ function extractNostrLinks(content: string): NostrLink[] {
  const nostrLinks: NostrLink[] = [];
  const seen = new Set<string>();
-  // Extract nostr: prefixed links
+  // Extract nostr: prefixed links (valid bech32 format)
-  const nostrMatches = content.match(/nostr:([a-z0-9]+[a-z0-9]{6,})/g) || [];
+  const nostrMatches = content.match(/nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi) || [];
  nostrMatches.forEach(match => {
    const id = match.substring(6); // Remove 'nostr:'
    const type = getNostrType(id);
@ -79,20 +79,33 @@ function extractWikilinks(content: string): Wikilink[] {
 /**
 * Extract hashtags from content
 * Excludes hashtags in URLs, code blocks, and inline code
 */
 function extractHashtags(content: string): string[] {
  const hashtags: string[] = [];
  const seen = new Set<string>();
-  // Extract hashtags: #hashtag
+  // Remove code blocks first to avoid matching inside them
-  const hashtagMatches = content.match(/#([a-zA-Z0-9_]+)/g) || [];
+  const codeBlockPattern = /```[\s\S]*?```/g;
-  hashtagMatches.forEach(match => {
+  const inlineCodePattern = /`[^`]+`/g;
-    const tag = match.substring(1).toLowerCase();
+  const urlPattern = /https?:\/\/[^\s<>"']+/g;
  let processedContent = content
    .replace(codeBlockPattern, '') // Remove code blocks
    .replace(inlineCodePattern, '') // Remove inline code
    .replace(urlPattern, ''); // Remove URLs
  // Extract hashtags: #hashtag (word boundary to avoid matching in URLs)
  const hashtagPattern = /\B#([a-zA-Z0-9_]+)/g;
  let match;
  while ((match = hashtagPattern.exec(processedContent)) !== null) {
    const tag = match[1].toLowerCase();
    if (!seen.has(tag)) {
      hashtags.push(tag);
      seen.add(tag);
    }
-  });
+  }
  return hashtags;
 }
@ -104,39 +117,35 @@ function extractLinks(content: string, linkBaseURL: string): Array<{ url: string
  const links: Array<{ url: string; text: string; isExternal: boolean }> = [];
  const seen = new Set<string>();
-  // Extract markdown links: [text](url)
+  // Extract markdown links: [text](url) - optimized to avoid double matching
-  const markdownLinks = content.match(/\[([^\]]+)\]\(([^)]+)\)/g) || [];
+  const markdownLinkPattern = /\[([^\]]+)\]\(([^)]+)\)/g;
-  markdownLinks.forEach(match => {
+  let markdownMatch;
-    const linkMatch = match.match(/\[([^\]]+)\]\(([^)]+)\)/);
+  while ((markdownMatch = markdownLinkPattern.exec(content)) !== null) {
-    if (linkMatch) {
+    const [, text, url] = markdownMatch;
-      const [, text, url] = linkMatch;
+    if (!seen.has(url) && !isNostrUrl(url)) {
-      if (!seen.has(url) && !isNostrUrl(url)) {
+      seen.add(url);
-        seen.add(url);
+      links.push({
-        links.push({
+        url,
-          url,
+        text,
-          text,
+        isExternal: isExternalUrl(url, linkBaseURL),
-          isExternal: isExternalUrl(url, linkBaseURL),
+      });
        });
      }
    }
-  });
+  }
-  // Extract asciidoc links: link:url[text]
+  // Extract asciidoc links: link:url[text] - optimized to avoid double matching
-  const asciidocLinks = content.match(/link:([^\[]+)\[([^\]]+)\]/g) || [];
+  const asciidocLinkPattern = /link:([^\[]+)\[([^\]]+)\]/g;
-  asciidocLinks.forEach(match => {
+  let asciidocMatch;
-    const linkMatch = match.match(/link:([^\[]+)\[([^\]]+)\]/);
+  while ((asciidocMatch = asciidocLinkPattern.exec(content)) !== null) {
-    if (linkMatch) {
+    const [, url, text] = asciidocMatch;
-      const [, url, text] = linkMatch;
+    if (!seen.has(url) && !isNostrUrl(url)) {
-      if (!seen.has(url) && !isNostrUrl(url)) {
+      seen.add(url);
-        seen.add(url);
+      links.push({
-        links.push({
+        url,
-          url,
+        text,
-          text,
+        isExternal: isExternalUrl(url, linkBaseURL),
-          isExternal: isExternalUrl(url, linkBaseURL),
+      });
        });
      }
    }
-  });
+  }
  // Extract raw URLs (basic pattern)
  const urlPattern = /https?:\/\/[^\s<>"']+/g;
@ -162,29 +171,31 @@ function extractMedia(content: string): string[] {
  const media: string[] = [];
  const seen = new Set<string>();
-  // Extract markdown images: ![alt](url)
+  // Extract markdown images: ![alt](url) - optimized to avoid double matching
-  const imageMatches = content.match(/!\[[^\]]*\]\(([^)]+)\)/g) || [];
+  const markdownImagePattern = /!\[[^\]]*\]\(([^)]+)\)/g;
-  imageMatches.forEach(match => {
+  let markdownImageMatch;
-    const url = match.match(/!\[[^\]]*\]\(([^)]+)\)/)?.[1];
+  while ((markdownImageMatch = markdownImagePattern.exec(content)) !== null) {
    const url = markdownImageMatch[1];
    if (url && !seen.has(url)) {
      if (isImageUrl(url) || isVideoUrl(url)) {
        media.push(url);
        seen.add(url);
      }
    }
-  });
+  }
-  // Extract asciidoc images: image::url[alt]
+  // Extract asciidoc images: image::url[alt] - optimized to avoid double matching
-  const asciidocImageMatches = content.match(/image::([^\[]+)\[/g) || [];
+  const asciidocImagePattern = /image::([^\[]+)\[/g;
-  asciidocImageMatches.forEach(match => {
+  let asciidocImageMatch;
-    const url = match.match(/image::([^\[]+)\[/)?.[1];
+  while ((asciidocImageMatch = asciidocImagePattern.exec(content)) !== null) {
    const url = asciidocImageMatch[1];
    if (url && !seen.has(url)) {
      if (isImageUrl(url) || isVideoUrl(url)) {
        media.push(url);
        seen.add(url);
      }
    }
-  });
+  }
  // Extract raw image/video URLs
  const urlPattern = /https?:\/\/[^\s<>"']+/g;
--- a/src/processors/asciidoc.ts
+++ b/src/processors/asciidoc.ts
@ -120,9 +120,18 @@ export async function processAsciidoc(
      media: [],
    };
  } catch (error) {
-    // Fallback to plain text
+    // Fallback to plain text with error logging
    const errorMessage = error instanceof Error ? error.message : String(error);
    // Use process.stderr.write for Node.js compatibility instead of console.error
    if (typeof process !== 'undefined' && process.stderr) {
      process.stderr.write(`Error processing AsciiDoc: ${errorMessage}\n`);
    }
    // Escape HTML in content for safe display
    const escapedContent = sanitizeHTML(content);
    return {
-      content: `<p>${sanitizeHTML(content)}</p>`,
+      content: `<p>${escapedContent}</p>`,
      tableOfContents: '',
      hasLaTeX: false,
      hasMusicalNotation: false,
--- a/src/processors/html-postprocess.ts
+++ b/src/processors/html-postprocess.ts
@ -19,7 +19,16 @@ export function postProcessHtml(html: string, options: PostProcessOptions = {}):
  // Convert hashtag links to HTML
  processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => {
-    return `<a href="/notes?t=${normalizedHashtag}" class="hashtag-link text-green-600 dark:text-green-400 hover:text-green-700 dark:hover:text-green-300 hover:underline">${displayText}</a>`;
+    // URL encode the hashtag to prevent XSS
    const encodedHashtag = encodeURIComponent(normalizedHashtag);
    // HTML escape the display text
    const escapedDisplay = displayText
      .replace(/&/g, '&amp;')
      .replace(/</g, '&lt;')
      .replace(/>/g, '&gt;')
      .replace(/"/g, '&quot;')
      .replace(/'/g, '&#39;');
    return `<a href="/notes?t=${encodedHashtag}" class="hashtag-link text-green-600 dark:text-green-400 hover:text-green-700 dark:hover:text-green-300 hover:underline">${escapedDisplay}</a>`;
  });
  // Convert wikilink:dtag[display] format to HTML
@ -105,7 +114,7 @@ function processImages(html: string): string {
    let updatedAttributes = attributes;
    if (updatedAttributes.match(/class=["']/i)) {
-      updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match, classes) => {
+      updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match: string, classes: string) => {
        const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim();
        const newClasses = cleanedClasses 
          ? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in`
--- a/src/processors/html-utils.ts
+++ b/src/processors/html-utils.ts
@ -32,14 +32,14 @@ export function extractTOC(html: string): { toc: string; contentWithoutTOC: stri
    return { toc: '', contentWithoutTOC: html };
  }
-  // Find the matching closing tag by counting div tags
+  // Find the matching closing tag by counting div/nav tags
  const searchStart = tocStartIdx + tocStartTag.length;
  let depth = 1;
  let i = searchStart;
  while (i < html.length && depth > 0) {
    // Look for opening or closing div/nav tags
-    if (i + 4 < html.length && html.substring(i, i + 4) === '<div') {
+    if (i + 4 < html.length && html.substring(i, i + 4).toLowerCase() === '<div') {
      // Check if it's a closing tag
      if (i + 5 < html.length && html[i + 4] === '/') {
        depth--;
@ -47,25 +47,35 @@ export function extractTOC(html: string): { toc: string; contentWithoutTOC: stri
        if (closeIdx === -1) break;
        i = closeIdx + 1;
      } else {
-        // Opening tag - find the end
+        // Opening tag - find the end (handle attributes and self-closing)
        const closeIdx = html.indexOf('>', i);
        if (closeIdx === -1) break;
-        // Check if it's self-closing
+        // Check if it's self-closing (look for /> before the >)
-        if (html[closeIdx - 1] !== '/') {
+        const tagContent = html.substring(i, closeIdx);
        if (!tagContent.endsWith('/')) {
          depth++;
        }
        i = closeIdx + 1;
      }
-    } else if (i + 5 < html.length && html.substring(i, i + 5) === '</div') {
+    } else if (i + 5 < html.length && html.substring(i, i + 5).toLowerCase() === '</div') {
      depth--;
      const closeIdx = html.indexOf('>', i);
      if (closeIdx === -1) break;
      i = closeIdx + 1;
-    } else if (i + 5 < html.length && html.substring(i, i + 5) === '</nav') {
+    } else if (i + 5 < html.length && html.substring(i, i + 5).toLowerCase() === '</nav') {
      depth--;
      const closeIdx = html.indexOf('>', i);
      if (closeIdx === -1) break;
      i = closeIdx + 1;
    } else if (i + 4 < html.length && html.substring(i, i + 4).toLowerCase() === '<nav') {
      // Handle opening nav tags
      const closeIdx = html.indexOf('>', i);
      if (closeIdx === -1) break;
      const tagContent = html.substring(i, closeIdx);
      if (!tagContent.endsWith('/')) {
        depth++;
      }
      i = closeIdx + 1;
    } else {
      i++;
    }
@ -119,15 +129,30 @@ export function sanitizeHTML(html: string): string {
 /**
 * Processes HTML links to add target="_blank" to external links
 * This function is available for use but not currently called automatically.
 * It can be used in post-processing if needed.
 */
 export function processLinks(html: string, linkBaseURL: string): string {
  // Extract domain from linkBaseURL for comparison
  let linkBaseDomain = '';
  if (linkBaseURL) {
-    const url = linkBaseURL.replace(/^https?:\/\//, '');
+    try {
-    const parts = url.split('/');
+      // Use URL constructor if available (Node.js 10+)
-    if (parts.length > 0) {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      linkBaseDomain = parts[0];
+      const URLConstructor = (globalThis as any).URL;
      if (URLConstructor) {
        const url = new URLConstructor(linkBaseURL);
        linkBaseDomain = url.hostname;
      } else {
        throw new Error('URL not available');
      }
    } catch {
      // Fallback to simple string parsing if URL constructor fails
      const url = linkBaseURL.replace(/^https?:\/\//, '');
      const parts = url.split('/');
      if (parts.length > 0) {
        linkBaseDomain = parts[0];
      }
    }
  }
@ -140,9 +165,25 @@ export function processLinks(html: string, linkBaseURL: string): string {
    if (isExternal) {
      // Check if it's pointing to our own domain
-      if (linkBaseDomain && href.includes(linkBaseDomain)) {
+      if (linkBaseDomain) {
-        // Same domain - open in same tab (remove any existing target attribute)
+        try {
-        return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
          const URLConstructor = (globalThis as any).URL;
          if (URLConstructor) {
            const hrefUrl = new URLConstructor(href);
            if (hrefUrl.hostname === linkBaseDomain) {
              // Same domain - open in same tab (remove any existing target attribute)
              return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
            }
          } else {
            throw new Error('URL not available');
          }
        } catch {
          // If URL parsing fails, use simple string check
          if (href.includes(linkBaseDomain)) {
            return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
          }
        }
      }
      // External link - add target="_blank" and rel="noopener noreferrer" if not already present
--- a/tsconfig.json
+++ b/tsconfig.json
@ -3,6 +3,7 @@
    "target": "ES2020",
    "module": "commonjs",
    "lib": ["ES2020"],
    "types": ["node"],
    "outDir": "./dist",
    "rootDir": "./src",
    "strict": true,