hashtags and hyperlinks

3 months ago · e0213b6c2e
2 changed files with 331 additions and 16 deletions
--- a/src/converters/to-asciidoc.ts
+++ b/src/converters/to-asciidoc.ts
@ -50,9 +50,19 @@ export function convertToAsciidoc(
    asciidoc = processNostrAddresses(asciidoc, linkBaseURL);
  }
-  // Process hashtags
+  // Process media URLs in markdown links/images first (before converting to AsciiDoc)
  // This ensures media URLs in [text](url) or ![alt](url) format are detected
  asciidoc = processMediaUrlsInMarkdown(asciidoc);
  // Process media URLs (YouTube, Spotify, video, audio files) - for bare URLs
  asciidoc = processMediaUrls(asciidoc);
  // Process bare URLs (convert to AsciiDoc links)
  asciidoc = processBareUrls(asciidoc);
  // Process hashtags (after URLs to avoid conflicts)
  asciidoc = processHashtags(asciidoc);
-
+  
  return asciidoc;
 }
@ -107,13 +117,16 @@ function convertWikipediaToAsciidoc(content: string): string {
 function convertMarkdownToAsciidoc(content: string): string {
  let asciidoc = content.replace(/\\n/g, '\n');
-  // Fix spacing issues
+  // Fix spacing issues (but be careful not to break links and images)
  // Process these BEFORE converting links/images to avoid conflicts
  asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)');
  asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3');
  asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` (');
  asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2');
  asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2');
-  asciidoc = asciidoc.replace(/([a-zA-Z0-9])==/g, '$1 ==');
+  // Add space before == but not if it's part of a markdown link pattern
  // Check that == is not immediately after ]( which would be a link
  asciidoc = asciidoc.replace(/([a-zA-Z0-9])(?<!\]\()==/g, '$1 ==');
  // Note: nostr: addresses are processed later in processNostrAddresses
@ -155,12 +168,41 @@ function convertMarkdownToAsciidoc(content: string): string {
  asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Inline code
  asciidoc = asciidoc.replace(/`\$([^$]+)\$`/g, '`$\\$1\\$$`'); // Preserve LaTeX in code
-  // Convert images
+  // Convert images first (before links, since images are links with ! prefix)
-  asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, 'image::$2[$1,width=100%]');
+  // Match: ![alt text](url) or ![](url) - handle empty alt text
-  asciidoc = asciidoc.replace(/image::([^\[]+)\[([^\]]+),width=100%\]/g, 'image::$1[$2,width=100%]');
+  // Use non-greedy matching to stop at first closing paren
  asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+?)\)/g, (match, alt, url) => {
    const cleanUrl = url.trim();
    const cleanAlt = alt.trim();
    // Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown)
    if (cleanUrl.startsWith('MEDIA:')) {
      return cleanUrl; // Return the placeholder as-is
    }
    // Regular image - escape special characters in URL for AsciiDoc
    const escapedUrl = cleanUrl.replace(/([\[\]])/g, '\\$1');
    return `image::${escapedUrl}[${cleanAlt ? cleanAlt + ', ' : ''}width=100%]`;
  });
-  // Convert links
+  // Convert links (but not images, which we already processed)
-  asciidoc = asciidoc.replace(/\[([^\]]+)\]\(([^)]+)\)/g, 'link:$2[$1]');
+  // Match: [text](url) - use negative lookbehind to avoid matching images
  // Use non-greedy matching for URL to stop at first closing paren
  // This ensures we don't capture trailing punctuation
  asciidoc = asciidoc.replace(/(?<!!)\[([^\]]+)\]\(([^)]+?)\)/g, (match, text, url) => {
    const cleanUrl = url.trim();
    const cleanText = text.trim();
    // Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown)
    if (cleanUrl.startsWith('MEDIA:')) {
      return cleanUrl; // Return the placeholder as-is
    }
    // Regular link - escape special AsciiDoc characters in both URL and text
    const escapedUrl = cleanUrl.replace(/([\[\]])/g, '\\$1');
    const escapedText = cleanText.replace(/([\[\]])/g, '\\$1');
    return `link:${escapedUrl}[${escapedText}]`;
  });
  // Convert horizontal rules
  asciidoc = asciidoc.replace(/^---$/gm, '\'\'\'');
@ -319,14 +361,108 @@ function processNostrAddresses(content: string, linkBaseURL: string): string {
  });
 }
 /**
 * Processes media URLs in markdown links and images
 * Converts them to MEDIA: placeholders before markdown conversion
 */
 function processMediaUrlsInMarkdown(content: string): string {
  let processed = content;
  // Process YouTube URLs in markdown links: [text](youtube-url)
  processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, videoId) => {
    return `MEDIA:youtube:${videoId}`;
  });
  // Process Spotify URLs in markdown links: [text](spotify-url)
  processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, type, id) => {
    return `MEDIA:spotify:${type}:${id}`;
  });
  // Process video files in markdown links/images: [text](video-url) or ![alt](video-url)
  processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => {
    const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params
    return `MEDIA:video:${cleanUrl}`;
  });
  // Process audio files in markdown links/images: [text](audio-url) or ![alt](audio-url)
  processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp3|m4a|ogg|wav|flac|aac|opus|wma))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => {
    const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params
    return `MEDIA:audio:${cleanUrl}`;
  });
  return processed;
 }
 /**
 * Processes media URLs (YouTube, Spotify, video, audio files) in bare URLs
 * Converts them to placeholders that will be rendered as embeds/players
 */
 function processMediaUrls(content: string): string {
  // Process YouTube URLs
  // Match: youtube.com/watch?v=, youtu.be/, youtube.com/embed/, youtube.com/v/
  content = content.replace(/(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, videoId) => {
    return `MEDIA:youtube:${videoId}`;
  });
  // Process Spotify URLs
  // Match: open.spotify.com/track/, open.spotify.com/album/, open.spotify.com/playlist/, open.spotify.com/artist/
  content = content.replace(/(?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, type, id) => {
    return `MEDIA:spotify:${type}:${id}`;
  });
  // Process video files (mp4, webm, ogg, m4v, mov, avi, etc.)
  content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => {
    const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL
    return `MEDIA:video:${url}`;
  });
  // Process audio files (mp3, m4a, ogg, wav, flac, aac, etc.)
  content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp3|m4a|ogg|wav|flac|aac|opus|wma)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => {
    const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL
    return `MEDIA:audio:${url}`;
  });
  return content;
 }
 /**
 * Processes bare URLs and converts them to AsciiDoc links
 * Matches http://, https://, and www. URLs that aren't already in markdown links
 */
 function processBareUrls(content: string): string {
  // Match URLs that aren't already in markdown link format
  // Pattern: http://, https://, or www. followed by valid URL characters
  // Use negative lookbehind to avoid matching URLs inside parentheses (markdown links)
  // Match URLs that are not preceded by ]( (which would be a markdown link)
  const urlPattern = /(?<!\]\()\b(https?:\/\/[^\s<>"{}|\\^`\[\]()]+|www\.[^\s<>"{}|\\^`\[\]()]+)/gi;
  return content.replace(urlPattern, (match, url) => {
    // Ensure URL starts with http:// or https://
    let fullUrl = url;
    if (url.startsWith('www.')) {
      fullUrl = 'https://' + url;
    }
    // Escape special AsciiDoc characters
    const escapedUrl = fullUrl.replace(/([\[\]])/g, '\\$1');
    return `link:${escapedUrl}[${url}]`;
  });
 }
 /**
 * Processes hashtags
 * Converts to hashtag:tag[#tag] format
 * Handles hashtags at the beginning of lines to prevent line breaks
 */
 function processHashtags(content: string): string {
-  // Match # followed by word characters, avoiding those in URLs, code blocks, etc.
+  // Match # followed by word characters
-  return content.replace(/\B#([a-zA-Z0-9_]+)/g, (_match, hashtag) => {
+  // Match at word boundary OR at start of line OR after whitespace
  // This ensures we don't match # in URLs or code, but do match at line start
  return content.replace(/(^|\s|>)#([a-zA-Z0-9_]+)(?![a-zA-Z0-9_])/g, (match, before, hashtag) => {
    const normalizedHashtag = hashtag.toLowerCase();
-    return `hashtag:${normalizedHashtag}[#${hashtag}]`;
+    // Preserve the space or line start before the hashtag to prevent line breaks
    // Add a zero-width space or ensure proper spacing
    const prefix = before === '' ? '' : before;
    return `${prefix}hashtag:${normalizedHashtag}[#${hashtag}]`;
  });
 }
--- a/src/processors/html-postprocess.ts
+++ b/src/processors/html-postprocess.ts
@ -18,10 +18,8 @@ export function postProcessHtml(html: string, options: PostProcessOptions = {}):
    return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`;
  });
-  // Convert hashtag links to HTML
+  // Convert hashtag links to HTML (styled like links but not clickable)
  processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => {
    // URL encode the hashtag to prevent XSS
    const encodedHashtag = encodeURIComponent(normalizedHashtag);
    // HTML escape the display text
    const escapedDisplay = displayText
      .replace(/&/g, '&amp;')
@ -29,7 +27,8 @@ export function postProcessHtml(html: string, options: PostProcessOptions = {}):
      .replace(/>/g, '&gt;')
      .replace(/"/g, '&quot;')
      .replace(/'/g, '&#39;');
-    return `<a href="/notes?t=${encodedHashtag}" class="hashtag-link text-green-600 dark:text-green-400 hover:text-green-700 dark:hover:text-green-300 hover:underline">${escapedDisplay}</a>`;
+    // Use span instead of <a> tag - same color as links but no underline and not clickable
    return `<span class="hashtag-link">${escapedDisplay}</span>`;
  });
  // Convert WIKILINK:dtag|display placeholder format to HTML
@ -68,6 +67,12 @@ export function postProcessHtml(html: string, options: PostProcessOptions = {}):
    }
  });
  // Process media URLs (YouTube, Spotify, video, audio)
  processed = processMedia(processed);
  // Process OpenGraph links (external links that should have rich previews)
  processed = processOpenGraphLinks(processed, options.linkBaseURL);
  // Process images: add max-width styling and data attributes
  processed = processImages(processed);
@ -100,6 +105,180 @@ function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'n
  return null;
 }
 /**
 * Process media URLs (YouTube, Spotify, video, audio)
 * Converts MEDIA: placeholders to HTML embeds/players
 */
 function processMedia(html: string): string {
  let processed = html;
  // Process YouTube embeds
  processed = processed.replace(/MEDIA:youtube:([a-zA-Z0-9_-]+)/g, (_match, videoId) => {
    const escapedId = videoId.replace(/"/g, '&quot;');
    return `<div class="media-embed youtube-embed" style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; max-width: 100%; margin: 1rem 0;">
      <iframe 
        style="position: absolute; top: 0; left: 0; width: 100%; height: 100%;" 
        src="https://www.youtube.com/embed/${escapedId}" 
        frameborder="0" 
        allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" 
        allowfullscreen
        loading="lazy">
      </iframe>
    </div>`;
  });
  // Process Spotify embeds
  processed = processed.replace(/MEDIA:spotify:(track|album|playlist|artist|episode|show):([a-zA-Z0-9]+)/g, (_match, type, id) => {
    const escapedType = type.replace(/"/g, '&quot;');
    const escapedId = id.replace(/"/g, '&quot;');
    return `<div class="media-embed spotify-embed" style="margin: 1rem 0;">
      <iframe 
        style="border-radius: 12px; width: 100%; max-width: 100%;" 
        src="https://open.spotify.com/embed/${escapedType}/${escapedId}?utm_source=generator" 
        width="100%" 
        height="352" 
        frameborder="0" 
        allowfullscreen="" 
        allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" 
        loading="lazy">
      </iframe>
    </div>`;
  });
  // Process video files
  processed = processed.replace(/MEDIA:video:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => {
    const escapedUrl = url
      .replace(/&/g, '&amp;')
      .replace(/</g, '&lt;')
      .replace(/>/g, '&gt;')
      .replace(/"/g, '&quot;')
      .replace(/'/g, '&#39;');
    return `<div class="media-embed video-embed" style="margin: 1rem 0;">
      <video 
        controls 
        preload="metadata" 
        style="width: 100%; max-width: 100%; height: auto; border-radius: 8px;"
        class="media-player">
        <source src="${escapedUrl}" type="video/mp4">
        Your browser does not support the video tag.
      </video>
    </div>`;
  });
  // Process audio files
  processed = processed.replace(/MEDIA:audio:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => {
    const escapedUrl = url
      .replace(/&/g, '&amp;')
      .replace(/</g, '&lt;')
      .replace(/>/g, '&gt;')
      .replace(/"/g, '&quot;')
      .replace(/'/g, '&#39;');
    return `<div class="media-embed audio-embed" style="margin: 1rem 0;">
      <audio 
        controls 
        preload="metadata" 
        style="width: 100%; max-width: 100%;"
        class="media-player">
        <source src="${escapedUrl}">
        Your browser does not support the audio tag.
      </audio>
    </div>`;
  });
  return processed;
 }
 /**
 * Process OpenGraph links - mark external links for OpenGraph preview fetching
 */
 function processOpenGraphLinks(html: string, linkBaseURL?: string): string {
  let processed = html;
  // Extract base domain from linkBaseURL if provided
  let baseDomain: string | null = null;
  if (linkBaseURL) {
    try {
      const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/);
      if (urlMatch) {
        baseDomain = urlMatch[1];
      }
    } catch {
      // Ignore parsing errors
    }
  }
  // Match external links (http/https) that aren't media, nostr, or wikilinks
  // Skip links that are already in media embeds or special containers
  // Use a more flexible regex that handles attributes in any order
  processed = processed.replace(/<a\s+([^>]*?)href\s*=\s*["'](https?:\/\/[^"']+)["']([^>]*?)>(.*?)<\/a>/gis, (match, before, href, after, linkText) => {
    // Skip if it's already a media embed, nostr link, wikilink, or opengraph link
    if (match.includes('class="wikilink"') || 
        match.includes('class="nostr-link"') ||
        match.includes('class="opengraph-link"') ||
        match.includes('data-embedded-note') ||
        match.includes('youtube-embed') ||
        match.includes('spotify-embed') ||
        match.includes('media-embed') ||
        match.includes('opengraph-link-container')) {
      return match;
    }
    // Skip if it's a media file URL
    if (/\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv|mp3|m4a|wav|flac|aac|opus|wma|jpeg|jpg|png|gif|webp|svg)$/i.test(href)) {
      return match;
    }
    // Skip if it's YouTube or Spotify (already handled as media)
    if (/youtube\.com|youtu\.be|spotify\.com/i.test(href)) {
      return match;
    }
    // Check if it's an external link (not same domain)
    let isExternal = true;
    if (baseDomain) {
      try {
        const hrefMatch = href.match(/^https?:\/\/([^\/]+)/);
        if (hrefMatch && hrefMatch[1] === baseDomain) {
          isExternal = false;
        }
      } catch {
        // If parsing fails, assume external
      }
    }
    // Only process external links
    if (!isExternal) {
      return match;
    }
    // Escape the URL for data attribute
    const escapedUrl = href
      .replace(/&/g, '&amp;')
      .replace(/"/g, '&quot;')
      .replace(/'/g, '&#39;');
    // Add data attribute for OpenGraph fetching and wrap in container
    // The actual OpenGraph fetching will be done client-side via JavaScript
    return `<span class="opengraph-link-container" data-og-url="${escapedUrl}">
      <a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="opengraph-link break-words inline-flex items-baseline gap-1">${linkText} <svg class="size-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>
      <div class="opengraph-preview" data-og-loading="true" style="display: none;">
        <div class="opengraph-card">
          <div class="opengraph-image-container">
            <img class="opengraph-image" src="" alt="" style="display: none;" />
          </div>
          <div class="opengraph-content">
            <div class="opengraph-site"></div>
            <div class="opengraph-title"></div>
            <div class="opengraph-description"></div>
          </div>
        </div>
      </div>
    </span>`;
  });
  return processed;
 }
 /**
 * Process images: add max-width styling and data attributes
 */