Browse Source

hashtags and hyperlinks

master
Silberengel 2 weeks ago
parent
commit
e0213b6c2e
  1. 160
      src/converters/to-asciidoc.ts
  2. 187
      src/processors/html-postprocess.ts

160
src/converters/to-asciidoc.ts

@ -50,9 +50,19 @@ export function convertToAsciidoc(
asciidoc = processNostrAddresses(asciidoc, linkBaseURL); asciidoc = processNostrAddresses(asciidoc, linkBaseURL);
} }
// Process hashtags // Process media URLs in markdown links/images first (before converting to AsciiDoc)
// This ensures media URLs in [text](url) or ![alt](url) format are detected
asciidoc = processMediaUrlsInMarkdown(asciidoc);
// Process media URLs (YouTube, Spotify, video, audio files) - for bare URLs
asciidoc = processMediaUrls(asciidoc);
// Process bare URLs (convert to AsciiDoc links)
asciidoc = processBareUrls(asciidoc);
// Process hashtags (after URLs to avoid conflicts)
asciidoc = processHashtags(asciidoc); asciidoc = processHashtags(asciidoc);
return asciidoc; return asciidoc;
} }
@ -107,13 +117,16 @@ function convertWikipediaToAsciidoc(content: string): string {
function convertMarkdownToAsciidoc(content: string): string { function convertMarkdownToAsciidoc(content: string): string {
let asciidoc = content.replace(/\\n/g, '\n'); let asciidoc = content.replace(/\\n/g, '\n');
// Fix spacing issues // Fix spacing issues (but be careful not to break links and images)
// Process these BEFORE converting links/images to avoid conflicts
asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)'); asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3'); asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` ('); asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` (');
asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2'); asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2'); asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])==/g, '$1 =='); // Add space before == but not if it's part of a markdown link pattern
// Check that == is not immediately after ]( which would be a link
asciidoc = asciidoc.replace(/([a-zA-Z0-9])(?<!\]\()==/g, '$1 ==');
// Note: nostr: addresses are processed later in processNostrAddresses // Note: nostr: addresses are processed later in processNostrAddresses
@ -155,12 +168,41 @@ function convertMarkdownToAsciidoc(content: string): string {
asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Inline code asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Inline code
asciidoc = asciidoc.replace(/`\$([^$]+)\$`/g, '`$\\$1\\$$`'); // Preserve LaTeX in code asciidoc = asciidoc.replace(/`\$([^$]+)\$`/g, '`$\\$1\\$$`'); // Preserve LaTeX in code
// Convert images // Convert images first (before links, since images are links with ! prefix)
asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, 'image::$2[$1,width=100%]'); // Match: ![alt text](url) or ![](url) - handle empty alt text
asciidoc = asciidoc.replace(/image::([^\[]+)\[([^\]]+),width=100%\]/g, 'image::$1[$2,width=100%]'); // Use non-greedy matching to stop at first closing paren
asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+?)\)/g, (match, alt, url) => {
const cleanUrl = url.trim();
const cleanAlt = alt.trim();
// Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown)
if (cleanUrl.startsWith('MEDIA:')) {
return cleanUrl; // Return the placeholder as-is
}
// Regular image - escape special characters in URL for AsciiDoc
const escapedUrl = cleanUrl.replace(/([\[\]])/g, '\\$1');
return `image::${escapedUrl}[${cleanAlt ? cleanAlt + ', ' : ''}width=100%]`;
});
// Convert links // Convert links (but not images, which we already processed)
asciidoc = asciidoc.replace(/\[([^\]]+)\]\(([^)]+)\)/g, 'link:$2[$1]'); // Match: [text](url) - use negative lookbehind to avoid matching images
// Use non-greedy matching for URL to stop at first closing paren
// This ensures we don't capture trailing punctuation
asciidoc = asciidoc.replace(/(?<!!)\[([^\]]+)\]\(([^)]+?)\)/g, (match, text, url) => {
const cleanUrl = url.trim();
const cleanText = text.trim();
// Check if it's already a MEDIA: placeholder (processed by processMediaUrlsInMarkdown)
if (cleanUrl.startsWith('MEDIA:')) {
return cleanUrl; // Return the placeholder as-is
}
// Regular link - escape special AsciiDoc characters in both URL and text
const escapedUrl = cleanUrl.replace(/([\[\]])/g, '\\$1');
const escapedText = cleanText.replace(/([\[\]])/g, '\\$1');
return `link:${escapedUrl}[${escapedText}]`;
});
// Convert horizontal rules // Convert horizontal rules
asciidoc = asciidoc.replace(/^---$/gm, '\'\'\''); asciidoc = asciidoc.replace(/^---$/gm, '\'\'\'');
@ -319,14 +361,108 @@ function processNostrAddresses(content: string, linkBaseURL: string): string {
}); });
} }
/**
* Processes media URLs in markdown links and images
* Converts them to MEDIA: placeholders before markdown conversion
*/
function processMediaUrlsInMarkdown(content: string): string {
let processed = content;
// Process YouTube URLs in markdown links: [text](youtube-url)
processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, videoId) => {
return `MEDIA:youtube:${videoId}`;
});
// Process Spotify URLs in markdown links: [text](spotify-url)
processed = processed.replace(/\[([^\]]+)\]\((?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, text, type, id) => {
return `MEDIA:spotify:${type}:${id}`;
});
// Process video files in markdown links/images: [text](video-url) or ![alt](video-url)
processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => {
const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params
return `MEDIA:video:${cleanUrl}`;
});
// Process audio files in markdown links/images: [text](audio-url) or ![alt](audio-url)
processed = processed.replace(/[!]?\[([^\]]*)\]\((https?:\/\/[^\s<>"{}|\\^`\[\]()]+\.(mp3|m4a|ogg|wav|flac|aac|opus|wma))(?:\?[^\s<>"{}|\\^`\[\]()]*)?\)/gi, (_match, altOrText, url) => {
const cleanUrl = url.replace(/\?.*$/, ''); // Remove query params
return `MEDIA:audio:${cleanUrl}`;
});
return processed;
}
/**
* Processes media URLs (YouTube, Spotify, video, audio files) in bare URLs
* Converts them to placeholders that will be rendered as embeds/players
*/
function processMediaUrls(content: string): string {
// Process YouTube URLs
// Match: youtube.com/watch?v=, youtu.be/, youtube.com/embed/, youtube.com/v/
content = content.replace(/(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, videoId) => {
return `MEDIA:youtube:${videoId}`;
});
// Process Spotify URLs
// Match: open.spotify.com/track/, open.spotify.com/album/, open.spotify.com/playlist/, open.spotify.com/artist/
content = content.replace(/(?:https?:\/\/)?(?:open\.)?spotify\.com\/(track|album|playlist|artist|episode|show)\/([a-zA-Z0-9]+)(?:[?&][^?\s<>"{}|\\^`\[\]()]*)?/gi, (match, type, id) => {
return `MEDIA:spotify:${type}:${id}`;
});
// Process video files (mp4, webm, ogg, m4v, mov, avi, etc.)
content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => {
const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL
return `MEDIA:video:${url}`;
});
// Process audio files (mp3, m4a, ogg, wav, flac, aac, etc.)
content = content.replace(/(?:https?:\/\/[^\s<>"{}|\\^`\[\]()]+)\.(mp3|m4a|ogg|wav|flac|aac|opus|wma)(?:\?[^\s<>"{}|\\^`\[\]()]*)?/gi, (match, ext) => {
const url = match.replace(/\?.*$/, ''); // Remove query params for cleaner URL
return `MEDIA:audio:${url}`;
});
return content;
}
/**
* Processes bare URLs and converts them to AsciiDoc links
* Matches http://, https://, and www. URLs that aren't already in markdown links
*/
function processBareUrls(content: string): string {
// Match URLs that aren't already in markdown link format
// Pattern: http://, https://, or www. followed by valid URL characters
// Use negative lookbehind to avoid matching URLs inside parentheses (markdown links)
// Match URLs that are not preceded by ]( (which would be a markdown link)
const urlPattern = /(?<!\]\()\b(https?:\/\/[^\s<>"{}|\\^`\[\]()]+|www\.[^\s<>"{}|\\^`\[\]()]+)/gi;
return content.replace(urlPattern, (match, url) => {
// Ensure URL starts with http:// or https://
let fullUrl = url;
if (url.startsWith('www.')) {
fullUrl = 'https://' + url;
}
// Escape special AsciiDoc characters
const escapedUrl = fullUrl.replace(/([\[\]])/g, '\\$1');
return `link:${escapedUrl}[${url}]`;
});
}
/** /**
* Processes hashtags * Processes hashtags
* Converts to hashtag:tag[#tag] format * Converts to hashtag:tag[#tag] format
* Handles hashtags at the beginning of lines to prevent line breaks
*/ */
function processHashtags(content: string): string { function processHashtags(content: string): string {
// Match # followed by word characters, avoiding those in URLs, code blocks, etc. // Match # followed by word characters
return content.replace(/\B#([a-zA-Z0-9_]+)/g, (_match, hashtag) => { // Match at word boundary OR at start of line OR after whitespace
// This ensures we don't match # in URLs or code, but do match at line start
return content.replace(/(^|\s|>)#([a-zA-Z0-9_]+)(?![a-zA-Z0-9_])/g, (match, before, hashtag) => {
const normalizedHashtag = hashtag.toLowerCase(); const normalizedHashtag = hashtag.toLowerCase();
return `hashtag:${normalizedHashtag}[#${hashtag}]`; // Preserve the space or line start before the hashtag to prevent line breaks
// Add a zero-width space or ensure proper spacing
const prefix = before === '' ? '' : before;
return `${prefix}hashtag:${normalizedHashtag}[#${hashtag}]`;
}); });
} }

187
src/processors/html-postprocess.ts

@ -18,10 +18,8 @@ export function postProcessHtml(html: string, options: PostProcessOptions = {}):
return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`; return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`;
}); });
// Convert hashtag links to HTML // Convert hashtag links to HTML (styled like links but not clickable)
processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => { processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => {
// URL encode the hashtag to prevent XSS
const encodedHashtag = encodeURIComponent(normalizedHashtag);
// HTML escape the display text // HTML escape the display text
const escapedDisplay = displayText const escapedDisplay = displayText
.replace(/&/g, '&amp;') .replace(/&/g, '&amp;')
@ -29,7 +27,8 @@ export function postProcessHtml(html: string, options: PostProcessOptions = {}):
.replace(/>/g, '&gt;') .replace(/>/g, '&gt;')
.replace(/"/g, '&quot;') .replace(/"/g, '&quot;')
.replace(/'/g, '&#39;'); .replace(/'/g, '&#39;');
return `<a href="/notes?t=${encodedHashtag}" class="hashtag-link text-green-600 dark:text-green-400 hover:text-green-700 dark:hover:text-green-300 hover:underline">${escapedDisplay}</a>`; // Use span instead of <a> tag - same color as links but no underline and not clickable
return `<span class="hashtag-link">${escapedDisplay}</span>`;
}); });
// Convert WIKILINK:dtag|display placeholder format to HTML // Convert WIKILINK:dtag|display placeholder format to HTML
@ -68,6 +67,12 @@ export function postProcessHtml(html: string, options: PostProcessOptions = {}):
} }
}); });
// Process media URLs (YouTube, Spotify, video, audio)
processed = processMedia(processed);
// Process OpenGraph links (external links that should have rich previews)
processed = processOpenGraphLinks(processed, options.linkBaseURL);
// Process images: add max-width styling and data attributes // Process images: add max-width styling and data attributes
processed = processImages(processed); processed = processImages(processed);
@ -100,6 +105,180 @@ function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'n
return null; return null;
} }
/**
* Process media URLs (YouTube, Spotify, video, audio)
* Converts MEDIA: placeholders to HTML embeds/players
*/
function processMedia(html: string): string {
let processed = html;
// Process YouTube embeds
processed = processed.replace(/MEDIA:youtube:([a-zA-Z0-9_-]+)/g, (_match, videoId) => {
const escapedId = videoId.replace(/"/g, '&quot;');
return `<div class="media-embed youtube-embed" style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; max-width: 100%; margin: 1rem 0;">
<iframe
style="position: absolute; top: 0; left: 0; width: 100%; height: 100%;"
src="https://www.youtube.com/embed/${escapedId}"
frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen
loading="lazy">
</iframe>
</div>`;
});
// Process Spotify embeds
processed = processed.replace(/MEDIA:spotify:(track|album|playlist|artist|episode|show):([a-zA-Z0-9]+)/g, (_match, type, id) => {
const escapedType = type.replace(/"/g, '&quot;');
const escapedId = id.replace(/"/g, '&quot;');
return `<div class="media-embed spotify-embed" style="margin: 1rem 0;">
<iframe
style="border-radius: 12px; width: 100%; max-width: 100%;"
src="https://open.spotify.com/embed/${escapedType}/${escapedId}?utm_source=generator"
width="100%"
height="352"
frameborder="0"
allowfullscreen=""
allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture"
loading="lazy">
</iframe>
</div>`;
});
// Process video files
processed = processed.replace(/MEDIA:video:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => {
const escapedUrl = url
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
return `<div class="media-embed video-embed" style="margin: 1rem 0;">
<video
controls
preload="metadata"
style="width: 100%; max-width: 100%; height: auto; border-radius: 8px;"
class="media-player">
<source src="${escapedUrl}" type="video/mp4">
Your browser does not support the video tag.
</video>
</div>`;
});
// Process audio files
processed = processed.replace(/MEDIA:audio:(https?:\/\/[^\s<>"{}|\\^`\[\]()]+)/g, (_match, url) => {
const escapedUrl = url
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
return `<div class="media-embed audio-embed" style="margin: 1rem 0;">
<audio
controls
preload="metadata"
style="width: 100%; max-width: 100%;"
class="media-player">
<source src="${escapedUrl}">
Your browser does not support the audio tag.
</audio>
</div>`;
});
return processed;
}
/**
* Process OpenGraph links - mark external links for OpenGraph preview fetching
*/
function processOpenGraphLinks(html: string, linkBaseURL?: string): string {
let processed = html;
// Extract base domain from linkBaseURL if provided
let baseDomain: string | null = null;
if (linkBaseURL) {
try {
const urlMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/);
if (urlMatch) {
baseDomain = urlMatch[1];
}
} catch {
// Ignore parsing errors
}
}
// Match external links (http/https) that aren't media, nostr, or wikilinks
// Skip links that are already in media embeds or special containers
// Use a more flexible regex that handles attributes in any order
processed = processed.replace(/<a\s+([^>]*?)href\s*=\s*["'](https?:\/\/[^"']+)["']([^>]*?)>(.*?)<\/a>/gis, (match, before, href, after, linkText) => {
// Skip if it's already a media embed, nostr link, wikilink, or opengraph link
if (match.includes('class="wikilink"') ||
match.includes('class="nostr-link"') ||
match.includes('class="opengraph-link"') ||
match.includes('data-embedded-note') ||
match.includes('youtube-embed') ||
match.includes('spotify-embed') ||
match.includes('media-embed') ||
match.includes('opengraph-link-container')) {
return match;
}
// Skip if it's a media file URL
if (/\.(mp4|webm|ogg|m4v|mov|avi|mkv|flv|wmv|mp3|m4a|wav|flac|aac|opus|wma|jpeg|jpg|png|gif|webp|svg)$/i.test(href)) {
return match;
}
// Skip if it's YouTube or Spotify (already handled as media)
if (/youtube\.com|youtu\.be|spotify\.com/i.test(href)) {
return match;
}
// Check if it's an external link (not same domain)
let isExternal = true;
if (baseDomain) {
try {
const hrefMatch = href.match(/^https?:\/\/([^\/]+)/);
if (hrefMatch && hrefMatch[1] === baseDomain) {
isExternal = false;
}
} catch {
// If parsing fails, assume external
}
}
// Only process external links
if (!isExternal) {
return match;
}
// Escape the URL for data attribute
const escapedUrl = href
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
// Add data attribute for OpenGraph fetching and wrap in container
// The actual OpenGraph fetching will be done client-side via JavaScript
return `<span class="opengraph-link-container" data-og-url="${escapedUrl}">
<a href="${escapedUrl}" target="_blank" rel="noreferrer noopener" class="opengraph-link break-words inline-flex items-baseline gap-1">${linkText} <svg class="size-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>
<div class="opengraph-preview" data-og-loading="true" style="display: none;">
<div class="opengraph-card">
<div class="opengraph-image-container">
<img class="opengraph-image" src="" alt="" style="display: none;" />
</div>
<div class="opengraph-content">
<div class="opengraph-site"></div>
<div class="opengraph-title"></div>
<div class="opengraph-description"></div>
</div>
</div>
</div>
</span>`;
});
return processed;
}
/** /**
* Process images: add max-width styling and data attributes * Process images: add max-width styling and data attributes
*/ */

Loading…
Cancel
Save