import { URL_REGEX } from '@/constants' import logger from '@/lib/logger' /** * A comma after the host (easy typo next to `.`) is not valid in a hostname, but `new URL()` still * parses it and then serializes to a bogus `,/` before the path (e.g. `https://a.com,` → `https://a.com,/`). * Strip trailing commas from the parsed hostname before further normalization. */ function stripTrailingCommasFromHostname(url: URL): void { const h = url.hostname if (!h.includes(',')) return url.hostname = h.replace(/,+$/g, '') } export function isWebsocketUrl(url: string): boolean { return /^wss?:\/\/.+$/.test(url) } /** Nostr relay over HTTPS (index relay JSON API), not WebSocket. */ export function isHttpRelayUrl(url: string): boolean { const u = url.trim() return /^https?:\/\/.+/i.test(u) } /** * Normalize https/http relay base URL without converting to WebSocket. * Use for kind 10243 and index-relay HTTP API calls (not for NIP-01 WS pool). */ export function normalizeHttpRelayUrl(url: string): string { return normalizeHttpUrl(url) } /** * In dev, loopback HTTP relay bases (`http://localhost:*` / `http://127.0.0.1:*`) use the Vite * same-origin `/dev-index-relay` proxy (see `vite.config.ts`) so JSON APIs and NIP-11 avoid CORS. */ export function devProxyLoopbackHttpRelayBase(normalizedBase: string): string { if (import.meta.env.PROD || typeof window === 'undefined') return normalizedBase let u: URL try { u = new URL(normalizedBase) } catch { return normalizedBase } if (u.protocol !== 'http:') return normalizedBase const h = u.hostname if (h !== 'localhost' && h !== '127.0.0.1') return normalizedBase return `${window.location.origin}/dev-index-relay` } /** * Normalize relay URL for deduplication: WebSocket URLs via {@link normalizeUrl}, HTTPS index relays via {@link normalizeHttpRelayUrl}. */ export function normalizeAnyRelayUrl(url: string): string { if (isHttpRelayUrl(url)) return normalizeHttpRelayUrl(url) || '' return normalizeUrl(url) || '' } // copy from nostr-tools/utils export function normalizeUrl(url: string): string { try { if (url.indexOf('://') === -1) { if (url.startsWith('localhost:') || url.startsWith('localhost/')) { url = 'ws://' + url } else { url = 'wss://' + url } } // Parse the URL first to validate it const p = new URL(url) stripTrailingCommasFromHostname(p) // Check if URL has hash fragments (these are not valid for relay URLs) // Note: Query parameters are allowed (e.g., filter.nostr.wine uses ?broadcast=true/false) const hasHashFragment = url.includes('#') // Block URLs with hash fragments (these are not valid for relays) if (hasHashFragment) { logger.warn('Skipping URL with hash fragment (not a relay)', { url }) return '' } p.pathname = p.pathname.replace(/\/+/g, '/') if (p.pathname.endsWith('/')) p.pathname = p.pathname.slice(0, -1) if (p.protocol === 'https:') { p.protocol = 'wss:' } else if (p.protocol === 'http:') { p.protocol = 'ws:' } // After protocol normalization, validate it's actually a websocket URL if (!isWebsocketUrl(p.toString())) { logger.warn('Skipping non-websocket URL', { url }) return '' } // Normalize localhost and local network addresses to always use ws:// instead of wss:// // This fixes the common typo where people use wss:// for local relays if (isLocalNetworkUrl(p.toString())) { p.protocol = 'ws:' } if ((p.port === '80' && p.protocol === 'ws:') || (p.port === '443' && p.protocol === 'wss:')) { p.port = '' } p.searchParams.sort() p.hash = '' // Final validation: ensure we have a proper websocket URL const finalUrl = p.toString() if (!isWebsocketUrl(finalUrl)) { logger.warn('Normalization resulted in invalid websocket URL', { url: finalUrl }) return '' } return finalUrl } catch (error) { logger.error('Invalid URL', { error, url }) return '' } } export function normalizeHttpUrl(url: string): string { try { if (url.indexOf('://') === -1) url = 'https://' + url const p = new URL(url) stripTrailingCommasFromHostname(p) p.pathname = p.pathname.replace(/\/+/g, '/') if (p.pathname.endsWith('/')) p.pathname = p.pathname.slice(0, -1) if (p.protocol === 'wss:') { p.protocol = 'https:' } else if (p.protocol === 'ws:') { p.protocol = 'http:' } if ( (p.port === '80' && p.protocol === 'http:') || (p.port === '443' && p.protocol === 'https:') ) { p.port = '' } p.searchParams.sort() p.hash = '' return p.toString() } catch (error) { logger.error('Invalid URL', { error, url }) return '' } } export function simplifyUrl(url: string): string { return url .replace('wss://', '') .replace('ws://', '') .replace('https://', '') .replace('http://', '') .replace(/\/$/, '') } /** * Some events use r-tags like `https://nostr:nevent1…` — not a real http(s) URL (the `nostr:` NIP-21 * scheme is pasted after `https://`). Those strings pass a naive `https://` check and break WebPreview. */ export function isPseudoNostrHttpsUrl(url: string): boolean { return /^https?:\/\/nostr:/i.test(url.trim()) } export function isLocalNetworkUrl(urlString: string): boolean { try { const url = new URL(urlString) const hostname = url.hostname // Check if it's localhost if (hostname === 'localhost' || hostname === '::1') { return true } // Check if it's an IPv4 local network address const ipv4Match = hostname.match(/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) if (ipv4Match) { const [, a, b, c, d] = ipv4Match.map(Number) return ( a === 10 || (a === 172 && b >= 16 && b <= 31) || (a === 192 && b === 168) || (a === 127 && b === 0 && c === 0 && d === 1) ) } // Check if it's an IPv6 address if (hostname.includes(':')) { if (hostname === '::1') { return true // IPv6 loopback address } if (hostname.startsWith('fe80:')) { return true // Link-local address } if (hostname.startsWith('fc') || hostname.startsWith('fd')) { return true // Unique local address (ULA) } } return false } catch { return false // Return false for invalid URLs } } export function isImage(url: string) { try { const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.heic', '.svg'] const parsedUrl = new URL(url) // Check pathname for image extensions if (imageExtensions.some((ext) => parsedUrl.pathname.toLowerCase().endsWith(ext))) { return true } // Check query parameters for image URLs (common in proxy services like wsrv.nl, images.weserv.nl) // Look for 'url' parameter that might contain an image URL // Note: searchParams.get() automatically decodes URL-encoded values const urlParam = parsedUrl.searchParams.get('url') if (urlParam) { // Check if the URL parameter contains an image extension const urlParamLower = urlParam.toLowerCase() if (imageExtensions.some((ext) => urlParamLower.includes(ext))) { // Verify it's actually part of a URL path, not just random text // Check if extension appears after /, ?, =, or &, or at the end for (const ext of imageExtensions) { if (urlParamLower.includes(ext)) { // Check if it's in a valid position (after path separator or query param) const extPattern = new RegExp(`[/?=&]${ext.replace('.', '\\.')}(?:[?&#]|$)`, 'i') if (extPattern.test(urlParam) || urlParamLower.endsWith(ext)) { return true } } } } // Also try to parse it as a URL and check the pathname try { const decodedParsed = new URL(urlParam) if (imageExtensions.some((ext) => decodedParsed.pathname.toLowerCase().endsWith(ext))) { return true } } catch { // If it's not a valid URL, that's fine - we already checked for extensions above } } // Check for image-related query parameters (common in image proxy services) // e.g., output=webp, format=webp, etc. const outputParam = parsedUrl.searchParams.get('output') || parsedUrl.searchParams.get('format') if (outputParam && ['webp', 'jpg', 'jpeg', 'png', 'gif'].includes(outputParam.toLowerCase())) { return true } // Check if the full URL string contains image extensions (fallback) // This handles cases where the extension might be in query parameters or fragments // Check if any image extension appears in the URL after a /, ?, =, or & for (const ext of imageExtensions) { const extensionPattern = new RegExp(`[/?=&]${ext.replace('.', '\\.')}(?:[?&#]|$)`, 'i') if (extensionPattern.test(url)) { return true } } return false } catch { return false } } export function isMedia(url: string) { try { const mediaExtensions = [ '.mp4', '.webm', '.ogg', '.mov', '.mp3', '.wav', '.flac', '.aac', '.m4a', '.opus', '.wma' ] return mediaExtensions.some((ext) => new URL(url).pathname.toLowerCase().endsWith(ext)) } catch { return false } } export function isAudio(url: string) { try { const audioExtensions = [ '.mp3', '.wav', '.flac', '.aac', '.m4a', '.opus', '.wma', '.ogg', // ogg can be audio '.webm', // webm can be audio (when uploaded via microphone button) '.mp4' // mp4 can be audio (m4a files) ] return audioExtensions.some((ext) => new URL(url).pathname.toLowerCase().endsWith(ext)) } catch { return false } } export function isVideo(url: string) { try { const videoExtensions = [ '.mp4', '.webm', '.mov', '.avi', '.wmv', '.flv', '.mkv', '.m4v', '.3gp' ] return videoExtensions.some((ext) => new URL(url).pathname.toLowerCase().endsWith(ext)) } catch { return false } } /** * Return true if the URL looks like a fetchable web page (http(s) with a plausible host). * Used to skip OG metadata fetch for invalid or non-http URLs (e.g. "https://1.4ghz/"). */ export function isLikelyWebPageUrl(url: string): boolean { try { const parsed = new URL(url) if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false const host = parsed.hostname || '' if (!host) return false // Require a dot (e.g. example.com) or localhost so we skip bare hostnames like "1.4ghz" return host.includes('.') || host === 'localhost' } catch { return false } } /** Return true if the string looks like a safe absolute HTTP(S) URL for use as img/video src. */ export function isSafeMediaUrl(url: string): boolean { if (!url || typeof url !== 'string') return false const t = url.trim() return t.startsWith('http://') || t.startsWith('https://') } /** * True if the URL may be used as an `` in-app: http(s), `data:image/…` (e.g. pubkey * placeholders), `blob:`, or `file:` (Electron). Use {@link isSafeMediaUrl} for user-openable links only. */ export function isRenderableMediaUrl(url: string): boolean { if (!url || typeof url !== 'string') return false const t = url.trim() if (isSafeMediaUrl(t)) return true if (t.startsWith('data:image/')) return true if (t.startsWith('blob:')) return true if (t.startsWith('file:')) return true return false } /** * Primal R2A CDN URL for media keyed by SHA-256 (same object as `https://blossom.primal.net/{hash}.ext`). * Used when the blossom host fails in-browser; aligns with NIP-B7-style alternate retrieval. */ export function primalR2aUploads2UrlFromSha256(hash: string, extensionWithDot?: string): string | null { const h = hash.toLowerCase() if (!/^[0-9a-f]{64}$/.test(h)) return null const ext = extensionWithDot && extensionWithDot.startsWith('.') ? extensionWithDot.toLowerCase() : '' const a = h.slice(0, 1) const b = h.slice(1, 3) const c = h.slice(3, 5) return `https://r2a.primal.net/uploads2/${a}/${b}/${c}/${h}${ext}` } /** * If `url` is on `blossom.primal.net` with a 64-hex blob id in the path, return the r2a CDN mirror URL. */ export function primalR2aMirrorForBlossomPrimalUrl(url: string | URL): string | null { try { const u = url instanceof URL ? url : new URL(url) if (u.hostname !== 'blossom.primal.net') return null const m = u.pathname.match(/([0-9a-f]{64})(\.\w+)?$/i) if (!m) return null return primalR2aUploads2UrlFromSha256(m[1].toLowerCase(), m[2] || '') } catch { return null } } /** * Display URL for note/imeta image `src`. Keep `https://blossom.primal.net/{sha256}.ext` as-is: it is the * canonical URL in events and usually loads reliably. Use {@link primalR2aMirrorForBlossomPrimalUrl} only * as a fallback in {@link Image} `onError` when the blossom host fails. */ export function preferBlossomPrimalDisplayUrl(url: string): string { return url } /** * Remove tracking parameters from URLs * Removes common tracking parameters like utm_*, fbclid, gclid, etc. */ export function cleanUrl(url: string): string { try { const parsedUrl = new URL(url) stripTrailingCommasFromHostname(parsedUrl) // List of tracking parameter prefixes and exact names to remove const trackingParams = [ // Google Analytics & Ads 'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', 'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic', 'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid', // Facebook 'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref', // Twitter/X 'twclid', 'twsrc', // Microsoft/Bing 'msclkid', 'mc_cid', 'mc_eid', // Adobe 'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid', // Mailchimp 'mc_cid', 'mc_eid', // HubSpot 'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver', // Marketo 'mkt_tok', // YouTube 'si', 'feature', 'kw', 'pp', // Other common tracking 'ref', 'referrer', 'source', 'campaign', 'medium', 'content', 'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd', // Mobile app tracking 'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative', // Amazon 'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag', // Affiliate tracking 'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer', // Social media share tracking 'share', 'shared', 'sharesource', // Mail Online / Associated Newspapers RSS (e.g. ?ns_mchannel=rss&ito=1490&ns_campaign=1490) 'ns_mchannel', 'ns_campaign', 'ito' ] // Remove all tracking parameters trackingParams.forEach(param => { parsedUrl.searchParams.delete(param) }) // Other Mail-style campaign params (ns_*) Array.from(parsedUrl.searchParams.keys()).forEach((key) => { if (key.startsWith('ns_')) { parsedUrl.searchParams.delete(key) } }) // Remove any parameter that starts with utm_ Array.from(parsedUrl.searchParams.keys()).forEach(key => { if (key.startsWith('utm_') || key.startsWith('_')) { parsedUrl.searchParams.delete(key) } }) return parsedUrl.toString() } catch { // If URL parsing fails, return original URL return url } } /** * Rewrite http(s) URLs in a plain string using {@link URL_REGEX} (same boundary rules as the feed parser), then * {@link cleanUrl}. Avoids greedy `https?:\\/\\/[^\\s]+`, which swallows trailing punctuation like `https://a.com, and`. */ export function rewritePlainTextHttpUrls( content: string, transform: (url: string) => string = cleanUrl ): string { const re = new RegExp(URL_REGEX.source, URL_REGEX.flags) return content.replace(re, (match) => { try { return transform(match) } catch { return match } }) } /** * Relays in `full` whose normalized URL is not in `provisional` (by {@link normalizeUrl}), preserving first-seen order. */ export function subtractNormalizedRelayUrls(full: string[], provisional: string[]): string[] { const prov = new Set( provisional.map((u) => normalizeUrl(u) || u.trim()).filter(Boolean) ) const seen = new Set() const out: string[] = [] for (const u of full) { const n = normalizeUrl(u) || u.trim() if (!n || prov.has(n) || seen.has(n)) continue seen.add(n) out.push(u) } return out }