jumble/src/lib/url.ts

import { URL_REGEX } from '@/constants'
import logger from '@/lib/logger'

/**
 * A comma after the host (easy typo next to `.`) is not valid in a hostname, but `new URL()` still
 * parses it and then serializes to a bogus `,/` before the path (e.g. `https://a.com,` → `https://a.com,/`).
 * Strip trailing commas from the parsed hostname before further normalization.
 */
function stripTrailingCommasFromHostname(url: URL): void {
  const h = url.hostname
  if (!h.includes(',')) return
  url.hostname = h.replace(/,+$/g, '')
}

export function isWebsocketUrl(url: string): boolean {
  return /^wss?:\/\/.+$/.test(url)
}

export function isWebSocketRelayScheme(url: string): boolean {
  return /^wss?:\/\//i.test(url.trim())
}

export function isHttpOrHttpsScheme(url: string): boolean {
  return /^https?:\/\//i.test(url.trim())
}

/**
 * Kind **10243** `r` tag values use http(s) for the index-relay JSON API.
 * Do not use this to classify arbitrary https:// URLs (profile websites, etc.).
 */
export function isKind10243HttpRelayTagUrl(url: string): boolean {
  const u = url.trim()
  return /^https?:\/\/.+/i.test(u)
}

/** @deprecated Prefer {@link isKind10243HttpRelayTagUrl} only when parsing kind 10243. */
export function isHttpRelayUrl(url: string): boolean {
  return isKind10243HttpRelayTagUrl(url)
}

/**
 * Normalize https/http relay base URL without converting to WebSocket.
 * Use for kind 10243 and index-relay HTTP API calls (not for NIP-01 WS pool).
 */
export function normalizeHttpRelayUrl(url: string): string {
  return normalizeHttpUrl(url)
}

/**
 * In dev, loopback HTTP relay bases (`http://localhost:*` / `http://127.0.0.1:*`) use the Vite
 * same-origin `/dev-index-relay` proxy (see `vite.config.ts`) so JSON APIs and NIP-11 avoid CORS.
 * Only used for the configured HTTP index relay — WS relay NIP-11 fetches bypass this proxy.
 */
export function devProxyLoopbackHttpRelayBase(normalizedBase: string): string {
  if (import.meta.env.PROD || typeof window === 'undefined') return normalizedBase
  let u: URL
  try {
    u = new URL(normalizedBase)
  } catch {
    return normalizedBase
  }
  if (u.protocol !== 'http:') return normalizedBase
  const h = u.hostname
  if (h !== 'localhost' && h !== '127.0.0.1') return normalizedBase
  return `${window.location.origin}/dev-index-relay`
}

/**
 * Hosts whose HTTPS index API breaks in the browser (CORS preflight rejects `Content-Type`, etc.).
 * In dev, `index-relay-http` rewrites the base to same-origin `/dev-cors-index-relay` (see `vite.config.ts`).
 * Keep this list tiny — each entry needs a matching Vite `proxy` target.
 */
const DEV_HTTPS_INDEX_RELAY_CORS_PROXY_HOSTS = new Set(['nos.lol', 'mercury-relay.imwald.eu'])

function devIndexRelayTargetHostname(): string | null {
  const raw = import.meta.env.VITE_DEV_INDEX_RELAY_TARGET
  if (typeof raw !== 'string' || !raw.trim()) return null
  try {
    const withScheme = /^https?:\/\//i.test(raw.trim()) ? raw.trim() : `https://${raw.trim()}`
    return new URL(withScheme).hostname.toLowerCase()
  } catch {
    return null
  }
}

/**
 * Rewrite HTTPS index relay bases to a same-origin Vite proxy so POST /api/events/filter works in dev.
 * Chain after `devProxyLoopbackHttpRelayBase`: `devProxyCorsProblematicHttpsIndexRelayBase(devProxyLoopbackHttpRelayBase(url))`.
 *
 * - Host matching `VITE_DEV_INDEX_RELAY_TARGET` → `/dev-index-relay`
 * - Allowlisted hosts (e.g. nos.lol, mercury-relay.imwald.eu) → `/dev-cors-index-relay`
 */
export function devProxyCorsProblematicHttpsIndexRelayBase(normalizedBase: string): string {
  if (import.meta.env.PROD || typeof window === 'undefined') return normalizedBase
  let u: URL
  try {
    u = new URL(normalizedBase)
  } catch {
    return normalizedBase
  }
  if (u.protocol !== 'https:') return normalizedBase
  const host = u.hostname.toLowerCase()
  const devTargetHost = devIndexRelayTargetHostname()
  if (devTargetHost && host === devTargetHost) {
    return `${window.location.origin}/dev-index-relay`
  }
  if (!DEV_HTTPS_INDEX_RELAY_CORS_PROXY_HOSTS.has(host)) return normalizedBase
  return `${window.location.origin}/dev-cors-index-relay`
}

/** Relay URLs must include an explicit `http:`, `https:`, `ws:`, or `wss:` scheme (no bare hostnames). */
export function relayUrlHasExplicitScheme(url: string): boolean {
  return /^(https?|wss?):\/\//i.test(url.trim())
}

/** Normalize WebSocket relay URLs (`ws:` / `wss:`) for REQ pools and feed layers. */
export function normalizeAnyRelayUrl(url: string): string {
  return normalizeUrl(url)
}

/**
 * Normalize a relay URL using the route for its scheme: `http(s)` index relays (kind 10243)
 * vs `ws(s)` NIP-01 relays (kind 10002). Bare hostnames are rejected by both routes.
 */
export function normalizeRelayUrlByScheme(url: string): string {
  const trimmed = url.trim()
  if (!trimmed) return ''
  if (isHttpOrHttpsScheme(trimmed)) return normalizeHttpRelayUrl(trimmed)
  return normalizeAnyRelayUrl(trimmed)
}

/** Relay explore/detail routes accept WebSocket relays or kind-10243 HTTP index bases. */
export function normalizeRelayUrlForPage(url: string): string {
  return normalizeRelayUrlByScheme(url)
}

/** Stable key for per-relay session stats (scheme preserved; no https→wss aliasing). */
export function canonicalRelaySessionKey(url: string): string {
  const trimmed = url.trim()
  if (!trimmed) return ''
  if (isWebSocketRelayScheme(trimmed)) {
    return (normalizeUrl(trimmed) || trimmed).toLowerCase()
  }
  if (isHttpOrHttpsScheme(trimmed)) {
    return (normalizeHttpRelayUrl(trimmed) || trimmed).toLowerCase()
  }
  return trimmed.toLowerCase()
}

/**
 * HTTP index relay bases present in `urls` that are also listed in kind **10243** storage
 * (`httpRead` / `httpWrite`). URLs with https scheme that are not configured are ignored.
 */
export function httpIndexRelayBasesInUrlBatch(
  urls: readonly string[],
  configuredHttpIndexBases: readonly string[]
): string[] {
  const configured = new Set(
    configuredHttpIndexBases
      .map((u) => normalizeHttpRelayUrl(u) || u.trim())
      .filter(Boolean)
      .map((u) => u.toLowerCase())
  )
  if (configured.size === 0) return []
  const out = new Set<string>()
  for (const raw of urls) {
    const n = normalizeHttpRelayUrl(raw) || raw.trim()
    if (!n) continue
    if (configured.has(n.toLowerCase())) out.add(n)
  }
  return [...out]
}

/**
 * HTTP index bases to poll for a REQ batch: explicit http(s) relay URLs in `urls`, plus any that
 * match the viewer's kind-10243 list. Unlike {@link httpIndexRelayBasesInUrlBatch} alone, does not
 * require configuration when the batch already names an HTTP index relay (e.g. relay detail page).
 */
export function httpIndexBasesForRelayQuery(
  urls: readonly string[],
  configuredHttpIndexBases: readonly string[] = []
): string[] {
  const seen = new Set<string>()
  const out: string[] = []
  const add = (raw: string) => {
    const n = normalizeHttpRelayUrl(raw)
    if (!n || !isKind10243HttpRelayTagUrl(n)) return
    const key = n.toLowerCase()
    if (seen.has(key)) return
    seen.add(key)
    out.push(n)
  }
  for (const raw of urls) {
    if (isHttpOrHttpsScheme(raw.trim())) add(raw)
  }
  for (const base of httpIndexRelayBasesInUrlBatch(urls, configuredHttpIndexBases)) add(base)
  return out
}

export function urlMatchesConfiguredHttpIndexRelay(
  url: string,
  configuredHttpIndexBases: readonly string[]
): boolean {
  const n = normalizeHttpRelayUrl(url) || url.trim()
  if (!n) return false
  const key = n.toLowerCase()
  return configuredHttpIndexBases.some((b) => {
    const nb = normalizeHttpRelayUrl(b) || b.trim()
    return nb && nb.toLowerCase() === key
  })
}

// copy from nostr-tools/utils — WebSocket relays only (`ws:` / `wss:`); never rewrite http(s) schemes.
export function normalizeUrl(url: string): string {
  try {
    const trimmed = url.trim()
    if (!trimmed) return ''
    if (!trimmed.includes('://')) {
      logger.warn('WebSocket relay URL requires ws: or wss: prefix', { url: trimmed })
      return ''
    }

    const p = new URL(trimmed)
    stripTrailingCommasFromHostname(p)

    if (p.protocol !== 'ws:' && p.protocol !== 'wss:') {
      return ''
    }

    const hasHashFragment = trimmed.includes('#')
    if (hasHashFragment) {
      logger.warn('Skipping URL with hash fragment (not a relay)', { url: trimmed })
      return ''
    }

    p.pathname = p.pathname.replace(/\/+/g, '/')
    if (p.pathname.endsWith('/')) p.pathname = p.pathname.slice(0, -1)

    if (!isWebsocketUrl(p.toString())) {
      logger.warn('Skipping non-websocket URL', { url: trimmed })
      return ''
    }

    // Normalize localhost and local network addresses to always use ws:// instead of wss://
    // This fixes the common typo where people use wss:// for local relays
    if (isLocalNetworkUrl(p.toString())) {
      p.protocol = 'ws:'
    }

    if ((p.port === '80' && p.protocol === 'ws:') || (p.port === '443' && p.protocol === 'wss:')) {
      p.port = ''
    }
    p.searchParams.sort()
    p.hash = ''

    // Final validation: ensure we have a proper websocket URL
    const finalUrl = p.toString()
    if (!isWebsocketUrl(finalUrl)) {
      logger.warn('Normalization resulted in invalid websocket URL', { url: finalUrl })
      return ''
    }

    return finalUrl
  } catch (error) {
    logger.error('Invalid URL', { error, url })
    return ''
  }
}

export function normalizeHttpUrl(url: string): string {
  try {
    const trimmed = url.trim()
    if (!trimmed) return ''
    if (!trimmed.includes('://')) {
      logger.debug('HTTP URL requires http: or https: prefix', { url: trimmed })
      return ''
    }
    const p = new URL(trimmed)
    stripTrailingCommasFromHostname(p)
    if (p.protocol !== 'http:' && p.protocol !== 'https:') {
      return ''
    }
    p.pathname = p.pathname.replace(/\/+/g, '/')
    if (p.pathname.endsWith('/')) p.pathname = p.pathname.slice(0, -1)
    if (
      (p.port === '80' && p.protocol === 'http:') ||
      (p.port === '443' && p.protocol === 'https:')
    ) {
      p.port = ''
    }
    p.searchParams.sort()
    p.hash = ''
    return p.toString()
  } catch (error) {
    logger.error('Invalid URL', { error, url })
    return ''
  }
}

export function simplifyUrl(url: string): string {
  return url
    .replace('wss://', '')
    .replace('ws://', '')
    .replace('https://', '')
    .replace('http://', '')
    .replace(/\/$/, '')
}

/**
 * Some events use r-tags like `https://nostr:nevent1…` — not a real http(s) URL (the `nostr:` NIP-21
 * scheme is pasted after `https://`). Those strings pass a naive `https://` check and break WebPreview.
 */
export function isPseudoNostrHttpsUrl(url: string): boolean {
  return /^https?:\/\/nostr:/i.test(url.trim())
}

export function isLocalNetworkUrl(urlString: string): boolean {
  try {
    const url = new URL(urlString)
    const hostname = url.hostname

    // Check if it's localhost
    if (hostname === 'localhost' || hostname === '::1') {
      return true
    }

    // Check if it's an IPv4 local network address
    const ipv4Match = hostname.match(/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/)
    if (ipv4Match) {
      const [, a, b, c, d] = ipv4Match.map(Number)
      return (
        a === 10 ||
        (a === 172 && b >= 16 && b <= 31) ||
        (a === 192 && b === 168) ||
        (a === 127 && b === 0 && c === 0 && d === 1)
      )
    }

    // Check if it's an IPv6 address
    if (hostname.includes(':')) {
      if (hostname === '::1') {
        return true // IPv6 loopback address
      }
      if (hostname.startsWith('fe80:')) {
        return true // Link-local address
      }
      if (hostname.startsWith('fc') || hostname.startsWith('fd')) {
        return true // Unique local address (ULA)
      }
    }

    return false
  } catch {
    return false // Return false for invalid URLs
  }
}

export function isImage(url: string) {
  try {
    const imageExtensions = [
      '.jpg',
      '.jpeg',
      '.png',
      '.gif',
      '.webp',
      '.avif',
      '.apng',
      '.heic',
      '.svg'
    ]
    const parsedUrl = new URL(url)

    // Check pathname for image extensions
    if (imageExtensions.some((ext) => parsedUrl.pathname.toLowerCase().endsWith(ext))) {
      return true
    }

    // Check query parameters for image URLs (common in proxy services like wsrv.nl, images.weserv.nl)
    // Look for 'url' parameter that might contain an image URL
    // Note: searchParams.get() automatically decodes URL-encoded values
    const urlParam = parsedUrl.searchParams.get('url')
    if (urlParam) {
      // Check if the URL parameter contains an image extension
      const urlParamLower = urlParam.toLowerCase()
      if (imageExtensions.some((ext) => urlParamLower.includes(ext))) {
        // Verify it's actually part of a URL path, not just random text
        // Check if extension appears after /, ?, =, or &, or at the end
        for (const ext of imageExtensions) {
          if (urlParamLower.includes(ext)) {
            // Check if it's in a valid position (after path separator or query param)
            const extPattern = new RegExp(`[/?=&]${ext.replace('.', '\\.')}(?:[?&#]|$)`, 'i')
            if (extPattern.test(urlParam) || urlParamLower.endsWith(ext)) {
              return true
            }
          }
        }
      }
      // Also try to parse it as a URL and check the pathname
      try {
        const decodedParsed = new URL(urlParam)
        if (imageExtensions.some((ext) => decodedParsed.pathname.toLowerCase().endsWith(ext))) {
          return true
        }
      } catch {
        // If it's not a valid URL, that's fine - we already checked for extensions above
      }
    }

    // Check for image-related query parameters (common in image proxy services)
    // e.g., output=webp, format=webp, etc.
    const outputParam = parsedUrl.searchParams.get('output') || parsedUrl.searchParams.get('format')
    if (
      outputParam &&
      ['webp', 'jpg', 'jpeg', 'png', 'gif', 'avif', 'svg', 'apng'].includes(outputParam.toLowerCase())
    ) {
      return true
    }

    // Check if the full URL string contains image extensions (fallback)
    // This handles cases where the extension might be in query parameters or fragments
    // Check if any image extension appears in the URL after a /, ?, =, or &
    for (const ext of imageExtensions) {
      const extensionPattern = new RegExp(`[/?=&]${ext.replace('.', '\\.')}(?:[?&#]|$)`, 'i')
      if (extensionPattern.test(url)) {
        return true
      }
    }

    return false
  } catch {
    return false
  }
}

export function isMedia(url: string) {
  try {
    const mediaExtensions = [
      '.mp4',
      '.webm',
      '.ogg',
      '.ogv',
      '.mov',
      '.mkv',
      '.mka',
      '.3gp',
      '.3g2',
      '.mp3',
      '.wav',
      '.flac',
      '.aac',
      '.m4a',
      '.opus',
      '.wma'
    ]
    return mediaExtensions.some((ext) => new URL(url).pathname.toLowerCase().endsWith(ext))
  } catch {
    return false
  }
}

/**
 * SHA-256 hex from a Blossom (BUD) blob URL path (`https://host/<64-hex>`), or null.
 */
export function blossomSha256FromBlobUrl(url: string): string | null {
  try {
    const u = new URL(url.trim())
    if (u.protocol !== 'http:' && u.protocol !== 'https:') return null
    const segs = u.pathname.split('/').filter(Boolean)
    if (segs.length !== 1) return null
    const h = segs[0]!
    return /^[a-f0-9]{64}$/i.test(h) ? h.toLowerCase() : null
  } catch {
    return null
  }
}

/**
 * Blossom (BUD) blob URLs: `https://host/<64-hex-sha256>` with no file extension.
 * MIME comes from the server or from NIP-94 `imeta` (`m`, `x`, etc.).
 */
export function isBlossomBudBlobUrl(url: string): boolean {
  return blossomSha256FromBlobUrl(url) !== null
}

export function isAudio(url: string) {
  try {
    const path = new URL(url).pathname.toLowerCase()
    // Matroska: .mka is audio-only; .mkv is video — do not treat mkv as audio by extension.
    const audioExtensions = [
      '.mp3',
      '.wav',
      '.flac',
      '.aac',
      '.m4a',
      '.opus',
      '.wma',
      '.mka',
      '.ogg',
      '.webm',
      '.mp4'
    ]
    return audioExtensions.some((ext) => path.endsWith(ext))
  } catch {
    return false
  }
}

export function isVideo(url: string) {
  try {
    const path = new URL(url).pathname.toLowerCase()
    const videoExtensions = [
      '.mp4',
      '.webm',
      '.mov',
      '.avi',
      '.wmv',
      '.flv',
      '.mkv',
      '.m4v',
      '.3gp',
      '.3g2',
      '.ogv'
    ]
    return videoExtensions.some((ext) => path.endsWith(ext))
  } catch {
    return false
  }
}

/** HLS / MPEG-DASH manifests (often tagged as `streaming` on NIP-53 live events). */
export function isHlsPlaylistUrl(url: string): boolean {
  try {
    const path = new URL(url).pathname.toLowerCase()
    return path.endsWith('.m3u8') || path.endsWith('.m3u')
  } catch {
    return false
  }
}

const ZAP_STREAM_EMBED_HOSTS = new Set(['zap.stream', 'www.zap.stream'])

/**
 * True for [zap.stream](https://zap.stream) watch URLs (`/naddr1…` / `nevent1…`), which are HTML apps — not `<video src>`.
 * Used so the in-app player embeds an iframe instead of probing metadata on a document URL.
 */
export function isZapStreamWatchPageUrl(url: string): boolean {
  try {
    const u = new URL(url.trim())
    if (!ZAP_STREAM_EMBED_HOSTS.has(u.hostname.toLowerCase())) return false
    const firstSeg = u.pathname.split('/').filter(Boolean)[0] ?? ''
    return firstSeg.startsWith('naddr1') || firstSeg.startsWith('nevent1')
  } catch {
    return false
  }
}

/**
 * Return true if the URL looks like a fetchable web page (http(s) with a plausible host).
 * Used to skip OG metadata fetch for invalid or non-http URLs (e.g. "https://1.4ghz/").
 *
 * Direct image/video/audio URLs (e.g. nostr.build `…/file.jpg`) are not HTML; OG metadata fetch uses
 * `fetch` with `mode: "cors"`, which fails on 301/CDN responses without ACAO and spams the console.
 */
export function isLikelyWebPageUrl(url: string): boolean {
  try {
    const parsed = new URL(url)
    if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false
    const host = parsed.hostname || ''
    if (!host) return false
    if (!host.includes('.') && host !== 'localhost') return false
    if (isImage(url) || isMedia(url)) return false
    return true
  } catch {
    return false
  }
}

/** Return true if the string looks like a safe absolute HTTP(S) URL for use as img/video src. */
export function isSafeMediaUrl(url: string): boolean {
  if (!url || typeof url !== 'string') return false
  const t = url.trim()
  return t.startsWith('http://') || t.startsWith('https://')
}

/**
 * True if the URL may be used as an `<img src>` in-app: http(s), `data:image/…` (e.g. pubkey
 * placeholders), `blob:`, or `file:` (Electron). Use {@link isSafeMediaUrl} for user-openable links only.
 */
export function isRenderableMediaUrl(url: string): boolean {
  if (!url || typeof url !== 'string') return false
  const t = url.trim()
  if (isSafeMediaUrl(t)) return true
  if (t.startsWith('data:image/')) return true
  if (t.startsWith('blob:')) return true
  if (t.startsWith('file:')) return true
  return false
}

/**
 * Primal R2A CDN URL for media keyed by SHA-256 (same object as `https://blossom.primal.net/{hash}.ext`).
 * Used when the blossom host fails in-browser; aligns with NIP-B7-style alternate retrieval.
 */
export function primalR2aUploads2UrlFromSha256(hash: string, extensionWithDot?: string): string | null {
  const h = hash.toLowerCase()
  if (!/^[0-9a-f]{64}$/.test(h)) return null
  const ext =
    extensionWithDot && extensionWithDot.startsWith('.') ? extensionWithDot.toLowerCase() : ''
  const a = h.slice(0, 1)
  const b = h.slice(1, 3)
  const c = h.slice(3, 5)
  return `https://r2a.primal.net/uploads2/${a}/${b}/${c}/${h}${ext}`
}

/**
 * If `url` is on `blossom.primal.net` with a 64-hex blob id in the path, return the r2a CDN mirror URL.
 */
export function primalR2aMirrorForBlossomPrimalUrl(url: string | URL): string | null {
  try {
    const u = url instanceof URL ? url : new URL(url)
    if (u.hostname !== 'blossom.primal.net') return null
    const m = u.pathname.match(/([0-9a-f]{64})(\.\w+)?$/i)
    if (!m) return null
    return primalR2aUploads2UrlFromSha256(m[1].toLowerCase(), m[2] || '')
  } catch {
    return null
  }
}

/**
 * URL for `<img src>` / `<video src>` / `<audio src>`. For `https://blossom.primal.net/{sha256}.ext`,
 * returns the `r2a.primal.net/uploads2/…` mirror when known so the browser loads bytes directly.
 * The blossom host often answers with redirects; following those cross-origin responses commonly hits
 * ORB / hotlink rules and fails to decode in-app even though the file exists.
 */
export function resolvePrimalBlossomPlayableUrl(url: string): string {
  const t = url.trim()
  if (!t) return t
  return primalR2aMirrorForBlossomPrimalUrl(t) ?? t
}

/**
 * Display / lightbox URL for note media. Same as {@link resolvePrimalBlossomPlayableUrl} for Primal blossom links.
 */
export function preferBlossomPrimalDisplayUrl(url: string): string {
  return resolvePrimalBlossomPlayableUrl(url)
}

/**
 * Remove tracking parameters from URLs
 * Removes common tracking parameters like utm_*, fbclid, gclid, etc.
 */
export function cleanUrl(url: string): string {
  try {
    const parsedUrl = new URL(url)
    stripTrailingCommasFromHostname(parsedUrl)

    // List of tracking parameter prefixes and exact names to remove
    const trackingParams = [
      // Google Analytics & Ads
      'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
      'utm_id', 'utm_source_platform', 'utm_creative_format', 'utm_marketing_tactic',
      'gclid', 'gclsrc', 'dclid', 'gbraid', 'wbraid',

      // Facebook
      'fbclid', 'fb_action_ids', 'fb_action_types', 'fb_source', 'fb_ref',

      // Twitter/X
      'twclid', 'twsrc',

      // Microsoft/Bing
      'msclkid', 'mc_cid', 'mc_eid',

      // Adobe
      'adobe_mc', 'adobe_mc_ref', 'adobe_mc_sdid',

      // Mailchimp
      'mc_cid', 'mc_eid',

      // HubSpot
      'hsCtaTracking', 'hsa_acc', 'hsa_cam', 'hsa_grp', 'hsa_ad', 'hsa_src', 'hsa_tgt', 'hsa_kw', 'hsa_mt', 'hsa_net', 'hsa_ver',

      // Marketo
      'mkt_tok',

      // YouTube
      'si', 'feature', 'kw', 'pp',

      // Other common tracking
      'ref', 'referrer', 'source', 'campaign', 'medium', 'content',
      'yclid', 'srsltid', '_ga', '_gl', 'igshid', 'epik', 'pk_campaign', 'pk_kwd',

      // Mobile app tracking
      'adjust_tracker', 'adjust_campaign', 'adjust_adgroup', 'adjust_creative',

      // Amazon
      'tag', 'linkCode', 'creative', 'creativeASIN', 'linkId', 'ascsubtag',

      // Affiliate tracking
      'aff_id', 'affiliate_id', 'aff', 'ref_', 'refer',

      // Social media share tracking
      'share', 'shared', 'sharesource',

      // Mail Online / Associated Newspapers RSS (e.g. ?ns_mchannel=rss&ito=1490&ns_campaign=1490)
      'ns_mchannel',
      'ns_campaign',
      'ito'
    ]

    // Remove all tracking parameters
    trackingParams.forEach(param => {
      parsedUrl.searchParams.delete(param)
    })

    // Other Mail-style campaign params (ns_*)
    Array.from(parsedUrl.searchParams.keys()).forEach((key) => {
      if (key.startsWith('ns_')) {
        parsedUrl.searchParams.delete(key)
      }
    })

    // Remove any parameter that starts with utm_
    Array.from(parsedUrl.searchParams.keys()).forEach(key => {
      if (key.startsWith('utm_') || key.startsWith('_')) {
        parsedUrl.searchParams.delete(key)
      }
    })

    return parsedUrl.toString()
  } catch {
    // If URL parsing fails, return original URL
    return url
  }
}

/**
 * Rewrite http(s) URLs in a plain string using {@link URL_REGEX} (same boundary rules as the feed parser), then
 * {@link cleanUrl}. Avoids greedy `https?:\\/\\/[^\\s]+`, which swallows trailing punctuation like `https://a.com, and`.
 */
export function rewritePlainTextHttpUrls(
  content: string,
  transform: (url: string) => string = cleanUrl
): string {
  const re = new RegExp(URL_REGEX.source, URL_REGEX.flags)
  return content.replace(re, (match) => {
    try {
      return transform(match)
    } catch {
      return match
    }
  })
}

/**
 * Relays in `full` whose normalized URL is not in `provisional` (by {@link normalizeUrl}), preserving first-seen order.
 */
export function subtractNormalizedRelayUrls(full: string[], provisional: string[]): string[] {
  const prov = new Set(
    provisional.map((u) => normalizeUrl(u) || u.trim()).filter(Boolean)
  )
  const seen = new Set<string>()
  const out: string[] = []
  for (const u of full) {
    const n = normalizeUrl(u) || u.trim()
    if (!n || prov.has(n) || seen.has(n)) continue
    seen.add(n)
    out.push(u)
  }
  return out
}