You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

250 lines
7.7 KiB

import { Event } from 'nostr-tools'
import { getImetaInfosFromEvent } from '@/lib/event'
import {
blossomSha256FromBlobUrl,
cleanUrl,
isImage,
isMedia,
isAudio,
isVideo,
isHlsPlaylistUrl,
isBlossomBudBlobUrl
} from '@/lib/url'
import { TImetaInfo } from '@/types'
import mediaUpload from './media-upload.service'
import { getImetaInfoFromImetaTag } from '@/lib/tag'
/** Any URL we may embed or extract from note bodies (incl. video-only extensions like .3gp, HLS manifests). */
function isEmbeddableMediaUrl(cleaned: string): boolean {
return isImage(cleaned) || isMedia(cleaned) || isVideo(cleaned) || isAudio(cleaned) || isHlsPlaylistUrl(cleaned) || isBlossomBudBlobUrl(cleaned)
}
export interface ExtractedMedia {
images: TImetaInfo[]
videos: TImetaInfo[]
audio: TImetaInfo[]
all: TImetaInfo[]
}
/**
* Unified service for extracting all media (images, videos, audio) from an event
* Sources: imeta tags, image tags, and content field (not `r` tags — those are references, not media embeds)
*/
export function extractAllMediaFromEvent(
event: Event,
content?: string
): ExtractedMedia {
const textBody = content ?? event.content ?? ''
const seenUrls = new Set<string>()
const allMedia: TImetaInfo[] = []
// Helper to add media if not already seen (using cleaned URL for comparison)
const addMedia = (url: string, pubkey?: string, mimeType?: string) => {
if (!url) return
const cleaned = cleanUrl(url)
if (!cleaned || seenUrls.has(cleaned)) return
if (!isEmbeddableMediaUrl(cleaned)) return
seenUrls.add(cleaned)
// Determine mime type if not provided
let mime = mimeType
if (!mime) {
if (isImage(cleaned)) {
mime = 'image/*'
} else if (isBlossomBudBlobUrl(cleaned)) {
mime = 'image/*'
} else if (isHlsPlaylistUrl(cleaned)) {
mime = 'video/*'
} else if (isAudio(cleaned)) {
mime = 'audio/*'
} else if (isVideo(cleaned)) {
mime = 'video/*'
} else {
mime = 'media/*'
}
}
allMedia.push({
url: cleaned,
pubkey: pubkey || event.pubkey,
m: mime
})
}
// 1. Extract from imeta tags (keep full metadata: alt, dim, blurHash, etc.)
const imetaInfos = getImetaInfosFromEvent(event)
imetaInfos.forEach((info) => {
const cleaned = cleanUrl(info.url)
if (!cleaned || seenUrls.has(cleaned)) return
const nip94Signals = !!(info.blurHash || info.dim || info.x)
if (
info.m?.startsWith('image/') ||
info.m?.startsWith('video/') ||
info.m?.startsWith('audio/') ||
info.m === 'application/vnd.apple.mpegurl' ||
isImage(info.url) ||
isMedia(info.url) ||
isVideo(info.url) ||
isAudio(info.url) ||
isHlsPlaylistUrl(info.url) ||
// Blossom / NIP-94 URLs often have no file extension; metadata still identifies the blob.
(nip94Signals && !!info.url)
) {
seenUrls.add(cleaned)
allMedia.push({ ...info, url: cleaned })
}
})
// Non-standard imeta layouts (no `url ` prefix, concatenated fields, etc.)
const looseHttpsFromImetaValue = (s: string): string[] => {
const out: string[] = []
const re = /https?:\/\/[^\s<>"'[\]()]+/gi
let m: RegExpExecArray | null
re.lastIndex = 0
while ((m = re.exec(s)) !== null) {
out.push(m[0])
}
return out
}
event.tags.forEach((tag) => {
if (tag[0] !== 'imeta') return
if (getImetaInfoFromImetaTag(tag, event.pubkey)) return
for (let i = 1; i < tag.length; i++) {
const part = tag[i]
if (typeof part !== 'string') continue
for (const raw of looseHttpsFromImetaValue(part)) {
addMedia(raw, event.pubkey)
}
}
})
// 2. Extract from image tag
const imageTag = event.tags.find((tag) => tag[0] === 'image' && tag[1])
if (imageTag?.[1]) {
addMedia(imageTag[1])
}
// 3. Live streams in `r` tags (often next to imeta for poster / blurhash)
event.tags.forEach((tag) => {
if (tag[0] !== 'r' || !tag[1]) return
const c = cleanUrl(tag[1]) || tag[1]
if (isHlsPlaylistUrl(c)) {
addMedia(tag[1], event.pubkey, 'video/*')
}
})
// 4. Extract from note content (plain URLs, markdown images) — callers may omit `content`; default to `event.content`.
if (textBody) {
// First, extract from markdown image syntax: ![alt](url) or [![](url)](link)
// This handles images inside links
const markdownImageRegex = /!\[[^\]]*\]\(([^)]+)\)/g
let imgMatch
while ((imgMatch = markdownImageRegex.exec(textBody)) !== null) {
if (imgMatch[1]) {
const url = imgMatch[1]
if (isEmbeddableMediaUrl(cleanUrl(url) || url)) {
addMedia(url)
}
}
}
// Then extract directly from raw content (catch any URLs that weren't parsed)
const urlRegex = /https?:\/\/[^\s<>"']+/g
const urlMatches = textBody.matchAll(urlRegex)
for (const match of urlMatches) {
const url = match[0]
const c = cleanUrl(url) || url
if (isEmbeddableMediaUrl(c)) {
addMedia(url)
}
}
}
// 6. Try to match content URLs with imeta tags for better metadata (alt, dim, blurHash, m)
const imageIdentityKey = (url: string): string | null => {
try {
const u = cleanUrl(url)
if (!u) return null
const blossom = blossomSha256FromBlobUrl(u)
if (blossom) {
return `blossom-sha256:${blossom}`
}
const pathname = new URL(u).pathname
const filename = pathname.split('/').pop() || ''
if (filename && /^[a-f0-9]{32,}\.(png|jpg|jpeg|gif|webp|svg|avif|apng)$/i.test(filename)) {
return filename.toLowerCase()
}
return u
} catch {
return cleanUrl(url) || null
}
}
imetaInfos.forEach((imeta) => {
const imetaUrl = cleanUrl(imeta.url)
const imetaKey = imetaUrl ? imageIdentityKey(imetaUrl) : null
const x = imeta.x?.trim()
const imetaKeyFromX = x && /^[a-f0-9]{64}$/i.test(x) ? `blossom-sha256:${x.toLowerCase()}` : null
allMedia.forEach((media, index) => {
if (imetaUrl && imetaUrl === media.url) {
allMedia[index] = { ...media, ...imeta, url: media.url }
} else if (imetaKey && imetaKey === imageIdentityKey(media.url)) {
allMedia[index] = { ...media, ...imeta, url: media.url }
} else if (imetaKeyFromX && imetaKeyFromX === imageIdentityKey(media.url)) {
allMedia[index] = { ...media, ...imeta, url: media.url }
} else {
// Try to get imeta from media upload service
const tag = mediaUpload.getImetaTagByUrl(media.url)
if (tag) {
const parsedImeta = getImetaInfoFromImetaTag(tag, event.pubkey)
if (parsedImeta) {
allMedia[index] = { ...media, ...parsedImeta, url: media.url }
}
}
}
})
})
// Categorize media
const images: TImetaInfo[] = []
const videos: TImetaInfo[] = []
const audio: TImetaInfo[] = []
allMedia.forEach((media) => {
if (media.m?.startsWith('image/') || isImage(media.url)) {
images.push(media)
} else if (media.m?.startsWith('video/') || isVideo(media.url) || isHlsPlaylistUrl(media.url)) {
videos.push(media)
} else if (media.m?.startsWith('audio/') || isAudio(media.url)) {
audio.push(media)
} else if (isBlossomBudBlobUrl(media.url)) {
if (media.m?.startsWith('video/')) {
videos.push(media)
} else if (media.m?.startsWith('audio/')) {
audio.push(media)
} else {
images.push(media)
}
} else {
// Fallback: try to determine by URL extension
if (isImage(media.url)) {
images.push(media)
} else if (isVideo(media.url) || isHlsPlaylistUrl(media.url)) {
videos.push(media)
} else if (isAudio(media.url)) {
audio.push(media)
}
}
})
return {
images,
videos,
audio,
all: allMedia
}
}