3 changed files with 299 additions and 125 deletions
@ -0,0 +1,70 @@ |
|||||||
|
import { describe, expect, it } from 'vitest' |
||||||
|
import { |
||||||
|
htmlLooksLikeImwaldAppShell, |
||||||
|
isImwaldDefaultOpenGraphDescription, |
||||||
|
isImwaldDefaultOpenGraphTitle, |
||||||
|
parseOpenGraphFromHtml |
||||||
|
} from './open-graph' |
||||||
|
|
||||||
|
const IMWALD_INDEX_SNIPPET = `<!doctype html>
|
||||||
|
<html><head> |
||||||
|
<title>Imwald</title> |
||||||
|
<meta property="og:title" content="Imwald" /> |
||||||
|
<meta property="og:description" content="Imwald — a user-friendly Nostr client focused on relay feed browsing, publications, and relay discovery." /> |
||||||
|
<meta property="og:image" content="https://jumble.imwald.eu/og-image.png" /> |
||||||
|
</head><body><div id="root"><div id="imwald-boot-splash"></div></div></body></html>` |
||||||
|
|
||||||
|
const FOUNTAIN_SNIPPET = `<!doctype html>
|
||||||
|
<html><head> |
||||||
|
<meta property="og:title" content="Episode Title | Fountain" /> |
||||||
|
<meta property="og:description" content="A podcast episode" /> |
||||||
|
<meta property="og:image" content="https://fountain.fm/cover.jpg" /> |
||||||
|
<meta property="og:audio" content="https://fountain.fm/audio.mp3" /> |
||||||
|
</head><body></body></html>` |
||||||
|
|
||||||
|
describe('open-graph', () => { |
||||||
|
it('detects Imwald app shell HTML', () => { |
||||||
|
expect(htmlLooksLikeImwaldAppShell(IMWALD_INDEX_SNIPPET)).toBe(true) |
||||||
|
expect(htmlLooksLikeImwaldAppShell(FOUNTAIN_SNIPPET)).toBe(false) |
||||||
|
}) |
||||||
|
|
||||||
|
it('returns empty metadata for app shell on external URLs', () => { |
||||||
|
expect(parseOpenGraphFromHtml(IMWALD_INDEX_SNIPPET, 'https://fountain.fm/episode/x')).toEqual({}) |
||||||
|
}) |
||||||
|
|
||||||
|
it('parses og and twitter tags from a normal page', () => { |
||||||
|
expect(parseOpenGraphFromHtml(FOUNTAIN_SNIPPET, 'https://fountain.fm/episode/x')).toEqual({ |
||||||
|
title: 'Episode Title | Fountain', |
||||||
|
description: 'A podcast episode', |
||||||
|
image: 'https://fountain.fm/cover.jpg', |
||||||
|
audio: 'https://fountain.fm/audio.mp3' |
||||||
|
}) |
||||||
|
}) |
||||||
|
|
||||||
|
it('strips Imwald default title even without trailing space', () => { |
||||||
|
expect(isImwaldDefaultOpenGraphTitle('Imwald')).toBe(true) |
||||||
|
expect(isImwaldDefaultOpenGraphTitle('Episode Title')).toBe(false) |
||||||
|
}) |
||||||
|
|
||||||
|
it('strips Imwald default description case-insensitively', () => { |
||||||
|
expect( |
||||||
|
isImwaldDefaultOpenGraphDescription( |
||||||
|
'Imwald — a user-friendly Nostr client focused on relay feed browsing.' |
||||||
|
) |
||||||
|
).toBe(true) |
||||||
|
}) |
||||||
|
|
||||||
|
it('filters jumble og-image on external hosts while keeping other fields', () => { |
||||||
|
const html = `<html><head>
|
||||||
|
<meta property="og:title" content="Real Site" /> |
||||||
|
<meta property="og:description" content="About the site" /> |
||||||
|
<meta property="og:image" content="https://jumble.imwald.eu/og-image.png" /> |
||||||
|
</head></html>` |
||||||
|
expect(parseOpenGraphFromHtml(html, 'https://example.com/page')).toEqual({ |
||||||
|
title: 'Real Site', |
||||||
|
description: 'About the site', |
||||||
|
image: undefined, |
||||||
|
audio: undefined |
||||||
|
}) |
||||||
|
}) |
||||||
|
}) |
||||||
@ -0,0 +1,171 @@ |
|||||||
|
import { TWebMetadata } from '@/types' |
||||||
|
import logger from '@/lib/logger' |
||||||
|
|
||||||
|
/** True when HTML is the Vite/React dev shell or another SPA stub, not the target page. */ |
||||||
|
export function htmlLooksLikeLocalDevAppShell(html: string): boolean { |
||||||
|
const head = html.slice(0, 8000) |
||||||
|
return ( |
||||||
|
head.includes('injectIntoGlobalHook') || |
||||||
|
head.includes('/@vite/') || |
||||||
|
head.includes('@vite/client') || |
||||||
|
head.includes('@react-refresh') |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
/** True when HTML is Imwald's SPA index (served when OG proxy is missing or misrouted). */ |
||||||
|
export function htmlLooksLikeImwaldAppShell(html: string): boolean { |
||||||
|
if (htmlLooksLikeLocalDevAppShell(html)) return true |
||||||
|
const head = html.slice(0, 16_000) |
||||||
|
if (head.includes('imwald-boot-splash') && head.includes('<title>Imwald</title>')) return true |
||||||
|
if (head.includes('jumble.imwald.eu/og-image') && /property="og:title"[^>]*content="Imwald"/i.test(head)) { |
||||||
|
return true |
||||||
|
} |
||||||
|
return false |
||||||
|
} |
||||||
|
|
||||||
|
export function isImwaldDefaultOpenGraphTitle(title: string | null | undefined): boolean { |
||||||
|
if (!title) return false |
||||||
|
const t = title.trim() |
||||||
|
return ( |
||||||
|
/^imwald$/i.test(t) || |
||||||
|
t.includes('Imwald ') || |
||||||
|
/jumble\s*-\s*imwald edition/i.test(t) || |
||||||
|
/jumble imwald edition/i.test(t) |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
export function isImwaldDefaultOpenGraphDescription(description: string | null | undefined): boolean { |
||||||
|
if (!description) return false |
||||||
|
return /user-friendly nostr client focused on relay feed browsing/i.test(description) |
||||||
|
} |
||||||
|
|
||||||
|
function metaContent(doc: Document, selectors: string[]): string | undefined { |
||||||
|
for (const sel of selectors) { |
||||||
|
const el = doc.querySelector(sel) |
||||||
|
const v = el?.getAttribute('content') ?? (el as HTMLMetaElement | null)?.content |
||||||
|
if (v?.trim()) return v.trim() |
||||||
|
} |
||||||
|
return undefined |
||||||
|
} |
||||||
|
|
||||||
|
function resolveMaybeRelativeUrl(value: string, pageUrl: string): string { |
||||||
|
try { |
||||||
|
const urlObj = new URL(pageUrl) |
||||||
|
if (value.startsWith('/')) { |
||||||
|
return `${urlObj.protocol}//${urlObj.host}${value}` |
||||||
|
} |
||||||
|
if (!value.match(/^https?:\/\//)) { |
||||||
|
const basePath = urlObj.pathname.substring(0, urlObj.pathname.lastIndexOf('/') + 1) |
||||||
|
return `${urlObj.protocol}//${urlObj.host}${basePath}${value}` |
||||||
|
} |
||||||
|
return value |
||||||
|
} catch { |
||||||
|
return value |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function isFaviconOgImage(image: string): boolean { |
||||||
|
const imageLower = image.toLowerCase() |
||||||
|
return ( |
||||||
|
imageLower.includes('/favicon') || |
||||||
|
imageLower.endsWith('/favicon.ico') || |
||||||
|
imageLower.endsWith('/favicon.svg') |
||||||
|
) |
||||||
|
} |
||||||
|
|
||||||
|
/** Parse Open Graph / Twitter / description meta tags from fetched HTML. */ |
||||||
|
export function parseOpenGraphFromHtml(html: string, pageUrl: string): TWebMetadata { |
||||||
|
if (htmlLooksLikeImwaldAppShell(html)) { |
||||||
|
logger.debug('[OpenGraph] Ignoring Imwald app shell HTML', { pageUrl }) |
||||||
|
return {} |
||||||
|
} |
||||||
|
|
||||||
|
const parser = new DOMParser() |
||||||
|
const doc = parser.parseFromString(html, 'text/html') |
||||||
|
|
||||||
|
let title = metaContent(doc, [ |
||||||
|
'meta[property="og:title"]', |
||||||
|
'meta[name="og:title"]', |
||||||
|
'meta[name="twitter:title"]', |
||||||
|
'meta[property="twitter:title"]' |
||||||
|
]) |
||||||
|
if (!title) { |
||||||
|
const titleTag = doc.querySelector('title')?.textContent?.trim() |
||||||
|
if (titleTag) title = titleTag |
||||||
|
} |
||||||
|
if (title) { |
||||||
|
if ( |
||||||
|
/^(Redirecting|Loading|Please wait|Redirect)(\.\.\.|…)?$/i.test(title) || |
||||||
|
title === '...' || |
||||||
|
title === '…' |
||||||
|
) { |
||||||
|
title = undefined |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
let description = metaContent(doc, [ |
||||||
|
'meta[property="og:description"]', |
||||||
|
'meta[name="og:description"]', |
||||||
|
'meta[name="twitter:description"]', |
||||||
|
'meta[property="twitter:description"]', |
||||||
|
'meta[name="description"]' |
||||||
|
]) |
||||||
|
|
||||||
|
let image = metaContent(doc, [ |
||||||
|
'meta[property="og:image"]', |
||||||
|
'meta[name="og:image"]', |
||||||
|
'meta[property="og:image:url"]', |
||||||
|
'meta[property="og:image:secure_url"]', |
||||||
|
'meta[name="twitter:image"]', |
||||||
|
'meta[property="twitter:image"]' |
||||||
|
]) |
||||||
|
|
||||||
|
let audio = metaContent(doc, [ |
||||||
|
'meta[property="og:audio"]', |
||||||
|
'meta[property="og:audio:url"]', |
||||||
|
'meta[property="og:audio:secure_url"]', |
||||||
|
'meta[name="og:audio"]' |
||||||
|
]) |
||||||
|
|
||||||
|
if (image) { |
||||||
|
try { |
||||||
|
image = resolveMaybeRelativeUrl(image, pageUrl) |
||||||
|
if (isFaviconOgImage(image)) { |
||||||
|
logger.warn('[OpenGraph] Filtered favicon from OG image', { pageUrl, image }) |
||||||
|
image = undefined |
||||||
|
} |
||||||
|
} catch (error) { |
||||||
|
logger.warn('[OpenGraph] Failed to resolve image URL', { image, pageUrl, error }) |
||||||
|
image = undefined |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (audio && !audio.match(/^https?:\/\//)) { |
||||||
|
try { |
||||||
|
audio = resolveMaybeRelativeUrl(audio, pageUrl) |
||||||
|
if (!audio.match(/^https?:\/\//)) audio = undefined |
||||||
|
} catch { |
||||||
|
audio = undefined |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
try { |
||||||
|
const urlObj = new URL(pageUrl) |
||||||
|
const isAppCanonicalHost = urlObj.hostname === 'jumble.imwald.eu' |
||||||
|
if (!isAppCanonicalHost) { |
||||||
|
if (isImwaldDefaultOpenGraphTitle(title)) title = undefined |
||||||
|
if (isImwaldDefaultOpenGraphDescription(description)) description = undefined |
||||||
|
if (image?.includes('jumble.imwald.eu/og-image')) image = undefined |
||||||
|
if (!title && !description && !image && !audio) { |
||||||
|
logger.debug('[OpenGraph] Stripped Imwald default tags for external URL', { |
||||||
|
url: pageUrl, |
||||||
|
hostname: urlObj.hostname |
||||||
|
}) |
||||||
|
} |
||||||
|
} |
||||||
|
} catch { |
||||||
|
/* ignore */ |
||||||
|
} |
||||||
|
|
||||||
|
return { title, description, image, audio } |
||||||
|
} |
||||||
Loading…
Reference in new issue