3 changed files with 299 additions and 125 deletions
@ -0,0 +1,70 @@
@@ -0,0 +1,70 @@
|
||||
import { describe, expect, it } from 'vitest' |
||||
import { |
||||
htmlLooksLikeImwaldAppShell, |
||||
isImwaldDefaultOpenGraphDescription, |
||||
isImwaldDefaultOpenGraphTitle, |
||||
parseOpenGraphFromHtml |
||||
} from './open-graph' |
||||
|
||||
const IMWALD_INDEX_SNIPPET = `<!doctype html>
|
||||
<html><head> |
||||
<title>Imwald</title> |
||||
<meta property="og:title" content="Imwald" /> |
||||
<meta property="og:description" content="Imwald — a user-friendly Nostr client focused on relay feed browsing, publications, and relay discovery." /> |
||||
<meta property="og:image" content="https://jumble.imwald.eu/og-image.png" /> |
||||
</head><body><div id="root"><div id="imwald-boot-splash"></div></div></body></html>` |
||||
|
||||
const FOUNTAIN_SNIPPET = `<!doctype html>
|
||||
<html><head> |
||||
<meta property="og:title" content="Episode Title | Fountain" /> |
||||
<meta property="og:description" content="A podcast episode" /> |
||||
<meta property="og:image" content="https://fountain.fm/cover.jpg" /> |
||||
<meta property="og:audio" content="https://fountain.fm/audio.mp3" /> |
||||
</head><body></body></html>` |
||||
|
||||
describe('open-graph', () => { |
||||
it('detects Imwald app shell HTML', () => { |
||||
expect(htmlLooksLikeImwaldAppShell(IMWALD_INDEX_SNIPPET)).toBe(true) |
||||
expect(htmlLooksLikeImwaldAppShell(FOUNTAIN_SNIPPET)).toBe(false) |
||||
}) |
||||
|
||||
it('returns empty metadata for app shell on external URLs', () => { |
||||
expect(parseOpenGraphFromHtml(IMWALD_INDEX_SNIPPET, 'https://fountain.fm/episode/x')).toEqual({}) |
||||
}) |
||||
|
||||
it('parses og and twitter tags from a normal page', () => { |
||||
expect(parseOpenGraphFromHtml(FOUNTAIN_SNIPPET, 'https://fountain.fm/episode/x')).toEqual({ |
||||
title: 'Episode Title | Fountain', |
||||
description: 'A podcast episode', |
||||
image: 'https://fountain.fm/cover.jpg', |
||||
audio: 'https://fountain.fm/audio.mp3' |
||||
}) |
||||
}) |
||||
|
||||
it('strips Imwald default title even without trailing space', () => { |
||||
expect(isImwaldDefaultOpenGraphTitle('Imwald')).toBe(true) |
||||
expect(isImwaldDefaultOpenGraphTitle('Episode Title')).toBe(false) |
||||
}) |
||||
|
||||
it('strips Imwald default description case-insensitively', () => { |
||||
expect( |
||||
isImwaldDefaultOpenGraphDescription( |
||||
'Imwald — a user-friendly Nostr client focused on relay feed browsing.' |
||||
) |
||||
).toBe(true) |
||||
}) |
||||
|
||||
it('filters jumble og-image on external hosts while keeping other fields', () => { |
||||
const html = `<html><head>
|
||||
<meta property="og:title" content="Real Site" /> |
||||
<meta property="og:description" content="About the site" /> |
||||
<meta property="og:image" content="https://jumble.imwald.eu/og-image.png" /> |
||||
</head></html>` |
||||
expect(parseOpenGraphFromHtml(html, 'https://example.com/page')).toEqual({ |
||||
title: 'Real Site', |
||||
description: 'About the site', |
||||
image: undefined, |
||||
audio: undefined |
||||
}) |
||||
}) |
||||
}) |
||||
@ -0,0 +1,171 @@
@@ -0,0 +1,171 @@
|
||||
import { TWebMetadata } from '@/types' |
||||
import logger from '@/lib/logger' |
||||
|
||||
/** True when HTML is the Vite/React dev shell or another SPA stub, not the target page. */ |
||||
export function htmlLooksLikeLocalDevAppShell(html: string): boolean { |
||||
const head = html.slice(0, 8000) |
||||
return ( |
||||
head.includes('injectIntoGlobalHook') || |
||||
head.includes('/@vite/') || |
||||
head.includes('@vite/client') || |
||||
head.includes('@react-refresh') |
||||
) |
||||
} |
||||
|
||||
/** True when HTML is Imwald's SPA index (served when OG proxy is missing or misrouted). */ |
||||
export function htmlLooksLikeImwaldAppShell(html: string): boolean { |
||||
if (htmlLooksLikeLocalDevAppShell(html)) return true |
||||
const head = html.slice(0, 16_000) |
||||
if (head.includes('imwald-boot-splash') && head.includes('<title>Imwald</title>')) return true |
||||
if (head.includes('jumble.imwald.eu/og-image') && /property="og:title"[^>]*content="Imwald"/i.test(head)) { |
||||
return true |
||||
} |
||||
return false |
||||
} |
||||
|
||||
export function isImwaldDefaultOpenGraphTitle(title: string | null | undefined): boolean { |
||||
if (!title) return false |
||||
const t = title.trim() |
||||
return ( |
||||
/^imwald$/i.test(t) || |
||||
t.includes('Imwald ') || |
||||
/jumble\s*-\s*imwald edition/i.test(t) || |
||||
/jumble imwald edition/i.test(t) |
||||
) |
||||
} |
||||
|
||||
export function isImwaldDefaultOpenGraphDescription(description: string | null | undefined): boolean { |
||||
if (!description) return false |
||||
return /user-friendly nostr client focused on relay feed browsing/i.test(description) |
||||
} |
||||
|
||||
function metaContent(doc: Document, selectors: string[]): string | undefined { |
||||
for (const sel of selectors) { |
||||
const el = doc.querySelector(sel) |
||||
const v = el?.getAttribute('content') ?? (el as HTMLMetaElement | null)?.content |
||||
if (v?.trim()) return v.trim() |
||||
} |
||||
return undefined |
||||
} |
||||
|
||||
function resolveMaybeRelativeUrl(value: string, pageUrl: string): string { |
||||
try { |
||||
const urlObj = new URL(pageUrl) |
||||
if (value.startsWith('/')) { |
||||
return `${urlObj.protocol}//${urlObj.host}${value}` |
||||
} |
||||
if (!value.match(/^https?:\/\//)) { |
||||
const basePath = urlObj.pathname.substring(0, urlObj.pathname.lastIndexOf('/') + 1) |
||||
return `${urlObj.protocol}//${urlObj.host}${basePath}${value}` |
||||
} |
||||
return value |
||||
} catch { |
||||
return value |
||||
} |
||||
} |
||||
|
||||
function isFaviconOgImage(image: string): boolean { |
||||
const imageLower = image.toLowerCase() |
||||
return ( |
||||
imageLower.includes('/favicon') || |
||||
imageLower.endsWith('/favicon.ico') || |
||||
imageLower.endsWith('/favicon.svg') |
||||
) |
||||
} |
||||
|
||||
/** Parse Open Graph / Twitter / description meta tags from fetched HTML. */ |
||||
export function parseOpenGraphFromHtml(html: string, pageUrl: string): TWebMetadata { |
||||
if (htmlLooksLikeImwaldAppShell(html)) { |
||||
logger.debug('[OpenGraph] Ignoring Imwald app shell HTML', { pageUrl }) |
||||
return {} |
||||
} |
||||
|
||||
const parser = new DOMParser() |
||||
const doc = parser.parseFromString(html, 'text/html') |
||||
|
||||
let title = metaContent(doc, [ |
||||
'meta[property="og:title"]', |
||||
'meta[name="og:title"]', |
||||
'meta[name="twitter:title"]', |
||||
'meta[property="twitter:title"]' |
||||
]) |
||||
if (!title) { |
||||
const titleTag = doc.querySelector('title')?.textContent?.trim() |
||||
if (titleTag) title = titleTag |
||||
} |
||||
if (title) { |
||||
if ( |
||||
/^(Redirecting|Loading|Please wait|Redirect)(\.\.\.|…)?$/i.test(title) || |
||||
title === '...' || |
||||
title === '…' |
||||
) { |
||||
title = undefined |
||||
} |
||||
} |
||||
|
||||
let description = metaContent(doc, [ |
||||
'meta[property="og:description"]', |
||||
'meta[name="og:description"]', |
||||
'meta[name="twitter:description"]', |
||||
'meta[property="twitter:description"]', |
||||
'meta[name="description"]' |
||||
]) |
||||
|
||||
let image = metaContent(doc, [ |
||||
'meta[property="og:image"]', |
||||
'meta[name="og:image"]', |
||||
'meta[property="og:image:url"]', |
||||
'meta[property="og:image:secure_url"]', |
||||
'meta[name="twitter:image"]', |
||||
'meta[property="twitter:image"]' |
||||
]) |
||||
|
||||
let audio = metaContent(doc, [ |
||||
'meta[property="og:audio"]', |
||||
'meta[property="og:audio:url"]', |
||||
'meta[property="og:audio:secure_url"]', |
||||
'meta[name="og:audio"]' |
||||
]) |
||||
|
||||
if (image) { |
||||
try { |
||||
image = resolveMaybeRelativeUrl(image, pageUrl) |
||||
if (isFaviconOgImage(image)) { |
||||
logger.warn('[OpenGraph] Filtered favicon from OG image', { pageUrl, image }) |
||||
image = undefined |
||||
} |
||||
} catch (error) { |
||||
logger.warn('[OpenGraph] Failed to resolve image URL', { image, pageUrl, error }) |
||||
image = undefined |
||||
} |
||||
} |
||||
|
||||
if (audio && !audio.match(/^https?:\/\//)) { |
||||
try { |
||||
audio = resolveMaybeRelativeUrl(audio, pageUrl) |
||||
if (!audio.match(/^https?:\/\//)) audio = undefined |
||||
} catch { |
||||
audio = undefined |
||||
} |
||||
} |
||||
|
||||
try { |
||||
const urlObj = new URL(pageUrl) |
||||
const isAppCanonicalHost = urlObj.hostname === 'jumble.imwald.eu' |
||||
if (!isAppCanonicalHost) { |
||||
if (isImwaldDefaultOpenGraphTitle(title)) title = undefined |
||||
if (isImwaldDefaultOpenGraphDescription(description)) description = undefined |
||||
if (image?.includes('jumble.imwald.eu/og-image')) image = undefined |
||||
if (!title && !description && !image && !audio) { |
||||
logger.debug('[OpenGraph] Stripped Imwald default tags for external URL', { |
||||
url: pageUrl, |
||||
hostname: urlObj.hostname |
||||
}) |
||||
} |
||||
} |
||||
} catch { |
||||
/* ignore */ |
||||
} |
||||
|
||||
return { title, description, image, audio } |
||||
} |
||||
Loading…
Reference in new issue