diff --git a/src/lib/open-graph.test.ts b/src/lib/open-graph.test.ts
new file mode 100644
index 00000000..99b677c8
--- /dev/null
+++ b/src/lib/open-graph.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, it } from 'vitest'
+import {
+ htmlLooksLikeImwaldAppShell,
+ isImwaldDefaultOpenGraphDescription,
+ isImwaldDefaultOpenGraphTitle,
+ parseOpenGraphFromHtml
+} from './open-graph'
+
+const IMWALD_INDEX_SNIPPET = `
+
+Imwald
+
+
+
+`
+
+const FOUNTAIN_SNIPPET = `
+
+
+
+
+
+`
+
+describe('open-graph', () => {
+ it('detects Imwald app shell HTML', () => {
+ expect(htmlLooksLikeImwaldAppShell(IMWALD_INDEX_SNIPPET)).toBe(true)
+ expect(htmlLooksLikeImwaldAppShell(FOUNTAIN_SNIPPET)).toBe(false)
+ })
+
+ it('returns empty metadata for app shell on external URLs', () => {
+ expect(parseOpenGraphFromHtml(IMWALD_INDEX_SNIPPET, 'https://fountain.fm/episode/x')).toEqual({})
+ })
+
+ it('parses og and twitter tags from a normal page', () => {
+ expect(parseOpenGraphFromHtml(FOUNTAIN_SNIPPET, 'https://fountain.fm/episode/x')).toEqual({
+ title: 'Episode Title | Fountain',
+ description: 'A podcast episode',
+ image: 'https://fountain.fm/cover.jpg',
+ audio: 'https://fountain.fm/audio.mp3'
+ })
+ })
+
+ it('strips Imwald default title even without trailing space', () => {
+ expect(isImwaldDefaultOpenGraphTitle('Imwald')).toBe(true)
+ expect(isImwaldDefaultOpenGraphTitle('Episode Title')).toBe(false)
+ })
+
+ it('strips Imwald default description case-insensitively', () => {
+ expect(
+ isImwaldDefaultOpenGraphDescription(
+ 'Imwald — a user-friendly Nostr client focused on relay feed browsing.'
+ )
+ ).toBe(true)
+ })
+
+ it('filters jumble og-image on external hosts while keeping other fields', () => {
+ const html = `
+
+
+
+`
+ expect(parseOpenGraphFromHtml(html, 'https://example.com/page')).toEqual({
+ title: 'Real Site',
+ description: 'About the site',
+ image: undefined,
+ audio: undefined
+ })
+ })
+})
diff --git a/src/lib/open-graph.ts b/src/lib/open-graph.ts
new file mode 100644
index 00000000..e285ee9f
--- /dev/null
+++ b/src/lib/open-graph.ts
@@ -0,0 +1,171 @@
+import { TWebMetadata } from '@/types'
+import logger from '@/lib/logger'
+
+/** True when HTML is the Vite/React dev shell or another SPA stub, not the target page. */
+export function htmlLooksLikeLocalDevAppShell(html: string): boolean {
+ const head = html.slice(0, 8000)
+ return (
+ head.includes('injectIntoGlobalHook') ||
+ head.includes('/@vite/') ||
+ head.includes('@vite/client') ||
+ head.includes('@react-refresh')
+ )
+}
+
+/** True when HTML is Imwald's SPA index (served when OG proxy is missing or misrouted). */
+export function htmlLooksLikeImwaldAppShell(html: string): boolean {
+ if (htmlLooksLikeLocalDevAppShell(html)) return true
+ const head = html.slice(0, 16_000)
+ if (head.includes('imwald-boot-splash') && head.includes('Imwald')) return true
+ if (head.includes('jumble.imwald.eu/og-image') && /property="og:title"[^>]*content="Imwald"/i.test(head)) {
+ return true
+ }
+ return false
+}
+
+export function isImwaldDefaultOpenGraphTitle(title: string | null | undefined): boolean {
+ if (!title) return false
+ const t = title.trim()
+ return (
+ /^imwald$/i.test(t) ||
+ t.includes('Imwald ') ||
+ /jumble\s*-\s*imwald edition/i.test(t) ||
+ /jumble imwald edition/i.test(t)
+ )
+}
+
+export function isImwaldDefaultOpenGraphDescription(description: string | null | undefined): boolean {
+ if (!description) return false
+ return /user-friendly nostr client focused on relay feed browsing/i.test(description)
+}
+
+function metaContent(doc: Document, selectors: string[]): string | undefined {
+ for (const sel of selectors) {
+ const el = doc.querySelector(sel)
+ const v = el?.getAttribute('content') ?? (el as HTMLMetaElement | null)?.content
+ if (v?.trim()) return v.trim()
+ }
+ return undefined
+}
+
+function resolveMaybeRelativeUrl(value: string, pageUrl: string): string {
+ try {
+ const urlObj = new URL(pageUrl)
+ if (value.startsWith('/')) {
+ return `${urlObj.protocol}//${urlObj.host}${value}`
+ }
+ if (!value.match(/^https?:\/\//)) {
+ const basePath = urlObj.pathname.substring(0, urlObj.pathname.lastIndexOf('/') + 1)
+ return `${urlObj.protocol}//${urlObj.host}${basePath}${value}`
+ }
+ return value
+ } catch {
+ return value
+ }
+}
+
+function isFaviconOgImage(image: string): boolean {
+ const imageLower = image.toLowerCase()
+ return (
+ imageLower.includes('/favicon') ||
+ imageLower.endsWith('/favicon.ico') ||
+ imageLower.endsWith('/favicon.svg')
+ )
+}
+
+/** Parse Open Graph / Twitter / description meta tags from fetched HTML. */
+export function parseOpenGraphFromHtml(html: string, pageUrl: string): TWebMetadata {
+ if (htmlLooksLikeImwaldAppShell(html)) {
+ logger.debug('[OpenGraph] Ignoring Imwald app shell HTML', { pageUrl })
+ return {}
+ }
+
+ const parser = new DOMParser()
+ const doc = parser.parseFromString(html, 'text/html')
+
+ let title = metaContent(doc, [
+ 'meta[property="og:title"]',
+ 'meta[name="og:title"]',
+ 'meta[name="twitter:title"]',
+ 'meta[property="twitter:title"]'
+ ])
+ if (!title) {
+ const titleTag = doc.querySelector('title')?.textContent?.trim()
+ if (titleTag) title = titleTag
+ }
+ if (title) {
+ if (
+ /^(Redirecting|Loading|Please wait|Redirect)(\.\.\.|…)?$/i.test(title) ||
+ title === '...' ||
+ title === '…'
+ ) {
+ title = undefined
+ }
+ }
+
+ let description = metaContent(doc, [
+ 'meta[property="og:description"]',
+ 'meta[name="og:description"]',
+ 'meta[name="twitter:description"]',
+ 'meta[property="twitter:description"]',
+ 'meta[name="description"]'
+ ])
+
+ let image = metaContent(doc, [
+ 'meta[property="og:image"]',
+ 'meta[name="og:image"]',
+ 'meta[property="og:image:url"]',
+ 'meta[property="og:image:secure_url"]',
+ 'meta[name="twitter:image"]',
+ 'meta[property="twitter:image"]'
+ ])
+
+ let audio = metaContent(doc, [
+ 'meta[property="og:audio"]',
+ 'meta[property="og:audio:url"]',
+ 'meta[property="og:audio:secure_url"]',
+ 'meta[name="og:audio"]'
+ ])
+
+ if (image) {
+ try {
+ image = resolveMaybeRelativeUrl(image, pageUrl)
+ if (isFaviconOgImage(image)) {
+ logger.warn('[OpenGraph] Filtered favicon from OG image', { pageUrl, image })
+ image = undefined
+ }
+ } catch (error) {
+ logger.warn('[OpenGraph] Failed to resolve image URL', { image, pageUrl, error })
+ image = undefined
+ }
+ }
+
+ if (audio && !audio.match(/^https?:\/\//)) {
+ try {
+ audio = resolveMaybeRelativeUrl(audio, pageUrl)
+ if (!audio.match(/^https?:\/\//)) audio = undefined
+ } catch {
+ audio = undefined
+ }
+ }
+
+ try {
+ const urlObj = new URL(pageUrl)
+ const isAppCanonicalHost = urlObj.hostname === 'jumble.imwald.eu'
+ if (!isAppCanonicalHost) {
+ if (isImwaldDefaultOpenGraphTitle(title)) title = undefined
+ if (isImwaldDefaultOpenGraphDescription(description)) description = undefined
+ if (image?.includes('jumble.imwald.eu/og-image')) image = undefined
+ if (!title && !description && !image && !audio) {
+ logger.debug('[OpenGraph] Stripped Imwald default tags for external URL', {
+ url: pageUrl,
+ hostname: urlObj.hostname
+ })
+ }
+ }
+ } catch {
+ /* ignore */
+ }
+
+ return { title, description, image, audio }
+}
diff --git a/src/services/web.service.ts b/src/services/web.service.ts
index 41e42d59..b03902d8 100644
--- a/src/services/web.service.ts
+++ b/src/services/web.service.ts
@@ -4,6 +4,7 @@ import {
isSitesProxyUnavailableThisSession,
markSitesProxyUnavailableFromHttpStatus
} from '@/lib/optional-proxy-session'
+import { htmlLooksLikeImwaldAppShell, parseOpenGraphFromHtml } from '@/lib/open-graph'
import {
buildDevLocalSitesFetchUrl,
buildViteProxySitesFetchUrl,
@@ -13,17 +14,6 @@ import { TWebMetadata } from '@/types'
import DataLoader from 'dataloader'
import logger from '@/lib/logger'
-/** True when HTML is the Vite/React dev shell or another SPA stub, not the target page. */
-function htmlLooksLikeLocalDevAppShell(html: string): boolean {
- const head = html.slice(0, 8000)
- return (
- head.includes('injectIntoGlobalHook') ||
- head.includes('/@vite/') ||
- head.includes('@vite/client') ||
- head.includes('@react-refresh')
- )
-}
-
const HTML_FETCH_HEADERS = {
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (compatible; Imwald/1.0; +https://jumble.imwald.eu)'
@@ -49,146 +39,89 @@ async function tryFetchHtml(
if (!res.ok) return { html: null, status: res.status }
const html = await res.text()
if (html.length < 50) return { html: null, status: res.status }
- if (htmlLooksLikeLocalDevAppShell(html)) return { html: null, status: res.status }
+ if (htmlLooksLikeImwaldAppShell(html)) {
+ logger.debug('[WebService] Ignoring app-shell HTML from fetch', { fetchUrl })
+ return { html: null, status: res.status }
+ }
return { html }
} catch {
return { html: null }
}
}
-/**
- * OG HTML: always use `VITE_PROXY_SERVER` first when set; if that fails or is unset, fetch the page directly.
- */
-async function fetchHtmlForOpenGraph(originalUrl: string): Promise<{ html: string; via: string } | null> {
- const isAlreadyProxyRequest = urlLooksLikeViteProxyRequest(originalUrl)
-
- if (isAlreadyProxyRequest) {
- const { html } = await tryFetchHtml(originalUrl, 35_000)
- return html ? { html, via: originalUrl } : null
- }
+type OgFetchAttempt = { label: string; url: string; timeoutMs: number; direct?: boolean }
+function buildOgFetchAttempts(originalUrl: string): OgFetchAttempt[] {
+ const attempts: OgFetchAttempt[] = []
const proxyServer = import.meta.env.VITE_PROXY_SERVER?.trim()
+ const proxyDown = isSitesProxyUnavailableThisSession()
- if (proxyServer && !isSitesProxyUnavailableThisSession()) {
- const proxyFetchUrl = buildViteProxySitesFetchUrl(originalUrl, proxyServer)
- logger.debug('[WebService] OG fetch via VITE_PROXY_SERVER', { originalUrl, proxyFetchUrl })
- const proxyTry = await tryFetchHtml(proxyFetchUrl, 35_000)
- if (proxyTry.html) {
- clearSitesProxyUnavailableThisSession()
- return { html: proxyTry.html, via: proxyFetchUrl }
- }
- if (typeof proxyTry.status === 'number') {
- markSitesProxyUnavailableFromHttpStatus(proxyTry.status)
- }
- logger.debug('[WebService] OG proxy unavailable or bad response', { originalUrl, status: proxyTry.status })
+ if (proxyServer && !proxyDown && !urlLooksLikeViteProxyRequest(originalUrl)) {
+ attempts.push({
+ label: 'vite-proxy',
+ url: buildViteProxySitesFetchUrl(originalUrl, proxyServer),
+ timeoutMs: 35_000
+ })
}
if (import.meta.env.DEV) {
const devSitesUrl = buildDevLocalSitesFetchUrl(originalUrl)
- if (devSitesUrl && !isSitesProxyUnavailableThisSession()) {
- const devTry = await tryFetchHtml(devSitesUrl, 35_000)
- if (devTry.html) {
- clearSitesProxyUnavailableThisSession()
- return { html: devTry.html, via: devSitesUrl }
- }
- if (typeof devTry.status === 'number') {
- markSitesProxyUnavailableFromHttpStatus(devTry.status)
- }
+ if (devSitesUrl && !proxyDown) {
+ attempts.push({ label: 'dev-sites', url: devSitesUrl, timeoutMs: 35_000 })
}
- const direct = await tryFetchHtml(originalUrl, 15_000, { direct: true })
- return direct.html ? { html: direct.html, via: 'direct' } : null
+ attempts.push({ label: 'direct', url: originalUrl, timeoutMs: 15_000, direct: true })
+ } else if (!proxyServer || proxyDown) {
+ attempts.push({ label: 'direct', url: originalUrl, timeoutMs: 15_000, direct: true })
}
- // In production with a configured proxy, skip direct fetch: random sites rarely allow browser CORS,
- // and the attempt spams DevTools with cross-origin errors without improving OG success.
- if (proxyServer) {
- return null
- }
-
- const directOnly = await tryFetchHtml(originalUrl, 15_000, { direct: true })
- return directOnly.html ? { html: directOnly.html, via: 'direct' } : null
-}
-
-function parseOpenGraphFromHtml(html: string, pageUrl: string): TWebMetadata {
- const parser = new DOMParser()
- const doc = parser.parseFromString(html, 'text/html')
-
- const ogTitleMeta = doc.querySelector('meta[property="og:title"]')
- const titleTag = doc.querySelector('title')
-
- let title = ogTitleMeta?.getAttribute('content') || titleTag?.textContent
- if (title) {
- const trimmedTitle = title.trim()
- if (
- /^(Redirecting|Loading|Please wait|Redirect)(\.\.\.|…)?$/i.test(trimmedTitle) ||
- trimmedTitle === '...' ||
- trimmedTitle === '…'
- ) {
- title = undefined
+ attempts.push(
+ {
+ label: 'allorigins',
+ url: `https://api.allorigins.win/raw?url=${encodeURIComponent(originalUrl)}`,
+ timeoutMs: 25_000
+ },
+ {
+ label: 'corsproxy',
+ url: `https://corsproxy.io/?${encodeURIComponent(originalUrl)}`,
+ timeoutMs: 25_000
}
- }
-
- const description =
- doc.querySelector('meta[property="og:description"]')?.getAttribute('content') ||
- (doc.querySelector('meta[name="description"]') as HTMLMetaElement | null)?.content
+ )
- let image = (doc.querySelector('meta[property="og:image"]') as HTMLMetaElement | null)?.content
+ return attempts
+}
- let audio =
- doc.querySelector('meta[property="og:audio"]')?.getAttribute('content') ||
- doc.querySelector('meta[property="og:audio:url"]')?.getAttribute('content') ||
- doc.querySelector('meta[property="og:audio:secure_url"]')?.getAttribute('content') ||
- null
- if (audio && !audio.match(/^https?:\/\//)) {
- audio = null
+/**
+ * OG HTML: configured `/sites/?url=…` proxy first; then direct (dev or when proxy is down);
+ * then public CORS proxies as last resort.
+ */
+async function fetchHtmlForOpenGraph(originalUrl: string): Promise<{ html: string; via: string } | null> {
+ if (urlLooksLikeViteProxyRequest(originalUrl)) {
+ const { html } = await tryFetchHtml(originalUrl, 35_000)
+ return html ? { html, via: originalUrl } : null
}
- if (image) {
- try {
- const urlObj = new URL(pageUrl)
- if (image.startsWith('/')) {
- image = `${urlObj.protocol}//${urlObj.host}${image}`
- } else if (!image.match(/^https?:\/\//)) {
- const basePath = urlObj.pathname.substring(0, urlObj.pathname.lastIndexOf('/') + 1)
- image = `${urlObj.protocol}//${urlObj.host}${basePath}${image}`
- }
-
- const imageLower = image.toLowerCase()
- if (
- imageLower.includes('/favicon') ||
- imageLower.endsWith('/favicon.ico') ||
- imageLower.endsWith('/favicon.svg')
- ) {
- logger.warn('[WebService] Filtered out favicon URL from OG image', { url: pageUrl, image })
- image = undefined
+ for (const attempt of buildOgFetchAttempts(originalUrl)) {
+ logger.debug('[WebService] OG fetch attempt', {
+ originalUrl,
+ label: attempt.label,
+ fetchUrl: attempt.url
+ })
+ const result = await tryFetchHtml(attempt.url, attempt.timeoutMs, { direct: attempt.direct })
+ if (result.html) {
+ if (attempt.label === 'vite-proxy' || attempt.label === 'dev-sites') {
+ clearSitesProxyUnavailableThisSession()
}
- } catch (error) {
- logger.warn('[WebService] Failed to convert relative image URL', { image, url: pageUrl, error })
+ return { html: result.html, via: attempt.label }
}
- }
-
- try {
- const urlObj = new URL(pageUrl)
- const isAppCanonicalHost = urlObj.hostname === 'jumble.imwald.eu'
- const isAppDefaultTitle =
- title?.includes('Imwald ') ||
- title?.includes('Jumble - Imwald Edition') ||
- title?.includes('Jumble Imwald Edition')
- const isAppDefaultDesc = description?.includes(
- 'A user-friendly Nostr client focused on relay feed browsing'
- )
- if (!isAppCanonicalHost && (isAppDefaultTitle || isAppDefaultDesc)) {
- logger.debug('[WebService] Filtered out Imwald default OG tags for external domain', {
- url: pageUrl,
- hostname: urlObj.hostname
- })
- return {}
+ if (
+ (attempt.label === 'vite-proxy' || attempt.label === 'dev-sites') &&
+ typeof result.status === 'number'
+ ) {
+ markSitesProxyUnavailableFromHttpStatus(result.status)
}
- } catch {
- /* ignore */
}
- return { title, description, image, audio }
+ return null
}
class WebService {