You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

217 lines
6.2 KiB

/**
* OpenGraph metadata fetcher service
* Fetches OpenGraph metadata from URLs and caches results
*/
export interface OpenGraphData {
title?: string;
description?: string;
image?: string;
url?: string;
siteName?: string;
type?: string;
cachedAt: number;
}
const CACHE_DURATION = 7 * 24 * 60 * 60 * 1000; // 7 days
const CACHE_KEY_PREFIX = 'opengraph_';
/**
* Fetch OpenGraph metadata from a URL
* Uses a CORS proxy if needed, caches results in localStorage
*/
export async function fetchOpenGraph(url: string): Promise<OpenGraphData | null> {
// Check cache first
const cached = getCachedOpenGraph(url);
if (cached && Date.now() - cached.cachedAt < CACHE_DURATION) {
return cached;
}
try {
// Try to fetch the page HTML
// Note: Direct fetch may fail due to CORS, so we'll use a simple approach
// In production, you might want to use a backend proxy or service
const response = await fetch(url, {
method: 'GET',
headers: {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (compatible; aitherboard/1.0)'
},
mode: 'cors',
cache: 'no-cache'
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const html = await response.text();
const ogData = parseOpenGraph(html, url);
// Cache the result
if (ogData) {
cacheOpenGraph(url, ogData);
}
return ogData;
} catch (error) {
console.warn('Failed to fetch OpenGraph data:', error);
// Return cached data even if expired, or null
return cached || null;
}
}
/**
* Parse OpenGraph metadata from HTML
*/
function parseOpenGraph(html: string, url: string): OpenGraphData | null {
const og: Partial<OpenGraphData> = {
cachedAt: Date.now()
};
// Extract OpenGraph meta tags
const ogTitleMatch = html.match(/<meta\s+property=["']og:title["']\s+content=["']([^"']+)["']/i) ||
html.match(/<meta\s+content=["']([^"']+)["']\s+property=["']og:title["']/i);
if (ogTitleMatch) {
og.title = decodeHtmlEntities(ogTitleMatch[1]);
}
const ogDescriptionMatch = html.match(/<meta\s+property=["']og:description["']\s+content=["']([^"']+)["']/i) ||
html.match(/<meta\s+content=["']([^"']+)["']\s+property=["']og:description["']/i);
if (ogDescriptionMatch) {
og.description = decodeHtmlEntities(ogDescriptionMatch[1]);
}
const ogImageMatch = html.match(/<meta\s+property=["']og:image["']\s+content=["']([^"']+)["']/i) ||
html.match(/<meta\s+content=["']([^"']+)["']\s+property=["']og:image["']/i);
if (ogImageMatch) {
og.image = ogImageMatch[1];
// Make image URL absolute if relative
if (og.image && !og.image.startsWith('http')) {
try {
const baseUrl = new URL(url);
og.image = new URL(og.image, baseUrl).href;
} catch {
// Invalid URL, keep as is
}
}
}
const ogUrlMatch = html.match(/<meta\s+property=["']og:url["']\s+content=["']([^"']+)["']/i) ||
html.match(/<meta\s+content=["']([^"']+)["']\s+property=["']og:url["']/i);
if (ogUrlMatch) {
og.url = ogUrlMatch[1];
} else {
og.url = url;
}
const ogSiteNameMatch = html.match(/<meta\s+property=["']og:site_name["']\s+content=["']([^"']+)["']/i) ||
html.match(/<meta\s+content=["']([^"']+)["']\s+property=["']og:site_name["']/i);
if (ogSiteNameMatch) {
og.siteName = decodeHtmlEntities(ogSiteNameMatch[1]);
}
const ogTypeMatch = html.match(/<meta\s+property=["']og:type["']\s+content=["']([^"']+)["']/i) ||
html.match(/<meta\s+content=["']([^"']+)["']\s+property=["']og:type["']/i);
if (ogTypeMatch) {
og.type = ogTypeMatch[1];
}
// Fallback to regular meta tags if OpenGraph not available
if (!og.title) {
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
if (titleMatch) {
og.title = decodeHtmlEntities(titleMatch[1].trim());
}
}
if (!og.description) {
const metaDescriptionMatch = html.match(/<meta\s+name=["']description["']\s+content=["']([^"']+)["']/i) ||
html.match(/<meta\s+content=["']([^"']+)["']\s+name=["']description["']/i);
if (metaDescriptionMatch) {
og.description = decodeHtmlEntities(metaDescriptionMatch[1]);
}
}
// Return null if we have no useful data
if (!og.title && !og.description && !og.image) {
return null;
}
return og as OpenGraphData;
}
/**
* Decode HTML entities
*/
function decodeHtmlEntities(text: string): string {
const textarea = document.createElement('textarea');
textarea.innerHTML = text;
return textarea.value;
}
/**
* Get cached OpenGraph data
*/
function getCachedOpenGraph(url: string): OpenGraphData | null {
if (typeof window === 'undefined') return null;
try {
const key = CACHE_KEY_PREFIX + url;
const cached = localStorage.getItem(key);
if (cached) {
return JSON.parse(cached) as OpenGraphData;
}
} catch (error) {
console.warn('Error reading cached OpenGraph data:', error);
}
return null;
}
/**
* Cache OpenGraph data
*/
function cacheOpenGraph(url: string, data: OpenGraphData): void {
if (typeof window === 'undefined') return;
const key = CACHE_KEY_PREFIX + url;
try {
localStorage.setItem(key, JSON.stringify(data));
} catch (error) {
console.warn('Error caching OpenGraph data:', error);
// If storage is full, try to clear old entries
try {
clearOldCacheEntries();
localStorage.setItem(key, JSON.stringify(data));
} catch {
// Give up
}
}
}
/**
* Clear old cache entries to free up space
*/
function clearOldCacheEntries(): void {
if (typeof window === 'undefined') return;
const now = Date.now();
const keysToRemove: string[] = [];
for (let i = 0; i < localStorage.length; i++) {
const key = localStorage.key(i);
if (key && key.startsWith(CACHE_KEY_PREFIX)) {
try {
const data = JSON.parse(localStorage.getItem(key) || '{}') as OpenGraphData;
if (now - data.cachedAt > CACHE_DURATION) {
keysToRemove.push(key);
}
} catch {
keysToRemove.push(key);
}
}
}
keysToRemove.forEach(key => localStorage.removeItem(key));
}