You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

1761 lines
66 KiB

import { DEFAULT_RSS_FEEDS } from '@/constants'
import { canonicalizeRssArticleUrl } from '@/lib/rss-article'
import { cleanUrl } from '@/lib/url'
import logger from '@/lib/logger'
import { buildViteProxySitesFetchUrl, urlLooksLikeViteProxyRequest } from '@/lib/vite-proxy-url'
import indexedDb from '@/services/indexed-db.service'
export interface RssFeedItemMedia {
url: string
type?: string
credit?: string
thumbnail?: string
width?: string
height?: string
}
export interface RssFeedItemEnclosure {
url: string
type: string
length?: string
duration?: string
}
export interface RssFeedItem {
title: string
link: string
description: string
pubDate: Date | null
guid: string
feedUrl: string
feedTitle?: string
feedImage?: string
feedDescription?: string
media?: RssFeedItemMedia[]
enclosure?: RssFeedItemEnclosure
}
export interface RssFeed {
title: string
link: string
description: string
items: RssFeedItem[]
feedUrl: string
image?: {
url?: string
title?: string
link?: string
width?: string
height?: string
description?: string
}
language?: string
copyright?: string
generator?: string
lastBuildDate?: Date
}
/** Synthetic row for URL-only threads (Nostr activity on a link without an RSS cache hit). */
export const WEB_ONLY_FAUX_FEED_URL = 'nostr:jumble/web-faux-rss-item'
export function isWebOnlyFauxRssItem(item: Pick<RssFeedItem, 'feedUrl' | 'guid'>): boolean {
return item.feedUrl === WEB_ONLY_FAUX_FEED_URL || item.guid.startsWith('web-only:')
}
export function createWebOnlyRssFeedItem(articleUrl: string): RssFeedItem {
const canonical = canonicalizeRssArticleUrl(articleUrl.trim())
return {
title: canonical,
link: canonical,
description: '',
pubDate: null,
guid: `web-only:${canonical}`,
feedUrl: WEB_ONLY_FAUX_FEED_URL,
feedTitle: undefined
}
}
const RSS_FEED_FETCH_ATTEMPTED_KEYS_SETTING = 'rssFeedFetchAttemptedKeys'
class RssFeedService {
static instance: RssFeedService
private feedCache: Map<string, { feed: RssFeed; timestamp: number }> = new Map()
private readonly CACHE_DURATION = 5 * 60 * 1000 // 5 minutes
private backgroundRefreshController: AbortController | null = null
private monthMapCache: Record<string, string> | null = null
private activeFetchPromises: Map<string, Promise<RssFeed>> = new Map() // Track active fetches by URL
/** Global RSS item cap in IndexedDB; oldest by pubDate are removed when exceeded. */
private readonly MAX_CACHED_RSS_ITEMS = 5000
/**
* Feed URLs we already tried to hydrate (success or hard failure). Without this, a feed that never
* yields items (CORS, dead host) stays "missing" forever and blocks every load / retriggers refresh.
* Persisted so a full reload does not repeat a 30s wait for the same dead URL.
*/
private rssFeedAttemptedKeys = new Set<string>()
private rssFeedAttemptedKeysLoaded = false
/** Same feed list + overlapping time: one network refresh (Strict Mode / remount / HMR). */
private rssMultiFeedRefreshInFlight = new Map<string, Promise<void>>()
private rssMultiFeedRefreshKey(feedUrls: string[]): string {
return [...feedUrls].map((u) => this.normalizeRssFeedKeyUrl(u)).sort().join('\u0001')
}
constructor() {
if (!RssFeedService.instance) {
RssFeedService.instance = this
}
return RssFeedService.instance
}
private normalizeRssFeedKeyUrl(url: string): string {
return url.trim().replace(/\/$/, '')
}
private async ensureRssFeedAttemptedKeysLoaded(): Promise<void> {
if (this.rssFeedAttemptedKeysLoaded) return
this.rssFeedAttemptedKeysLoaded = true
try {
const raw = await indexedDb.getSetting(RSS_FEED_FETCH_ATTEMPTED_KEYS_SETTING)
if (!raw) return
const parsed = JSON.parse(raw) as unknown
if (!Array.isArray(parsed)) return
for (const x of parsed) {
if (typeof x === 'string' && x.trim()) {
this.rssFeedAttemptedKeys.add(this.normalizeRssFeedKeyUrl(x))
}
}
} catch (e) {
logger.warn('[RssFeedService] Failed to load attempted feed URL keys', { error: e })
}
}
private async persistRssFeedAttemptedKeys(): Promise<void> {
try {
await indexedDb.setSetting(
RSS_FEED_FETCH_ATTEMPTED_KEYS_SETTING,
JSON.stringify([...this.rssFeedAttemptedKeys])
)
} catch (e) {
logger.warn('[RssFeedService] Failed to persist attempted feed URL keys', { error: e })
}
}
private markFeedKeysAttempted(urls: string[]): void {
for (const u of urls) {
this.rssFeedAttemptedKeys.add(this.normalizeRssFeedKeyUrl(u))
}
}
private parseItemPubDate(item: RssFeedItem): Date | null {
if (!item.pubDate) return null
if (item.pubDate instanceof Date) return item.pubDate
if (typeof item.pubDate === 'number') return new Date(item.pubDate)
if (typeof item.pubDate === 'string') return new Date(item.pubDate)
return null
}
/**
* Merge refreshed feeds into the full IndexedDB cache, trim oldest items when over the cap,
* and rewrite the store so pruned rows are removed (put-only would leave stale keys).
*/
private async persistGlobalRssCacheAfterMerge(
mergedFromRefresh: RssFeedItem[],
refreshedFeedUrls: string[]
): Promise<void> {
const refreshedSet = new Set(refreshedFeedUrls.map((u) => this.normalizeRssFeedKeyUrl(u)))
let all: RssFeedItem[] = []
try {
all = await indexedDb.getRssFeedItems()
} catch (e) {
logger.warn('[RssFeedService] persistGlobalRssCacheAfterMerge: read cache failed', { error: e })
}
const map = new Map<string, RssFeedItem>()
for (const item of all) {
const key = `${item.feedUrl}:${item.guid}`
if (!refreshedSet.has(this.normalizeRssFeedKeyUrl(item.feedUrl))) {
map.set(key, {
...item,
pubDate: this.parseItemPubDate(item)
})
}
}
for (const item of mergedFromRefresh) {
map.set(`${item.feedUrl}:${item.guid}`, item)
}
let combined = Array.from(map.values())
combined.sort((a, b) => {
const dateA = a.pubDate?.getTime() || 0
const dateB = b.pubDate?.getTime() || 0
return dateB - dateA
})
if (combined.length > this.MAX_CACHED_RSS_ITEMS) {
combined = combined.slice(0, this.MAX_CACHED_RSS_ITEMS)
}
try {
await indexedDb.clearRssFeedItems()
await indexedDb.putRssFeedItems(combined)
} catch (error) {
logger.error('[RssFeedService] persistGlobalRssCacheAfterMerge failed', { error })
}
}
/**
* Fetch and parse an RSS/Atom feed from a URL
*/
async fetchFeed(url: string, signal?: AbortSignal): Promise<RssFeed> {
// Check cache first
const cached = this.feedCache.get(url)
if (cached && Date.now() - cached.timestamp < this.CACHE_DURATION) {
logger.debug('[RssFeedService] Returning cached feed', { url })
return cached.feed
}
// Check if already aborted
if (signal?.aborted) {
logger.warn('[RssFeedService] Signal already aborted before fetchFeed', { url })
throw new DOMException('The operation was aborted.', 'AbortError')
}
// Check if there's already an active fetch for this URL (deduplicate simultaneous requests)
const activeFetch = this.activeFetchPromises.get(url)
if (activeFetch) {
logger.debug('[RssFeedService] Reusing active fetch for URL', { url })
return activeFetch
}
// Create fetch promise and track it
const fetchPromise = (async () => {
try {
// Try multiple fetch strategies in order
const strategies = this.getFetchStrategies(url)
for (const strategy of strategies) {
// Check if aborted before trying next strategy
if (signal?.aborted) {
logger.warn('[RssFeedService] Signal aborted during fetch strategies', { url, strategy: strategy.name })
throw new DOMException('The operation was aborted.', 'AbortError')
}
try {
logger.debug('[RssFeedService] Trying fetch strategy', { url, strategy: strategy.name })
const xmlText = await this.fetchWithStrategy(url, strategy, signal)
if (xmlText) {
const feed = this.parseFeed(xmlText, url)
// Cache the feed
this.feedCache.set(url, { feed, timestamp: Date.now() })
logger.info('[RssFeedService] Successfully fetched and parsed feed', {
url,
itemCount: feed.items.length,
strategy: strategy.name
})
return feed
}
} catch (error) {
// Don't log abort errors as warnings - they're expected during cleanup
if (error instanceof DOMException && error.name === 'AbortError') {
logger.warn('[RssFeedService] Fetch aborted', { url, strategy: strategy.name })
throw error // Re-throw abort errors immediately
}
logger.warn('[RssFeedService] Strategy failed', { url, strategy: strategy.name, error })
// Continue to next strategy
continue
}
}
// All strategies failed
throw new Error(`Failed to fetch RSS feed from ${url} after trying all available methods`)
} finally {
// Remove from active fetches when done
this.activeFetchPromises.delete(url)
}
})()
// Store the promise to deduplicate simultaneous requests
this.activeFetchPromises.set(url, fetchPromise)
return fetchPromise
}
/**
* Get list of fetch strategies to try in order
*/
private getFetchStrategies(url: string): Array<{ name: string; getUrl: (url: string) => string }> {
const strategies: Array<{ name: string; getUrl: (url: string) => string }> = []
// Strategy 1: Same `VITE_PROXY_SERVER` contract as OG/link preview (`sites/?url=…`), not path-encoded `/sites/{url}`.
const proxyServer = import.meta.env.VITE_PROXY_SERVER?.trim()
if (proxyServer && !urlLooksLikeViteProxyRequest(url)) {
strategies.push({
name: 'configured-proxy',
getUrl: (u) => buildViteProxySitesFetchUrl(u, proxyServer)
})
}
// Strategy 2: Use public CORS proxy (allorigins.win)
strategies.push({
name: 'allorigins-proxy',
getUrl: (url) => `https://api.allorigins.win/raw?url=${encodeURIComponent(url)}`
})
// Strategy 3: Alternative CORS proxy (corsproxy.io)
strategies.push({
name: 'corsproxy-proxy',
getUrl: (url) => `https://corsproxy.io/?${encodeURIComponent(url)}`
})
// Strategy 4: Try direct fetch (may work for some feeds with CORS enabled)
strategies.push({
name: 'direct',
getUrl: (url) => url
})
return strategies
}
/**
* Fetch feed using a specific strategy
*/
private async fetchWithStrategy(originalUrl: string, strategy: { name: string; getUrl: (url: string) => string }, externalSignal?: AbortSignal): Promise<string> {
const fetchUrl = strategy.getUrl(originalUrl)
// Check if external signal is already aborted
if (externalSignal?.aborted) {
throw new DOMException('The operation was aborted.', 'AbortError')
}
const controller = new AbortController()
// Use a longer timeout for RSS feeds (30 seconds) since they can be slow
// Don't abort on timeout - just log a warning, let the fetch continue
const timeoutId = setTimeout(() => {
logger.warn('[RssFeedService] Fetch taking longer than expected', {
url: originalUrl,
strategy: strategy.name,
elapsed: '30s'
})
// Don't abort - just log. The fetch will continue or fail naturally
}, 30000) // 30 second warning (but don't abort)
// If external signal is provided, abort our controller when external signal aborts
if (externalSignal) {
externalSignal.addEventListener('abort', () => {
clearTimeout(timeoutId)
controller.abort()
}, { once: true })
}
try {
const res = await fetch(fetchUrl, {
signal: controller.signal,
mode: 'cors',
credentials: 'omit',
headers: {
'Accept': 'application/rss+xml, application/xml, application/atom+xml, text/xml, */*'
}
})
clearTimeout(timeoutId)
if (!res.ok) {
throw new Error(`HTTP ${res.status}: ${res.statusText}`)
}
const xmlText = await res.text()
// Validate that we got XML content
if (!xmlText || xmlText.trim().length === 0) {
throw new Error('Empty response')
}
// Basic validation - check if it looks like XML
if (!xmlText.trim().startsWith('<')) {
throw new Error('Response does not appear to be XML')
}
return xmlText
} catch (error) {
clearTimeout(timeoutId)
// Re-throw abort errors as-is
if (error instanceof DOMException && error.name === 'AbortError') {
throw error
}
throw error
}
}
/**
* Parse RSS/Atom XML into structured data
*/
private parseFeed(xmlText: string, feedUrl: string): RssFeed {
const parser = new DOMParser()
const doc = parser.parseFromString(xmlText, 'text/xml')
// Check for parsing errors
const parserError = doc.querySelector('parsererror')
if (parserError) {
throw new Error('Failed to parse XML feed')
}
// Determine if it's RSS or Atom
const isAtom = doc.documentElement.tagName === 'feed' || doc.documentElement.namespaceURI === 'http://www.w3.org/2005/Atom'
if (isAtom) {
return this.parseAtomFeed(doc, feedUrl)
} else {
return this.parseRssFeed(doc, feedUrl)
}
}
/**
* Parse RSS 2.0 feed
*/
private parseRssFeed(doc: Document, feedUrl: string): RssFeed {
const channel = doc.querySelector('channel')
if (!channel) {
throw new Error('Invalid RSS feed: no channel element found')
}
const title = this.getTextContent(channel, 'title') || 'Untitled Feed'
const link = this.getTextContent(channel, 'link') || feedUrl
const description = this.getTextContent(channel, 'description') || ''
// Extract feed metadata
const language = this.getTextContent(channel, 'language') || undefined
const copyright = this.getTextContent(channel, 'copyright') || undefined
const generator = this.getTextContent(channel, 'generator') || undefined
const lastBuildDateStr = this.getTextContent(channel, 'lastBuildDate')
const lastBuildDate = lastBuildDateStr ? (this.parseDate(lastBuildDateStr) || undefined) : undefined
// Extract feed image
// Check all channel children for image elements (both standard RSS and namespaced)
let feedImage: RssFeed['image'] | undefined
const allChannelChildren = Array.from(channel.children)
// First, try to find standard RSS 2.0 <image> element
const standardImageElements = allChannelChildren.filter(child => {
const nodeName = child.nodeName.toLowerCase()
const localName = child.localName || nodeName
const namespaceURI = child.namespaceURI
// Standard RSS image element has nodeName "image" with no namespace prefix
return localName === 'image' &&
!nodeName.includes(':') &&
(!namespaceURI || (!namespaceURI.includes('itunes') && !namespaceURI.includes('media')))
})
if (standardImageElements.length > 0) {
const imageElement = standardImageElements[0]
logger.debug('[RssFeedService] Processing standard image element', {
url: feedUrl,
nodeName: imageElement.nodeName,
localName: imageElement.localName,
childrenCount: imageElement.children.length,
innerHTML: imageElement.innerHTML?.substring(0, 200)
})
const imageUrl = this.getTextContent(imageElement, 'url')
logger.debug('[RssFeedService] Extracted image URL', { url: feedUrl, imageUrl })
if (imageUrl) {
const imageTitle = this.getTextContent(imageElement, 'title')
const imageLink = this.getTextContent(imageElement, 'link')
const imageWidth = this.getTextContent(imageElement, 'width')
const imageHeight = this.getTextContent(imageElement, 'height')
const imageDescription = this.getTextContent(imageElement, 'description')
feedImage = {
url: imageUrl,
title: imageTitle || undefined,
link: imageLink || undefined,
width: imageWidth || undefined,
height: imageHeight || undefined,
description: imageDescription || undefined
}
logger.debug('[RssFeedService] Found standard RSS feed image element', { url: feedUrl, imageUrl, feedImage })
} else {
logger.warn('[RssFeedService] Standard image element found but no URL extracted', {
url: feedUrl,
imageElementHTML: imageElement.outerHTML?.substring(0, 300)
})
}
}
// If no standard image found, check for itunes:image (common in podcast feeds)
if (!feedImage) {
const itunesImageElements = allChannelChildren.filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
const namespaceURI = child.namespaceURI
// Check if it's itunes:image by namespace or nodeName
return (localName === 'image' && namespaceURI && namespaceURI.includes('itunes')) ||
nodeName === 'itunes:image' ||
(nodeName.includes('itunes') && nodeName.includes('image'))
})
if (itunesImageElements.length > 0) {
const itunesImage = itunesImageElements[0]
// itunes:image uses href attribute, not nested url element
const href = itunesImage.getAttribute('href')
if (href) {
feedImage = { url: href }
logger.debug('[RssFeedService] Found itunes:image', { url: feedUrl, imageUrl: href })
}
}
}
logger.debug('[RssFeedService] Feed image extraction result', {
url: feedUrl,
hasImage: !!feedImage,
imageUrl: feedImage?.url,
channelChildrenCount: allChannelChildren.length,
standardImageCount: standardImageElements.length
})
const items: RssFeedItem[] = []
const itemElements = channel.querySelectorAll('item')
itemElements.forEach((item) => {
const itemTitle = this.getTextContent(item, 'title') || ''
let itemLink = this.getTextContent(item, 'link') || ''
// Convert relative URLs to absolute
if (itemLink && !itemLink.startsWith('http://') && !itemLink.startsWith('https://')) {
try {
const baseUrl = new URL(feedUrl)
itemLink = new URL(itemLink, baseUrl.origin).href
} catch {
// If URL parsing fails, keep the original link
}
}
if (itemLink) {
const cleanedLink = cleanUrl(itemLink)
if (cleanedLink) itemLink = cleanedLink
}
// For description, prefer content:encoded (WordPress full content) over description (truncated)
// Check for content:encoded first, then fall back to description
let itemDescription = ''
// Try to find content:encoded element (WordPress namespace extension)
// Iterate through all direct children to find it (most reliable method for namespaced XML)
const children = Array.from(item.children)
let contentEncoded: Element | null = null
for (const child of children) {
// Check if this is the content:encoded element
// The tagName might be "content:encoded" or just "encoded" depending on parser
const tagName = child.tagName || child.nodeName
if (tagName && (
tagName.toLowerCase() === 'encoded' ||
tagName.toLowerCase() === 'content:encoded' ||
tagName.includes('encoded') ||
(child.localName && child.localName.toLowerCase() === 'encoded')
)) {
contentEncoded = child
break
}
}
if (contentEncoded) {
// For CDATA sections in XML, we need to get the content carefully
// The content:encoded element contains CDATA with HTML
// Get textContent first (this properly extracts CDATA content)
// textContent will contain the HTML as a string from CDATA sections
const rawContent = contentEncoded.textContent?.trim() || contentEncoded.innerHTML?.trim() || ''
if (rawContent) {
// Clean up the content - remove any XML artifacts that might have leaked through
// Remove XML closing tags that might appear at the end (like ]]>)
itemDescription = rawContent
.replace(/\]\]\s*>\s*$/g, '') // Remove trailing ]]> from CDATA
.replace(/^\s*<!\[CDATA\[/g, '') // Remove leading CDATA declaration
.trim()
// If the content looks like it has HTML tags, use it as-is
// Otherwise, it might be plain text that needs HTML entity decoding
if (itemDescription && itemDescription.includes('<')) {
// It's HTML - ensure it's clean
// Remove any stray XML/namespace declarations that might appear
itemDescription = itemDescription
.replace(/<\?xml[^>]*\?>/gi, '') // Remove XML declarations
.replace(/<\!DOCTYPE[^>]*>/gi, '') // Remove DOCTYPE declarations
.trim()
}
}
// Log for debugging
if (itemDescription) {
logger.debug('[RssFeedService] Found content:encoded', {
url: feedUrl,
hasHtml: itemDescription.includes('<'),
length: itemDescription.length,
preview: itemDescription.substring(0, 100)
})
}
} else {
logger.debug('[RssFeedService] content:encoded not found, using description', { url: feedUrl })
}
// Fall back to description if content:encoded is not found or empty
if (!itemDescription) {
// Try getting HTML content from description tag
itemDescription = this.getHtmlContent(item, 'description') || ''
// If that doesn't work, try getting text content and decode HTML entities
// This handles cases where HTML entities are in the text content
if (!itemDescription) {
const descElement = item.querySelector('description')
if (descElement) {
// Get raw text content (which may contain HTML entities)
const rawText = descElement.textContent?.trim() || descElement.innerHTML?.trim() || ''
if (rawText) {
// Decode HTML entities using a temporary element
// The browser will automatically decode entities when setting innerHTML
const temp = document.createElement('textarea')
temp.innerHTML = rawText
itemDescription = temp.value
}
}
}
// Clean description as well
if (itemDescription) {
itemDescription = itemDescription
.replace(/\]\]\s*>\s*$/g, '')
.replace(/^\s*<!\[CDATA\[/g, '')
.trim()
}
}
const pubDateText = this.getTextContent(item, 'pubDate')
const itemPubDate = this.parseDate(pubDateText)
let itemGuid = this.getTextContent(item, 'guid') || itemLink || ''
if (itemGuid && (itemGuid.startsWith('http://') || itemGuid.startsWith('https://'))) {
const cleanedGuid = cleanUrl(itemGuid)
if (cleanedGuid) itemGuid = cleanedGuid
}
// Log item parsing for debugging
if (!itemPubDate && pubDateText) {
logger.warn('[RssFeedService] Failed to parse pubDate for item', {
url: feedUrl,
title: itemTitle.substring(0, 50),
pubDateText,
link: itemLink
})
}
// Extract enclosure element (for audio/video files)
let enclosure: RssFeedItemEnclosure | undefined
const enclosureElement = item.querySelector('enclosure')
if (enclosureElement) {
const enclosureUrl = enclosureElement.getAttribute('url') || ''
const enclosureType = enclosureElement.getAttribute('type') || ''
const enclosureLength = enclosureElement.getAttribute('length') || undefined
if (enclosureUrl && enclosureType) {
// Try to get duration from itunes:duration
let duration: string | undefined
const allItemChildren = Array.from(item.children)
const durationElements = allItemChildren.filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
const namespaceURI = child.namespaceURI
return (localName === 'duration' && (nodeName.includes('itunes:duration') || namespaceURI?.includes('itunes'))) ||
nodeName === 'itunes:duration'
})
if (durationElements.length > 0) {
duration = durationElements[0].textContent?.trim() || undefined
}
enclosure = {
url: enclosureUrl,
type: enclosureType,
length: enclosureLength,
duration: duration
}
logger.debug('[RssFeedService] Found enclosure', {
url: feedUrl,
itemTitle: itemTitle.substring(0, 50),
enclosureType: enclosureType,
enclosureUrl: enclosureUrl,
duration: duration
})
}
}
// Extract media:content elements (Media RSS)
// Handle namespaced elements by checking all elements and filtering by localName and namespace
const media: RssFeedItemMedia[] = []
// Get all child elements and filter for media:content
// media:content has localName "content" but is in the media namespace
// Regular RSS content:encoded has localName "encoded" and is in the content namespace (different!)
const allChildren = Array.from(item.children)
const mediaContentElements = allChildren.filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
const namespaceURI = child.namespaceURI
// media:content elements have:
// 1. localName "content" AND a "url" attribute (media:content has url attribute)
// 2. nodeName includes "media:content"
// 3. namespaceURI includes "media"
// We exclude content:encoded which has localName "encoded" (not "content")
if (localName === 'content') {
// If it has a url attribute, it's likely media:content (content:encoded doesn't have url)
if (child.getAttribute('url')) {
return true
}
// Check namespace - media:content is in media namespace
if (namespaceURI && namespaceURI.includes('media')) {
return true
}
// Check nodeName for media: prefix
if (nodeName.includes('media:content') || nodeName.startsWith('media:')) {
return true
}
}
return false
})
logger.debug('[RssFeedService] Found media:content elements', {
url: feedUrl,
itemTitle: itemTitle.substring(0, 50),
mediaCount: mediaContentElements.length,
allChildrenCount: allChildren.length
})
mediaContentElements.forEach((mediaEl) => {
const url = mediaEl.getAttribute('url') || ''
const type = mediaEl.getAttribute('type') || undefined
const width = mediaEl.getAttribute('width') || undefined
const height = mediaEl.getAttribute('height') || undefined
if (url) {
// Get media:credit (attribution) - check children for credit element
let credit: string | undefined
const creditElements = Array.from(mediaEl.children).filter(child => {
const localName = child.localName || child.nodeName
return localName === 'credit' || child.nodeName === 'media:credit'
})
if (creditElements.length > 0) {
credit = creditElements[0].textContent?.trim() || creditElements[0].getAttribute('scheme') || undefined
}
// Get media:thumbnail - check children for thumbnail element
let thumbnail: string | undefined
const thumbnailElements = Array.from(mediaEl.children).filter(child => {
const localName = child.localName || child.nodeName
return localName === 'thumbnail' || child.nodeName === 'media:thumbnail'
})
if (thumbnailElements.length > 0) {
thumbnail = thumbnailElements[0].getAttribute('url') || undefined
}
media.push({
url,
type,
credit,
thumbnail,
width,
height
})
}
})
// Also check for media:thumbnail at item level (if no media:content found)
if (media.length === 0) {
const thumbnailElementsAtItemLevel = Array.from(item.children).filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) ||
nodeName === 'media:thumbnail'
})
thumbnailElementsAtItemLevel.forEach((thumbEl) => {
const url = thumbEl.getAttribute('url') || ''
if (url) {
media.push({
url,
type: 'image',
thumbnail: url
})
}
})
}
items.push({
title: itemTitle,
link: itemLink,
description: itemDescription,
pubDate: itemPubDate,
guid: itemGuid,
feedUrl,
feedTitle: title,
feedImage: feedImage?.url,
feedDescription: description,
media: media.length > 0 ? media : undefined,
enclosure: enclosure || undefined
})
})
return {
title,
link,
description,
items,
feedUrl,
image: feedImage,
language,
copyright,
generator,
lastBuildDate
}
}
/**
* Parse Atom 1.0 feed
*/
private parseAtomFeed(doc: Document, feedUrl: string): RssFeed {
const feed = doc.documentElement
const title = this.getTextContent(feed, 'title') || 'Untitled Feed'
const linkElement = feed.querySelector('link[rel="alternate"], link:not([rel])')
const link = linkElement?.getAttribute('href') || feedUrl
const description = this.getTextContent(feed, 'subtitle') || this.getTextContent(feed, 'description') || ''
// Extract feed metadata for Atom feeds
const language = feed.getAttribute('xml:lang') || undefined
const rights = this.getTextContent(feed, 'rights') || undefined
const generator = this.getTextContent(feed, 'generator') || undefined
const updatedStr = this.getTextContent(feed, 'updated')
const lastBuildDate = updatedStr ? (this.parseDate(updatedStr) || undefined) : undefined
// Extract feed image/logo for Atom feeds
let feedImage: RssFeed['image'] | undefined
const logoElement = feed.querySelector('logo')
const iconElement = feed.querySelector('icon')
if (logoElement) {
const logoUrl = this.getTextContent(feed, 'logo')
if (logoUrl) {
feedImage = { url: logoUrl }
}
} else if (iconElement) {
const iconUrl = this.getTextContent(feed, 'icon')
if (iconUrl) {
feedImage = { url: iconUrl }
}
}
const items: RssFeedItem[] = []
const entryElements = feed.querySelectorAll('entry')
entryElements.forEach((entry) => {
const entryTitle = this.getTextContent(entry, 'title') || ''
const entryLinkElement = entry.querySelector('link[rel="alternate"], link:not([rel])')
let entryLink = entryLinkElement?.getAttribute('href') || ''
// Convert relative URLs to absolute
if (entryLink && !entryLink.startsWith('http://') && !entryLink.startsWith('https://')) {
try {
const baseUrl = new URL(feedUrl)
entryLink = new URL(entryLink, baseUrl.origin).href
} catch {
// If URL parsing fails, keep the original link
}
}
if (entryLink) {
const cleanedEntryLink = cleanUrl(entryLink)
if (cleanedEntryLink) entryLink = cleanedEntryLink
}
// For content/summary, preserve HTML content
let entryContent = this.getHtmlContent(entry, 'content') || this.getHtmlContent(entry, 'summary') || ''
// Additional cleaning for Atom feeds (getHtmlContent already does basic cleaning)
// This ensures any remaining XML artifacts are removed
if (entryContent) {
entryContent = entryContent
.replace(/\]\]\s*>\s*$/gm, '')
.replace(/^\s*<!\[CDATA\[/gm, '')
.trim()
}
const entryPublished = this.getTextContent(entry, 'published') || this.getTextContent(entry, 'updated')
const entryPubDate = this.parseDate(entryPublished)
let entryId = this.getTextContent(entry, 'id') || entryLink || ''
if (entryId && (entryId.startsWith('http://') || entryId.startsWith('https://'))) {
const cleanedId = cleanUrl(entryId)
if (cleanedId) entryId = cleanedId
}
// Extract enclosure/link elements for Atom feeds (Atom uses <link rel="enclosure">)
let enclosure: RssFeedItemEnclosure | undefined
const enclosureLinkElements = entry.querySelectorAll('link[rel="enclosure"]')
if (enclosureLinkElements.length > 0) {
const enclosureLink = enclosureLinkElements[0]
const enclosureUrl = enclosureLink.getAttribute('href') || ''
const enclosureType = enclosureLink.getAttribute('type') || ''
const enclosureLength = enclosureLink.getAttribute('length') || undefined
if (enclosureUrl && enclosureType) {
// Try to get duration from itunes:duration
let duration: string | undefined
const allEntryChildren = Array.from(entry.children)
const durationElements = allEntryChildren.filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
const namespaceURI = child.namespaceURI
return (localName === 'duration' && (nodeName.includes('itunes:duration') || namespaceURI?.includes('itunes'))) ||
nodeName === 'itunes:duration'
})
if (durationElements.length > 0) {
duration = durationElements[0].textContent?.trim() || undefined
}
enclosure = {
url: enclosureUrl,
type: enclosureType,
length: enclosureLength,
duration: duration
}
}
}
// Extract media:content elements (Media RSS) for Atom feeds
// In Atom feeds, we need to distinguish between media:content (media) and content (entry content)
// Handle namespaced elements by checking all elements and filtering by namespace
const media: RssFeedItemMedia[] = []
// Get all child elements and filter for media:content
// media:content has localName "content" but is in the media namespace (not Atom namespace)
const allChildren = Array.from(entry.children)
const mediaContentElements = allChildren.filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
const namespaceURI = child.namespaceURI
// Check if it's media:content - must have localName "content" but NOT be in Atom namespace
// Atom content element is in Atom namespace, media:content is in media namespace
if (localName === 'content') {
// If it has a url attribute, it's likely media:content (Atom content uses src or type="xhtml")
if (child.getAttribute('url')) {
return true
}
// Check namespace - media:content is in media namespace, not Atom namespace
if (namespaceURI && namespaceURI.includes('media') && !namespaceURI.includes('atom')) {
return true
}
// Check nodeName for media: prefix
if (nodeName.includes('media:content')) {
return true
}
}
return false
})
mediaContentElements.forEach((mediaEl) => {
const url = mediaEl.getAttribute('url') || ''
const type = mediaEl.getAttribute('type') || undefined
const width = mediaEl.getAttribute('width') || undefined
const height = mediaEl.getAttribute('height') || undefined
if (url) {
// Get media:credit (attribution) - check children for credit element
let credit: string | undefined
const creditElements = Array.from(mediaEl.children).filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
return (localName === 'credit' && (nodeName.includes('media:credit') || child.namespaceURI?.includes('media'))) ||
nodeName === 'media:credit'
})
if (creditElements.length > 0) {
credit = creditElements[0].textContent?.trim() || creditElements[0].getAttribute('scheme') || undefined
}
// Get media:thumbnail - check children for thumbnail element
let thumbnail: string | undefined
const thumbnailElements = Array.from(mediaEl.children).filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) ||
nodeName === 'media:thumbnail'
})
if (thumbnailElements.length > 0) {
thumbnail = thumbnailElements[0].getAttribute('url') || undefined
}
media.push({
url,
type,
credit,
thumbnail,
width,
height
})
}
})
// Also check for media:thumbnail at entry level (if no media:content found)
if (media.length === 0) {
const thumbnailElementsAtEntryLevel = Array.from(entry.children).filter(child => {
const localName = child.localName || child.nodeName.toLowerCase()
const nodeName = child.nodeName.toLowerCase()
return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) ||
nodeName === 'media:thumbnail'
})
thumbnailElementsAtEntryLevel.forEach((thumbEl) => {
const url = thumbEl.getAttribute('url') || ''
if (url) {
media.push({
url,
type: 'image',
thumbnail: url
})
}
})
}
items.push({
title: entryTitle,
link: entryLink,
description: entryContent,
pubDate: entryPubDate,
guid: entryId,
feedUrl,
feedTitle: title,
feedImage: feedImage?.url,
feedDescription: description,
media: media.length > 0 ? media : undefined,
enclosure: enclosure
})
})
return {
title,
link,
description,
items,
feedUrl,
image: feedImage,
language,
copyright: rights,
generator,
lastBuildDate
}
}
/**
* Get text content from an element, handling CDATA and nested elements
*/
private getTextContent(element: Element | null, tagName: string): string {
if (!element) return ''
const child = element.querySelector(tagName)
if (!child) return ''
// Get text content which automatically decodes HTML entities
return child.textContent?.trim() || ''
}
/**
* Get HTML content from an element (for descriptions that may contain HTML)
*/
private getHtmlContent(element: Element | null, tagName: string): string {
if (!element) return ''
// Handle namespaced tags like content:encoded
const child = element.querySelector(tagName) ||
element.querySelector(tagName.replace(':', '\\:')) ||
element.getElementsByTagName(tagName)[0]
if (!child) return ''
// Get innerHTML to preserve HTML formatting and CDATA content
// CDATA sections are automatically included in innerHTML/textContent
let html = child.innerHTML?.trim() || child.textContent?.trim() || ''
if (!html) return ''
// Decode HTML entities that might be encoded (like &lt; &gt; &amp; etc.)
// The browser's XML parser should decode entities automatically when accessing textContent/innerHTML
// However, if entities are still present, decode them using textarea trick
// This handles cases where entities are double-encoded or in raw XML text
if (html.includes('&lt;') || html.includes('&gt;') || html.includes('&amp;')) {
// HTML entities are present, decode them
const decoder = document.createElement('textarea')
decoder.innerHTML = html
html = decoder.value
}
// Also decode numeric entities (like &#8212;) using the same method
// The textarea approach handles both named and numeric entities
const temp = document.createElement('textarea')
temp.innerHTML = html
html = temp.value || html
// Clean up any XML artifacts that might have leaked through
// Do this AFTER entity decoding, as entities might encode XML artifacts
html = html
.replace(/\]\]\s*>\s*$/gm, '') // Remove trailing ]]> from CDATA (multiline, end of string)
.replace(/\]\]\s*>/g, '') // Remove any ]]> anywhere in the content
.replace(/^\s*<!\[CDATA\[/gm, '') // Remove leading CDATA declaration (multiline, start of string)
.replace(/<!\[CDATA\[/g, '') // Remove any CDATA declaration anywhere
.replace(/<\?xml[^>]*\?>/gi, '') // Remove XML declarations
.replace(/<\!DOCTYPE[^>]*>/gi, '') // Remove DOCTYPE declarations
.replace(/xmlns[=:][^=]*=["'][^"']*["']/gi, '') // Remove xmlns attributes
.trim()
return html
}
/**
* Build a map of foreign month names to English abbreviations using Intl.DateTimeFormat
* This handles month names in various languages automatically
*/
private buildMonthMap(): Record<string, string> {
// Common locales that might appear in RSS feeds
const locales = ['de', 'fr', 'es', 'it', 'pt', 'pt-BR', 'ru', 'pl', 'nl', 'sv', 'no', 'da', 'fi', 'cs', 'hu', 'ro', 'sk', 'sl', 'hr', 'bg', 'el', 'tr', 'ja', 'ko', 'zh', 'ar', 'he', 'th', 'vi', 'hi', 'fa']
const monthMap: Record<string, string> = {}
const year = new Date().getFullYear()
// English month abbreviations (0-11 index to English abbrev)
const englishMonths = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
// Build map for each locale
for (const locale of locales) {
try {
const formatter = new Intl.DateTimeFormat(locale, { month: 'short' })
for (let monthIndex = 0; monthIndex < 12; monthIndex++) {
const foreignMonth = formatter.format(new Date(year, monthIndex))
const englishMonth = englishMonths[monthIndex]
// Add both the full foreign month name and its lowercase version
if (foreignMonth && englishMonth) {
const trimmed = foreignMonth.trim()
// Locales may emit numeric or odd tokens; never map those (would match "12:01:00" as \b12\b).
if (/^\d+$/.test(trimmed)) {
continue
}
monthMap[foreignMonth] = englishMonth
monthMap[foreignMonth.toLowerCase()] = englishMonth
// Also handle common variations (first 3 letters)
if (foreignMonth.length >= 3) {
const abbrev = foreignMonth.substring(0, 3).trim()
if (abbrev.length >= 2 && !/^\d/.test(abbrev)) {
monthMap[abbrev] = englishMonth
monthMap[abbrev.toLowerCase()] = englishMonth
}
}
}
}
} catch {
// Skip locales that fail to format
continue
}
}
return monthMap
}
/**
* Parse date string into Date object
* Handles non-standard formats by skipping weekday and mapping foreign month names
*/
private parseDate(dateString: string | null): Date | null {
if (!dateString) return null
// First, try standard Date parsing
try {
const standardDate = new Date(dateString)
if (!isNaN(standardDate.getTime())) {
return standardDate
}
} catch {
// Continue to fallback parsing
}
// Handle non-standard formats (e.g., "Don, 06 Nov 2025 15:24:25")
// Skip the weekday part (everything up to and including the first comma)
let dateToParse = dateString.trim()
const commaIndex = dateToParse.indexOf(',')
if (commaIndex > 0) {
// Skip weekday and comma, keep the rest
dateToParse = dateToParse.substring(commaIndex + 1).trim()
}
// Build month map using Intl.DateTimeFormat (lazy initialization)
if (!this.monthMapCache) {
this.monthMapCache = this.buildMonthMap()
logger.debug('[RssFeedService] Built month map', {
monthCount: Object.keys(this.monthMapCache).length,
sampleMonths: Object.entries(this.monthMapCache).slice(0, 5)
})
}
// Replace foreign month names with English equivalents (longest key first so "September" beats "Sep";
// skip pure-numeric keys so "12:01:00" is never touched by a spurious "12" → "Dec" map entry).
let monthReplaced = false
const monthEntries = Object.entries(this.monthMapCache)
.filter(([foreign]) => !/^\d+$/.test(foreign.trim()))
.sort((a, b) => b[0].length - a[0].length)
for (const [foreign, english] of monthEntries) {
const regex = new RegExp(`\\b${this.escapeRegex(foreign)}\\b`, 'i')
if (regex.test(dateToParse)) {
dateToParse = dateToParse.replace(regex, english)
monthReplaced = true
logger.debug('[RssFeedService] Replaced month name', { foreign, english, original: dateString, afterReplace: dateToParse })
break
}
}
// If no timezone is specified, assume UTC (common for RSS feeds)
const hasTimezone = /[+-]\d{4}|GMT|UTC|EST|PST|CET|CEST|CST|EDT|PDT$/i.test(dateToParse)
if (!hasTimezone && dateToParse.match(/\d{2}:\d{2}:\d{2}$/)) {
// Add UTC timezone if time is present but no timezone
dateToParse += ' UTC'
}
try {
const parsedDate = new Date(dateToParse)
if (!isNaN(parsedDate.getTime())) {
logger.debug('[RssFeedService] Successfully parsed date', {
original: dateString,
parsed: dateToParse,
result: parsedDate.toISOString()
})
return parsedDate
} else {
logger.warn('[RssFeedService] Date parsing resulted in invalid date', {
original: dateString,
parsed: dateToParse,
monthReplaced
})
}
} catch (error) {
logger.warn('[RssFeedService] Date parsing threw error', {
original: dateString,
parsed: dateToParse,
error: error instanceof Error ? error.message : String(error),
monthReplaced
})
}
return null
}
/**
* Escape special regex characters in a string
*/
private escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
}
/**
* Get feed URLs to use (from event or default)
* If eventFeedUrls is an empty array, return empty array (user has event but no feeds)
* If eventFeedUrls is null/undefined, return default feeds (no event exists)
*/
getFeedUrls(eventFeedUrls: string[] | null | undefined): string[] {
// If eventFeedUrls is explicitly an array (even if empty), use it
// This means the user has an event, so respect their choice
if (Array.isArray(eventFeedUrls)) {
return eventFeedUrls
}
// If null/undefined, no event exists - use defaults for demo
return DEFAULT_RSS_FEEDS
}
/**
* Fetch multiple feeds and merge items
* Cache-first: reads from IndexedDB, displays immediately, then background-refreshes to merge new items
*/
async fetchMultipleFeeds(feedUrls: string[], signal?: AbortSignal): Promise<RssFeedItem[]> {
if (feedUrls.length === 0) {
return []
}
// Check if already aborted
if (signal?.aborted) {
throw new DOMException('The operation was aborted.', 'AbortError')
}
await this.ensureRssFeedAttemptedKeysLoaded()
// Step 1: Read from IndexedDB cache first (cache-first strategy)
let cachedItems: RssFeedItem[] = []
try {
const allCachedItems = await indexedDb.getRssFeedItems()
logger.info('[RssFeedService] Retrieved all cached items from IndexedDB', {
totalCached: allCachedItems.length
})
// Filter to only items from the requested feeds
const normalizedRequestedUrls = new Set(feedUrls.map((u) => this.normalizeRssFeedKeyUrl(u)))
cachedItems = allCachedItems.filter(item => {
const normalizedItemUrl = this.normalizeRssFeedKeyUrl(item.feedUrl)
const matches = normalizedRequestedUrls.has(normalizedItemUrl)
if (!matches && allCachedItems.length > 0 && allCachedItems.length < 10) {
// Only log for small sets to avoid spam
logger.debug('[RssFeedService] Item filtered out (feed URL not in requested list)', {
itemFeedUrl: item.feedUrl,
normalizedItemUrl,
requestedFeeds: feedUrls,
normalizedRequestedUrls: Array.from(normalizedRequestedUrls),
itemGuid: item.guid?.substring(0, 20)
})
}
return matches
})
logger.info('[RssFeedService] Filtered cached items by feed URLs', {
beforeFilter: allCachedItems.length,
afterFilter: cachedItems.length,
requestedFeedCount: feedUrls.length,
uniqueCachedFeedUrls: [...new Set(allCachedItems.map(i => i.feedUrl))],
requestedFeedUrls: feedUrls
})
// Convert pubDate back to Date objects (handle both Date objects and timestamps/strings)
cachedItems = cachedItems.map(item => {
let pubDate: Date | null = null
if (item.pubDate) {
if (item.pubDate instanceof Date) {
pubDate = item.pubDate
} else if (typeof item.pubDate === 'number') {
pubDate = new Date(item.pubDate)
} else if (typeof item.pubDate === 'string') {
pubDate = new Date(item.pubDate)
}
}
return {
...item,
pubDate
}
})
logger.info('[RssFeedService] Loaded cached items from IndexedDB', {
cachedCount: cachedItems.length,
feedCount: feedUrls.length,
filteredCount: cachedItems.length,
feedUrls: feedUrls
})
} catch (error) {
logger.warn('[RssFeedService] Failed to load cached items from IndexedDB', { error })
}
const cacheWasEmpty = cachedItems.length === 0
// Missing = no cached rows for this feed URL and we have not yet completed a fetch pass for it
const cachedFeedUrls = new Set(cachedItems.map((item) => this.normalizeRssFeedKeyUrl(item.feedUrl)))
const missingFeeds = feedUrls.filter(
(url) =>
!cachedFeedUrls.has(this.normalizeRssFeedKeyUrl(url)) &&
!this.rssFeedAttemptedKeys.has(this.normalizeRssFeedKeyUrl(url))
)
if (missingFeeds.length > 0) {
logger.info('[RssFeedService] Some feeds are missing from cache, will fetch them', {
missingFeeds,
cachedFeedUrls: Array.from(cachedFeedUrls),
requestedFeeds: feedUrls
})
}
// Step 2: Background refresh — never tied to React's AbortSignal (Strict Mode / HMR / remount would cancel network).
const refreshAc = new AbortController()
const refreshSignal = refreshAc.signal
const backgroundRefresh = async (): Promise<void> => {
const dedupeKey = this.rssMultiFeedRefreshKey(feedUrls)
const inflight = this.rssMultiFeedRefreshInFlight.get(dedupeKey)
if (inflight) {
await inflight
return
}
const run = async (): Promise<void> => {
if (refreshSignal.aborted) {
return
}
logger.info('[RssFeedService] Starting background refresh', {
feedCount: feedUrls.length,
feedUrls,
cacheWasEmpty,
cachedItemCount: cachedItems.length
})
if (refreshSignal.aborted) {
logger.warn('[RssFeedService] Background refresh aborted before starting', {
feedCount: feedUrls.length
})
return
}
try {
logger.info('[RssFeedService] Starting to fetch feeds', {
feedCount: feedUrls.length,
feedUrls,
signalAborted: refreshSignal.aborted
})
const results = await Promise.allSettled(
feedUrls.map((url) => {
if (refreshSignal.aborted) {
logger.warn('[RssFeedService] Signal aborted before fetching feed', { url })
return Promise.reject(new DOMException('The operation was aborted.', 'AbortError'))
}
logger.debug('[RssFeedService] Fetching feed', { url, signalAborted: refreshSignal.aborted })
return this.fetchFeed(url, refreshSignal)
})
)
if (refreshSignal.aborted) {
logger.warn('[RssFeedService] Signal aborted after fetching feeds', {
feedCount: feedUrls.length
})
return
}
const newItems: RssFeedItem[] = []
let successCount = 0
let failureCount = 0
let abortCount = 0
results.forEach((result, index) => {
if (result.status === 'fulfilled') {
newItems.push(...result.value.items)
successCount++
logger.info('[RssFeedService] Successfully fetched feed', {
url: feedUrls[index],
itemCount: result.value.items.length,
feedTitle: result.value.title
})
} else {
failureCount++
const error = result.reason
if (error instanceof DOMException && error.name === 'AbortError') {
abortCount++
logger.warn('[RssFeedService] Feed fetch was aborted', {
url: feedUrls[index],
reason: error.message || 'AbortError'
})
return
}
const errorMessage = error instanceof Error ? error.message : String(error)
logger.warn('[RssFeedService] Failed to fetch feed after trying all strategies', {
url: feedUrls[index],
error: errorMessage,
errorStack: error instanceof Error ? error.stack : undefined,
errorType: error?.constructor?.name
})
}
})
logger.info('[RssFeedService] Background refresh completed', {
successCount,
failureCount,
abortCount,
newItemCount: newItems.length,
totalFeeds: feedUrls.length
})
if (!refreshSignal.aborted) {
this.markFeedKeysAttempted(feedUrls)
await this.persistRssFeedAttemptedKeys()
}
if (!refreshSignal.aborted && successCount > 0) {
const itemMap = new Map<string, RssFeedItem>()
cachedItems.forEach((item) => {
const key = `${item.feedUrl}:${item.guid}`
itemMap.set(key, item)
})
newItems.forEach((item) => {
const key = `${item.feedUrl}:${item.guid}`
const existing = itemMap.get(key)
if (!existing || (item.pubDate && existing.pubDate && item.pubDate > existing.pubDate)) {
itemMap.set(key, item)
}
})
const mergedItems = Array.from(itemMap.values())
mergedItems.sort((a, b) => {
const dateA = a.pubDate?.getTime() || 0
const dateB = b.pubDate?.getTime() || 0
return dateB - dateA
})
try {
await this.persistGlobalRssCacheAfterMerge(mergedItems, feedUrls)
logger.info('[RssFeedService] Updated IndexedDB cache with merged items', {
mergedFromThisRefresh: mergedItems.length,
newItems: newItems.length,
cachedItems: cachedItems.length
})
} catch (error) {
logger.error('[RssFeedService] Failed to update IndexedDB cache', { error })
}
}
} catch (error) {
if (!(error instanceof DOMException && error.name === 'AbortError')) {
logger.error('[RssFeedService] Background refresh failed', { error })
}
}
}
const p = run()
this.rssMultiFeedRefreshInFlight.set(dedupeKey, p)
try {
await p
} finally {
if (this.rssMultiFeedRefreshInFlight.get(dedupeKey) === p) {
this.rssMultiFeedRefreshInFlight.delete(dedupeKey)
}
}
}
// Wait only while some requested feeds are still unknown (no cache rows and no completed fetch pass)
const shouldWaitForRefresh = missingFeeds.length > 0
if (shouldWaitForRefresh) {
logger.info('[RssFeedService] Waiting for background refresh to complete', {
feedCount: feedUrls.length,
cacheWasEmpty,
missingFeedsCount: missingFeeds.length,
missingFeeds
})
try {
const callerGone = signal
? new Promise<void>((resolve) => {
if (signal.aborted) resolve()
else signal.addEventListener('abort', () => resolve(), { once: true })
})
: new Promise<void>(() => {
/* never */
})
// Caller abort ends the wait early; refresh keeps running on refreshAc.signal
await Promise.race([
backgroundRefresh(),
new Promise<void>((resolve) => setTimeout(() => resolve(), 30000)),
callerGone
])
// Re-read from cache after background refresh
try {
const refreshedItems = await indexedDb.getRssFeedItems()
const feedUrlSet = new Set(feedUrls.map((u) => this.normalizeRssFeedKeyUrl(u)))
cachedItems = refreshedItems
.filter((item) => feedUrlSet.has(this.normalizeRssFeedKeyUrl(item.feedUrl)))
.map(item => ({
...item,
pubDate: item.pubDate ? new Date(item.pubDate) : null
}))
logger.info('[RssFeedService] Loaded items after background refresh', {
itemCount: cachedItems.length,
feedCount: feedUrls.length
})
} catch (error) {
logger.warn('[RssFeedService] Failed to reload cached items after background refresh', { error })
}
} catch (error) {
if (!(error instanceof DOMException && error.name === 'AbortError')) {
logger.error('[RssFeedService] Background refresh error during initial load', { error })
}
}
} else {
// Cache has all requested feeds, start background refresh in background (don't wait)
logger.debug('[RssFeedService] All feeds in cache, starting background refresh without waiting')
void backgroundRefresh().catch((err) => {
if (!(err instanceof DOMException && err.name === 'AbortError')) {
logger.error('[RssFeedService] Background refresh error', { error: err })
}
})
}
// Return cached items (now potentially updated from background refresh)
// Sort by publication date (newest first)
cachedItems.sort((a, b) => {
const dateA = a.pubDate?.getTime() || 0
const dateB = b.pubDate?.getTime() || 0
return dateB - dateA
})
return cachedItems
}
/**
* Trigger a background refresh for specific feed URLs (without returning cached items)
* This is useful when you want to force a refresh after updating the feed list
* Aborts any existing background refresh before starting a new one
*/
async backgroundRefreshFeeds(feedUrls: string[], signal?: AbortSignal): Promise<void> {
if (feedUrls.length === 0) {
return
}
await this.ensureRssFeedAttemptedKeysLoaded()
for (const u of feedUrls) {
this.rssFeedAttemptedKeys.delete(this.normalizeRssFeedKeyUrl(u))
}
await this.persistRssFeedAttemptedKeys()
// Abort any existing background refresh
if (this.backgroundRefreshController) {
logger.info('[RssFeedService] Aborting existing background refresh before starting new one')
this.backgroundRefreshController.abort()
this.backgroundRefreshController = null
}
this.rssMultiFeedRefreshInFlight.clear()
// Create a new AbortController for this refresh
const controller = new AbortController()
this.backgroundRefreshController = controller
// Combine with external signal if provided
if (signal) {
if (signal.aborted) {
controller.abort()
this.backgroundRefreshController = null
return
}
signal.addEventListener('abort', () => {
controller.abort()
this.backgroundRefreshController = null
}, { once: true })
}
const combinedSignal = signal ? (() => {
const combined = new AbortController()
const abort = () => combined.abort()
signal.addEventListener('abort', abort, { once: true })
controller.signal.addEventListener('abort', abort, { once: true })
return combined.signal
})() : controller.signal
try {
const results = await Promise.allSettled(
feedUrls.map(url => this.fetchFeed(url, combinedSignal))
)
if (combinedSignal.aborted || controller.signal.aborted) {
this.backgroundRefreshController = null
return
}
const newItems: RssFeedItem[] = []
let successCount = 0
results.forEach((result, index) => {
if (result.status === 'fulfilled') {
newItems.push(...result.value.items)
successCount++
logger.debug('[RssFeedService] Background refresh: successfully fetched feed', {
url: feedUrls[index],
itemCount: result.value.items.length
})
}
})
if (!combinedSignal.aborted && !controller.signal.aborted) {
this.markFeedKeysAttempted(feedUrls)
await this.persistRssFeedAttemptedKeys()
}
if (!combinedSignal.aborted && !controller.signal.aborted && successCount > 0) {
// Get existing cached items
let cachedItems: RssFeedItem[] = []
try {
cachedItems = await indexedDb.getRssFeedItems()
const feedUrlSet = new Set(feedUrls)
cachedItems = cachedItems.filter(item => feedUrlSet.has(item.feedUrl))
cachedItems = cachedItems.map(item => ({
...item,
pubDate: item.pubDate ? new Date(item.pubDate) : null
}))
} catch (error) {
logger.warn('[RssFeedService] Failed to load cached items for background refresh', { error })
}
// Merge new items with cached items (deduplicate by feedUrl:guid)
const itemMap = new Map<string, RssFeedItem>()
// Add cached items first
cachedItems.forEach(item => {
const key = `${item.feedUrl}:${item.guid}`
itemMap.set(key, item)
})
// Add/update with new items (newer items replace older ones)
newItems.forEach(item => {
const key = `${item.feedUrl}:${item.guid}`
const existing = itemMap.get(key)
if (!existing || (item.pubDate && existing.pubDate && item.pubDate > existing.pubDate)) {
itemMap.set(key, item)
}
})
const mergedItems = Array.from(itemMap.values())
mergedItems.sort((a, b) => {
const dateA = a.pubDate?.getTime() || 0
const dateB = b.pubDate?.getTime() || 0
return dateB - dateA
})
try {
await this.persistGlobalRssCacheAfterMerge(mergedItems, feedUrls)
logger.info('[RssFeedService] Background refresh: updated IndexedDB cache', {
mergedFromThisRefresh: mergedItems.length,
newItems: newItems.length,
cachedItems: cachedItems.length
})
} catch (error) {
logger.error('[RssFeedService] Background refresh: failed to update IndexedDB cache', { error })
}
}
// Clear the controller when done
this.backgroundRefreshController = null
} catch (error) {
// Clear the controller on error
this.backgroundRefreshController = null
if (!(error instanceof DOMException && error.name === 'AbortError')) {
logger.error('[RssFeedService] Background refresh failed', { error })
}
}
}
/**
* Clear cache for a specific feed or all feeds
*/
clearCache(url?: string) {
if (url) {
this.feedCache.delete(url)
this.rssFeedAttemptedKeys.delete(this.normalizeRssFeedKeyUrl(url))
void this.persistRssFeedAttemptedKeys()
// Also clear from IndexedDB (filter by feedUrl)
indexedDb.getRssFeedItems().then(items => {
const filtered = items.filter(item => item.feedUrl !== url)
indexedDb.putRssFeedItems(filtered).catch(err => {
logger.error('[RssFeedService] Failed to clear feed from IndexedDB', { url, error: err })
})
}).catch(err => {
logger.error('[RssFeedService] Failed to get items for cache clear', { url, error: err })
})
} else {
this.feedCache.clear()
this.rssFeedAttemptedKeys.clear()
void this.persistRssFeedAttemptedKeys()
// Clear all from IndexedDB
indexedDb.clearRssFeedItems().catch(err => {
logger.error('[RssFeedService] Failed to clear IndexedDB cache', { error: err })
})
}
}
}
const instance = new RssFeedService()
export default instance