|
|
|
|
@ -1,6 +1,22 @@
@@ -1,6 +1,22 @@
|
|
|
|
|
import { DEFAULT_RSS_FEEDS } from '@/constants' |
|
|
|
|
import logger from '@/lib/logger' |
|
|
|
|
|
|
|
|
|
export interface RssFeedItemMedia { |
|
|
|
|
url: string |
|
|
|
|
type?: string |
|
|
|
|
credit?: string |
|
|
|
|
thumbnail?: string |
|
|
|
|
width?: string |
|
|
|
|
height?: string |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
export interface RssFeedItemEnclosure { |
|
|
|
|
url: string |
|
|
|
|
type: string |
|
|
|
|
length?: string |
|
|
|
|
duration?: string |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
export interface RssFeedItem { |
|
|
|
|
title: string |
|
|
|
|
link: string |
|
|
|
|
@ -9,6 +25,10 @@ export interface RssFeedItem {
@@ -9,6 +25,10 @@ export interface RssFeedItem {
|
|
|
|
|
guid: string |
|
|
|
|
feedUrl: string |
|
|
|
|
feedTitle?: string |
|
|
|
|
feedImage?: string |
|
|
|
|
feedDescription?: string |
|
|
|
|
media?: RssFeedItemMedia[] |
|
|
|
|
enclosure?: RssFeedItemEnclosure |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
export interface RssFeed { |
|
|
|
|
@ -17,6 +37,18 @@ export interface RssFeed {
@@ -17,6 +37,18 @@ export interface RssFeed {
|
|
|
|
|
description: string |
|
|
|
|
items: RssFeedItem[] |
|
|
|
|
feedUrl: string |
|
|
|
|
image?: { |
|
|
|
|
url?: string |
|
|
|
|
title?: string |
|
|
|
|
link?: string |
|
|
|
|
width?: string |
|
|
|
|
height?: string |
|
|
|
|
description?: string |
|
|
|
|
} |
|
|
|
|
language?: string |
|
|
|
|
copyright?: string |
|
|
|
|
generator?: string |
|
|
|
|
lastBuildDate?: Date |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
class RssFeedService { |
|
|
|
|
@ -179,6 +211,97 @@ class RssFeedService {
@@ -179,6 +211,97 @@ class RssFeedService {
|
|
|
|
|
const title = this.getTextContent(channel, 'title') || 'Untitled Feed' |
|
|
|
|
const link = this.getTextContent(channel, 'link') || feedUrl |
|
|
|
|
const description = this.getTextContent(channel, 'description') || '' |
|
|
|
|
|
|
|
|
|
// Extract feed metadata
|
|
|
|
|
const language = this.getTextContent(channel, 'language') || undefined |
|
|
|
|
const copyright = this.getTextContent(channel, 'copyright') || undefined |
|
|
|
|
const generator = this.getTextContent(channel, 'generator') || undefined |
|
|
|
|
const lastBuildDateStr = this.getTextContent(channel, 'lastBuildDate') |
|
|
|
|
const lastBuildDate = lastBuildDateStr ? (this.parseDate(lastBuildDateStr) || undefined) : undefined |
|
|
|
|
|
|
|
|
|
// Extract feed image
|
|
|
|
|
// Check all channel children for image elements (both standard RSS and namespaced)
|
|
|
|
|
let feedImage: RssFeed['image'] | undefined |
|
|
|
|
const allChannelChildren = Array.from(channel.children) |
|
|
|
|
|
|
|
|
|
// First, try to find standard RSS 2.0 <image> element
|
|
|
|
|
const standardImageElements = allChannelChildren.filter(child => { |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
const localName = child.localName || nodeName |
|
|
|
|
const namespaceURI = child.namespaceURI |
|
|
|
|
// Standard RSS image element has nodeName "image" with no namespace prefix
|
|
|
|
|
return localName === 'image' &&
|
|
|
|
|
!nodeName.includes(':') &&
|
|
|
|
|
(!namespaceURI || (!namespaceURI.includes('itunes') && !namespaceURI.includes('media'))) |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
if (standardImageElements.length > 0) { |
|
|
|
|
const imageElement = standardImageElements[0] |
|
|
|
|
logger.debug('[RssFeedService] Processing standard image element', { |
|
|
|
|
url: feedUrl, |
|
|
|
|
nodeName: imageElement.nodeName, |
|
|
|
|
localName: imageElement.localName, |
|
|
|
|
childrenCount: imageElement.children.length, |
|
|
|
|
innerHTML: imageElement.innerHTML?.substring(0, 200) |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
const imageUrl = this.getTextContent(imageElement, 'url') |
|
|
|
|
logger.debug('[RssFeedService] Extracted image URL', { url: feedUrl, imageUrl }) |
|
|
|
|
|
|
|
|
|
if (imageUrl) { |
|
|
|
|
const imageTitle = this.getTextContent(imageElement, 'title') |
|
|
|
|
const imageLink = this.getTextContent(imageElement, 'link') |
|
|
|
|
const imageWidth = this.getTextContent(imageElement, 'width') |
|
|
|
|
const imageHeight = this.getTextContent(imageElement, 'height') |
|
|
|
|
const imageDescription = this.getTextContent(imageElement, 'description') |
|
|
|
|
|
|
|
|
|
feedImage = { |
|
|
|
|
url: imageUrl, |
|
|
|
|
title: imageTitle || undefined, |
|
|
|
|
link: imageLink || undefined, |
|
|
|
|
width: imageWidth || undefined, |
|
|
|
|
height: imageHeight || undefined, |
|
|
|
|
description: imageDescription || undefined |
|
|
|
|
} |
|
|
|
|
logger.debug('[RssFeedService] Found standard RSS feed image element', { url: feedUrl, imageUrl, feedImage }) |
|
|
|
|
} else { |
|
|
|
|
logger.warn('[RssFeedService] Standard image element found but no URL extracted', { |
|
|
|
|
url: feedUrl, |
|
|
|
|
imageElementHTML: imageElement.outerHTML?.substring(0, 300) |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// If no standard image found, check for itunes:image (common in podcast feeds)
|
|
|
|
|
if (!feedImage) { |
|
|
|
|
const itunesImageElements = allChannelChildren.filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
const namespaceURI = child.namespaceURI |
|
|
|
|
// Check if it's itunes:image by namespace or nodeName
|
|
|
|
|
return (localName === 'image' && namespaceURI && namespaceURI.includes('itunes')) || |
|
|
|
|
nodeName === 'itunes:image' || |
|
|
|
|
(nodeName.includes('itunes') && nodeName.includes('image')) |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
if (itunesImageElements.length > 0) { |
|
|
|
|
const itunesImage = itunesImageElements[0] |
|
|
|
|
// itunes:image uses href attribute, not nested url element
|
|
|
|
|
const href = itunesImage.getAttribute('href') |
|
|
|
|
if (href) { |
|
|
|
|
feedImage = { url: href } |
|
|
|
|
logger.debug('[RssFeedService] Found itunes:image', { url: feedUrl, imageUrl: href }) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
logger.debug('[RssFeedService] Feed image extraction result', { |
|
|
|
|
url: feedUrl, |
|
|
|
|
hasImage: !!feedImage, |
|
|
|
|
imageUrl: feedImage?.url, |
|
|
|
|
channelChildrenCount: allChannelChildren.length, |
|
|
|
|
standardImageCount: standardImageElements.length |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
const items: RssFeedItem[] = [] |
|
|
|
|
const itemElements = channel.querySelectorAll('item') |
|
|
|
|
@ -262,7 +385,26 @@ class RssFeedService {
@@ -262,7 +385,26 @@ class RssFeedService {
|
|
|
|
|
|
|
|
|
|
// Fall back to description if content:encoded is not found or empty
|
|
|
|
|
if (!itemDescription) { |
|
|
|
|
// Try getting HTML content from description tag
|
|
|
|
|
itemDescription = this.getHtmlContent(item, 'description') || '' |
|
|
|
|
|
|
|
|
|
// If that doesn't work, try getting text content and decode HTML entities
|
|
|
|
|
// This handles cases where HTML entities are in the text content
|
|
|
|
|
if (!itemDescription) { |
|
|
|
|
const descElement = item.querySelector('description') |
|
|
|
|
if (descElement) { |
|
|
|
|
// Get raw text content (which may contain HTML entities)
|
|
|
|
|
const rawText = descElement.textContent?.trim() || descElement.innerHTML?.trim() || '' |
|
|
|
|
if (rawText) { |
|
|
|
|
// Decode HTML entities using a temporary element
|
|
|
|
|
// The browser will automatically decode entities when setting innerHTML
|
|
|
|
|
const temp = document.createElement('textarea') |
|
|
|
|
temp.innerHTML = rawText |
|
|
|
|
itemDescription = temp.value |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Clean description as well
|
|
|
|
|
if (itemDescription) { |
|
|
|
|
itemDescription = itemDescription |
|
|
|
|
@ -273,6 +415,148 @@ class RssFeedService {
@@ -273,6 +415,148 @@ class RssFeedService {
|
|
|
|
|
} |
|
|
|
|
const itemPubDate = this.parseDate(this.getTextContent(item, 'pubDate')) |
|
|
|
|
const itemGuid = this.getTextContent(item, 'guid') || itemLink || '' |
|
|
|
|
|
|
|
|
|
// Extract enclosure element (for audio/video files)
|
|
|
|
|
let enclosure: RssFeedItemEnclosure | undefined |
|
|
|
|
const enclosureElement = item.querySelector('enclosure') |
|
|
|
|
if (enclosureElement) { |
|
|
|
|
const enclosureUrl = enclosureElement.getAttribute('url') || '' |
|
|
|
|
const enclosureType = enclosureElement.getAttribute('type') || '' |
|
|
|
|
const enclosureLength = enclosureElement.getAttribute('length') || undefined |
|
|
|
|
|
|
|
|
|
if (enclosureUrl && enclosureType) { |
|
|
|
|
// Try to get duration from itunes:duration
|
|
|
|
|
let duration: string | undefined |
|
|
|
|
const allItemChildren = Array.from(item.children) |
|
|
|
|
const durationElements = allItemChildren.filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
const namespaceURI = child.namespaceURI |
|
|
|
|
return (localName === 'duration' && (nodeName.includes('itunes:duration') || namespaceURI?.includes('itunes'))) || |
|
|
|
|
nodeName === 'itunes:duration' |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
if (durationElements.length > 0) { |
|
|
|
|
duration = durationElements[0].textContent?.trim() || undefined |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
enclosure = { |
|
|
|
|
url: enclosureUrl, |
|
|
|
|
type: enclosureType, |
|
|
|
|
length: enclosureLength, |
|
|
|
|
duration: duration |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
logger.debug('[RssFeedService] Found enclosure', { |
|
|
|
|
url: feedUrl, |
|
|
|
|
itemTitle: itemTitle.substring(0, 50), |
|
|
|
|
enclosureType: enclosureType, |
|
|
|
|
enclosureUrl: enclosureUrl, |
|
|
|
|
duration: duration |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Extract media:content elements (Media RSS)
|
|
|
|
|
// Handle namespaced elements by checking all elements and filtering by localName and namespace
|
|
|
|
|
const media: RssFeedItemMedia[] = [] |
|
|
|
|
|
|
|
|
|
// Get all child elements and filter for media:content
|
|
|
|
|
// media:content has localName "content" but is in the media namespace
|
|
|
|
|
// Regular RSS content:encoded has localName "encoded" and is in the content namespace (different!)
|
|
|
|
|
const allChildren = Array.from(item.children) |
|
|
|
|
const mediaContentElements = allChildren.filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
const namespaceURI = child.namespaceURI |
|
|
|
|
|
|
|
|
|
// media:content elements have:
|
|
|
|
|
// 1. localName "content" AND a "url" attribute (media:content has url attribute)
|
|
|
|
|
// 2. nodeName includes "media:content"
|
|
|
|
|
// 3. namespaceURI includes "media"
|
|
|
|
|
// We exclude content:encoded which has localName "encoded" (not "content")
|
|
|
|
|
if (localName === 'content') { |
|
|
|
|
// If it has a url attribute, it's likely media:content (content:encoded doesn't have url)
|
|
|
|
|
if (child.getAttribute('url')) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
// Check namespace - media:content is in media namespace
|
|
|
|
|
if (namespaceURI && namespaceURI.includes('media')) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
// Check nodeName for media: prefix
|
|
|
|
|
if (nodeName.includes('media:content') || nodeName.startsWith('media:')) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return false |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
logger.debug('[RssFeedService] Found media:content elements', { |
|
|
|
|
url: feedUrl, |
|
|
|
|
itemTitle: itemTitle.substring(0, 50), |
|
|
|
|
mediaCount: mediaContentElements.length, |
|
|
|
|
allChildrenCount: allChildren.length |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
mediaContentElements.forEach((mediaEl) => { |
|
|
|
|
const url = mediaEl.getAttribute('url') || '' |
|
|
|
|
const type = mediaEl.getAttribute('type') || undefined |
|
|
|
|
const width = mediaEl.getAttribute('width') || undefined |
|
|
|
|
const height = mediaEl.getAttribute('height') || undefined |
|
|
|
|
|
|
|
|
|
if (url) { |
|
|
|
|
// Get media:credit (attribution) - check children for credit element
|
|
|
|
|
let credit: string | undefined |
|
|
|
|
const creditElements = Array.from(mediaEl.children).filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName |
|
|
|
|
return localName === 'credit' || child.nodeName === 'media:credit' |
|
|
|
|
}) |
|
|
|
|
if (creditElements.length > 0) { |
|
|
|
|
credit = creditElements[0].textContent?.trim() || creditElements[0].getAttribute('scheme') || undefined |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Get media:thumbnail - check children for thumbnail element
|
|
|
|
|
let thumbnail: string | undefined |
|
|
|
|
const thumbnailElements = Array.from(mediaEl.children).filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName |
|
|
|
|
return localName === 'thumbnail' || child.nodeName === 'media:thumbnail' |
|
|
|
|
}) |
|
|
|
|
if (thumbnailElements.length > 0) { |
|
|
|
|
thumbnail = thumbnailElements[0].getAttribute('url') || undefined |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
media.push({ |
|
|
|
|
url, |
|
|
|
|
type, |
|
|
|
|
credit, |
|
|
|
|
thumbnail, |
|
|
|
|
width, |
|
|
|
|
height |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
// Also check for media:thumbnail at item level (if no media:content found)
|
|
|
|
|
if (media.length === 0) { |
|
|
|
|
const thumbnailElementsAtItemLevel = Array.from(item.children).filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) || |
|
|
|
|
nodeName === 'media:thumbnail' |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
thumbnailElementsAtItemLevel.forEach((thumbEl) => { |
|
|
|
|
const url = thumbEl.getAttribute('url') || '' |
|
|
|
|
if (url) { |
|
|
|
|
media.push({ |
|
|
|
|
url, |
|
|
|
|
type: 'image', |
|
|
|
|
thumbnail: url |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
items.push({ |
|
|
|
|
title: itemTitle, |
|
|
|
|
@ -281,7 +565,11 @@ class RssFeedService {
@@ -281,7 +565,11 @@ class RssFeedService {
|
|
|
|
|
pubDate: itemPubDate, |
|
|
|
|
guid: itemGuid, |
|
|
|
|
feedUrl, |
|
|
|
|
feedTitle: title |
|
|
|
|
feedTitle: title, |
|
|
|
|
feedImage: feedImage?.url, |
|
|
|
|
feedDescription: description, |
|
|
|
|
media: media.length > 0 ? media : undefined, |
|
|
|
|
enclosure: enclosure || undefined |
|
|
|
|
}) |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
@ -290,7 +578,12 @@ class RssFeedService {
@@ -290,7 +578,12 @@ class RssFeedService {
|
|
|
|
|
link, |
|
|
|
|
description, |
|
|
|
|
items, |
|
|
|
|
feedUrl |
|
|
|
|
feedUrl, |
|
|
|
|
image: feedImage, |
|
|
|
|
language, |
|
|
|
|
copyright, |
|
|
|
|
generator, |
|
|
|
|
lastBuildDate |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@ -304,6 +597,29 @@ class RssFeedService {
@@ -304,6 +597,29 @@ class RssFeedService {
|
|
|
|
|
const linkElement = feed.querySelector('link[rel="alternate"], link:not([rel])') |
|
|
|
|
const link = linkElement?.getAttribute('href') || feedUrl |
|
|
|
|
const description = this.getTextContent(feed, 'subtitle') || this.getTextContent(feed, 'description') || '' |
|
|
|
|
|
|
|
|
|
// Extract feed metadata for Atom feeds
|
|
|
|
|
const language = feed.getAttribute('xml:lang') || undefined |
|
|
|
|
const rights = this.getTextContent(feed, 'rights') || undefined |
|
|
|
|
const generator = this.getTextContent(feed, 'generator') || undefined |
|
|
|
|
const updatedStr = this.getTextContent(feed, 'updated') |
|
|
|
|
const lastBuildDate = updatedStr ? (this.parseDate(updatedStr) || undefined) : undefined |
|
|
|
|
|
|
|
|
|
// Extract feed image/logo for Atom feeds
|
|
|
|
|
let feedImage: RssFeed['image'] | undefined |
|
|
|
|
const logoElement = feed.querySelector('logo') |
|
|
|
|
const iconElement = feed.querySelector('icon') |
|
|
|
|
if (logoElement) { |
|
|
|
|
const logoUrl = this.getTextContent(feed, 'logo') |
|
|
|
|
if (logoUrl) { |
|
|
|
|
feedImage = { url: logoUrl } |
|
|
|
|
} |
|
|
|
|
} else if (iconElement) { |
|
|
|
|
const iconUrl = this.getTextContent(feed, 'icon') |
|
|
|
|
if (iconUrl) { |
|
|
|
|
feedImage = { url: iconUrl } |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const items: RssFeedItem[] = [] |
|
|
|
|
const entryElements = feed.querySelectorAll('entry') |
|
|
|
|
@ -334,6 +650,134 @@ class RssFeedService {
@@ -334,6 +650,134 @@ class RssFeedService {
|
|
|
|
|
const entryPublished = this.getTextContent(entry, 'published') || this.getTextContent(entry, 'updated') |
|
|
|
|
const entryPubDate = this.parseDate(entryPublished) |
|
|
|
|
const entryId = this.getTextContent(entry, 'id') || entryLink || '' |
|
|
|
|
|
|
|
|
|
// Extract enclosure/link elements for Atom feeds (Atom uses <link rel="enclosure">)
|
|
|
|
|
let enclosure: RssFeedItemEnclosure | undefined |
|
|
|
|
const enclosureLinkElements = entry.querySelectorAll('link[rel="enclosure"]') |
|
|
|
|
if (enclosureLinkElements.length > 0) { |
|
|
|
|
const enclosureLink = enclosureLinkElements[0] |
|
|
|
|
const enclosureUrl = enclosureLink.getAttribute('href') || '' |
|
|
|
|
const enclosureType = enclosureLink.getAttribute('type') || '' |
|
|
|
|
const enclosureLength = enclosureLink.getAttribute('length') || undefined |
|
|
|
|
|
|
|
|
|
if (enclosureUrl && enclosureType) { |
|
|
|
|
// Try to get duration from itunes:duration
|
|
|
|
|
let duration: string | undefined |
|
|
|
|
const allEntryChildren = Array.from(entry.children) |
|
|
|
|
const durationElements = allEntryChildren.filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
const namespaceURI = child.namespaceURI |
|
|
|
|
return (localName === 'duration' && (nodeName.includes('itunes:duration') || namespaceURI?.includes('itunes'))) || |
|
|
|
|
nodeName === 'itunes:duration' |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
if (durationElements.length > 0) { |
|
|
|
|
duration = durationElements[0].textContent?.trim() || undefined |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
enclosure = { |
|
|
|
|
url: enclosureUrl, |
|
|
|
|
type: enclosureType, |
|
|
|
|
length: enclosureLength, |
|
|
|
|
duration: duration |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Extract media:content elements (Media RSS) for Atom feeds
|
|
|
|
|
// In Atom feeds, we need to distinguish between media:content (media) and content (entry content)
|
|
|
|
|
// Handle namespaced elements by checking all elements and filtering by namespace
|
|
|
|
|
const media: RssFeedItemMedia[] = [] |
|
|
|
|
|
|
|
|
|
// Get all child elements and filter for media:content
|
|
|
|
|
// media:content has localName "content" but is in the media namespace (not Atom namespace)
|
|
|
|
|
const allChildren = Array.from(entry.children) |
|
|
|
|
const mediaContentElements = allChildren.filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
const namespaceURI = child.namespaceURI |
|
|
|
|
// Check if it's media:content - must have localName "content" but NOT be in Atom namespace
|
|
|
|
|
// Atom content element is in Atom namespace, media:content is in media namespace
|
|
|
|
|
if (localName === 'content') { |
|
|
|
|
// If it has a url attribute, it's likely media:content (Atom content uses src or type="xhtml")
|
|
|
|
|
if (child.getAttribute('url')) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
// Check namespace - media:content is in media namespace, not Atom namespace
|
|
|
|
|
if (namespaceURI && namespaceURI.includes('media') && !namespaceURI.includes('atom')) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
// Check nodeName for media: prefix
|
|
|
|
|
if (nodeName.includes('media:content')) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return false |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
mediaContentElements.forEach((mediaEl) => { |
|
|
|
|
const url = mediaEl.getAttribute('url') || '' |
|
|
|
|
const type = mediaEl.getAttribute('type') || undefined |
|
|
|
|
const width = mediaEl.getAttribute('width') || undefined |
|
|
|
|
const height = mediaEl.getAttribute('height') || undefined |
|
|
|
|
|
|
|
|
|
if (url) { |
|
|
|
|
// Get media:credit (attribution) - check children for credit element
|
|
|
|
|
let credit: string | undefined |
|
|
|
|
const creditElements = Array.from(mediaEl.children).filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
return (localName === 'credit' && (nodeName.includes('media:credit') || child.namespaceURI?.includes('media'))) || |
|
|
|
|
nodeName === 'media:credit' |
|
|
|
|
}) |
|
|
|
|
if (creditElements.length > 0) { |
|
|
|
|
credit = creditElements[0].textContent?.trim() || creditElements[0].getAttribute('scheme') || undefined |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Get media:thumbnail - check children for thumbnail element
|
|
|
|
|
let thumbnail: string | undefined |
|
|
|
|
const thumbnailElements = Array.from(mediaEl.children).filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) || |
|
|
|
|
nodeName === 'media:thumbnail' |
|
|
|
|
}) |
|
|
|
|
if (thumbnailElements.length > 0) { |
|
|
|
|
thumbnail = thumbnailElements[0].getAttribute('url') || undefined |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
media.push({ |
|
|
|
|
url, |
|
|
|
|
type, |
|
|
|
|
credit, |
|
|
|
|
thumbnail, |
|
|
|
|
width, |
|
|
|
|
height |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
// Also check for media:thumbnail at entry level (if no media:content found)
|
|
|
|
|
if (media.length === 0) { |
|
|
|
|
const thumbnailElementsAtEntryLevel = Array.from(entry.children).filter(child => { |
|
|
|
|
const localName = child.localName || child.nodeName.toLowerCase() |
|
|
|
|
const nodeName = child.nodeName.toLowerCase() |
|
|
|
|
return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) || |
|
|
|
|
nodeName === 'media:thumbnail' |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
thumbnailElementsAtEntryLevel.forEach((thumbEl) => { |
|
|
|
|
const url = thumbEl.getAttribute('url') || '' |
|
|
|
|
if (url) { |
|
|
|
|
media.push({ |
|
|
|
|
url, |
|
|
|
|
type: 'image', |
|
|
|
|
thumbnail: url |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
items.push({ |
|
|
|
|
title: entryTitle, |
|
|
|
|
@ -342,7 +786,11 @@ class RssFeedService {
@@ -342,7 +786,11 @@ class RssFeedService {
|
|
|
|
|
pubDate: entryPubDate, |
|
|
|
|
guid: entryId, |
|
|
|
|
feedUrl, |
|
|
|
|
feedTitle: title |
|
|
|
|
feedTitle: title, |
|
|
|
|
feedImage: feedImage?.url, |
|
|
|
|
feedDescription: description, |
|
|
|
|
media: media.length > 0 ? media : undefined, |
|
|
|
|
enclosure: enclosure |
|
|
|
|
}) |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
@ -351,7 +799,12 @@ class RssFeedService {
@@ -351,7 +799,12 @@ class RssFeedService {
|
|
|
|
|
link, |
|
|
|
|
description, |
|
|
|
|
items, |
|
|
|
|
feedUrl |
|
|
|
|
feedUrl, |
|
|
|
|
image: feedImage, |
|
|
|
|
language, |
|
|
|
|
copyright: rights, |
|
|
|
|
generator, |
|
|
|
|
lastBuildDate |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@ -383,11 +836,22 @@ class RssFeedService {
@@ -383,11 +836,22 @@ class RssFeedService {
|
|
|
|
|
|
|
|
|
|
if (!html) return '' |
|
|
|
|
|
|
|
|
|
// Decode HTML entities that might be encoded (like — for em dash)
|
|
|
|
|
// Create a temporary element to decode entities
|
|
|
|
|
const temp = document.createElement('div') |
|
|
|
|
// Decode HTML entities that might be encoded (like < > & etc.)
|
|
|
|
|
// The browser's XML parser should decode entities automatically when accessing textContent/innerHTML
|
|
|
|
|
// However, if entities are still present, decode them using textarea trick
|
|
|
|
|
// This handles cases where entities are double-encoded or in raw XML text
|
|
|
|
|
if (html.includes('<') || html.includes('>') || html.includes('&')) { |
|
|
|
|
// HTML entities are present, decode them
|
|
|
|
|
const decoder = document.createElement('textarea') |
|
|
|
|
decoder.innerHTML = html |
|
|
|
|
html = decoder.value |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Also decode numeric entities (like —) using the same method
|
|
|
|
|
// The textarea approach handles both named and numeric entities
|
|
|
|
|
const temp = document.createElement('textarea') |
|
|
|
|
temp.innerHTML = html |
|
|
|
|
html = temp.innerHTML || html |
|
|
|
|
html = temp.value || html |
|
|
|
|
|
|
|
|
|
// Clean up any XML artifacts that might have leaked through
|
|
|
|
|
// Do this AFTER entity decoding, as entities might encode XML artifacts
|
|
|
|
|
|