From 08bdcaae833abc5247294315bcff65a599851fee Mon Sep 17 00:00:00 2001 From: Silberengel Date: Tue, 11 Nov 2025 21:12:55 +0100 Subject: [PATCH] make the rss feed more attractive --- src/components/RssFeedItem/index.tsx | 111 ++++++- src/services/rss-feed.service.ts | 480 ++++++++++++++++++++++++++- 2 files changed, 577 insertions(+), 14 deletions(-) diff --git a/src/components/RssFeedItem/index.tsx b/src/components/RssFeedItem/index.tsx index 6c0e242..f4facda 100644 --- a/src/components/RssFeedItem/index.tsx +++ b/src/components/RssFeedItem/index.tsx @@ -8,6 +8,7 @@ import { useNostr } from '@/providers/NostrProvider' import PostEditor from '@/components/PostEditor' import { HighlightData } from '@/components/PostEditor/HighlightEditor' import { cn } from '@/lib/utils' +import MediaPlayer from '@/components/MediaPlayer' /** * Convert HTML to plain text by extracting text content and cleaning up whitespace @@ -249,10 +250,19 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c }, [item.feedUrl, item.feedTitle]) // Clean and parse HTML description safely - // Remove any XML artifacts that might have leaked through + // Decode HTML entities and remove any XML artifacts that might have leaked through const descriptionHtml = useMemo(() => { let html = item.description || '' + if (!html) return '' + + // Decode HTML entities (like < > & " etc.) + // Use textarea element which automatically decodes HTML entities when setting innerHTML + // This is the most reliable way to decode entities in the browser + const decoder = document.createElement('textarea') + decoder.innerHTML = html + html = decoder.value + // Remove any trailing XML/CDATA artifacts html = html .replace(/\]\]\s*>\s*$/g, '') // Remove trailing ]]> from CDATA @@ -261,6 +271,16 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c .replace(/<\!DOCTYPE[^>]*>/gi, '') // Remove DOCTYPE declarations .trim() + // Basic sanitization: remove script tags and dangerous attributes + // Remove script tags and their content (including nested tags) + html = html.replace(/)<[^<]*)*<\/script>/gi, '') + // Remove event handlers (onclick, onerror, etc.) + html = html.replace(/\s*on\w+\s*=\s*["'][^"']*["']/gi, '') + // Remove javascript: URLs in href and src attributes + html = html.replace(/javascript:/gi, '') + // Remove data: URLs that might contain javascript (basic protection) + html = html.replace(/data:\s*text\/html/gi, '') + return html }, [item.description]) @@ -324,12 +344,39 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c return (
- {/* Feed Source and Date */} -
- {feedSourceName} - {pubDateTimestamp && ( - + {/* Feed Header with Metadata */} +
+ {/* Feed Image/Logo */} + {item.feedImage && ( + {item.feedTitle { + // Hide image on error + e.currentTarget.style.display = 'none' + }} + /> )} + + {/* Feed Info */} +
+
+
+

+ {item.feedTitle || feedSourceName} +

+ {item.feedDescription && ( +

+ {item.feedDescription} +

+ )} +
+ {pubDateTimestamp && ( + + )} +
+
{/* Title */} @@ -346,6 +393,58 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c
+ {/* Media (Images) */} + {item.media && item.media.length > 0 && ( +
+ {item.media + .filter(m => m.type?.startsWith('image/') || !m.type || m.type === 'image') + .map((media, index) => ( +
+ {item.title} { + e.stopPropagation() + // Open image in new tab + window.open(media.url, '_blank', 'noopener,noreferrer') + }} + onError={(e) => { + // Hide image on error + e.currentTarget.style.display = 'none' + }} + /> + {media.credit && ( +
+ {t('Photo')}: {media.credit} +
+ )} +
+ ))} +
+ )} + + {/* Audio/Video Enclosure */} + {item.enclosure && (item.enclosure.type.startsWith('audio/') || item.enclosure.type.startsWith('video/')) && ( +
+
+
+
+ {item.enclosure.type.startsWith('audio/') ? t('Audio') : t('Video')} + {item.enclosure.duration && ( + ({item.enclosure.duration}) + )} +
+
+ +
+
+ )} + {/* Description with text selection support and collapse/expand */}
element + const standardImageElements = allChannelChildren.filter(child => { + const nodeName = child.nodeName.toLowerCase() + const localName = child.localName || nodeName + const namespaceURI = child.namespaceURI + // Standard RSS image element has nodeName "image" with no namespace prefix + return localName === 'image' && + !nodeName.includes(':') && + (!namespaceURI || (!namespaceURI.includes('itunes') && !namespaceURI.includes('media'))) + }) + + if (standardImageElements.length > 0) { + const imageElement = standardImageElements[0] + logger.debug('[RssFeedService] Processing standard image element', { + url: feedUrl, + nodeName: imageElement.nodeName, + localName: imageElement.localName, + childrenCount: imageElement.children.length, + innerHTML: imageElement.innerHTML?.substring(0, 200) + }) + + const imageUrl = this.getTextContent(imageElement, 'url') + logger.debug('[RssFeedService] Extracted image URL', { url: feedUrl, imageUrl }) + + if (imageUrl) { + const imageTitle = this.getTextContent(imageElement, 'title') + const imageLink = this.getTextContent(imageElement, 'link') + const imageWidth = this.getTextContent(imageElement, 'width') + const imageHeight = this.getTextContent(imageElement, 'height') + const imageDescription = this.getTextContent(imageElement, 'description') + + feedImage = { + url: imageUrl, + title: imageTitle || undefined, + link: imageLink || undefined, + width: imageWidth || undefined, + height: imageHeight || undefined, + description: imageDescription || undefined + } + logger.debug('[RssFeedService] Found standard RSS feed image element', { url: feedUrl, imageUrl, feedImage }) + } else { + logger.warn('[RssFeedService] Standard image element found but no URL extracted', { + url: feedUrl, + imageElementHTML: imageElement.outerHTML?.substring(0, 300) + }) + } + } + + // If no standard image found, check for itunes:image (common in podcast feeds) + if (!feedImage) { + const itunesImageElements = allChannelChildren.filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + const namespaceURI = child.namespaceURI + // Check if it's itunes:image by namespace or nodeName + return (localName === 'image' && namespaceURI && namespaceURI.includes('itunes')) || + nodeName === 'itunes:image' || + (nodeName.includes('itunes') && nodeName.includes('image')) + }) + + if (itunesImageElements.length > 0) { + const itunesImage = itunesImageElements[0] + // itunes:image uses href attribute, not nested url element + const href = itunesImage.getAttribute('href') + if (href) { + feedImage = { url: href } + logger.debug('[RssFeedService] Found itunes:image', { url: feedUrl, imageUrl: href }) + } + } + } + + logger.debug('[RssFeedService] Feed image extraction result', { + url: feedUrl, + hasImage: !!feedImage, + imageUrl: feedImage?.url, + channelChildrenCount: allChannelChildren.length, + standardImageCount: standardImageElements.length + }) const items: RssFeedItem[] = [] const itemElements = channel.querySelectorAll('item') @@ -262,7 +385,26 @@ class RssFeedService { // Fall back to description if content:encoded is not found or empty if (!itemDescription) { + // Try getting HTML content from description tag itemDescription = this.getHtmlContent(item, 'description') || '' + + // If that doesn't work, try getting text content and decode HTML entities + // This handles cases where HTML entities are in the text content + if (!itemDescription) { + const descElement = item.querySelector('description') + if (descElement) { + // Get raw text content (which may contain HTML entities) + const rawText = descElement.textContent?.trim() || descElement.innerHTML?.trim() || '' + if (rawText) { + // Decode HTML entities using a temporary element + // The browser will automatically decode entities when setting innerHTML + const temp = document.createElement('textarea') + temp.innerHTML = rawText + itemDescription = temp.value + } + } + } + // Clean description as well if (itemDescription) { itemDescription = itemDescription @@ -273,6 +415,148 @@ class RssFeedService { } const itemPubDate = this.parseDate(this.getTextContent(item, 'pubDate')) const itemGuid = this.getTextContent(item, 'guid') || itemLink || '' + + // Extract enclosure element (for audio/video files) + let enclosure: RssFeedItemEnclosure | undefined + const enclosureElement = item.querySelector('enclosure') + if (enclosureElement) { + const enclosureUrl = enclosureElement.getAttribute('url') || '' + const enclosureType = enclosureElement.getAttribute('type') || '' + const enclosureLength = enclosureElement.getAttribute('length') || undefined + + if (enclosureUrl && enclosureType) { + // Try to get duration from itunes:duration + let duration: string | undefined + const allItemChildren = Array.from(item.children) + const durationElements = allItemChildren.filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + const namespaceURI = child.namespaceURI + return (localName === 'duration' && (nodeName.includes('itunes:duration') || namespaceURI?.includes('itunes'))) || + nodeName === 'itunes:duration' + }) + + if (durationElements.length > 0) { + duration = durationElements[0].textContent?.trim() || undefined + } + + enclosure = { + url: enclosureUrl, + type: enclosureType, + length: enclosureLength, + duration: duration + } + + logger.debug('[RssFeedService] Found enclosure', { + url: feedUrl, + itemTitle: itemTitle.substring(0, 50), + enclosureType: enclosureType, + enclosureUrl: enclosureUrl, + duration: duration + }) + } + } + + // Extract media:content elements (Media RSS) + // Handle namespaced elements by checking all elements and filtering by localName and namespace + const media: RssFeedItemMedia[] = [] + + // Get all child elements and filter for media:content + // media:content has localName "content" but is in the media namespace + // Regular RSS content:encoded has localName "encoded" and is in the content namespace (different!) + const allChildren = Array.from(item.children) + const mediaContentElements = allChildren.filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + const namespaceURI = child.namespaceURI + + // media:content elements have: + // 1. localName "content" AND a "url" attribute (media:content has url attribute) + // 2. nodeName includes "media:content" + // 3. namespaceURI includes "media" + // We exclude content:encoded which has localName "encoded" (not "content") + if (localName === 'content') { + // If it has a url attribute, it's likely media:content (content:encoded doesn't have url) + if (child.getAttribute('url')) { + return true + } + // Check namespace - media:content is in media namespace + if (namespaceURI && namespaceURI.includes('media')) { + return true + } + // Check nodeName for media: prefix + if (nodeName.includes('media:content') || nodeName.startsWith('media:')) { + return true + } + } + return false + }) + + logger.debug('[RssFeedService] Found media:content elements', { + url: feedUrl, + itemTitle: itemTitle.substring(0, 50), + mediaCount: mediaContentElements.length, + allChildrenCount: allChildren.length + }) + + mediaContentElements.forEach((mediaEl) => { + const url = mediaEl.getAttribute('url') || '' + const type = mediaEl.getAttribute('type') || undefined + const width = mediaEl.getAttribute('width') || undefined + const height = mediaEl.getAttribute('height') || undefined + + if (url) { + // Get media:credit (attribution) - check children for credit element + let credit: string | undefined + const creditElements = Array.from(mediaEl.children).filter(child => { + const localName = child.localName || child.nodeName + return localName === 'credit' || child.nodeName === 'media:credit' + }) + if (creditElements.length > 0) { + credit = creditElements[0].textContent?.trim() || creditElements[0].getAttribute('scheme') || undefined + } + + // Get media:thumbnail - check children for thumbnail element + let thumbnail: string | undefined + const thumbnailElements = Array.from(mediaEl.children).filter(child => { + const localName = child.localName || child.nodeName + return localName === 'thumbnail' || child.nodeName === 'media:thumbnail' + }) + if (thumbnailElements.length > 0) { + thumbnail = thumbnailElements[0].getAttribute('url') || undefined + } + + media.push({ + url, + type, + credit, + thumbnail, + width, + height + }) + } + }) + + // Also check for media:thumbnail at item level (if no media:content found) + if (media.length === 0) { + const thumbnailElementsAtItemLevel = Array.from(item.children).filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) || + nodeName === 'media:thumbnail' + }) + + thumbnailElementsAtItemLevel.forEach((thumbEl) => { + const url = thumbEl.getAttribute('url') || '' + if (url) { + media.push({ + url, + type: 'image', + thumbnail: url + }) + } + }) + } items.push({ title: itemTitle, @@ -281,7 +565,11 @@ class RssFeedService { pubDate: itemPubDate, guid: itemGuid, feedUrl, - feedTitle: title + feedTitle: title, + feedImage: feedImage?.url, + feedDescription: description, + media: media.length > 0 ? media : undefined, + enclosure: enclosure || undefined }) }) @@ -290,7 +578,12 @@ class RssFeedService { link, description, items, - feedUrl + feedUrl, + image: feedImage, + language, + copyright, + generator, + lastBuildDate } } @@ -304,6 +597,29 @@ class RssFeedService { const linkElement = feed.querySelector('link[rel="alternate"], link:not([rel])') const link = linkElement?.getAttribute('href') || feedUrl const description = this.getTextContent(feed, 'subtitle') || this.getTextContent(feed, 'description') || '' + + // Extract feed metadata for Atom feeds + const language = feed.getAttribute('xml:lang') || undefined + const rights = this.getTextContent(feed, 'rights') || undefined + const generator = this.getTextContent(feed, 'generator') || undefined + const updatedStr = this.getTextContent(feed, 'updated') + const lastBuildDate = updatedStr ? (this.parseDate(updatedStr) || undefined) : undefined + + // Extract feed image/logo for Atom feeds + let feedImage: RssFeed['image'] | undefined + const logoElement = feed.querySelector('logo') + const iconElement = feed.querySelector('icon') + if (logoElement) { + const logoUrl = this.getTextContent(feed, 'logo') + if (logoUrl) { + feedImage = { url: logoUrl } + } + } else if (iconElement) { + const iconUrl = this.getTextContent(feed, 'icon') + if (iconUrl) { + feedImage = { url: iconUrl } + } + } const items: RssFeedItem[] = [] const entryElements = feed.querySelectorAll('entry') @@ -334,6 +650,134 @@ class RssFeedService { const entryPublished = this.getTextContent(entry, 'published') || this.getTextContent(entry, 'updated') const entryPubDate = this.parseDate(entryPublished) const entryId = this.getTextContent(entry, 'id') || entryLink || '' + + // Extract enclosure/link elements for Atom feeds (Atom uses ) + let enclosure: RssFeedItemEnclosure | undefined + const enclosureLinkElements = entry.querySelectorAll('link[rel="enclosure"]') + if (enclosureLinkElements.length > 0) { + const enclosureLink = enclosureLinkElements[0] + const enclosureUrl = enclosureLink.getAttribute('href') || '' + const enclosureType = enclosureLink.getAttribute('type') || '' + const enclosureLength = enclosureLink.getAttribute('length') || undefined + + if (enclosureUrl && enclosureType) { + // Try to get duration from itunes:duration + let duration: string | undefined + const allEntryChildren = Array.from(entry.children) + const durationElements = allEntryChildren.filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + const namespaceURI = child.namespaceURI + return (localName === 'duration' && (nodeName.includes('itunes:duration') || namespaceURI?.includes('itunes'))) || + nodeName === 'itunes:duration' + }) + + if (durationElements.length > 0) { + duration = durationElements[0].textContent?.trim() || undefined + } + + enclosure = { + url: enclosureUrl, + type: enclosureType, + length: enclosureLength, + duration: duration + } + } + } + + // Extract media:content elements (Media RSS) for Atom feeds + // In Atom feeds, we need to distinguish between media:content (media) and content (entry content) + // Handle namespaced elements by checking all elements and filtering by namespace + const media: RssFeedItemMedia[] = [] + + // Get all child elements and filter for media:content + // media:content has localName "content" but is in the media namespace (not Atom namespace) + const allChildren = Array.from(entry.children) + const mediaContentElements = allChildren.filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + const namespaceURI = child.namespaceURI + // Check if it's media:content - must have localName "content" but NOT be in Atom namespace + // Atom content element is in Atom namespace, media:content is in media namespace + if (localName === 'content') { + // If it has a url attribute, it's likely media:content (Atom content uses src or type="xhtml") + if (child.getAttribute('url')) { + return true + } + // Check namespace - media:content is in media namespace, not Atom namespace + if (namespaceURI && namespaceURI.includes('media') && !namespaceURI.includes('atom')) { + return true + } + // Check nodeName for media: prefix + if (nodeName.includes('media:content')) { + return true + } + } + return false + }) + + mediaContentElements.forEach((mediaEl) => { + const url = mediaEl.getAttribute('url') || '' + const type = mediaEl.getAttribute('type') || undefined + const width = mediaEl.getAttribute('width') || undefined + const height = mediaEl.getAttribute('height') || undefined + + if (url) { + // Get media:credit (attribution) - check children for credit element + let credit: string | undefined + const creditElements = Array.from(mediaEl.children).filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + return (localName === 'credit' && (nodeName.includes('media:credit') || child.namespaceURI?.includes('media'))) || + nodeName === 'media:credit' + }) + if (creditElements.length > 0) { + credit = creditElements[0].textContent?.trim() || creditElements[0].getAttribute('scheme') || undefined + } + + // Get media:thumbnail - check children for thumbnail element + let thumbnail: string | undefined + const thumbnailElements = Array.from(mediaEl.children).filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) || + nodeName === 'media:thumbnail' + }) + if (thumbnailElements.length > 0) { + thumbnail = thumbnailElements[0].getAttribute('url') || undefined + } + + media.push({ + url, + type, + credit, + thumbnail, + width, + height + }) + } + }) + + // Also check for media:thumbnail at entry level (if no media:content found) + if (media.length === 0) { + const thumbnailElementsAtEntryLevel = Array.from(entry.children).filter(child => { + const localName = child.localName || child.nodeName.toLowerCase() + const nodeName = child.nodeName.toLowerCase() + return (localName === 'thumbnail' && (nodeName.includes('media:thumbnail') || child.namespaceURI?.includes('media'))) || + nodeName === 'media:thumbnail' + }) + + thumbnailElementsAtEntryLevel.forEach((thumbEl) => { + const url = thumbEl.getAttribute('url') || '' + if (url) { + media.push({ + url, + type: 'image', + thumbnail: url + }) + } + }) + } items.push({ title: entryTitle, @@ -342,7 +786,11 @@ class RssFeedService { pubDate: entryPubDate, guid: entryId, feedUrl, - feedTitle: title + feedTitle: title, + feedImage: feedImage?.url, + feedDescription: description, + media: media.length > 0 ? media : undefined, + enclosure: enclosure }) }) @@ -351,7 +799,12 @@ class RssFeedService { link, description, items, - feedUrl + feedUrl, + image: feedImage, + language, + copyright: rights, + generator, + lastBuildDate } } @@ -383,11 +836,22 @@ class RssFeedService { if (!html) return '' - // Decode HTML entities that might be encoded (like — for em dash) - // Create a temporary element to decode entities - const temp = document.createElement('div') + // Decode HTML entities that might be encoded (like < > & etc.) + // The browser's XML parser should decode entities automatically when accessing textContent/innerHTML + // However, if entities are still present, decode them using textarea trick + // This handles cases where entities are double-encoded or in raw XML text + if (html.includes('<') || html.includes('>') || html.includes('&')) { + // HTML entities are present, decode them + const decoder = document.createElement('textarea') + decoder.innerHTML = html + html = decoder.value + } + + // Also decode numeric entities (like —) using the same method + // The textarea approach handles both named and numeric entities + const temp = document.createElement('textarea') temp.innerHTML = html - html = temp.innerHTML || html + html = temp.value || html // Clean up any XML artifacts that might have leaked through // Do this AFTER entity decoding, as entities might encode XML artifacts