Browse Source

get the rss feed working

imwald
Silberengel 4 months ago
parent
commit
ca5490d453
  1. 128
      src/components/RssFeedItem/index.tsx
  2. 16
      src/components/RssFeedList/index.tsx
  3. 240
      src/services/rss-feed.service.ts

128
src/components/RssFeedItem/index.tsx

@ -1,12 +1,13 @@ @@ -1,12 +1,13 @@
import { RssFeedItem as TRssFeedItem } from '@/services/rss-feed.service'
import { FormattedTimestamp } from '../FormattedTimestamp'
import { ExternalLink, Highlighter } from 'lucide-react'
import { ExternalLink, Highlighter, ChevronDown, ChevronUp } from 'lucide-react'
import { useState, useRef, useEffect, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { Button } from '@/components/ui/button'
import { useNostr } from '@/providers/NostrProvider'
import PostEditor from '@/components/PostEditor'
import { HighlightData } from '@/components/PostEditor/HighlightEditor'
import { cn } from '@/lib/utils'
export default function RssFeedItem({ item, className }: { item: TRssFeedItem; className?: string }) {
const { t } = useTranslation()
@ -17,6 +18,7 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c @@ -17,6 +18,7 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c
const [selectionPosition, setSelectionPosition] = useState<{ x: number; y: number } | null>(null)
const [isPostEditorOpen, setIsPostEditorOpen] = useState(false)
const [highlightData, setHighlightData] = useState<HighlightData | undefined>(undefined)
const [isExpanded, setIsExpanded] = useState(false)
const contentRef = useRef<HTMLDivElement>(null)
const selectionTimeoutRef = useRef<NodeJS.Timeout>()
@ -138,12 +140,80 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c @@ -138,12 +140,80 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c
}
}, [item.feedUrl, item.feedTitle])
// Parse HTML description safely
const descriptionHtml = item.description
// Clean and parse HTML description safely
// Remove any XML artifacts that might have leaked through
const descriptionHtml = useMemo(() => {
let html = item.description || ''
// Remove any trailing XML/CDATA artifacts
html = html
.replace(/\]\]\s*>\s*$/g, '') // Remove trailing ]]> from CDATA
.replace(/^\s*<!\[CDATA\[/g, '') // Remove leading CDATA declaration
.replace(/<\?xml[^>]*\?>/gi, '') // Remove XML declarations
.replace(/<\!DOCTYPE[^>]*>/gi, '') // Remove DOCTYPE declarations
.trim()
return html
}, [item.description])
// Format publication date
const pubDateTimestamp = item.pubDate ? Math.floor(item.pubDate.getTime() / 1000) : null
// Check if content exceeds 400px height
const [needsCollapse, setNeedsCollapse] = useState(false)
useEffect(() => {
if (!contentRef.current || !descriptionHtml) return
const checkHeight = () => {
const element = contentRef.current
if (!element) return
// Temporarily remove max-height to measure full content height
const hadMaxHeight = element.classList.contains('max-h-[400px]')
if (hadMaxHeight) {
element.classList.remove('max-h-[400px]')
element.style.maxHeight = 'none'
}
// Force a reflow to get accurate measurements
void element.offsetHeight
// Measure the actual content height
const fullHeight = element.scrollHeight
// Restore original state
if (hadMaxHeight) {
element.classList.add('max-h-[400px]')
element.style.maxHeight = ''
}
setNeedsCollapse(fullHeight > 400)
}
// Check height after content is rendered (multiple checks for dynamic content)
const timeoutId1 = setTimeout(checkHeight, 100)
const timeoutId2 = setTimeout(checkHeight, 500)
// Use ResizeObserver to detect when content changes
const resizeObserver = new ResizeObserver(() => {
// Only check if not currently expanded (to avoid unnecessary checks)
if (!isExpanded) {
checkHeight()
}
})
if (contentRef.current) {
resizeObserver.observe(contentRef.current)
}
return () => {
clearTimeout(timeoutId1)
clearTimeout(timeoutId2)
resizeObserver.disconnect()
}
}, [descriptionHtml, isExpanded])
return (
<div className={`border rounded-lg bg-background p-4 space-y-3 ${className || ''}`}>
{/* Feed Source and Date */}
@ -168,23 +238,61 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c @@ -168,23 +238,61 @@ export default function RssFeedItem({ item, className }: { item: TRssFeedItem; c
</a>
</div>
{/* Description with text selection support */}
{/* Description with text selection support and collapse/expand */}
<div className="relative">
<div
ref={contentRef}
className="prose prose-sm dark:prose-invert max-w-none break-words rss-feed-content"
dangerouslySetInnerHTML={{ __html: descriptionHtml }}
onMouseUp={(e) => {
// Allow text selection
e.stopPropagation()
}}
className={cn(
'prose prose-sm dark:prose-invert max-w-none break-words rss-feed-content transition-all duration-200',
needsCollapse && !isExpanded && 'max-h-[400px] overflow-hidden'
)}
style={{
userSelect: 'text',
WebkitUserSelect: 'text',
MozUserSelect: 'text',
msUserSelect: 'text'
}}
>
<div
dangerouslySetInnerHTML={{ __html: descriptionHtml }}
onMouseUp={(e) => {
// Allow text selection
e.stopPropagation()
}}
/>
</div>
{/* Gradient overlay when collapsed */}
{needsCollapse && !isExpanded && (
<div className="absolute bottom-0 left-0 right-0 h-16 bg-gradient-to-b from-transparent via-background/60 to-background pointer-events-none" />
)}
{/* Collapse/Expand Button */}
{needsCollapse && (
<div className="flex justify-center mt-2 relative z-10">
<Button
variant="ghost"
size="sm"
onClick={(e) => {
e.stopPropagation()
setIsExpanded(!isExpanded)
}}
className="text-muted-foreground hover:text-foreground"
>
{isExpanded ? (
<>
<ChevronUp className="h-4 w-4 mr-1" />
{t('Show less')}
</>
) : (
<>
<ChevronDown className="h-4 w-4 mr-1" />
{t('Show more')}
</>
)}
</Button>
</div>
)}
{/* Highlight Button */}
{showHighlightButton && selectedText && selectionPosition && (

16
src/components/RssFeedList/index.tsx

@ -44,12 +44,26 @@ export default function RssFeedList() { @@ -44,12 +44,26 @@ export default function RssFeedList() {
}
}
// Fetch and merge feeds
// Fetch and merge feeds (this handles errors gracefully and returns partial results)
const fetchedItems = await rssFeedService.fetchMultipleFeeds(feedUrls)
if (fetchedItems.length === 0) {
// No items were successfully fetched, but don't show error if we tried
// The fetchMultipleFeeds already logs warnings for failed feeds
setError(null) // Clear any previous error
}
setItems(fetchedItems)
} catch (err) {
logger.error('[RssFeedList] Error loading RSS feeds', { error: err })
// Don't set error state - fetchMultipleFeeds handles individual feed failures gracefully
// Only set error if there's a critical issue (like network completely down)
if (err instanceof TypeError && err.message.includes('Failed to fetch')) {
// Network error - might be temporary, don't show persistent error
setError(null)
} else {
setError(err instanceof Error ? err.message : t('Failed to load RSS feeds'))
}
} finally {
setLoading(false)
}

240
src/services/rss-feed.service.ts

@ -41,20 +41,75 @@ class RssFeedService { @@ -41,20 +41,75 @@ class RssFeedService {
return cached.feed
}
// Try multiple fetch strategies in order
const strategies = this.getFetchStrategies(url)
for (const strategy of strategies) {
try {
// Check if we should use proxy server to avoid CORS issues
const xmlText = await this.fetchWithStrategy(url, strategy)
if (xmlText) {
const feed = this.parseFeed(xmlText, url)
// Cache the feed
this.feedCache.set(url, { feed, timestamp: Date.now() })
return feed
}
} catch (error) {
logger.warn('[RssFeedService] Strategy failed', { url, strategy: strategy.name, error })
// Continue to next strategy
continue
}
}
// All strategies failed
throw new Error(`Failed to fetch RSS feed from ${url} after trying all available methods`)
}
/**
* Get list of fetch strategies to try in order
*/
private getFetchStrategies(url: string): Array<{ name: string; getUrl: (url: string) => string }> {
const strategies: Array<{ name: string; getUrl: (url: string) => string }> = []
// Strategy 1: Use configured proxy server (if available)
const proxyServer = import.meta.env.VITE_PROXY_SERVER
const isProxyUrl = url.includes('/sites/')
if (proxyServer && !url.includes('/sites/')) {
strategies.push({
name: 'configured-proxy',
getUrl: (url) => `${proxyServer}/sites/${encodeURIComponent(url)}`
})
}
// Strategy 2: Use public CORS proxy (allorigins.win)
strategies.push({
name: 'allorigins-proxy',
getUrl: (url) => `https://api.allorigins.win/raw?url=${encodeURIComponent(url)}`
})
// Strategy 3: Alternative CORS proxy (corsproxy.io)
strategies.push({
name: 'corsproxy-proxy',
getUrl: (url) => `https://corsproxy.io/?${encodeURIComponent(url)}`
})
// Strategy 4: Try direct fetch (may work for some feeds with CORS enabled)
strategies.push({
name: 'direct',
getUrl: (url) => url
})
// If proxy is configured and URL isn't already proxied, use proxy
let fetchUrl = url
if (proxyServer && !isProxyUrl) {
fetchUrl = `${proxyServer}/sites/${encodeURIComponent(url)}`
return strategies
}
/**
* Fetch feed using a specific strategy
*/
private async fetchWithStrategy(originalUrl: string, strategy: { name: string; getUrl: (url: string) => string }): Promise<string> {
const fetchUrl = strategy.getUrl(originalUrl)
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), 10000) // 10 second timeout
const timeoutId = setTimeout(() => controller.abort(), 15000) // 15 second timeout
try {
const res = await fetch(fetchUrl, {
signal: controller.signal,
mode: 'cors',
@ -67,18 +122,24 @@ class RssFeedService { @@ -67,18 +122,24 @@ class RssFeedService {
clearTimeout(timeoutId)
if (!res.ok) {
throw new Error(`Failed to fetch feed: ${res.status} ${res.statusText}`)
throw new Error(`HTTP ${res.status}: ${res.statusText}`)
}
const xmlText = await res.text()
const feed = this.parseFeed(xmlText, url)
// Cache the feed
this.feedCache.set(url, { feed, timestamp: Date.now() })
// Validate that we got XML content
if (!xmlText || xmlText.trim().length === 0) {
throw new Error('Empty response')
}
return feed
// Basic validation - check if it looks like XML
if (!xmlText.trim().startsWith('<')) {
throw new Error('Response does not appear to be XML')
}
return xmlText
} catch (error) {
logger.error('[RssFeedService] Error fetching feed', { url, error })
clearTimeout(timeoutId)
throw error
}
}
@ -134,8 +195,82 @@ class RssFeedService { @@ -134,8 +195,82 @@ class RssFeedService {
// If URL parsing fails, keep the original link
}
}
// For description, preserve HTML content
const itemDescription = this.getHtmlContent(item, 'description') || ''
// For description, prefer content:encoded (WordPress full content) over description (truncated)
// Check for content:encoded first, then fall back to description
let itemDescription = ''
// Try to find content:encoded element (WordPress namespace extension)
// Iterate through all direct children to find it (most reliable method for namespaced XML)
const children = Array.from(item.children)
let contentEncoded: Element | null = null
for (const child of children) {
// Check if this is the content:encoded element
// The tagName might be "content:encoded" or just "encoded" depending on parser
const tagName = child.tagName || child.nodeName
if (tagName && (
tagName.toLowerCase() === 'encoded' ||
tagName.toLowerCase() === 'content:encoded' ||
tagName.includes('encoded') ||
(child.localName && child.localName.toLowerCase() === 'encoded')
)) {
contentEncoded = child
break
}
}
if (contentEncoded) {
// For CDATA sections in XML, we need to get the content carefully
// The content:encoded element contains CDATA with HTML
// Get textContent first (this properly extracts CDATA content)
// textContent will contain the HTML as a string from CDATA sections
const rawContent = contentEncoded.textContent?.trim() || contentEncoded.innerHTML?.trim() || ''
if (rawContent) {
// Clean up the content - remove any XML artifacts that might have leaked through
// Remove XML closing tags that might appear at the end (like ]]>)
itemDescription = rawContent
.replace(/\]\]\s*>\s*$/g, '') // Remove trailing ]]> from CDATA
.replace(/^\s*<!\[CDATA\[/g, '') // Remove leading CDATA declaration
.trim()
// If the content looks like it has HTML tags, use it as-is
// Otherwise, it might be plain text that needs HTML entity decoding
if (itemDescription && itemDescription.includes('<')) {
// It's HTML - ensure it's clean
// Remove any stray XML/namespace declarations that might appear
itemDescription = itemDescription
.replace(/<\?xml[^>]*\?>/gi, '') // Remove XML declarations
.replace(/<\!DOCTYPE[^>]*>/gi, '') // Remove DOCTYPE declarations
.trim()
}
}
// Log for debugging
if (itemDescription) {
logger.debug('[RssFeedService] Found content:encoded', {
url: feedUrl,
hasHtml: itemDescription.includes('<'),
length: itemDescription.length,
preview: itemDescription.substring(0, 100)
})
}
} else {
logger.debug('[RssFeedService] content:encoded not found, using description', { url: feedUrl })
}
// Fall back to description if content:encoded is not found or empty
if (!itemDescription) {
itemDescription = this.getHtmlContent(item, 'description') || ''
// Clean description as well
if (itemDescription) {
itemDescription = itemDescription
.replace(/\]\]\s*>\s*$/g, '')
.replace(/^\s*<!\[CDATA\[/g, '')
.trim()
}
}
const itemPubDate = this.parseDate(this.getTextContent(item, 'pubDate'))
const itemGuid = this.getTextContent(item, 'guid') || itemLink || ''
@ -187,7 +322,15 @@ class RssFeedService { @@ -187,7 +322,15 @@ class RssFeedService {
}
}
// For content/summary, preserve HTML content
const entryContent = this.getHtmlContent(entry, 'content') || this.getHtmlContent(entry, 'summary') || ''
let entryContent = this.getHtmlContent(entry, 'content') || this.getHtmlContent(entry, 'summary') || ''
// Additional cleaning for Atom feeds (getHtmlContent already does basic cleaning)
// This ensures any remaining XML artifacts are removed
if (entryContent) {
entryContent = entryContent
.replace(/\]\]\s*>\s*$/gm, '')
.replace(/^\s*<!\[CDATA\[/gm, '')
.trim()
}
const entryPublished = this.getTextContent(entry, 'published') || this.getTextContent(entry, 'updated')
const entryPubDate = this.parseDate(entryPublished)
const entryId = this.getTextContent(entry, 'id') || entryLink || ''
@ -228,10 +371,37 @@ class RssFeedService { @@ -228,10 +371,37 @@ class RssFeedService {
*/
private getHtmlContent(element: Element | null, tagName: string): string {
if (!element) return ''
const child = element.querySelector(tagName)
// Handle namespaced tags like content:encoded
const child = element.querySelector(tagName) ||
element.querySelector(tagName.replace(':', '\\:')) ||
element.getElementsByTagName(tagName)[0]
if (!child) return ''
// Return innerHTML to preserve HTML formatting
return child.innerHTML?.trim() || child.textContent?.trim() || ''
// Get innerHTML to preserve HTML formatting and CDATA content
// CDATA sections are automatically included in innerHTML/textContent
let html = child.innerHTML?.trim() || child.textContent?.trim() || ''
if (!html) return ''
// Decode HTML entities that might be encoded (like &#8212; for em dash)
// Create a temporary element to decode entities
const temp = document.createElement('div')
temp.innerHTML = html
html = temp.innerHTML || html
// Clean up any XML artifacts that might have leaked through
// Do this AFTER entity decoding, as entities might encode XML artifacts
html = html
.replace(/\]\]\s*>\s*$/gm, '') // Remove trailing ]]> from CDATA (multiline, end of string)
.replace(/\]\]\s*>/g, '') // Remove any ]]> anywhere in the content
.replace(/^\s*<!\[CDATA\[/gm, '') // Remove leading CDATA declaration (multiline, start of string)
.replace(/<!\[CDATA\[/g, '') // Remove any CDATA declaration anywhere
.replace(/<\?xml[^>]*\?>/gi, '') // Remove XML declarations
.replace(/<\!DOCTYPE[^>]*>/gi, '') // Remove DOCTYPE declarations
.replace(/xmlns[=:][^=]*=["'][^"']*["']/gi, '') // Remove xmlns attributes
.trim()
return html
}
/**
@ -258,22 +428,52 @@ class RssFeedService { @@ -258,22 +428,52 @@ class RssFeedService {
/**
* Fetch multiple feeds and merge items
* This method gracefully handles failures - if some feeds fail, it returns items from successful feeds
*/
async fetchMultipleFeeds(feedUrls: string[]): Promise<RssFeedItem[]> {
if (feedUrls.length === 0) {
return []
}
const results = await Promise.allSettled(
feedUrls.map(url => this.fetchFeed(url))
)
const allItems: RssFeedItem[] = []
let successCount = 0
let failureCount = 0
results.forEach((result, index) => {
if (result.status === 'fulfilled') {
allItems.push(...result.value.items)
successCount++
logger.debug('[RssFeedService] Successfully fetched feed', { url: feedUrls[index], itemCount: result.value.items.length })
} else {
logger.warn('[RssFeedService] Failed to fetch feed', { url: feedUrls[index], error: result.reason })
failureCount++
// Log warning but don't throw - we want to return partial results
const errorMessage = result.reason instanceof Error ? result.reason.message : String(result.reason)
logger.warn('[RssFeedService] Failed to fetch feed after trying all strategies', {
url: feedUrls[index],
error: errorMessage
})
}
})
// Log summary
if (successCount > 0) {
logger.info('[RssFeedService] Feed fetch summary', {
total: feedUrls.length,
successful: successCount,
failed: failureCount,
itemsFound: allItems.length
})
} else if (failureCount > 0) {
logger.error('[RssFeedService] All feeds failed to fetch', {
total: feedUrls.length,
urls: feedUrls
})
}
// Sort by publication date (newest first)
allItems.sort((a, b) => {
const dateA = a.pubDate?.getTime() || 0

Loading…
Cancel
Save