Browse Source

RSS feed parser. Minor fixes and cleanup.

imwald
Nuša Pukšič 3 months ago
parent
commit
d3f2564b04
  1. 7
      assets/styles/03-components/card.css
  2. 268
      src/Command/NzineSortArticlesCommand.php
  3. 759
      src/Command/RssFetchCommand.php
  4. 3
      src/Controller/ArticleController.php
  5. 5
      src/Controller/DefaultController.php
  6. 25
      src/Service/NzineCategoryIndexService.php
  7. 16
      src/Service/RedisCacheService.php
  8. 148
      src/Service/RssFeedService.php
  9. 5
      src/Twig/Components/Organisms/FeaturedList.php
  10. 2
      templates/components/ReadingListQuickAddComponent.html.twig

7
assets/styles/03-components/card.css

@ -60,6 +60,13 @@ h2.card-title {
object-fit: cover; object-fit: cover;
} }
@media screen and (min-width: 1200px) {
.featured-list .card-header img {
max-height: initial !important;
aspect-ratio: 1.4;
}
}
.card.comment { .card.comment {
display: flex; display: flex;
flex-direction: column; flex-direction: column;

268
src/Command/NzineSortArticlesCommand.php

@ -0,0 +1,268 @@
<?php
namespace App\Command;
use App\Entity\Article;
use App\Entity\Event as DbEvent; // your Doctrine entity
use App\Entity\NzineBot;
use App\Enum\KindsEnum;
use App\Repository\ArticleRepository;
use App\Repository\NzineRepository;
use App\Service\EncryptionService;
use Doctrine\ORM\EntityManagerInterface;
use swentel\nostr\Event\Event as WireEvent;
use swentel\nostr\Key\Key;
use swentel\nostr\Sign\Sign;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'nzine:sort:articles',
description: 'Update 30040 index events with matching 30023 articles based on tags',
)]
class NzineSortArticlesCommand extends Command
{
public function __construct(
private readonly NzineRepository $nzineRepository,
private readonly ArticleRepository $articleRepository,
private readonly EntityManagerInterface $em,
private readonly EncryptionService $encryptionService,
) {
parent::__construct();
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$nzine = $this->nzineRepository->findOneBy([]);
if (!$nzine) {
$io->error('No NZine entity found.');
return Command::FAILURE;
}
/** @var NzineBot $bot */
$bot = $nzine->getNzineBot();
$bot->setEncryptionService($this->encryptionService);
$key = new Key();
$signer = new Sign();
$publicKey = strtolower($key->getPublicKey($bot->getNsec())); // hex
/** @var Article[] $articles */
$articles = $this->articleRepository->findBy(['pubkey' => $publicKey]);
$io->writeln('Articles for bot: ' . count($articles));
/** @var DbEvent[] $indexes */
$indexes = $this->em->getRepository(DbEvent::class)->findBy([
'pubkey' => $publicKey,
'kind' => KindsEnum::PUBLICATION_INDEX,
]);
$io->writeln('Found ' . count($indexes) . ' existing indexes for bot ' . $publicKey);
if (!$indexes) {
$io->warning('No existing publication indexes found; nothing to update.');
return Command::SUCCESS;
}
// newest index per d-tag (slug)
$newestIndexBySlug = [];
foreach ($indexes as $idx) {
$d = $this->firstTagValue($idx->getTags() ?? [], 'd');
if ($d === null) continue;
if (!isset($newestIndexBySlug[$d]) || $idx->getCreatedAt() > $newestIndexBySlug[$d]->getCreatedAt()) {
$newestIndexBySlug[$d] = $idx;
}
}
$mainCategories = $nzine->getMainCategories() ?? [];
$totalUpdated = 0;
foreach ($mainCategories as $category) {
$slug = (string)($category['slug'] ?? '');
if ($slug === '') continue;
$index = $newestIndexBySlug[$slug] ?? null;
if (!$index) {
$io->writeln(" - Skip category '{$slug}': no index found for this slug.");
continue;
}
$tags = $index->getTags() ?? [];
// topics tracked by this index (t-tags)
$trackedTopics = array_values(array_unique(array_filter(array_map(
fn($t) => $this->normTag($t),
$this->allTagValues($tags, 't')
))));
if (!$trackedTopics) {
$io->writeln(" - Index d='{$slug}': no tracked 't' tags, skipping.");
continue;
}
// existing a-tags for dedupe
$existingA = [];
foreach ($tags as $t) {
if (($t[0] ?? null) === 'a' && isset($t[1])) {
$existingA[strtolower($t[1])] = true;
}
}
$added = 0;
foreach ($articles as $article) {
if (strtolower($article->getPubkey()) !== $publicKey) continue;
$slugArticle = (string)$article->getSlug();
if ($slugArticle === '') continue;
$articleTopics = $article->getTopics() ?? [];
if (!$articleTopics) continue;
if (!$this->intersects($articleTopics, $trackedTopics)) continue;
$coord = sprintf('%s:%s:%s', KindsEnum::LONGFORM->value, $publicKey, $slugArticle);
$coordKey = strtolower($coord);
if (!isset($existingA[$coordKey])) {
$tags[] = ['a', $coord];
$existingA[$coordKey] = true;
$added++;
}
}
if ($added > 0) {
$tags = $this->sortedATagsLast($tags);
$index->setTags($tags);
// ---- SIGN USING SWENTEL EVENT ----
$wire = $this->toWireEvent($index, $publicKey);
$wire->setTags($tags);
$signer->signEvent($wire, $bot->getNsec());
$this->applySignedWireToEntity($wire, $index);
// -----------------------------------
$this->em->persist($index);
$io->writeln(" + Updated index d='{$slug}': added {$added} article(s).");
$totalUpdated++;
} else {
$io->writeln(" - Index d='{$slug}': no new matches.");
}
}
if ($totalUpdated > 0) {
$this->em->flush();
}
$io->success("Done. Updated {$totalUpdated} index(es).");
return Command::SUCCESS;
}
private function firstTagValue(array $tags, string $name): ?string
{
foreach ($tags as $t) {
if (($t[0] ?? null) === $name && isset($t[1])) {
return (string)$t[1];
}
}
return null;
}
private function allTagValues(array $tags, string $name): array
{
$out = [];
foreach ($tags as $t) {
if (($t[0] ?? null) === $name && isset($t[1])) {
$out[] = (string)$t[1];
}
}
return $out;
}
private function normTag(?string $t): string
{
$t = trim((string)$t);
if ($t !== '' && $t[0] === '#') $t = substr($t, 1);
return mb_strtolower($t);
}
private function intersects(array $a, array $b): bool
{
if (!$a || !$b) return false;
$set = array_fill_keys($b, true);
foreach ($a as $x) if (isset($set[$x])) return true;
return false;
}
private function sortedATagsLast(array $tags): array
{
$aTags = [];
$other = [];
foreach ($tags as $t) {
if (($t[0] ?? null) === 'a' && isset($t[1])) $aTags[] = $t;
else $other[] = $t;
}
usort($aTags, fn($x, $y) => strcmp(strtolower($x[1]), strtolower($y[1])));
return array_merge($other, $aTags);
}
/**
* Build a swentel wire event from your DB entity so we can sign it.
*/
private function toWireEvent(DbEvent $e, string $pubkey): WireEvent
{
$w = new WireEvent();
$w->setKind($e->getKind());
$createdAt = $e->getCreatedAt();
// accept int or DateTimeInterface
if ($createdAt instanceof \DateTimeInterface) {
$w->setCreatedAt($createdAt->getTimestamp());
} else {
$w->setCreatedAt((int)$createdAt ?: time());
}
$w->setContent((string)($e->getContent() ?? ''));
$w->setTags($e->getTags() ?? []);
$w->setPublicKey($pubkey); // ensure pubkey is set for id computation
return $w;
}
/**
* Copy signature/id (and any normalized fields) back to your entity.
*/
private function applySignedWireToEntity(WireEvent $w, DbEvent $e): void
{
if (method_exists($e, 'setId') && $w->getId()) {
$e->setId($w->getId());
}
if (method_exists($e, 'setSig') && $w->getSignature()) {
$e->setSig($w->getSignature());
}
if (method_exists($e, 'setPubkey') && $w->getPublicKey()) {
$e->setPubkey($w->getPublicKey());
}
// keep tags/content in sync (in case swentel normalized)
if (method_exists($e, 'setTags')) {
$e->setTags($w->getTags());
}
if (method_exists($e, 'setContent')) {
$e->setContent($w->getContent());
}
if (method_exists($e, 'setCreatedAt') && is_int($w->getCreatedAt())) {
// optional: keep your entity’s createdAt as int or DateTime, depending on your schema
try {
$e->setCreatedAt($w->getCreatedAt());
} catch (\TypeError $t) {
// if your setter expects DateTimeImmutable:
if ($w->getCreatedAt()) {
$e->setCreatedAt((new \DateTimeImmutable())->setTimestamp($w->getCreatedAt())->getTimestamp());
}
}
}
// also ensure kind stays set
if (method_exists($e, 'setKind')) {
$e->setKind($w->getKind());
}
}
}

759
src/Command/RssFetchCommand.php

@ -2,544 +2,385 @@
namespace App\Command; namespace App\Command;
use App\Entity\Article;
use App\Entity\NzineBot;
use App\Factory\ArticleFactory; use App\Factory\ArticleFactory;
use App\Repository\ArticleRepository;
use App\Repository\NzineRepository; use App\Repository\NzineRepository;
use App\Service\EncryptionService; use App\Service\EncryptionService;
use App\Service\NostrClient; use App\Service\NostrClient;
use App\Service\NzineCategoryIndexService;
use App\Service\RssFeedService; use App\Service\RssFeedService;
use App\Service\RssToNostrConverter;
use App\Service\TagMatchingService;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use Psr\Log\LoggerInterface; use League\HTMLToMarkdown\HtmlConverter;
use swentel\nostr\Event\Event;
use swentel\nostr\Key\Key;
use swentel\nostr\Sign\Sign;
use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle; use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\String\Slugger\AsciiSlugger;
#[AsCommand( #[AsCommand(
name: 'nzine:rss:fetch', name: 'nzine:rss:fetch',
description: 'Fetch RSS feeds and publish as Nostr events for configured nzines', description: 'Fetch RSS feeds and save new articles for configured nzines',
)] )]
class RssFetchCommand extends Command class RssFetchCommand extends Command
{ {
private SymfonyStyle $io;
public function __construct( public function __construct(
private readonly NzineRepository $nzineRepository, private readonly NzineRepository $nzineRepository,
private readonly ArticleRepository $articleRepository, private readonly ArticleFactory $factory,
private readonly RssFeedService $rssFeedService, private readonly RssFeedService $rssFeedService,
private readonly TagMatchingService $tagMatchingService,
private readonly RssToNostrConverter $rssToNostrConverter,
private readonly ArticleFactory $articleFactory,
private readonly NostrClient $nostrClient,
private readonly EntityManagerInterface $entityManager, private readonly EntityManagerInterface $entityManager,
private readonly EncryptionService $encryptionService, private readonly NostrClient $nostrClient,
private readonly LoggerInterface $logger, private readonly EncryptionService $encryptionService
private readonly NzineCategoryIndexService $categoryIndexService
) { ) {
parent::__construct(); parent::__construct();
} }
protected function configure(): void
{
$this
->addOption('nzine-id', null, InputOption::VALUE_OPTIONAL, 'Process only this specific nzine ID')
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Test without actually publishing events')
->addOption('limit', null, InputOption::VALUE_OPTIONAL, 'Limit number of items to process per feed', 50);
}
protected function execute(InputInterface $input, OutputInterface $output): int protected function execute(InputInterface $input, OutputInterface $output): int
{ {
$this->io = new SymfonyStyle($input, $output); $io = new SymfonyStyle($input, $output);
$slugger = new AsciiSlugger();
$nzineId = $input->getOption('nzine-id'); $nzines = $this->nzineRepository->findAll();
$isDryRun = $input->getOption('dry-run'); foreach ($nzines as $nzine) {
$limit = (int) $input->getOption('limit'); if (!$nzine->getFeedUrl()) {
continue;
}
$this->io->title('RSS Feed to Nostr Aggregator'); /** @var NzineBot $bot */
$bot = $nzine->getNzineBot();
$bot->setEncryptionService($this->encryptionService);
if ($isDryRun) { $key = new Key();
$this->io->warning('Running in DRY-RUN mode - no events will be published'); $npub = $key->getPublicKey($bot->getNsec());
} $articles = $this->entityManager->getRepository(Article::class)->findBy(['pubkey' => $npub]);
$io->writeln('Found ' . count($articles) . ' existing articles for bot ' . $npub);
// Get nzines to process $io->section('Fetching RSS for: ' . $nzine->getFeedUrl());
$nzines = $nzineId
? [$this->nzineRepository->findRssNzineById((int) $nzineId)]
: $this->nzineRepository->findActiveRssNzines();
$nzines = array_filter($nzines); // Remove nulls try {
$feed = $this->rssFeedService->fetchFeed($nzine->getFeedUrl());
} catch (\Throwable $e) {
$io->warning('Failed to fetch ' . $nzine->getFeedUrl() . ': ' . $e->getMessage());
continue;
}
if (empty($nzines)) { foreach ($feed['items'] as $item) {
$this->io->warning('No RSS-enabled nzines found'); try {
return Command::SUCCESS; $event = new Event();
} $event->setKind(30023); // NIP-23 Long-form content
// created_at — use parsed pubDate (timestamp int) or now
$createdAt = isset($item['pubDate']) && is_numeric($item['pubDate'])
? (int)$item['pubDate']
: time();
$event->setCreatedAt($createdAt);
// slug (NIP-33 'd' tag) — stable per source item
$base = trim(($nzine->getSlug() ?? 'nzine') . '-' . ($item['title'] ?? ''));
$slug = (string) $slugger->slug($base)->lower();
// HTML → Markdown
$raw = trim($item['content'] ?? '') ?: trim($item['description'] ?? '');
$rawHtml = $this->normalizeWeirdHtml($raw);
$cleanHtml = $this->sanitizeHtml($rawHtml);
$markdown = $this->htmlToMarkdown($cleanHtml);
$event->setContent($markdown);
// Tags
$tags = [
['title', $this->safeStr($item['title'] ?? '')],
['d', $slug],
['source', $this->safeStr($item['link'] ?? '')],
];
// summary (short description)
$summary = $this->ellipsis($this->plainText($item['description'] ?? ''), 280);
if ($summary !== '') {
$tags[] = ['summary', $summary];
}
$this->io->info(sprintf('Processing %d nzine(s)', count($nzines))); // image
if (!empty($item['image'])) {
$tags[] = ['image', $this->safeStr($item['image'])];
} else {
// try to sniff first <img> from content if media tag was missing
if (preg_match('~<img[^>]+src="([^"]+)"~i', $rawHtml, $m)) {
$tags[] = ['image', $m[1]];
}
}
$totalStats = [ // categories → "t" tags
'nzines_processed' => 0, if (!empty($item['categories']) && is_array($item['categories'])) {
'items_fetched' => 0, foreach ($item['categories'] as $category) {
'items_matched' => 0, $cat = trim((string)$category);
'items_skipped_duplicate' => 0, if ($cat !== '') {
'items_skipped_unmatched' => 0, $event->addTag(['t', $cat]);
'events_created' => 0, }
'events_updated' => 0, }
'errors' => 0, }
];
foreach ($nzines as $nzine) { $event->setTags($tags);
try {
$stats = $this->processNzine($nzine, $isDryRun, $limit); // Sign event
$signer = new Sign();
$signer->signEvent($event, $bot->getNsec());
// Publish (add/adjust relays as you like)
try {
$this->nostrClient->publishEvent($event, [
'wss://purplepag.es',
'wss://relay.damus.io',
'wss://nos.lol',
]);
$io->writeln('Published long-form event: ' . ($item['title'] ?? '(no title)'));
} catch (\Throwable $e) {
$io->warning('Publish failed: ' . $e->getMessage());
}
// Aggregate stats // Persist locally
foreach ($stats as $key => $value) { $article = $this->factory->createFromLongFormContentEvent((object)$event->toArray());
$totalStats[$key] = ($totalStats[$key] ?? 0) + $value; $this->entityManager->persist($article);
} catch (\Throwable $e) {
// keep going on item errors
$io->warning('Item failed: ' . ($item['title'] ?? '(no title)') . ' — ' . $e->getMessage());
} }
}
$totalStats['nzines_processed']++; $this->entityManager->flush();
} catch (\Exception $e) { $io->success('RSS fetch complete for: ' . $nzine->getFeedUrl());
$this->io->error(sprintf(
'Error processing nzine #%d: %s', // --- Update bot profile (kind 0) using feed metadata ---
$nzine->getId(), $feedMeta = $feed['feed'] ?? null;
$e->getMessage() if ($feedMeta) {
)); $profile = [
$this->logger->error('Nzine processing error', [ 'name' => $feedMeta['title'] ?? $nzine->getTitle(),
'nzine_id' => $nzine->getId(), 'about' => $feedMeta['description'] ?? '',
'error' => $e->getMessage(), 'picture' => $feedMeta['image'] ?? null,
'trace' => $e->getTraceAsString(), 'website' => $feedMeta['link'] ?? null,
]); ];
$totalStats['errors']++; $p = new Event();
$p->setKind(0);
$p->setCreatedAt(time());
$p->setContent(json_encode($profile, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE));
$signer = new Sign();
$signer->signEvent($p, $bot->getNsec());
try {
$this->nostrClient->publishEvent($p, ['wss://purplepag.es']);
$io->success('Published bot profile (kind 0) with feed metadata');
} catch (\Throwable $e) {
$io->warning('Failed to publish bot profile event: ' . $e->getMessage());
}
} }
} }
// Display final statistics return Command::SUCCESS;
$this->io->success('RSS feed processing completed');
$this->io->table(
['Metric', 'Count'],
[
['Nzines processed', $totalStats['nzines_processed']],
['Items fetched', $totalStats['items_fetched']],
['Items matched', $totalStats['items_matched']],
['Events created', $totalStats['events_created']],
['Events updated', $totalStats['events_updated']],
['Duplicates skipped', $totalStats['items_skipped_duplicate']],
['Unmatched skipped', $totalStats['items_skipped_unmatched']],
['Errors', $totalStats['errors']],
]
);
return $totalStats['errors'] > 0 ? Command::FAILURE : Command::SUCCESS;
} }
/** /** -------- Helpers: HTML prep + converter + small utils -------- */
* Process a single nzine's RSS feed
*/ private function normalizeWeirdHtml(string $html): string
private function processNzine($nzine, bool $isDryRun, int $limit): array
{ {
$stats = [ // 1) Unwrap Ghost "HTML cards": keep only the <body> content, drop <html>/<head> wrappers and scripts
'items_fetched' => 0, $html = preg_replace_callback('/<!--\s*kg-card-begin:\s*html\s*-->.*?<!--\s*kg-card-end:\s*html\s*-->/si', function ($m) {
'items_matched' => 0, $block = $m[0];
'items_skipped_duplicate' => 0, // Extract inner <body></body> if present
'items_skipped_unmatched' => 0, if (preg_match('/<body\b[^>]*>(.*?)<\/body>/si', $block, $mm)) {
'events_created' => 0, $inner = $mm[1];
'events_updated' => 0, } else {
]; // No explicit body; just strip the markers
$inner = preg_replace('/<!--\s*kg-card-(?:begin|end):\s*html\s*-->/', '', $block);
$this->io->section(sprintf('Processing Nzine #%d: %s', $nzine->getId(), $nzine->getSlug())); }
return $inner;
$feedUrl = $nzine->getFeedUrl(); }, $html);
if (empty($feedUrl)) {
$this->io->warning('No feed URL configured');
return $stats;
}
// Fetch RSS feed // 2) Nuke any remaining document wrappers that would cut DOM parsing short
try { $html = preg_replace([
$feedItems = $this->rssFeedService->fetchFeed($feedUrl); '/<\/?html[^>]*>/i',
$stats['items_fetched'] = count($feedItems); '/<\/?body[^>]*>/i',
'/<head\b[^>]*>.*?<\/head>/si',
], '', $html);
$this->io->text(sprintf('Fetched %d items from feed', count($feedItems))); dump($html);
} catch (\Exception $e) {
$this->io->error(sprintf('Failed to fetch feed: %s', $e->getMessage()));
throw $e;
}
// Limit items if specified return $html;
if ($limit > 0 && count($feedItems) > $limit) { }
$feedItems = array_slice($feedItems, 0, $limit);
$this->io->text(sprintf('Limited to %d items', $limit));
}
// Get nzine categories
$categories = $nzine->getMainCategories();
if (empty($categories)) {
$this->io->warning('No categories configured - skipping all items');
$stats['items_skipped_unmatched'] = count($feedItems);
return $stats;
}
// Ensure category index events exist in the database private function sanitizeHtml(string $html): string
$categoryIndices = []; {
if (!$isDryRun) { if ($html === '') return $html;
$this->io->text('Ensuring category index events exist...');
// 0) quick pre-clean: kill scripts/styles early to avoid DOM bloat
$html = preg_replace('~<(script|style)\b[^>]*>.*?</\1>~is', '', $html);
$html = preg_replace('~<!--.*?-->~s', '', $html); // comments
// 1) Normalize weird widgets and wrappers BEFORE DOM parse
// lightning-widget → simple text
$html = preg_replace_callback(
'~<lightning-widget[^>]*\bto="([^"]+)"[^>]*>.*?</lightning-widget>~is',
fn($m) => '<p>⚡ Tips: ' . htmlspecialchars($m[1]) . '</p>',
$html
);
// Ghost/Koenig wrappers: keep useful inner content
$html = preg_replace('~<figure[^>]*\bkg-image-card\b[^>]*>\s*(<img[^>]+>)\s*</figure>~i', '$1', $html);
$html = preg_replace('~<div[^>]*\bkg-callout-card\b[^>]*>(.*?)</div>~is', '<blockquote>$1</blockquote>', $html);
// YouTube iframes → links
$html = preg_replace_callback(
'~<iframe[^>]+src="https?://www\.youtube\.com/embed/([A-Za-z0-9_\-]+)[^"]*"[^>]*></iframe>~i',
fn($m) => '<p><a href="https://youtu.be/' . $m[1] . '">Watch on YouTube</a></p>',
$html
);
// 2) Try to pretty up malformed markup via Tidy (if available)
if (function_exists('tidy_parse_string')) {
try { try {
$categoryIndices = $this->categoryIndexService->ensureCategoryIndices($nzine); $tidy = tidy_parse_string($html, [
$this->io->text(sprintf('Category indices ready: %d', count($categoryIndices))); 'clean' => true,
} catch (\Exception $e) { 'output-xhtml' => true,
$this->io->warning(sprintf('Could not create category indices: %s', $e->getMessage())); 'show-body-only' => false,
$this->logger->warning('Category index creation failed', [ 'wrap' => 0,
'nzine_id' => $nzine->getId(), 'drop-empty-paras' => true,
'error' => $e->getMessage(), 'merge-divs' => true,
]); 'merge-spans' => true,
// Continue processing even if category indices fail 'numeric-entities' => false,
'quote-ampersand' => true,
], 'utf8');
$tidy->cleanRepair();
$html = (string)$tidy;
} catch (\Throwable $e) {
// ignore tidy failures
} }
} }
// Process each feed item // 3) DOM sanitize: remove junk, unwrap html/body/head, allowlist elements/attrs
$this->io->progressStart(count($feedItems)); $dom = new \DOMDocument('1.0', 'UTF-8');
libxml_use_internal_errors(true);
foreach ($feedItems as $item) { $loaded = $dom->loadHTML(
$this->io->progressAdvance(); // force UTF-8 meta so DOMDocument doesn't mangle
'<!DOCTYPE html><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'.$html,
try { LIBXML_NOWARNING | LIBXML_NOERROR
$result = $this->processRssItem($item, $nzine, $categories, $isDryRun, $categoryIndices); );
libxml_clear_errors();
if ($result === 'created') { if (!$loaded) {
$stats['events_created']++; // fallback: as-is minus tags we already stripped
$stats['items_matched']++; return $html;
} elseif ($result === 'updated') {
$stats['events_updated']++;
$stats['items_matched']++;
} elseif ($result === 'duplicate') {
$stats['items_skipped_duplicate']++;
} elseif ($result === 'unmatched') {
$stats['items_skipped_unmatched']++;
}
} catch (\Exception $e) {
$this->io->error(sprintf(
'Error processing RSS item "%s": %s',
$item['title'] ?? 'unknown',
$e->getMessage()
));
$this->logger->error('Error processing RSS item', [
'nzine_id' => $nzine->getId(),
'item_title' => $item['title'] ?? 'unknown',
'error' => $e->getMessage(),
'trace' => $e->getTraceAsString(),
]);
}
} }
$this->io->progressFinish(); $xpath = new \DOMXPath($dom);
// Re-sign all category indices after articles have been added // Remove <head>, <script>, <style>, <link>, <meta>, <noscript>, <object>, <embed>
if (!$isDryRun && !empty($categoryIndices)) { foreach (['//head','//script','//style','//link','//meta','//noscript','//object','//embed'] as $q) {
$this->io->text('Re-signing category indices...'); foreach ($xpath->query($q) as $n) {
try { $n->parentNode?->removeChild($n);
$this->categoryIndexService->resignCategoryIndices($categoryIndices, $nzine);
$this->io->text(sprintf('✓ Re-signed %d category indices', count($categoryIndices)));
} catch (\Exception $e) {
$this->io->warning(sprintf('Failed to re-sign category indices: %s', $e->getMessage()));
$this->logger->error('Category index re-signing failed', [
'nzine_id' => $nzine->getId(),
'error' => $e->getMessage(),
]);
} }
} }
// Update last fetched timestamp // Remove iframes that survived (non-YouTube or any at this point)
if (!$isDryRun) { foreach ($xpath->query('//iframe') as $n) {
$nzine->setLastFetchedAt(new \DateTimeImmutable()); $n->parentNode?->removeChild($n);
$this->entityManager->flush();
} }
$this->io->table( // Remove any custom elements we don’t want (e.g., <lightning-widget>, <amp-*>)
['Metric', 'Count'], foreach ($xpath->query('//*[starts-with(name(), "amp-") or local-name()="lightning-widget"]') as $n) {
[ $n->parentNode?->removeChild($n);
['Items fetched', $stats['items_fetched']], }
['Items matched', $stats['items_matched']],
['Events created', $stats['events_created']],
['Events updated', $stats['events_updated']],
['Duplicates skipped', $stats['items_skipped_duplicate']],
['Unmatched skipped', $stats['items_skipped_unmatched']],
]
);
return $stats;
}
/**
* Process a single RSS item
*
* @return string Result: 'created', 'duplicate', or 'unmatched'
*/
private function processRssItem(array $item, $nzine, array $categories, bool $isDryRun, array $categoryIndices): string
{
// Generate slug for duplicate detection
$slug = $this->rssToNostrConverter->generateSlugForItem($item);
// Check if already exists
$existing = $this->articleRepository->findOneBy(['slug' => $slug]);
if ($existing) {
if ($isDryRun) {
$this->io->text(sprintf(
' 🔄 Would update: "%s"',
$item['title'] ?? 'unknown'
));
return 'updated';
}
$this->io->text(sprintf(
' 🔄 Updating existing article: "%s"',
$item['title'] ?? 'unknown'
));
$this->logger->debug('Found existing article - updating', [
'slug' => $slug,
'title' => $item['title'],
]);
// Match to category for fresh data
$matchedCategory = $this->tagMatchingService->findMatchingCategory(
$item['categories'] ?? [],
$categories
);
// Convert to Nostr event to get fresh data with all processing applied
$nostrEvent = $this->rssToNostrConverter->convertToNostrEvent(
$item,
$matchedCategory,
$nzine
);
// Add original RSS categories as additional tags
if (!empty($item['categories'])) {
foreach ($item['categories'] as $rssCategory) {
$categorySlug = strtolower(trim($rssCategory));
$tagExists = false;
foreach ($nostrEvent->getTags() as $existingTag) {
if (is_array($existingTag) && $existingTag[0] === 't' && isset($existingTag[1]) && $existingTag[1] === $categorySlug) {
$tagExists = true;
break;
}
}
if (!$tagExists) {
$nostrEvent->addTag(['t', $categorySlug]);
}
}
}
// Convert to stdClass for processing
$eventObject = json_decode($nostrEvent->toJson());
// Update all fields from the fresh event data
$existing->setContent($eventObject->content);
$existing->setTitle($item['title'] ?? '');
// Set createdAt and publishedAt from RSS pubDate if available // Allowlist basic attributes; drop event handlers/javascript: urls
if (isset($item['pubDate']) && $item['pubDate'] instanceof \DateTimeImmutable) { $allowedAttrs = ['href','src','alt','title','width','height','class'];
$existing->setCreatedAt($item['pubDate']); foreach ($xpath->query('//@*') as $attr) {
$existing->setPublishedAt($item['pubDate']); $name = $attr->nodeName;
$val = $attr->nodeValue ?? '';
if (!in_array($name, $allowedAttrs, true)) {
$attr->ownerElement?->removeAttributeNode($attr);
continue;
} }
// kill javascript: and data: except images
// Extract and set image from tags if ($name === 'href' || $name === 'src') {
foreach ($eventObject->tags as $tag) { $valTrim = trim($val);
if ($tag[0] === 'image' && isset($tag[1])) { $lower = strtolower($valTrim);
$existing->setImage($tag[1]); $isDataImg = str_starts_with($lower, 'data:image/');
break; if (str_starts_with($lower, 'javascript:') || (str_starts_with($lower, 'data:') && !$isDataImg)) {
$attr->ownerElement?->removeAttribute($name);
} else {
$attr->nodeValue = $valTrim;
} }
} }
}
// Extract and set summary from tags (now with HTML stripped) // Unwrap <html> and <body> → gather innerHTML
foreach ($eventObject->tags as $tag) { $body = $dom->getElementsByTagName('body')->item(0);
if ($tag[0] === 'summary' && isset($tag[1])) { $container = $body ?: $dom; // fallback
$existing->setSummary($tag[1]);
break;
}
}
// Clear existing topics and re-add from fresh data // Drop empty spans/divs that are just whitespace
$existing->clearTopics(); foreach ($xpath->query('.//span|.//div', $container) as $n) {
foreach ($eventObject->tags as $tag) { if (!trim($n->textContent ?? '') && !$n->getElementsByTagName('*')->length) {
if ($tag[0] === 't' && isset($tag[1])) { $n->parentNode?->removeChild($n);
$existing->addTopic($tag[1]);
}
} }
$this->entityManager->persist($existing);
$this->entityManager->flush();
$this->logger->info('Article updated with fresh RSS data', [
'slug' => $slug,
'title' => $item['title'],
]);
return 'updated';
} }
// Match to category // Serialize inner HTML of container
$matchedCategory = $this->tagMatchingService->findMatchingCategory( $cleanHtml = '';
$item['categories'] ?? [], foreach ($container->childNodes as $child) {
$categories $cleanHtml .= $dom->saveHTML($child);
);
if (!$matchedCategory) {
$this->io->text(sprintf(
' ℹ No category match: "%s" [categories: %s] - importing as standalone',
$item['title'] ?? 'unknown',
implode(', ', $item['categories'] ?? ['none'])
));
$this->logger->debug('No category match for item - importing as standalone', [
'title' => $item['title'],
'categories' => $item['categories'] ?? [],
]);
// Don't return - continue processing without a category
} }
// Ensure matched category has a slug field // Final tiny cleanups
if ($matchedCategory && empty($matchedCategory['slug'])) { $cleanHtml = preg_replace('~\s+</p>~', '</p>', $cleanHtml);
// Generate slug from title if not present $cleanHtml = preg_replace('~<p>\s+</p>~', '', $cleanHtml);
$slugger = new \Symfony\Component\String\Slugger\AsciiSlugger();
$matchedCategory['slug'] = $slugger->slug($matchedCategory['title'] ?? $matchedCategory['name'] ?? '')->lower()->toString();
$this->logger->debug('Generated slug for matched category', [ return trim($cleanHtml);
'category_title' => $matchedCategory['title'] ?? $matchedCategory['name'] ?? 'unknown', }
'generated_slug' => $matchedCategory['slug'],
]);
}
if ($isDryRun) { private function htmlToMarkdown(string $html): string
$categoryLabel = $matchedCategory {
? ($matchedCategory['name'] ?? $matchedCategory['title'] ?? $matchedCategory['slug'] ?? 'unknown') $converter = $this->makeConverter();
: 'standalone'; $md = trim($converter->convert($html));
$this->io->text(sprintf(
' ✓ Would create: "%s" → %s',
$item['title'] ?? 'unknown',
$categoryLabel
));
$this->logger->info('[DRY RUN] Would create event', [
'title' => $item['title'],
'category' => $categoryLabel,
'slug' => $slug,
]);
return 'created';
}
// Convert to Nostr event (with or without category) // ensure there's a blank line after images
$nostrEvent = $this->rssToNostrConverter->convertToNostrEvent( // 1) images that already sit alone on a line
$item, $md = preg_replace('/^(>?\s*)!\[[^\]]*]\([^)]*\)\s*$/m', "$0\n", $md);
$matchedCategory, // 2) inline images: add a newline after the token (optional — comment out if you only want #1)
$nzine $md = preg_replace('/!\[[^\]]*]\([^)]*\)/', "$0\n", $md);
);
// Add original RSS categories as additional tags (topics) // collapse any excessive blank lines to max two
// This ensures RSS feed categories are preserved even if they don't match nzine categories $md = preg_replace("/\n{3,}/", "\n\n", $md);
if (!empty($item['categories'])) {
foreach ($item['categories'] as $rssCategory) {
// Add as 't' tag if not already present
$categorySlug = strtolower(trim($rssCategory));
$tagExists = false;
foreach ($nostrEvent->getTags() as $existingTag) {
if (is_array($existingTag) && $existingTag[0] === 't' && isset($existingTag[1]) && $existingTag[1] === $categorySlug) {
$tagExists = true;
break;
}
}
if (!$tagExists) { // Optional: coalesce too many blank lines caused by sanitization/conversion
$nostrEvent->addTag(['t', $categorySlug]); $md = preg_replace("~\n{3,}~", "\n\n", $md);
}
}
}
// Convert Nostr Event to stdClass object for ArticleFactory return $md;
$eventObject = json_decode($nostrEvent->toJson()); }
// Create Article entity from the event object
$article = $this->articleFactory->createFromLongFormContentEvent($eventObject);
$this->entityManager->persist($article);
$this->entityManager->flush();
// Add article to category index if category matched
if ($matchedCategory && isset($matchedCategory['slug']) && !empty($categoryIndices)) {
$categorySlug = $matchedCategory['slug'];
if (isset($categoryIndices[$categorySlug])) {
$articleCoordinate = sprintf(
'%d:%s:%s',
$article->getKind()->value,
$article->getPubkey(),
$article->getSlug()
);
try { private function makeConverter(): HtmlConverter
// addArticleToCategoryIndex now returns a NEW event entity {
$updatedCategoryIndex = $this->categoryIndexService->addArticleToCategoryIndex( return new HtmlConverter([
$categoryIndices[$categorySlug], 'header_style' => 'atx',
$articleCoordinate, 'bold_style' => '**',
$nzine 'italic_style' => '*',
); 'hard_break' => true,
'strip_tags' => true,
// Update the reference in the array to point to the new event 'remove_nodes' => 'script style',
$categoryIndices[$categorySlug] = $updatedCategoryIndex; ]);
}
// Flush to ensure the category index is saved to the database
$this->entityManager->flush();
$this->logger->debug('Added article to category index', [
'article_slug' => $article->getSlug(),
'category_slug' => $categorySlug,
'coordinate' => $articleCoordinate,
]);
} catch (\Exception $e) {
$this->logger->warning('Failed to add article to category index', [
'article_slug' => $article->getSlug(),
'category_slug' => $categorySlug,
'error' => $e->getMessage(),
]);
}
} else {
$this->logger->warning('Category index not found for matched category', [
'category_slug' => $categorySlug,
'available_indices' => array_keys($categoryIndices),
]);
}
}
$categoryLabel = $matchedCategory private function plainText(string $html): string
? ($matchedCategory['name'] ?? $matchedCategory['title'] ?? $matchedCategory['slug'] ?? 'unknown') {
: 'standalone'; return trim(html_entity_decode(strip_tags($html)));
}
$this->io->text(sprintf(
' ✓ Created: "%s" → %s',
$item['title'] ?? 'unknown',
$categoryLabel
));
// Publish to relays (async/background in production)
try {
// TODO: Get configured relays from nzine or use default
// $this->nostrClient->publishEvent($nostrEvent, $relays);
$this->logger->info('Event created and saved', [
'event_id' => $nostrEvent->getId() ?? 'unknown',
'title' => $item['title'],
'category' => $categoryLabel,
]);
} catch (\Exception $e) {
$this->logger->warning('Failed to publish to relays', [
'event_id' => $nostrEvent->getId() ?? 'unknown',
'error' => $e->getMessage(),
]);
// Continue even if relay publishing fails
}
return 'created'; private function ellipsis(string $text, int $max): string
{
$text = trim($text);
if ($text === '' || mb_strlen($text) <= $max) return $text;
return rtrim(mb_substr($text, 0, $max - 1)) . '…';
} }
}
private function safeStr(?string $s): string
{
return $s === null ? '' : trim($s);
}
}

3
src/Controller/ArticleController.php

@ -10,7 +10,6 @@ use App\Service\RedisCacheService;
use App\Util\CommonMark\Converter; use App\Util\CommonMark\Converter;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use League\CommonMark\Exception\CommonMarkException; use League\CommonMark\Exception\CommonMarkException;
use Mdanter\Ecc\Crypto\Signature\SchnorrSignature;
use nostriphant\NIP19\Bech32; use nostriphant\NIP19\Bech32;
use nostriphant\NIP19\Data\NAddr; use nostriphant\NIP19\Data\NAddr;
use Psr\Cache\CacheItemPoolInterface; use Psr\Cache\CacheItemPoolInterface;
@ -23,9 +22,7 @@ use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response; use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Attribute\Route; use Symfony\Component\Routing\Attribute\Route;
use Symfony\Component\Security\Csrf\CsrfToken; use Symfony\Component\Security\Csrf\CsrfToken;
use Symfony\Component\String\Slugger\AsciiSlugger;
use Symfony\Component\Security\Csrf\CsrfTokenManagerInterface; use Symfony\Component\Security\Csrf\CsrfTokenManagerInterface;
use Symfony\Component\Workflow\WorkflowInterface;
class ArticleController extends AbstractController class ArticleController extends AbstractController
{ {

5
src/Controller/DefaultController.php

@ -19,10 +19,10 @@ use Psr\Cache\CacheItemPoolInterface;
use Psr\Cache\InvalidArgumentException; use Psr\Cache\InvalidArgumentException;
use swentel\nostr\Key\Key; use swentel\nostr\Key\Key;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController; use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\ParameterBag;
use Symfony\Component\HttpFoundation\RequestStack; use Symfony\Component\HttpFoundation\RequestStack;
use Symfony\Component\HttpFoundation\Response; use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Attribute\Route; use Symfony\Component\Routing\Attribute\Route;
use Symfony\Contracts\Cache\CacheInterface;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
class DefaultController extends AbstractController class DefaultController extends AbstractController
@ -52,7 +52,8 @@ class DefaultController extends AbstractController
#[Route('/latest-articles', name: 'latest_articles')] #[Route('/latest-articles', name: 'latest_articles')]
public function latestArticles(FinderInterface $finder, CacheItemPoolInterface $articlesCache): Response public function latestArticles(FinderInterface $finder, CacheItemPoolInterface $articlesCache): Response
{ {
$cacheKey = 'latest_articles_list'; $env = $this->getParameter('kernel.environment');
$cacheKey = 'latest_articles_list_' . $env ; // Use env to differentiate cache between environments
$cacheItem = $articlesCache->getItem($cacheKey); $cacheItem = $articlesCache->getItem($cacheKey);
if (!$cacheItem->isHit()) { if (!$cacheItem->isHit()) {

25
src/Service/NzineCategoryIndexService.php

@ -12,7 +12,6 @@ use swentel\nostr\Sign\Sign;
use Symfony\Component\Serializer\Encoder\JsonEncoder; use Symfony\Component\Serializer\Encoder\JsonEncoder;
use Symfony\Component\Serializer\Normalizer\ObjectNormalizer; use Symfony\Component\Serializer\Normalizer\ObjectNormalizer;
use Symfony\Component\Serializer\Serializer; use Symfony\Component\Serializer\Serializer;
use Symfony\Component\String\Slugger\AsciiSlugger;
/** /**
* Service for managing category index events for nzines * Service for managing category index events for nzines
@ -59,7 +58,6 @@ class NzineCategoryIndexService
return []; return [];
} }
$slugger = new AsciiSlugger();
$categoryIndices = []; $categoryIndices = [];
// Load all existing category indices for this nzine at once // Load all existing category indices for this nzine at once
@ -72,28 +70,22 @@ class NzineCategoryIndexService
// Index existing events by their d-tag (slug) // Index existing events by their d-tag (slug)
$existingBySlug = []; $existingBySlug = [];
foreach ($existingIndices as $existingIndex) { foreach ($existingIndices as $existingIndex) {
$slug = $this->extractSlugFromTags($existingIndex->getTags()); $slug = $existingIndex->getSlug();
if ($slug) { if ($slug) {
$existingBySlug[$slug] = $existingIndex; $existingBySlug[$slug] = $existingIndex;
} }
} }
foreach ($categories as $category) { foreach ($categories as $category) {
if (empty($category['title'])) {
continue;
}
$title = $category['title'];
$slug = $category['slug']; $slug = $category['slug'];
// Check if category index already exists // Check if category index already exists
if (isset($existingBySlug[$slug])) { if (isset($existingBySlug[$slug])) {
// FIX: Add existing index to return array
$categoryIndices[$slug] = $existingBySlug[$slug]; $categoryIndices[$slug] = $existingBySlug[$slug];
$this->logger->debug('Using existing category index', [ $this->logger->debug('Using existing category index', [
'category_slug' => $slug, 'category_slug' => $slug,
'title' => $title, 'title' => $category['title'],
]); ]);
continue; continue;
} }
@ -102,7 +94,7 @@ class NzineCategoryIndexService
$event = new Event(); $event = new Event();
$event->setKind(KindsEnum::PUBLICATION_INDEX->value); $event->setKind(KindsEnum::PUBLICATION_INDEX->value);
$event->addTag(['d', $slug]); $event->addTag(['d', $slug]);
$event->addTag(['title', $title]); $event->addTag(['title', $category['title']]);
$event->addTag(['auto-update', 'yes']); $event->addTag(['auto-update', 'yes']);
$event->addTag(['type', 'magazine']); $event->addTag(['type', 'magazine']);
@ -121,14 +113,16 @@ class NzineCategoryIndexService
// Convert to EventEntity and save // Convert to EventEntity and save
$serializer = new Serializer([new ObjectNormalizer()], [new JsonEncoder()]); $serializer = new Serializer([new ObjectNormalizer()], [new JsonEncoder()]);
// Move id to eventId before persisting
$eventId = $event->getId();
$event->setId(null);
$eventEntity = $serializer->deserialize($event->toJson(), EventEntity::class, 'json'); $eventEntity = $serializer->deserialize($event->toJson(), EventEntity::class, 'json');
$eventEntity->setEventId($eventId);
$this->entityManager->persist($eventEntity); $this->entityManager->persist($eventEntity);
$categoryIndices[$slug] = $eventEntity; $categoryIndices[$slug] = $eventEntity;
$this->logger->info('Created category index event', [ $this->logger->info('Created category index event', [
'nzine_id' => $nzine->getId(), 'nzine_id' => $nzine->getId(),
'category_title' => $title,
'category_slug' => $slug, 'category_slug' => $slug,
]); ]);
} }
@ -215,10 +209,13 @@ class NzineCategoryIndexService
// Convert to JSON and deserialize to NEW EventEntity // Convert to JSON and deserialize to NEW EventEntity
$serializer = new Serializer([new ObjectNormalizer()], [new JsonEncoder()]); $serializer = new Serializer([new ObjectNormalizer()], [new JsonEncoder()]);
// Move id to eventId before persisting
$eventId = $event->getId();
$newEventEntity = $serializer->deserialize($event->toJson(), EventEntity::class, 'json'); $newEventEntity = $serializer->deserialize($event->toJson(), EventEntity::class, 'json');
$newEventEntity->setEventId($eventId);
// Persist the NEW event entity // Persist the NEW event entity
$this->entityManager->persist($newEventEntity); $this->entityManager->persist($newEventEntity);
$this->entityManager->flush();
$articleCount = count(array_filter($newEventEntity->getTags(), fn($tag) => $tag[0] === 'a')); $articleCount = count(array_filter($newEventEntity->getTags(), fn($tag) => $tag[0] === 'a'));

16
src/Service/RedisCacheService.php

@ -7,6 +7,7 @@ use App\Enum\KindsEnum;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use Psr\Cache\InvalidArgumentException; use Psr\Cache\InvalidArgumentException;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
use swentel\nostr\Key\Key;
use Symfony\Component\HttpFoundation\Response; use Symfony\Component\HttpFoundation\Response;
use Symfony\Contracts\Cache\CacheInterface; use Symfony\Contracts\Cache\CacheInterface;
use Symfony\Contracts\Cache\ItemInterface; use Symfony\Contracts\Cache\ItemInterface;
@ -455,4 +456,19 @@ readonly class RedisCacheService
return null; return null;
} }
} }
public function setMetadata(\swentel\nostr\Event\Event $event)
{
$key = new Key();
$npub = $key->convertPublicKeyToBech32($event->getPublicKey());
$cacheKey = '0_' . $npub;
try {
$item = $this->redisCache->getItem($cacheKey);
$item->set(json_decode($event->getContent()));
$item->expiresAfter(3600); // 1 hour
$this->redisCache->save($item);
} catch (\Exception $e) {
$this->logger->error('Error setting user metadata.', ['exception' => $e]);
}
}
} }

148
src/Service/RssFeedService.php

@ -5,24 +5,13 @@ namespace App\Service;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
/**
* Service for fetching and parsing RSS feeds
*/
class RssFeedService class RssFeedService
{ {
public function __construct( public function __construct(
private readonly HttpClientInterface $httpClient, private readonly HttpClientInterface $httpClient,
private readonly LoggerInterface $logger private readonly LoggerInterface $logger
) { ) {}
}
/**
* Fetch and parse an RSS feed from a URL
*
* @param string $feedUrl The URL of the RSS feed
* @return array Array of feed items, each containing: title, link, pubDate, description, content, categories
* @throws \Exception if feed cannot be fetched or parsed
*/
public function fetchFeed(string $feedUrl): array public function fetchFeed(string $feedUrl): array
{ {
try { try {
@ -30,9 +19,7 @@ class RssFeedService
$response = $this->httpClient->request('GET', $feedUrl, [ $response = $this->httpClient->request('GET', $feedUrl, [
'timeout' => 30, 'timeout' => 30,
'headers' => [ 'headers' => ['User-Agent' => 'Newsroom RSS Aggregator/1.0'],
'User-Agent' => 'Newsroom RSS Aggregator/1.0',
],
]); ]);
if ($response->getStatusCode() !== 200) { if ($response->getStatusCode() !== 200) {
@ -40,14 +27,14 @@ class RssFeedService
} }
$xmlContent = $response->getContent(); $xmlContent = $response->getContent();
$items = $this->parseRssFeed($xmlContent); $parsed = $this->parseRssFeed($xmlContent);
$this->logger->info('RSS feed fetched successfully', [ $this->logger->info('RSS feed fetched successfully', [
'url' => $feedUrl, 'url' => $feedUrl,
'items' => count($items), 'items' => count($parsed['items']),
]); ]);
return $items; return $parsed;
} catch (\Exception $e) { } catch (\Exception $e) {
$this->logger->error('Failed to fetch RSS feed', [ $this->logger->error('Failed to fetch RSS feed', [
'url' => $feedUrl, 'url' => $feedUrl,
@ -57,13 +44,6 @@ class RssFeedService
} }
} }
/**
* Parse RSS XML content into structured array
*
* @param string $xmlContent Raw XML content
* @return array Array of parsed feed items
* @throws \Exception if XML parsing fails
*/
private function parseRssFeed(string $xmlContent): array private function parseRssFeed(string $xmlContent): array
{ {
libxml_use_internal_errors(true); libxml_use_internal_errors(true);
@ -76,44 +56,56 @@ class RssFeedService
} }
$items = []; $items = [];
$feedMeta = [
'title' => null,
'description' => null,
'link' => null,
'image' => null,
];
// Handle both RSS 2.0 and Atom feeds // RSS 2.0
if (isset($xml->channel->item)) { if (isset($xml->channel->item)) {
// RSS 2.0
foreach ($xml->channel->item as $item) { foreach ($xml->channel->item as $item) {
$items[] = $this->parseRssItem($item); $items[] = $this->parseRssItem($item);
} }
} elseif (isset($xml->entry)) { $feedMeta['title'] = (string)($xml->channel->title ?? '');
// Atom feed $feedMeta['description'] = (string)($xml->channel->description ?? '');
$feedMeta['link'] = (string)($xml->channel->link ?? '');
if (isset($xml->channel->image->url)) {
$feedMeta['image'] = (string)$xml->channel->image->url;
}
}
// Atom
elseif (isset($xml->entry)) {
foreach ($xml->entry as $entry) { foreach ($xml->entry as $entry) {
$items[] = $this->parseAtomEntry($entry); $items[] = $this->parseAtomEntry($entry);
} }
$feedMeta['title'] = (string)($xml->title ?? '');
$feedMeta['description'] = (string)($xml->subtitle ?? '');
$feedMeta['link'] = (string)($xml->link['href'] ?? '');
$feedMeta['image'] = (string)($xml->logo ?? '');
} }
return $items; return ['feed' => $feedMeta, 'items' => $items];
} }
/**
* Parse a single RSS 2.0 item
*/
private function parseRssItem(\SimpleXMLElement $item): array private function parseRssItem(\SimpleXMLElement $item): array
{ {
$namespaces = $item->getNamespaces(true); $namespaces = $item->getNamespaces(true);
$content = ''; $content = '';
// Try to get full content from content:encoded or description // content:encoded
if (isset($namespaces['content'])) { if (isset($namespaces['content'])) {
$contentChildren = $item->children($namespaces['content']); $contentChildren = $item->children($namespaces['content']);
if (isset($contentChildren->encoded)) { if (isset($contentChildren->encoded)) {
$content = (string) $contentChildren->encoded; $content = (string) $contentChildren->encoded;
} }
} }
if ($content === '') {
if (empty($content)) {
$content = (string) ($item->description ?? ''); $content = (string) ($item->description ?? '');
} }
// Extract categories // categories
$categories = []; $categories = [];
if (isset($item->category)) { if (isset($item->category)) {
foreach ($item->category as $category) { foreach ($item->category as $category) {
@ -121,61 +113,67 @@ class RssFeedService
} }
} }
// Extract image from media:content // media:content image
$imageUrl = null; $imageUrl = null;
if (isset($namespaces['media'])) { if (isset($namespaces['media'])) {
$mediaChildren = $item->children($namespaces['media']); $mediaChildren = $item->children($namespaces['media']);
if (isset($mediaChildren->content)) { if (isset($mediaChildren->content)) {
foreach ($mediaChildren->content as $mediaContent) { foreach ($mediaChildren->content as $mediaContent) {
$medium = (string) $mediaContent['medium']; $medium = (string) $mediaContent['medium'];
if ($medium === 'image' || empty($medium)) { if ($medium === 'image' || $medium === '') {
$imageUrl = (string) $mediaContent['url']; $imageUrl = (string) $mediaContent['url'];
break; break;
} }
} }
} }
} }
// ghost/bitnami quirk
if (!$imageUrl && isset($item->content) && isset($item->content->_url)) {
$medium = isset($item->content->_medium) ? (string)$item->content->_medium : '';
if ($medium === 'image' || $medium === '') {
$imageUrl = (string)$item->content->_url;
}
}
// Parse publication date // pubDate → timestamp int
$pubDate = null; $pubTs = null;
if (isset($item->pubDate)) { if (isset($item->pubDate)) {
$pubDate = new \DateTimeImmutable((string) $item->pubDate); try {
$pubTs = (new \DateTimeImmutable((string)$item->pubDate))->getTimestamp();
} catch (\Throwable $e) {
$pubTs = null;
}
} }
return [ return [
'title' => (string) ($item->title ?? ''), 'title' => (string) ($item->title ?? ''),
'link' => (string) ($item->link ?? ''), 'link' => (string) ($item->link ?? ''),
'pubDate' => $pubDate, 'pubDate' => $pubTs,
'description' => (string) ($item->description ?? ''), 'description' => html_entity_decode(strip_tags((string)($item->description ?? ''))),
'content' => $content, 'content' => (string)$content,
'categories' => $categories, 'categories' => $categories,
'guid' => (string) ($item->guid ?? ''), 'guid' => (string) ($item->guid ?? ''),
'image' => $imageUrl, 'image' => $imageUrl,
]; ];
} }
/**
* Parse a single Atom entry
*/
private function parseAtomEntry(\SimpleXMLElement $entry): array private function parseAtomEntry(\SimpleXMLElement $entry): array
{ {
$namespaces = $entry->getNamespaces(true); // link
// Get link
$link = ''; $link = '';
if (isset($entry->link)) { if (isset($entry->link)) {
foreach ($entry->link as $l) { foreach ($entry->link as $l) {
if ((string) $l['rel'] === 'alternate' || !isset($l['rel'])) { if ((string)$l['rel'] === 'alternate' || !isset($l['rel'])) {
$link = (string) $l['href']; $link = (string)$l['href'];
break; break;
} }
} }
} }
// Get content // content
$content = (string) ($entry->content ?? $entry->summary ?? ''); $content = (string) ($entry->content ?? $entry->summary ?? '');
// Get categories/tags // categories
$categories = []; $categories = [];
if (isset($entry->category)) { if (isset($entry->category)) {
foreach ($entry->category as $category) { foreach ($entry->category as $category) {
@ -183,22 +181,26 @@ class RssFeedService
} }
} }
// Parse publication date // pubDate → timestamp int
$pubDate = null; $pubTs = null;
if (isset($entry->published)) { try {
$pubDate = new \DateTimeImmutable((string) $entry->published); if (isset($entry->published)) {
} elseif (isset($entry->updated)) { $pubTs = (new \DateTimeImmutable((string)$entry->published))->getTimestamp();
$pubDate = new \DateTimeImmutable((string) $entry->updated); } elseif (isset($entry->updated)) {
$pubTs = (new \DateTimeImmutable((string)$entry->updated))->getTimestamp();
}
} catch (\Throwable $e) {
$pubTs = null;
} }
return [ return [
'title' => (string) ($entry->title ?? ''), 'title' => (string) ($entry->title ?? ''),
'link' => $link, 'link' => $link,
'pubDate' => $pubDate, 'pubDate' => $pubTs,
'description' => (string) ($entry->summary ?? ''), 'description' => html_entity_decode(strip_tags((string)($entry->summary ?? ''))),
'content' => $content, 'content' => $content,
'categories' => $categories, 'categories' => $categories,
'guid' => (string) ($entry->id ?? ''), 'guid' => (string) ($entry->id ?? ''),
]; ];
} }
} }

5
src/Twig/Components/Organisms/FeaturedList.php

@ -28,12 +28,13 @@ final class FeaturedList
*/ */
public function mount($category): void public function mount($category): void
{ {
$parts = explode(':', $category[1]); $parts = explode(':', $category[1], 3);
$categorySlug = $parts[2] ?? ''; $categorySlug = $parts[2] ?? '';
// Query the database for the category event by slug using native SQL // Query the database for the latest category event by slug using native SQL
$sql = "SELECT e.* FROM event e $sql = "SELECT e.* FROM event e
WHERE e.tags::jsonb @> ?::jsonb WHERE e.tags::jsonb @> ?::jsonb
ORDER BY e.created_at DESC
LIMIT 1"; LIMIT 1";
$conn = $this->entityManager->getConnection(); $conn = $this->entityManager->getConnection();

2
templates/components/ReadingListQuickAddComponent.html.twig

@ -1,7 +1,7 @@
<div {{ attributes.defaults({class: 'reading-list-quick-add'}) }}> <div {{ attributes.defaults({class: 'reading-list-quick-add'}) }}>
<div class="quick-add-toggle" data-action="live#action" data-live-action-param="toggleExpanded"> <div class="quick-add-toggle" data-action="live#action" data-live-action-param="toggleExpanded">
<span class="badge bg-primary"> <span class="badge bg-primary">
📚 Reading List Reading List
{% if itemCount > 0 %} {% if itemCount > 0 %}
<span class="badge bg-secondary ms-1">{{ itemCount }}</span> <span class="badge bg-secondary ms-1">{{ itemCount }}</span>
{% endif %} {% endif %}

Loading…
Cancel
Save