diff --git a/docker/cron/crontab b/docker/cron/crontab index aadf467..6b853d8 100644 --- a/docker/cron/crontab +++ b/docker/cron/crontab @@ -1,3 +1,19 @@ -0 */6 * * * /index_articles.sh >> /var/log/cron.log 2>&1 -2 */2 * * * /media_discovery.sh >> /var/log/cron.log 2>&1 -0 */2 * * * /article_discovery.sh >> /var/log/cron.log 2>&1 +# Article Processing Cron Jobs +# ======================================== + +# Post-process articles (QA, indexing, mark as indexed) +# Runs every 5 minutes to process articles ingested by hydration worker +*/5 * * * * /var/www/html/docker/cron/post_process_articles.sh >> /var/log/cron-post-process.log 2>&1 + +# Backfill historical articles +# Runs once daily at 2 AM for historical data +0 2 * * * /var/www/html/docker/cron/index_articles.sh >> /var/log/cron-backfill.log 2>&1 + +# Cache latest articles for Redis views +# Runs every 15 minutes to keep cache fresh +*/15 * * * * php /var/www/html/bin/console app:cache_latest_articles >> /var/log/cron-cache-articles.log 2>&1 + +# Cache latest highlights for Redis views +# Runs every 30 minutes +*/30 * * * * php /var/www/html/bin/console app:cache-latest-highlights >> /var/log/cron-cache-highlights.log 2>&1 + diff --git a/docker/cron/index_articles.sh b/docker/cron/index_articles.sh index d161442..8228300 100644 --- a/docker/cron/index_articles.sh +++ b/docker/cron/index_articles.sh @@ -2,8 +2,21 @@ set -e export PATH="/usr/local/bin:/usr/bin:/bin" -# Run Symfony commands sequentially +# ======================================== +# BACKFILL ONLY - FOR HISTORICAL ARTICLES +# ======================================== +# This cron ONLY fetches historical articles for backfill purposes. +# Run once daily or as needed for historical data. +# +# NEW: Post-processing (QA, indexing) now runs via separate cron +# See: docker/cron/post_process_articles.sh (runs every few minutes) +# ======================================== + +echo "$(date '+%Y-%m-%d %H:%M:%S') - Starting article backfill..." + +# Backfill: Fetch articles from last week (only needed for historical data) php /var/www/html/bin/console articles:get -- '-1 week' 'now' -php /var/www/html/bin/console articles:qa -php /var/www/html/bin/console articles:index -php /var/www/html/bin/console articles:indexed + +echo "$(date '+%Y-%m-%d %H:%M:%S') - Article backfill completed" + +# Note: QA and indexing are handled by post_process_articles.sh (separate cron) diff --git a/docker/cron/post_process_articles.sh b/docker/cron/post_process_articles.sh new file mode 100644 index 0000000..400ba8b --- /dev/null +++ b/docker/cron/post_process_articles.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e +export PATH="/usr/local/bin:/usr/bin:/bin" + +# ======================================== +# ARTICLE POST-PROCESSING CRON +# ======================================== +# Runs every few minutes to process articles that need QA and indexing +# Replaces the old articles:get + qa + index + indexed sequence +# Now only runs post-processing since hydration worker handles ingestion +# ======================================== + +echo "$(date '+%Y-%m-%d %H:%M:%S') - Starting article post-processing..." + +# Run post-processing commands (QA, index, mark as indexed) +php /var/www/html/bin/console articles:post-process + +echo "$(date '+%Y-%m-%d %H:%M:%S') - Article post-processing completed" + diff --git a/src/Command/ArticlePostProcessCommand.php b/src/Command/ArticlePostProcessCommand.php new file mode 100644 index 0000000..9e4029e --- /dev/null +++ b/src/Command/ArticlePostProcessCommand.php @@ -0,0 +1,135 @@ +addOption( + 'skip-qa', + null, + InputOption::VALUE_NONE, + 'Skip the QA step' + ) + ->addOption( + 'skip-index', + null, + InputOption::VALUE_NONE, + 'Skip the ElasticSearch indexing step' + ) + ->addOption( + 'skip-indexed', + null, + InputOption::VALUE_NONE, + 'Skip marking articles as indexed' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $io = new SymfonyStyle($input, $output); + + $io->title('Article Post-Processing'); + $io->text('Running QA and indexing commands sequentially...'); + $io->newLine(); + + $skipQa = $input->getOption('skip-qa'); + $skipIndex = $input->getOption('skip-index'); + $skipIndexed = $input->getOption('skip-indexed'); + + $commands = []; + if (!$skipQa) { + $commands[] = [ + 'name' => 'articles:qa', + 'description' => 'Quality Assurance', + ]; + } + if (!$skipIndex) { + $commands[] = [ + 'name' => 'articles:index', + 'description' => 'ElasticSearch Indexing', + ]; + } + if (!$skipIndexed) { + $commands[] = [ + 'name' => 'articles:indexed', + 'description' => 'Mark as Indexed', + ]; + } + + if (empty($commands)) { + $io->warning('All steps skipped - nothing to do!'); + return Command::SUCCESS; + } + + foreach ($commands as $cmd) { + $io->section(sprintf('Running: %s', $cmd['description'])); + + try { + // Create process to run the command + $process = new Process([ + PHP_BINARY, + 'bin/console', + $cmd['name'], + '--no-interaction' + ]); + $process->setTimeout(600); // 10 minutes timeout + + // Run and stream output in real-time + $process->run(function ($type, $buffer) use ($output) { + $output->write($buffer); + }); + + if (!$process->isSuccessful()) { + $io->error(sprintf( + '%s failed with exit code: %d', + $cmd['description'], + $process->getExitCode() + )); + + $errorOutput = $process->getErrorOutput(); + if ($errorOutput) { + $io->text('Error output:'); + $io->text($errorOutput); + } + + return Command::FAILURE; + } + + $io->success(sprintf('✓ %s completed', $cmd['description'])); + $io->newLine(); + + } catch (\Exception $e) { + $io->error(sprintf('Failed to run %s: %s', $cmd['name'], $e->getMessage())); + return Command::FAILURE; + } + } + + $io->success('✓ All post-processing commands completed successfully!'); + + $io->newLine(); + $io->text([ + 'Commands executed:', + sprintf(' • articles:qa: %s', $skipQa ? 'skipped' : 'completed'), + sprintf(' • articles:index: %s', $skipIndex ? 'skipped' : 'completed'), + sprintf(' • articles:indexed: %s', $skipIndexed ? 'skipped' : 'completed'), + ]); + + return Command::SUCCESS; + } +} + diff --git a/src/Controller/HighlightsController.php b/src/Controller/HighlightsController.php index 1778978..8c02b63 100644 --- a/src/Controller/HighlightsController.php +++ b/src/Controller/HighlightsController.php @@ -6,6 +6,8 @@ namespace App\Controller; use App\Service\HighlightService; use App\Service\NostrClient; +use App\Service\NostrLinkParser; +use App\Service\RedisViewStore; use nostriphant\NIP19\Bech32; use Psr\Log\LoggerInterface; use Symfony\Bundle\FrameworkBundle\Controller\AbstractController; @@ -23,16 +25,58 @@ class HighlightsController extends AbstractController private readonly NostrClient $nostrClient, private readonly HighlightService $highlightService, private readonly LoggerInterface $logger, - private readonly \App\Service\NostrLinkParser $nostrLinkParser, + private readonly NostrLinkParser $nostrLinkParser, + private readonly RedisViewStore $viewStore, ) {} #[Route('/highlights', name: 'highlights')] public function index(CacheInterface $cache): Response { try { + // Fast path: Try Redis views first (single GET) + $cachedView = $this->viewStore->fetchLatestHighlights(); + + if ($cachedView !== null) { + // Use Redis view - extract highlights data + $highlights = []; + + foreach ($cachedView as $baseObject) { + if (isset($baseObject['highlight']) && isset($baseObject['article'])) { + // Transform Redis view format to legacy highlight format + $highlight = [ + 'id' => $baseObject['highlight']['eventId'] ?? null, + 'content' => $baseObject['highlight']['content'] ?? '', + 'created_at' => isset($baseObject['highlight']['createdAt']) + ? strtotime($baseObject['highlight']['createdAt']) + : time(), + 'pubkey' => $baseObject['highlight']['pubkey'] ?? null, + 'context' => $baseObject['highlight']['context'] ?? null, + 'article_ref' => $baseObject['article']['eventId'] ?? null, + 'article_title' => $baseObject['article']['title'] ?? null, + 'article_author' => $baseObject['article']['pubkey'] ?? null, + 'article_slug' => $baseObject['article']['slug'] ?? null, + 'profile' => $baseObject['author'] ?? null, // Highlight author profile + 'article_author_profile' => $baseObject['profiles'][$baseObject['article']['pubkey']] ?? null, + ]; + + $highlights[] = $highlight; + } + } + + $this->logger->info('Loaded highlights from Redis view', ['count' => count($highlights)]); + + return $this->render('pages/highlights.html.twig', [ + 'highlights' => $highlights, + 'total' => count($highlights), + 'from_redis_view' => true, + ]); + } + + // Fallback path: Use old cache system if Redis view not available + $this->logger->debug('Redis view not found, falling back to Nostr relay fetch'); + // Cache key for highlights $cacheKey = 'global_article_highlights'; - $cache->delete($cacheKey); // Get highlights from cache or fetch fresh $highlights = $cache->get($cacheKey, function (ItemInterface $item) { $item->expiresAfter(self::CACHE_TTL); @@ -41,9 +85,6 @@ class HighlightsController extends AbstractController // Fetch highlights that reference articles (kind 30023) $events = $this->nostrClient->getArticleHighlights(self::MAX_DISPLAY_HIGHLIGHTS); - // Save raw events to database first (group by article) - //$this->saveHighlightsToDatabase($events); - // Process and enrich the highlights for display return $this->processHighlights($events); } catch (\Exception $e) { @@ -57,6 +98,7 @@ class HighlightsController extends AbstractController return $this->render('pages/highlights.html.twig', [ 'highlights' => $highlights, 'total' => count($highlights), + 'from_redis_view' => false, ]); } catch (\Exception $e) { diff --git a/src/Service/ArticleEventProjector.php b/src/Service/ArticleEventProjector.php index 9cb8f6f..1f1deaa 100644 --- a/src/Service/ArticleEventProjector.php +++ b/src/Service/ArticleEventProjector.php @@ -13,6 +13,8 @@ use Psr\Log\LoggerInterface; * Projects Nostr article events into the database * Handles the conversion from event format to Article entity and persistence * Also processes markdown content to HTML for performance optimization + * + * Note: Post-processing (QA, indexing) is handled by cron job running articles:post-process */ class ArticleEventProjector { @@ -21,7 +23,7 @@ class ArticleEventProjector private readonly EntityManagerInterface $entityManager, private readonly ManagerRegistry $managerRegistry, private readonly LoggerInterface $logger, - private readonly Converter $converter + private readonly Converter $converter, ) { } @@ -82,6 +84,10 @@ class ArticleEventProjector 'event_id' => $article->getEventId(), 'db_id' => $article->getId() ]); + + // Note: Post-processing (QA, indexing) will be handled by cron job + // See: docker/cron/post_process_articles.sh (runs every 5 minutes) + } else { $this->logger->debug('Article already exists in database, skipping', [ 'event_id' => $article->getEventId(), diff --git a/templates/pages/article.html.twig b/templates/pages/article.html.twig index ae566c0..e8f5f7d 100644 --- a/templates/pages/article.html.twig +++ b/templates/pages/article.html.twig @@ -136,7 +136,7 @@