From 7143a816dd06f34cb92513b8d398bb6bf8aa58d7 Mon Sep 17 00:00:00 2001 From: Silberengel Date: Thu, 23 Apr 2026 22:24:03 +0200 Subject: [PATCH] update categories --- .env.dist | 2 + README.md | 3 +- config/services.yaml | 2 + config/unfold.yaml | 6 + src/Command/PrewarmCommand.php | 4 +- src/Controller/SeoController.php | 23 +- src/Service/MagazineContentService.php | 127 +++- src/Service/MagazineRefresher.php | 99 ++- src/Service/NostrClient.php | 658 ++++++++++++++---- .../Components/Molecules/CategoryLink.php | 3 + src/Util/NostrEventTags.php | 45 ++ 11 files changed, 820 insertions(+), 152 deletions(-) create mode 100644 src/Util/NostrEventTags.php diff --git a/.env.dist b/.env.dist index de71349..0e1c807 100644 --- a/.env.dist +++ b/.env.dist @@ -44,6 +44,8 @@ MYSQL_ROOT_PASSWORD=root_password # After changing, recreate: `docker compose up -d --force-recreate cron` (dev) or # `docker compose -f compose.hub.yaml up -d --force-recreate prewarm` (hub). # PREWARM_FLAGS= +# Comma-separated magazine category #d slugs to refresh first when app:prewarm runs out of time before all categories (see MagazineRefresher). +# MAGAZINE_PREWARM_PREFER_SLUGS= # compose.hub.yaml: default host port is 9080. Use 80 only if nothing else binds it. Loopback-only example: # HTTP_PUBLISH=127.0.0.1:9080 # HTTP_PUBLISH=80 diff --git a/README.md b/README.md index 80d4d86..bc92d68 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ make prewarm | `--metadata-batch` | `50` | Pubkeys per batched Nostr `REQ` | | `--comments-max` | `10` | Newest **N** articles (by `createdAt` **DESC**); `0` = all (still bounded by budget) | | `--comments-budget` | `600` | Max wall seconds for the whole comments phase (Nostr is slow; raise e.g. `1200` if you need more articles in one run) | -| `--magazine-budget` | `30` | Max wall seconds for magazine refresh | +| `--magazine-budget` | `90` | Max wall seconds for magazine root + per-category 30040 fetches (hard-capped at 600s in code). If you have many categories, a **low** budget can stop before the last slug is refreshed—**stale home/category pages** until the next run. Set `MAGAZINE_PREWARM_PREFER_SLUGS` (comma-separated category `#d` slugs) to fetch those first after the root. | Prewarm clears the PHP **CLI** execution time limit for that run; relay work can be slow. @@ -100,6 +100,7 @@ For a full **Nostr backfill** + one-shot prewarm, use **`make prewarm`** (or a h | What | File | |------|------| | Site title, `npub`, `d_tag`, **relays** (`default_relay`, `article_relays`, `profile_relays`), theme | `config/unfold.yaml` (imported as Symfony parameters) | +| `MAGAZINE_PREWARM_PREFER_SLUGS` | `.env` / `.env.local` — optional comma-separated category slugs to prioritize in `app:prewarm` magazine phase (after the root). Use when the relay time budget would otherwise skip your updated category. | | `DATABASE_URL`, `APP_SECRET`, `HTTP_PORT`, `MYSQL_*`, optional **`PREWARM_FLAGS`** (for the Docker `cron` service) | `.env` / `.env.local` (see `.env.dist`) | | Service wiring (e.g. cache, `NostrClient` args) | `config/services.yaml` | diff --git a/config/services.yaml b/config/services.yaml index 2ef8a56..0777676 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -52,6 +52,8 @@ services: App\Service\MagazineRefresher: arguments: $appCache: '@cache.app' + $magazinePrewarmPreferSlugs: '%magazine_prewarm_prefer_slugs%' + $magazinePrewarmAlsoSlugs: '%magazine_prewarm_also_slugs%' App\Service\CacheService: arguments: $appCache: '@cache.app' diff --git a/config/unfold.yaml b/config/unfold.yaml index 739ce3a..8e2cf84 100644 --- a/config/unfold.yaml +++ b/config/unfold.yaml @@ -35,6 +35,12 @@ parameters: nip05_domain: 'blog.imwald.eu' # Base URL for "Open in Jumble" on author profile (trailing slash optional; npub is appended as /{npub}). jumble_profile_users_base: 'https://jumble.imwald.eu/users' + # Comma-separated category #d slugs to fetch first in app:prewarm after the root (see MagazineRefresher). + magazine_prewarm_prefer_slugs_empty: '' + magazine_prewarm_prefer_slugs: '%env(default:magazine_prewarm_prefer_slugs_empty:MAGAZINE_PREWARM_PREFER_SLUGS)%' + # Extra category #d slugs to 30040-fetch in prewarm right after prefer (before the rest of root’s a tags), so budget runs still hit new categories. + magazine_prewarm_also_slugs_empty: '' + magazine_prewarm_also_slugs: '%env(default:magazine_prewarm_also_slugs_empty:MAGAZINE_PREWARM_ALSO_SLUGS)%' external_links: - title: "Unfold" url: "https://github.com/decent-newsroom/unfold" diff --git a/src/Command/PrewarmCommand.php b/src/Command/PrewarmCommand.php index cdbc183..f86ce8f 100644 --- a/src/Command/PrewarmCommand.php +++ b/src/Command/PrewarmCommand.php @@ -65,7 +65,7 @@ final class PrewarmCommand extends Command ->addOption('deletion-since', null, InputOption::VALUE_REQUIRED, 'strtotime() window start for kind 5 fetch', '-2 month') ->addOption('no-metadata', null, InputOption::VALUE_NONE, 'Skip Nostr profile metadata cache') ->addOption('no-comments', null, InputOption::VALUE_NONE, 'Skip comment thread cache') - ->addOption('magazine-budget', null, InputOption::VALUE_REQUIRED, 'Seconds wall time for magazine relay refresh', '30') + ->addOption('magazine-budget', null, InputOption::VALUE_REQUIRED, 'Seconds wall time for magazine relay refresh (capped at 600s; if many category indices, raise this or set MAGAZINE_PREWARM_PREFER_SLUGS for hot slugs first)', '90') ->addOption('metadata-limit', null, InputOption::VALUE_REQUIRED, 'Max distinct author pubkeys to warm (0 = all)', '0') ->addOption('metadata-batch', null, InputOption::VALUE_REQUIRED, 'Kind-0 metadata: pubkeys per Nostr REQ (batched)', '50') ->addOption('comments-max', null, InputOption::VALUE_REQUIRED, 'Newest N magazine category articles to warm comment cache for (0 = all, order: createdAt DESC; excludes generic /articles feed-only rows)', '10') @@ -198,7 +198,7 @@ final class PrewarmCommand extends Command $io->warning('Long-form backfill failed: '.$e->getMessage()); } - // MagazineRefresher sets max_execution_time (e.g. 60 for budget 30); restore before metadata. + // MagazineRefresher sets max_execution_time (budget + headroom); restore before metadata. $this->disableCliExecutionTimeLimit(); if (!$input->getOption('no-deletions')) { diff --git a/src/Controller/SeoController.php b/src/Controller/SeoController.php index 144551d..b731773 100644 --- a/src/Controller/SeoController.php +++ b/src/Controller/SeoController.php @@ -285,11 +285,11 @@ final class SeoController extends AbstractController $plain = preg_replace('/\s+/', ' ', (string) $article->getContent()) ?? ''; $sum = (string) mb_substr($plain, 0, 500); } - $eId = (string) ($article->getEventId() ?? ''); - if ($eId === '') { - $eId = (string) ($article->getId() ?? 'item'); - } - $entryId = 'urn:web:'.$this->urlHostId($request).":article:{$eId}"; + // One stable Atom per row. Nostr eventId can repeat (revisions, duplicates); readers + // merge on and would only show a single entry if ids collided. + $dbId = $article->getId(); + $entryId = 'urn:web:'.$this->urlHostId($request) + .':db-article:'.($dbId !== null && $dbId !== '' ? (string) $dbId : \spl_object_id($article)); $pub = $article->getPublishedAt() ?? $article->getCreatedAt() ?? $tArticle; $out = "\n "; @@ -367,12 +367,21 @@ final class SeoController extends AbstractController private function xmlText(string $s): string { - return htmlspecialchars($s, \ENT_XML1 | \ENT_QUOTES, 'UTF-8'); + return htmlspecialchars($this->stripInvalidXml1Chars($s), \ENT_XML1 | \ENT_QUOTES, 'UTF-8'); } private function xmlAttr(string $s): string { - return htmlspecialchars($s, \ENT_XML1 | \ENT_QUOTES, 'UTF-8'); + return htmlspecialchars($this->stripInvalidXml1Chars($s), \ENT_XML1 | \ENT_QUOTES, 'UTF-8'); + } + + /** + * XML 1.0 disallows C0 control chars other than tab, CR, LF; they can make feeds appear truncated + * after the first entry that used only “clean” text. + */ + private function stripInvalidXml1Chars(string $s): string + { + return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $s) ?? $s; } private function xmlResponse(string $body): Response diff --git a/src/Service/MagazineContentService.php b/src/Service/MagazineContentService.php index 89fd941..8ba5c07 100644 --- a/src/Service/MagazineContentService.php +++ b/src/Service/MagazineContentService.php @@ -8,12 +8,13 @@ use App\Entity\Article; use App\Entity\Event; use App\Enum\EventStatusEnum; use App\Repository\ArticleRepository; +use App\Util\NostrEventTags; use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface; use Symfony\Component\HttpFoundation\RequestStack; /** - * Magazine index for templates. Reads {@see MagazineIndexStore} only on HTTP; relay refresh and DB - * backfill for category long-form are done by `app:prewarm` (cron) / CLI. + * Magazine index for templates. The store is filled by `app:prewarm` (cron) / CLI; missing 30040 + * snapshots can be loaded once per request from relays (see ensure* methods). */ final class MagazineContentService { @@ -65,6 +66,10 @@ final class MagazineContentService $npub = (string) $this->params->get('npub'); $dTag = (string) $this->params->get('d_tag'); $mag = $this->store->getRoot($npub, $dTag); + if ($mag === null) { + $this->ensureRoot30040FromRelays($npub, $dTag); + $mag = $this->store->getRoot($npub, $dTag); + } return $this->categoryATagsFromMag($mag); } @@ -78,11 +83,19 @@ final class MagazineContentService return []; } $tags = $mag->getTags(); - $cats = array_filter($tags, static function (mixed $tag): bool { - return \is_array($tag) && ($tag[0] ?? null) === 'a'; - }); + $cats = []; + foreach ($tags as $tag) { + if (!NostrEventTags::tagNameMatches($tag, 'a')) { + continue; + } + $seq = NostrEventTags::rowToStringList($tag); + if ($seq === null || !isset($seq[1]) || (string) $seq[1] === '') { + continue; + } + $cats[] = ['a', (string) $seq[1]]; + } - return array_values($cats); + return $cats; } /** @@ -127,10 +140,14 @@ final class MagazineContentService continue; } foreach ($catIndex->getTags() as $tag) { - if (!\is_array($tag) || ($tag[0] ?? null) !== 'a' || !isset($tag[1])) { + if (!NostrEventTags::tagNameMatches($tag, 'a')) { + continue; + } + $seq = NostrEventTags::rowToStringList($tag); + if ($seq === null || !isset($seq[1])) { continue; } - $parts = explode(':', (string) $tag[1], 3); + $parts = explode(':', (string) $seq[1], 3); if (\count($parts) < 2) { continue; } @@ -157,13 +174,18 @@ final class MagazineContentService if ($slug === '') { return ''; } + $this->warmCategoryIndexIfMissing($slug); $catIndex = $this->store->getCategory($slug); if ($catIndex === null) { return $slug; } foreach ($catIndex->getTags() as $tag) { - if (($tag[0] ?? null) === 'title' && isset($tag[1])) { - return (string) $tag[1]; + if (!NostrEventTags::tagNameMatches($tag, 'title')) { + continue; + } + $seq = NostrEventTags::rowToStringList($tag); + if ($seq !== null && isset($seq[1])) { + return (string) $seq[1]; } } @@ -178,20 +200,26 @@ final class MagazineContentService */ public function getCategoryPageData(string $slug): array { + $this->warmCategoryIndexIfMissing($slug); $catIndex = $this->store->getCategory($slug); $list = []; $coordinates = []; $category = []; if ($catIndex) { foreach ($catIndex->getTags() as $tag) { - if ($tag[0] === 'title') { - $category['title'] = (string) $tag[1]; + $seq = NostrEventTags::rowToStringList($tag); + if ($seq === null) { + continue; } - if ($tag[0] === 'summary') { - $category['summary'] = (string) $tag[1]; + $name = strtolower($seq[0] ?? ''); + if ($name === 'title' && isset($seq[1])) { + $category['title'] = (string) $seq[1]; } - if ($tag[0] === 'a') { - $coordinates[] = $tag[1]; + if ($name === 'summary' && isset($seq[1])) { + $category['summary'] = (string) $seq[1]; + } + if ($name === 'a' && isset($seq[1])) { + $coordinates[] = (string) $seq[1]; } } } @@ -264,8 +292,11 @@ final class MagazineContentService } $coordinates = []; foreach ($catIndex->getTags() as $tag) { - if (($tag[0] ?? null) === 'a' && isset($tag[1])) { - $coordinates[] = (string) $tag[1]; + if (NostrEventTags::tagNameMatches($tag, 'a')) { + $seq = NostrEventTags::rowToStringList($tag); + if ($seq !== null && isset($seq[1]) && (string) $seq[1] !== '') { + $coordinates[] = (string) $seq[1]; + } } } if ($coordinates === []) { @@ -358,4 +389,64 @@ final class MagazineContentService return $list; } + + /** + * Ensures the category 30040 is in the store for this HTTP request (one relay pass per slug). + * Safe to call from e.g. {@see \App\Twig\Components\Molecules\CategoryLink} before reading titles. + */ + public function warmCategoryIndexIfMissing(string $slug): void + { + if ($this->store->getCategory($slug) !== null) { + return; + } + $this->ensureCategory30040FromRelays($slug); + } + + private function ensureRoot30040FromRelays(string $npub, string $dTag): void + { + $r = $this->requestStack->getCurrentRequest(); + if ($r !== null && $r->attributes->get('_magazine_root_ensured')) { + return; + } + try { + $e = $this->nostrClient->getMagazineIndex($npub, $dTag); + if ($e !== null) { + $this->store->putRoot($npub, $dTag, $e); + } + } catch (\Throwable) { + } + if ($r !== null) { + $r->attributes->set('_magazine_root_ensured', true); + } + } + + private function ensureCategory30040FromRelays(string $slug): void + { + if (trim($slug) === '') { + return; + } + if ($this->store->getCategory($slug) !== null) { + return; + } + $r = $this->requestStack->getCurrentRequest(); + if ($r !== null) { + $tried = $r->attributes->get('_magazine_category_fetch_tried', []); + if (!\is_array($tried)) { + $tried = []; + } + if (\in_array($slug, $tried, true)) { + return; + } + $tried[] = $slug; + $r->attributes->set('_magazine_category_fetch_tried', $tried); + } + $npub = (string) $this->params->get('npub'); + try { + $e = $this->nostrClient->getMagazineIndex($npub, $slug); + if ($e !== null) { + $this->store->putCategory($slug, $e); + } + } catch (\Throwable) { + } + } } diff --git a/src/Service/MagazineRefresher.php b/src/Service/MagazineRefresher.php index 1cfdfd0..d8c9cfd 100644 --- a/src/Service/MagazineRefresher.php +++ b/src/Service/MagazineRefresher.php @@ -5,6 +5,7 @@ declare(strict_types=1); namespace App\Service; use App\Entity\Event; +use App\Util\NostrEventTags; use Psr\Cache\CacheItemPoolInterface; use Psr\Cache\InvalidArgumentException; use Psr\Log\LoggerInterface; @@ -24,6 +25,16 @@ final class MagazineRefresher private readonly LoggerInterface $logger, private readonly CacheItemPoolInterface $appCache, private readonly FeaturedAuthorSync $featuredAuthorSync, + /** + * Comma-separated category #d slugs (from the root index `a` tags) to fetch first after the root + * when the magazine relay phase is time-bounded; see MAGAZINE_PREWARM_PREFER_SLUGS in .env. + */ + private readonly string $magazinePrewarmPreferSlugs = '', + /** + * Comma-separated category #d slugs to always run a 30040 fetch for in prewarm, after the + * slugs from the live root (e.g. politics while the cached root has not yet listed that `a` tag). + */ + private readonly string $magazinePrewarmAlsoSlugs = '', ) { } @@ -37,10 +48,12 @@ final class MagazineRefresher */ public function refreshFromRelays(int $budgetSeconds = 8, array $preferSlugs = [], ?callable $onProgress = null): void { - $budgetSeconds = max(1, min(30, $budgetSeconds)); + // Allow large budgets (PrewarmCommand --magazine-budget). Hard cap only to avoid runaway PHP time. + $budgetSeconds = max(1, min(600, $budgetSeconds)); $deadline = microtime(true) + $budgetSeconds; $npub = (string) $this->params->get('npub'); $dTag = (string) $this->params->get('d_tag'); + $preferFromEnv = $this->parseCommaSeparatedSlugs($this->magazinePrewarmPreferSlugs); // Do not set max_execution_time to the *remaining* soft budget: PHP resets the timer, so // after a 6s root fetch, "2s left" would become a 2s hard cap for the *next* relay I/O @@ -50,6 +63,12 @@ final class MagazineRefresher $defaultRelay = (string) $this->params->get('default_relay'); $relayLabel = (string) (parse_url($defaultRelay, \PHP_URL_HOST) ?: $defaultRelay); + if ($preferFromEnv !== []) { + $this->logger->info('MagazineRefresher: prefer slugs (env) merged into fetch order', [ + 'prefer' => $preferFromEnv, + ]); + } + $onProgress?->__invoke('before_root', []); $root = $this->nostrClient->getMagazineIndex($npub, $dTag); if ($root === null) { @@ -67,7 +86,18 @@ final class MagazineRefresher $this->store->putRoot($npub, $dTag, $root); - $slugs = $this->orderedCategorySlugs($this->categorySlugsFromRoot($root), $preferSlugs); + $mergedPrefer = $this->mergePreferSlugsInOrder($preferSlugs, $preferFromEnv); + $alsoFromEnv = $this->parseCommaSeparatedSlugs($this->magazinePrewarmAlsoSlugs); + if ($alsoFromEnv !== []) { + $this->logger->info('MagazineRefresher: also slugs (env) merged into 30040 fetch list', [ + 'also' => $alsoFromEnv, + ]); + } + $slugs = $this->orderedCategorySlugs( + $this->categorySlugsFromRoot($root), + $mergedPrefer, + $alsoFromEnv + ); $totalSteps = 1 + \count($slugs); $onProgress?->__invoke('after_root', [ 'total_steps' => $totalSteps, @@ -152,14 +182,18 @@ final class MagazineRefresher { $slugs = []; foreach ($root->getTags() as $tag) { - if (($tag[0] ?? null) !== 'a' || !isset($tag[1])) { + if (!NostrEventTags::tagNameMatches($tag, 'a')) { continue; } - $parts = explode(':', (string) $tag[1], 3); + $seq = NostrEventTags::rowToStringList($tag); + if ($seq === null || !isset($seq[1]) || (string) $seq[1] === '') { + continue; + } + $parts = explode(':', (string) $seq[1], 3); if (\count($parts) < 3) { continue; } - $s = trim((string) end($parts)); + $s = trim((string) $parts[2]); if ($s !== '' && !\in_array($s, $slugs, true)) { $slugs[] = $s; } @@ -169,16 +203,29 @@ final class MagazineRefresher } /** + * Order: prefer (incl. MAGAZINE_PREWARM_PREFER_SLUGS), then MAGAZINE_PREWARM_ALSO_SLUGS, then + * each remaining category from the live root 30040. "Also" runs before the root tail so a + * time-bounded prewarm still fetches e.g. a new politics category 30040 even if the slug list + * from the root is long and the soft budget would stop before the former end of the list. + * * @param list $allFromRoot * @param list $prefer + * @param list $also + * * @return list */ - private function orderedCategorySlugs(array $allFromRoot, array $prefer): array + private function orderedCategorySlugs(array $allFromRoot, array $prefer, array $also): array { $prefer = array_values(array_filter($prefer, static function (string $s): bool { return $s !== ''; })); $out = $prefer; + foreach ($also as $s) { + $s = trim($s); + if ($s !== '' && !\in_array($s, $out, true)) { + $out[] = $s; + } + } foreach ($allFromRoot as $s) { if (!\in_array($s, $out, true)) { $out[] = $s; @@ -205,8 +252,46 @@ final class MagazineRefresher */ private function applyExecutionTimeCap(int $budgetSeconds): void { - $sec = max(30, min(120, $budgetSeconds + 30)); + $sec = max(30, min(700, $budgetSeconds + 30)); @set_time_limit($sec); @ini_set('max_execution_time', (string) $sec); } + + /** + * @return list + */ + private function parseCommaSeparatedSlugs(string $raw): array + { + if (trim($raw) === '') { + return []; + } + $out = []; + foreach (explode(',', $raw) as $part) { + $s = trim($part); + if ($s !== '' && !\in_array($s, $out, true)) { + $out[] = $s; + } + } + + return $out; + } + + /** + * @param list $fromCaller e.g. current /cat route (first) + * @param list $fromEnv MAGAZINE_PREWARM_PREFER_SLUGS (next) + * + * @return list + */ + private function mergePreferSlugsInOrder(array $fromCaller, array $fromEnv): array + { + $out = []; + foreach (array_merge($fromCaller, $fromEnv) as $s) { + $s = trim((string) $s); + if ($s !== '' && !\in_array($s, $out, true)) { + $out[] = $s; + } + } + + return $out; + } } diff --git a/src/Service/NostrClient.php b/src/Service/NostrClient.php index 971a000..a88831f 100644 --- a/src/Service/NostrClient.php +++ b/src/Service/NostrClient.php @@ -15,6 +15,7 @@ use swentel\nostr\Event\Event; use swentel\nostr\Filter\Filter; use swentel\nostr\Message\EventMessage; use swentel\nostr\Message\RequestMessage; +use swentel\nostr\Key\Key; use swentel\nostr\Relay\Relay; use swentel\nostr\Relay\RelaySet; use swentel\nostr\Request\Request; @@ -171,9 +172,9 @@ class NostrClient } /** - * One relay for magazine 30040 lookups. {@see Request::send()} iterates every relay in the set - * sequentially; the full default set (5–6 wss) multiplies wall time — often 10s+ while a single - * relay returns in under 2s for the same filter. + * Single-relay set for I/O that intentionally hits one wss (e.g. longform ingest). Magazine + * 30040 resolution uses the full article relay set so all relays can contribute the latest + * NIP-33 replaceable per address. */ private function buildSingleRelaySet(string $wssUrl): RelaySet { @@ -472,6 +473,119 @@ class NostrClient return $relaySet; } + /** + * NIP kind-range convention: kind 0, 3, and 10_000–19_999 are replaceable by (kind, pubkey) only; + * 30_000–39_999 are addressable by (kind, pubkey, d). On equal {@see created_at}, the + * lexicographically lowest id is kept. + */ + private static function isReplaceableByKindAndPubkeyNip(int $kind): bool + { + return $kind === 0 + || $kind === 3 + || ($kind >= 10_000 && $kind < 20_000); + } + + private static function replaceableKindPubkeyAddressFromWire(mixed $e): ?string + { + if (!\is_object($e)) { + return null; + } + $k = (int) ($e->kind ?? 0); + if (!self::isReplaceableByKindAndPubkeyNip($k)) { + return null; + } + $pk = (string) ($e->pubkey ?? ''); + if (64 !== \strlen($pk) || !ctype_xdigit($pk)) { + return null; + } + + return (string) $k.':'.strtolower($pk); + } + + private static function isValidNostrEventIdString(string $id): bool + { + return 64 === \strlen($id) && ctype_xdigit($id); + } + + /** + * Whether $candidate is the NIP-preferred live revision over $incumbent: higher created_at, or + * same created_at and lower (lexicographically first) id. Events without a valid 64-hex id + * lose to valid ones (avoids an empty id “winning” a tie and hiding real content). + */ + private static function wireEventSupersedes(mixed $candidate, mixed $incumbent): bool + { + $c = self::magazineEventCreatedAt($candidate); + $i = self::magazineEventCreatedAt($incumbent); + if ($c !== $i) { + return $c > $i; + } + $idC = self::magazineEventId($candidate); + $idI = self::magazineEventId($incumbent); + $vC = self::isValidNostrEventIdString($idC); + $vI = self::isValidNostrEventIdString($idI); + if ($vC && !$vI) { + return true; + } + if (!$vC && $vI) { + return false; + } + if (!$vC && !$vI) { + if ($idC === $idI) { + return false; + } + + return $idC < $idI; + } + if ($idC === $idI) { + return false; + } + + return $idC < $idI; + } + + /** + * NIP-01: kind-0 profile metadata is replaceable; the live document is addressed by `0:pubkey` + * (not by event id). Multiple relay copies collapse per {@see wireEventSupersedes}. + */ + private static function kind0Nip01ReplaceableAddress(mixed $ev): ?string + { + if (!\is_object($ev) || (int) ($ev->kind ?? -1) !== KindsEnum::METADATA->value) { + return null; + } + $pk = (string) ($ev->pubkey ?? ''); + if (64 !== \strlen($pk) || !ctype_xdigit($pk)) { + return null; + } + + return '0:'.strtolower($pk); + } + + private static function kind0ReplaceableIsNewer(mixed $candidate, mixed $incumbent): bool + { + return self::wireEventSupersedes($candidate, $incumbent); + } + + /** + * @param list $events + * + * @return array Keyed by `0:` + 64 hex (lowercase); one winning kind-0 event per key + */ + private static function mergeKind0EventsByReplaceableAddress(array $events): array + { + $byAddress = []; + foreach ($events as $ev) { + $addr = self::kind0Nip01ReplaceableAddress($ev); + if ($addr === null) { + continue; + } + if (!isset($byAddress[$addr]) || self::kind0ReplaceableIsNewer($ev, $byAddress[$addr])) { + $byAddress[$addr] = $ev; + } + } + + return $byAddress; + } + /** * Batched kind-0 profile fetch: one Nostr REQ per chunk with multiple "authors" (hex pubkeys). * @@ -512,23 +626,11 @@ class NostrClient 'relays' => $relaysTriedStr, 'ms' => (int) round((microtime(true) - $t0) * 1000), ]); - $newest = []; - foreach ($events as $ev) { - if (!\is_object($ev) || !isset($ev->pubkey, $ev->content)) { + foreach (self::mergeKind0EventsByReplaceableAddress($events) as $addr => $ev) { + if (!\is_object($ev) || !isset($ev->content)) { continue; } - $pk = (string) $ev->pubkey; - if (64 !== \strlen($pk)) { - continue; - } - $ts = (int) ($ev->created_at ?? 0); - if (isset($newest[$pk]) && $ts <= $newest[$pk]['t']) { - continue; - } - $newest[$pk] = ['ev' => $ev, 't' => $ts]; - } - foreach ($newest as $pk => $row) { - $ev = $row['ev']; + $pk = \substr($addr, 2); try { $data = \json_decode((string) $ev->content, false, 512, \JSON_THROW_ON_ERROR); } catch (\JsonException) { @@ -636,14 +738,23 @@ class NostrClient if (empty($events)) { throw new \Exception('No metadata for npub '.$npub.' (relays: '.$relaysTriedStr.')'); } - // Sort by date and return newest - usort($events, static fn ($a, $b) => (int) ($b->created_at ?? 0) <=> (int) ($a->created_at ?? 0)); + $byAddr = self::mergeKind0EventsByReplaceableAddress($events); + $authorHex = self::npubToHexPubkey($npub); + if ($authorHex === null) { + throw new \Exception('Invalid npub for metadata: '.$npub); + } + $key = '0:'.$authorHex; + if (!isset($byAddr[$key])) { + throw new \Exception('No kind-0 metadata for npub '.$npub.' (relays: '.$relaysTriedStr.')'); + } - return $events[0]; + return $byAddr[$key]; } /** - * NIP-A3 kind 10133: payment target events (replaceable) with `["payto", type, authority, ...]` tags. + * NIP-A3 kind 10133: payment target events; NIP kind-range 10_000–19_999 is replaceable by + * (kind, pubkey), so multi-relay results are merged to the live revision per + * {@see wireEventSupersedes} (at most one event for this author). * * @return list */ @@ -674,9 +785,8 @@ class NostrClient if (!\is_array($events) || $events === []) { return []; } - usort($events, static fn ($a, $b) => (int) ($b->created_at ?? 0) <=> (int) ($a->created_at ?? 0)); - return array_values($events); + return self::mergeNip33ParameterizedWireEvents($events); } public function getNpubLongForm($npub): void @@ -811,8 +921,14 @@ class NostrClient }); if (!empty($events)) { - // Save only the first event (most recent) - $event = $events[0]; + $kindI = (int) $kind; + $authorH = self::authorIdentToHexLower($author); + $event = self::isNip33ParameterizedKind($kindI) && $authorH !== null + ? self::pickLatestNip33ParameterizedForQuery($events, $kindI, $authorH, (string) $slug) + : null; + if ($event === null) { + $event = $events[0]; + } $wrapper = new \stdClass(); $wrapper->type = 'EVENT'; $wrapper->event = $event; @@ -937,9 +1053,14 @@ class NostrClient private function saveLongFormContent(mixed $filtered): void { + $events = []; foreach ($filtered as $wrapper) { - $article = $this->articleFactory->createFromLongFormContentEvent($wrapper->event); - // check if event with same eventId already in DB + if (isset($wrapper->event) && \is_object($wrapper->event)) { + $events[] = $wrapper->event; + } + } + foreach (self::mergeNip33ParameterizedWireEvents($events) as $event) { + $article = $this->articleFactory->createFromLongFormContentEvent($event); $this->saveEachArticleToTheDatabase($article); } } @@ -961,11 +1082,20 @@ class NostrClient if (empty($response)) { return []; } - // Sort by date and use newest - usort($response, fn($a, $b) => $b->created_at <=> $a->created_at); - // Process tags of the $response[0] and extract relays + $merged = self::mergeNip33ParameterizedWireEvents($response); + $use = null; + $k10002 = (int) KindsEnum::RELAY_LIST->value; + foreach ($merged as $e) { + if (\is_object($e) && (int) ($e->kind ?? 0) === $k10002) { + $use = $e; + break; + } + } + if ($use === null) { + return []; + } $relays = []; - foreach ($response[0]->tags as $tag) { + foreach ($use->tags ?? [] as $tag) { if ($tag[0] === 'r') { $relays[] = $tag[1]; } @@ -1577,12 +1707,19 @@ class NostrClient relaySet: $relaySet ); - // Process the response using the helper method - return $this->processResponse($request->send(), function($event) { + $events = $this->processResponse( + $request->send(), + static fn (object $event) => $event, + ); + foreach (self::mergeNip33ParameterizedWireEvents($events) as $event) { + if (!\is_object($event)) { + continue; + } $article = $this->articleFactory->createFromLongFormContentEvent($event); - // Save each article to the database $this->saveEachArticleToTheDatabase($article); - }); + } + + return []; } public function getArticles(array $slugs): array @@ -1690,7 +1827,7 @@ class NostrClient } } - return $articles; + return self::mergeNip33ParameterizedWireEvents(array_values($articles)); } /** @@ -1753,60 +1890,27 @@ class NostrClient try { $request = $this->newTimedRequest($relaySet, $requestMessage); - $response = $request->send(); - $found = false; - - // Check responses from each relay - foreach ($response as $relayUrl => $value) { - if ($value instanceof \Throwable) { - $this->logger->warning(sprintf( - '[%s] getArticlesByCoordinates: %s', - self::relayLogLabel($relayUrl), - $value->getMessage() - ), ['coordinate' => $coordinate, 'relay' => $relayUrl]); - - continue; - } - if (!\is_iterable($value)) { - continue; - } - foreach ($value as $item) { - if ($item->type === 'EVENT') { - $articlesMap[$coordinate] = $item->event; - $found = true; - break 2; // Found what we need, exit both loops - } - } + $events = $this->processResponse( + $request->send(), + static fn (object $event) => $event, + ); + $ev = $this->pickEventForNip33OrFirst($events, $kind, (string) $pubkey, (string) $slug); + if ($ev !== null) { + $articlesMap[$coordinate] = $ev; } - // If still not found, try with default relay set as fallback - if (!$found) { + if (!isset($articlesMap[$coordinate])) { $this->logger->info('Article not found in author relays, trying default relays', [ 'coordinate' => $coordinate ]); - - $request = $this->newTimedRequest($this->defaultRelaySet, $requestMessage); - $response = $request->send(); - - foreach ($response as $relayUrl => $value) { - if ($value instanceof \Throwable) { - $this->logger->warning(sprintf( - '[%s] getArticlesByCoordinates: %s', - self::relayLogLabel($relayUrl), - $value->getMessage() - ), ['coordinate' => $coordinate, 'relay' => $relayUrl]); - - continue; - } - if (!\is_iterable($value)) { - continue; - } - foreach ($value as $item) { - if ($item->type === 'EVENT') { - $articlesMap[$coordinate] = $item->event; - break 2; - } - } + $request2 = $this->newTimedRequest($this->defaultRelaySet, $requestMessage); + $events2 = $this->processResponse( + $request2->send(), + static fn (object $event) => $event, + ); + $ev2 = $this->pickEventForNip33OrFirst($events2, $kind, (string) $pubkey, (string) $slug); + if ($ev2 !== null) { + $articlesMap[$coordinate] = $ev2; } } } catch (\Exception $e) { @@ -2026,6 +2130,14 @@ class NostrClient } $wantD = (string) ($data->identifier ?? ''); + $kindI = (int) ($data->kind ?? KindsEnum::LONGFORM->value); + $authorH = self::authorIdentToHexLower($data->pubkey ?? null); + if (self::isNip33ParameterizedKind($kindI) && $authorH !== null) { + $picked = self::pickLatestNip33ParameterizedForQuery($events, $kindI, $authorH, $wantD); + if ($picked !== null) { + return $picked; + } + } foreach ($events as $event) { if ($this->eventHasDTag($event, $wantD)) { return $event; @@ -2053,6 +2165,200 @@ class NostrClient return false; } + /** + * One wire event for a (kind, author, #d) coordinate after merging relay results. + * + * @param list $events + */ + private function pickEventForNip33OrFirst(array $events, int $kind, string $authorIdent, string $dTag): ?object + { + if ($events === []) { + return null; + } + if (self::isNip33ParameterizedKind($kind)) { + $h = self::authorIdentToHexLower($authorIdent); + if ($h !== null) { + $picked = self::pickLatestNip33ParameterizedForQuery($events, $kind, $h, $dTag); + if ($picked !== null && \is_object($picked)) { + return $picked; + } + } + $merged = self::mergeNip33ParameterizedWireEvents($events); + $first = $merged[0] ?? null; + + return \is_object($first) ? $first : null; + } + if (self::isReplaceableByKindAndPubkeyNip($kind)) { + $h = self::authorIdentToHexLower($authorIdent); + if ($h !== null) { + $best = null; + foreach ($events as $e) { + if (!\is_object($e) || (int) ($e->kind ?? 0) !== $kind) { + continue; + } + if (strtolower((string) ($e->pubkey ?? '')) !== $h) { + continue; + } + if ($best === null || self::wireEventSupersedes($e, $best)) { + $best = $e; + } + } + if ($best !== null) { + return $best; + } + } + foreach (self::mergeNip33ParameterizedWireEvents($events) as $e) { + if (\is_object($e) && (int) ($e->kind ?? 0) === $kind) { + return $e; + } + } + + return null; + } + $e0 = $events[0] ?? null; + + return \is_object($e0) ? $e0 : null; + } + + /** NIP-33: kinds 30_000–39_999 (parameterized replaceable) use `kind:pubkey:d` as address. */ + private const NIP33_PARAMETERIZED_KIND_MIN = 30_000; + private const NIP33_PARAMETERIZED_KIND_MAX = 39_999; + + private static function isNip33ParameterizedKind(int $kind): bool + { + return $kind >= self::NIP33_PARAMETERIZED_KIND_MIN + && $kind <= self::NIP33_PARAMETERIZED_KIND_MAX; + } + + /** + * NIP-33: `kind:pubkey_hex:d` (d from tags; d may include colons). Kinds 30000–39999 only. + */ + private static function nip33ParameterizedReplaceableAddress(mixed $event): ?string + { + $k = self::magazineEventKind($event); + if (!self::isNip33ParameterizedKind($k)) { + return null; + } + $pk = self::magazineEventPubkeyHex($event); + if ($pk === '' || 64 !== \strlen($pk) || !ctype_xdigit($pk)) { + return null; + } + $d = self::eventDTagValue($event); + if ($d === null || $d === '') { + return null; + } + + return (string) $k.':'.strtolower($pk).':'.$d; + } + + /** + * NIP-33: among relay results for a single (kind, author, d) filter, keep the live revision per + * {@see wireEventSupersedes}. + * + * @param list $events + */ + private static function pickLatestNip33ParameterizedForQuery( + array $events, + int $expectedKind, + string $authorHexLower, + string $dTag + ): mixed { + if (!self::isNip33ParameterizedKind($expectedKind)) { + return null; + } + $wantD = trim($dTag); + $expectedAddr = (string) $expectedKind.':'.$authorHexLower.':'.$wantD; + $byAddress = []; + foreach ($events as $e) { + $addr = self::nip33ParameterizedReplaceableAddress($e); + if ($addr === null) { + continue; + } + if (strtolower(self::magazineEventPubkeyHex($e)) !== $authorHexLower) { + continue; + } + if (self::eventDTagValue($e) !== $wantD) { + continue; + } + if (self::magazineEventKind($e) !== $expectedKind) { + continue; + } + if (!isset($byAddress[$addr]) || self::wireEventSupersedes($e, $byAddress[$addr])) { + $byAddress[$addr] = $e; + } + } + if ($byAddress === []) { + return null; + } + if (isset($byAddress[$expectedAddr])) { + return $byAddress[$expectedAddr]; + } + if (\count($byAddress) === 1) { + return $byAddress[array_key_first($byAddress)]; + } + $best = null; + foreach ($byAddress as $e) { + if ($best === null || self::wireEventSupersedes($e, $best)) { + $best = $e; + } + } + + return $best; + } + + /** + * Merge relay results: 30_000–39_999 by `kind:pubkey:d`; kind 0, 3, 10_000–19_999 by `kind:pubkey`; + * others by event id. Uses {@see wireEventSupersedes} for the winning revision in each bucket. + * + * @param list $events + * + * @return list + */ + private static function mergeNip33ParameterizedWireEvents(array $events): array + { + $byNip33Address = []; + $byKindPubkey = []; + $byId = []; + foreach ($events as $e) { + if (!\is_object($e)) { + continue; + } + $k = (int) ($e->kind ?? 0); + if (self::isNip33ParameterizedKind($k)) { + $a = self::nip33ParameterizedReplaceableAddress($e); + if ($a === null) { + continue; + } + if (!isset($byNip33Address[$a]) || self::wireEventSupersedes($e, $byNip33Address[$a])) { + $byNip33Address[$a] = $e; + } + } elseif (self::isReplaceableByKindAndPubkeyNip($k)) { + $a = self::replaceableKindPubkeyAddressFromWire($e); + if ($a === null) { + continue; + } + if (!isset($byKindPubkey[$a]) || self::wireEventSupersedes($e, $byKindPubkey[$a])) { + $byKindPubkey[$a] = $e; + } + } else { + $id = (string) ($e->id ?? ''); + if ($id === '') { + continue; + } + if (!isset($byId[$id]) || self::wireEventSupersedes($e, $byId[$id])) { + $byId[$id] = $e; + } + } + } + + return array_values(array_merge($byId, $byKindPubkey, $byNip33Address)); + } + + private static function authorIdentToHexLower(mixed $ident): ?string + { + return self::npubToHexPubkey($ident); + } + /** * Latest kind 30040 index for this author and #d tag, as {@see PublicationEventEntity} * so callers can use {@see PublicationEventEntity::getTags()} (relay payloads are otherwise stdClass). @@ -2063,33 +2369,23 @@ class NostrClient */ public function getMagazineIndex(mixed $npub, mixed $dTag): ?PublicationEventEntity { - $entity = $this->queryMagazineIndex( - $npub, - $dTag, - $this->buildSingleRelaySet($this->defaultRelayUrl), - self::relayLogLabel($this->defaultRelayUrl) - ); - if ($entity !== null) { - return $entity; - } - if (\count($this->configuredArticleRelayUrlList()) <= 1) { - $this->logger->warning(sprintf( - 'No magazine index found (tried %s)', - self::relayLogLabel($this->defaultRelayUrl) - ), ['npub' => $npub, 'dTag' => $dTag, 'relay' => $this->defaultRelayUrl]); - - return null; - } - $this->logger->notice('Magazine index not on default relay, falling back to full relay set', [ - 'dTag' => $dTag, - ]); - $fullListStr = implode(', ', array_map(self::relayLogLabel(...), $this->configuredArticleRelayUrlList())); + $urls = $this->configuredArticleRelayUrlList(); + $relaysForLog = implode(', ', array_map(self::relayLogLabel(...), $urls)); - return $this->queryMagazineIndex($npub, $dTag, $this->defaultRelaySet, $fullListStr); + return $this->queryMagazineIndex($npub, $dTag, $this->defaultRelaySet, $relaysForLog); } private function queryMagazineIndex(mixed $npub, mixed $dTag, RelaySet $relaySet, string $relaysForLog): ?PublicationEventEntity { + $authorHex = self::npubToHexPubkey($npub); + if ($authorHex === null) { + $this->logger->warning('Magazine index: could not resolve npub to hex pubkey', [ + 'npub' => $npub, + 'dTag' => $dTag, + ]); + + return null; + } $request = $this->createNostrRequest( [KindsEnum::PUBLICATION_INDEX], ['authors' => [(string) $npub], 'tag' => ['#d', [(string) $dTag]]], @@ -2107,16 +2403,29 @@ class NostrClient if (empty($events)) { return null; } - usort($events, static function ($a, $b): int { - return self::magazineEventCreatedAt($b) <=> self::magazineEventCreatedAt($a); - }); + $raw = self::pickLatestNip33ParameterizedForQuery( + $events, + KindsEnum::PUBLICATION_INDEX->value, + $authorHex, + (string) $dTag + ); + if ($raw === null) { + $this->logger->warning('Magazine index: no event matched NIP-33 address (kind:pubkey:d) after merge', [ + 'npub' => $npub, + 'dTag' => $dTag, + 'relays' => $relaysForLog, + 'event_count' => \count($events), + ]); - return self::magazineEventToPublicationEntity($events[0]); + return null; + } + + return self::magazineEventToPublicationEntity($raw); } /** * Batch-fetch longform for category `a` coordinates that are not in the DB; one Nostr call per - * (author × kind) group, only the default relay (see {@see getMagazineIndex} rationale). + * (author × kind) group, only the default relay (faster; magazine 30040 uses the full relay set). * * @param list $addresses kind:pubkey:identifier */ @@ -2159,12 +2468,17 @@ class NostrClient $this->buildSingleRelaySet($this->defaultRelayUrl), ); try { - $this->processResponse($request->send(), function ($event) { + $events = $this->processResponse( + $request->send(), + static fn (object $event) => $event, + ); + foreach (self::mergeNip33ParameterizedWireEvents($events) as $event) { + if (!\is_object($event)) { + continue; + } $article = $this->articleFactory->createFromLongFormContentEvent($event); $this->saveEachArticleToTheDatabase($article); - - return null; - }); + } } catch (\Throwable $e) { $this->logger->error(sprintf( 'ingestMissingLongformForCategoryCoordinates [%s]: %s', @@ -2191,6 +2505,116 @@ class NostrClient return 0; } + private static function magazineEventId(mixed $event): string + { + if ($event instanceof PublicationEventEntity) { + return $event->getId(); + } + if (\is_object($event) && isset($event->id)) { + return (string) $event->id; + } + + return ''; + } + + private static function magazineEventKind(mixed $event): int + { + if ($event instanceof PublicationEventEntity) { + return $event->getKind(); + } + if (\is_object($event) && isset($event->kind)) { + return (int) $event->kind; + } + + return 0; + } + + private static function magazineEventPubkeyHex(mixed $event): string + { + if ($event instanceof PublicationEventEntity) { + return (string) $event->getPubkey(); + } + if (\is_object($event) && isset($event->pubkey)) { + return (string) $event->pubkey; + } + + return ''; + } + + /** + * Nostr wire tag as a name-first sequence (e.g. ["d", "ident"]). Handles both indexed arrays + * and object-shaped tag rows from JSON. + * + * @return list|null + */ + private static function normalizeNostrTagRowToSequence(mixed $row): ?array + { + if ($row === null) { + return null; + } + if (\is_object($row)) { + $row = get_object_vars($row); + } + if (!\is_array($row) || $row === []) { + return null; + } + $seq = array_values( + array_map( + static fn (mixed $v): string => (string) $v, + $row + ) + ); + if ($seq === [] || $seq[0] === '') { + return null; + } + + return $seq; + } + + /** + * First "d" tag value from raw relay or {@see PublicationEventEntity} tag arrays (trimmed). + */ + private static function eventDTagValue(mixed $event): ?string + { + $tags = null; + if ($event instanceof PublicationEventEntity) { + $tags = $event->getTags(); + } elseif (\is_object($event) && isset($event->tags) && \is_array($event->tags)) { + $tags = $event->tags; + } + if (!\is_array($tags)) { + return null; + } + foreach ($tags as $t) { + $seq = self::normalizeNostrTagRowToSequence($t); + if ($seq === null || ($seq[0] ?? '') !== 'd' || !isset($seq[1]) || (string) $seq[1] === '') { + continue; + } + + return trim((string) $seq[1]); + } + + return null; + } + + private static function npubToHexPubkey(mixed $npub): ?string + { + $s = trim((string) $npub); + if ($s === '') { + return null; + } + if (64 === \strlen($s) && ctype_xdigit($s)) { + return strtolower($s); + } + if (str_starts_with($s, 'npub')) { + $hex = (new Key())->convertToHex($s); + + return $hex !== '' && 64 === \strlen($hex) && ctype_xdigit($hex) ? strtolower($hex) : null; + } + + return null; + } + /** * Normalize relay / library event objects to the app's Event entity (not persisted). */ diff --git a/src/Twig/Components/Molecules/CategoryLink.php b/src/Twig/Components/Molecules/CategoryLink.php index c75d88d..5f7bca0 100644 --- a/src/Twig/Components/Molecules/CategoryLink.php +++ b/src/Twig/Components/Molecules/CategoryLink.php @@ -2,6 +2,7 @@ namespace App\Twig\Components\Molecules; +use App\Service\MagazineContentService; use App\Service\MagazineIndexStore; use Symfony\UX\TwigComponent\Attribute\AsTwigComponent; @@ -14,6 +15,7 @@ final class CategoryLink public function __construct( private readonly MagazineIndexStore $store, + private readonly MagazineContentService $magazineContent, ) { } @@ -29,6 +31,7 @@ final class CategoryLink } $this->title = $this->slug; + $this->magazineContent->warmCategoryIndexIfMissing($this->slug); $cat = $this->store->getCategory($this->slug); if (!\is_object($cat) || !\method_exists($cat, 'getTags')) { return; diff --git a/src/Util/NostrEventTags.php b/src/Util/NostrEventTags.php new file mode 100644 index 0000000..42b9f25 --- /dev/null +++ b/src/Util/NostrEventTags.php @@ -0,0 +1,45 @@ +|null + */ + public static function rowToStringList(mixed $row): ?array + { + if ($row === null) { + return null; + } + if (\is_object($row)) { + $row = get_object_vars($row); + } + if (!\is_array($row) || $row === []) { + return null; + } + + return array_values( + array_map( + static fn (mixed $v): string => (string) $v, + $row + ) + ); + } + + public static function tagNameMatches(mixed $row, string $name): bool + { + $seq = self::rowToStringList($row); + if ($seq === null || $seq === []) { + return false; + } + + return strtolower($seq[0] ?? '') === strtolower($name); + } +}