diff --git a/src/Command/PrewarmCommand.php b/src/Command/PrewarmCommand.php index f86ce8f..fbc7a79 100644 --- a/src/Command/PrewarmCommand.php +++ b/src/Command/PrewarmCommand.php @@ -185,13 +185,13 @@ final class PrewarmCommand extends Command } } - $io->section('Long-form in DB (category `a` tags missing from MySQL)'); + $io->section('Long-form in DB (category `a` tags — refresh from Nostr)'); try { - $n = $this->magazineContent->ingestMissingLongformForAllMagazineCategories(); + $n = $this->magazineContent->ingestLongformForAllMagazineCategories(); if ($n === 0) { - $io->note('No missing long-form rows for category `a` coordinates (or empty magazine store).'); + $io->note('No category `a` coordinates in the magazine store (or empty category indices).'); } else { - $io->writeln(sprintf('Fetched or attempted ingest for %d missing coordinate(s).', $n)); + $io->writeln(sprintf('Fetched latest long-form for %d coordinate(s) (new rows + NIP-33 updates).', $n)); } } catch (\Throwable $e) { $this->logger->error('app:prewarm longform ingest failed', ['e' => $e]); diff --git a/src/Service/MagazineContentService.php b/src/Service/MagazineContentService.php index 8ba5c07..51c507c 100644 --- a/src/Service/MagazineContentService.php +++ b/src/Service/MagazineContentService.php @@ -194,7 +194,7 @@ final class MagazineContentService /** * Category listing from the persisted 30040 index and DB only. Does not call relays. - * Missing `Article` rows (not yet in MySQL) appear until `app:prewarm` backfills. + * Rows come from MySQL only; run `app:prewarm` to sync new `a` tags and replaceable revisions. * * @return array{list: list
, category: array{title: string, summary: string}} */ @@ -263,19 +263,20 @@ final class MagazineContentService } /** - * For every category in the root index, fetch Nostr long-form for `a` tags missing in MySQL. - * Nostr I/O; intended for {@see PrewarmCommand} / cron only. + * For every category in the store, fetch the latest Nostr long-form for each `a` tag so new + * posts are ingested and NIP-33 replaceable updates refresh existing MySQL rows. Nostr I/O; + * intended for {@see PrewarmCommand} / cron only. */ - public function ingestMissingLongformForAllMagazineCategories(): int + public function ingestLongformForAllMagazineCategories(): int { $n = 0; foreach ($this->getCategorySlugsFromStore() as $catSlug) { - $missing = $this->findMissingLongformCoordinatesForCategory($catSlug); - if ($missing === []) { + $all = $this->findAllLongformCoordinatesForCategory($catSlug); + if ($all === []) { continue; } - $this->nostrClient->ingestMissingLongformForCategoryCoordinates($missing); - $n += \count($missing); + $this->nostrClient->ingestLongformForCategoryCoordinates($all); + $n += \count($all); } return $n; @@ -284,56 +285,30 @@ final class MagazineContentService /** * @return list Nostr coordinates kind:pubkey:identifier */ - private function findMissingLongformCoordinatesForCategory(string $slug): array + private function findAllLongformCoordinatesForCategory(string $slug): array { $catIndex = $this->store->getCategory($slug); if ($catIndex === null) { return []; } - $coordinates = []; + $out = []; foreach ($catIndex->getTags() as $tag) { - if (NostrEventTags::tagNameMatches($tag, 'a')) { - $seq = NostrEventTags::rowToStringList($tag); - if ($seq !== null && isset($seq[1]) && (string) $seq[1] !== '') { - $coordinates[] = (string) $seq[1]; - } - } - } - if ($coordinates === []) { - return []; - } - $pairs = []; - foreach ($coordinates as $coordinate) { - $parts = explode(':', (string) $coordinate, 3); - if (\count($parts) < 3) { + if (!NostrEventTags::tagNameMatches($tag, 'a')) { continue; } - $slugPart = trim((string) $parts[2]); - if ($slugPart === '') { + $seq = NostrEventTags::rowToStringList($tag); + if ($seq === null || !isset($seq[1]) || (string) $seq[1] === '') { continue; } - $pairs[] = [ - 'pubkey' => (string) $parts[1], - 'slug' => $slugPart, - ]; - } - if ($pairs === []) { - return []; - } - $byAddress = $this->articleRepository->findByAuthorAndSlugIndexed($pairs); - $missing = []; - foreach ($coordinates as $coordinate) { - $parts = explode(':', (string) $coordinate, 3); - if (\count($parts) < 3) { + $coordinate = (string) $seq[1]; + $parts = explode(':', $coordinate, 3); + if (\count($parts) < 3 || trim((string) $parts[2]) === '') { continue; } - $k = (string) $parts[1]."\0".trim((string) $parts[2]); - if (!isset($byAddress[$k])) { - $missing[] = (string) $coordinate; - } + $out[] = $coordinate; } - return $missing; + return $out; } /** diff --git a/src/Service/NostrClient.php b/src/Service/NostrClient.php index a88831f..33c4565 100644 --- a/src/Service/NostrClient.php +++ b/src/Service/NostrClient.php @@ -5,6 +5,7 @@ namespace App\Service; use App\Entity\Article; use App\Entity\User; use App\Entity\Event as PublicationEventEntity; +use App\Enum\EventStatusEnum; use App\Enum\KindsEnum; use App\Factory\ArticleFactory; use Doctrine\ORM\EntityManagerInterface; @@ -2046,17 +2047,205 @@ class NostrClient */ public function saveEachArticleToTheDatabase(Article $article): void { - $saved = $this->entityManager->getRepository(Article::class)->findOneBy(['eventId' => $article->getEventId()]); - if (!$saved) { + $newId = (string) ($article->getEventId() ?? ''); + if ($newId === '') { + $this->logger->info('[longform_ingest] saveEachArticle: skip, empty eventId on Article', [ + 'title' => $article->getTitle(), + ]); + + return; + } + $repo = $this->entityManager->getRepository(Article::class); + if ($repo->findOneBy(['eventId' => $newId]) !== null) { + $this->logger->info('[longform_ingest] saveEachArticle: skip, DB already has this exact event id (no work)', [ + 'eventId' => $newId, + 'slug' => $article->getSlug(), + ]); + + return; + } + $pubkey = strtolower((string) ($article->getPubkey() ?? '')); + $slug = trim((string) ($article->getSlug() ?? '')); + if ($pubkey === '' || $slug === '') { + $this->logger->info('[longform_ingest] saveEachArticle: persist new (missing pubkey or slug on entity)', [ + 'eventId' => $newId, + 'pubkey_empty' => $pubkey === '', + 'slug' => $slug, + ]); + $this->persistNewArticle($article, 'missing_pubkey_or_slug_on_entity'); + + return; + } + $incumbent = $this->findLatestLongFormArticleByAuthorAndSlug($pubkey, $slug); + if ($incumbent === null) { + $this->logger->info('[longform_ingest] saveEachArticle: persist new row (no DB row for author+slug)', [ + 'eventId' => $newId, + 'address' => $pubkey.':…:'.self::longformIngestShortSlug($slug), + ]); + $this->persistNewArticle($article, 'no_db_row_for_nip33_address'); + + return; + } + $candidate = $article->getRaw(); + if (!\is_object($candidate)) { + $this->logger->warning('[longform_ingest] saveEachArticle: new Article has no raw wire; trying insert as new', [ + 'eventId' => $newId, + ]); + $this->persistNewArticle($article, 'no_raw_on_incoming_article'); + + return; + } + $iWire = self::longFormWireStubFromArticle($incumbent); + $cTs = self::magazineEventCreatedAt($candidate); + $iTs = self::magazineEventCreatedAt($iWire); + if (self::wireEventSupersedes($candidate, $iWire)) { + $this->logger->info('[longform_ingest] saveEachArticle: NIP-33 update — candidate wins, flushing DB row', [ + 'address' => $pubkey.':…:'.self::longformIngestShortSlug($slug), + 'from_event_id' => $incumbent->getEventId(), + 'to_event_id' => $newId, + 'db_row_id' => $incumbent->getId(), + 'incumbent_created_at' => $iTs, + 'candidate_created_at' => $cTs, + ]); + $this->applyLongFormArticleOnto($article, $incumbent); + if ($incumbent->getPubkey() !== $pubkey) { + $incumbent->setPubkey($pubkey); + } try { - $this->logger->info('Saving article', ['article' => $article]); - $this->entityManager->persist($article); $this->entityManager->flush(); } catch (\Exception $e) { - $this->logger->error($e->getMessage()); + $this->logger->error('[longform_ingest] saveEachArticle: flush after update failed: '.$e->getMessage()); $this->managerRegistry->resetManager(); } + + return; + } + if (self::wireEventSupersedes($iWire, $candidate)) { + $this->logger->info('[longform_ingest] saveEachArticle: keep DB — merged relay result is not newer (incumbent wins)', [ + 'address' => $pubkey.':…:'.self::longformIngestShortSlug($slug), + 'dbEventId' => $incumbent->getEventId(), + 'seenEventId' => $newId, + 'db_row_id' => $incumbent->getId(), + 'dbCreatedAt' => $iTs, + 'seenCreatedAt' => $cTs, + ]); + } elseif ((string) $incumbent->getEventId() !== $newId) { + $this->logger->notice('[longform_ingest] saveEachArticle: inconclusive supersedes (different ids) — check relays / d-tag match', [ + 'address' => $pubkey.':…:'.self::longformIngestShortSlug($slug), + 'dbEventId' => $incumbent->getEventId(), + 'seenEventId' => $newId, + 'db_row_id' => $incumbent->getId(), + 'dbCreatedAt' => $iTs, + 'seenCreatedAt' => $cTs, + ]); + } + } + + private function persistNewArticle(Article $article, string $reason = 'unspecified'): void + { + try { + $this->logger->info('[longform_ingest] persistNewArticle', [ + 'reason' => $reason, + 'eventId' => $article->getEventId(), + 'slug' => self::longformIngestShortSlug((string) ($article->getSlug() ?? '')), + ]); + $this->entityManager->persist($article); + $this->entityManager->flush(); + } catch (\Exception $e) { + $this->logger->error('[longform_ingest] persistNewArticle failed: '.$e->getMessage(), [ + 'reason' => $reason, + 'eventId' => $article->getEventId(), + ]); + $this->managerRegistry->resetManager(); + } + } + + private static function longformIngestShortSlug(string $slug, int $max = 100): string + { + $t = trim($slug); + if (strlen($t) > $max) { + return substr($t, 0, $max - 1).'…'; + } + + return $t; + } + + /** + * @return array{kind: int, id: string, created_at: int, d: string, nip33: ?string} + */ + private static function longformIngestEventWireSummary(object $e): array + { + $d = self::eventDTagValue($e); + $nip = self::nip33ParameterizedReplaceableAddress($e); + + return [ + 'kind' => (int) ($e->kind ?? 0), + 'id' => (string) ($e->id ?? ''), + 'created_at' => (int) ($e->created_at ?? 0), + 'd' => $d !== null && $d !== '' ? self::longformIngestShortSlug($d, 80) : '', + 'nip33' => $nip, + ]; + } + + private function findLatestLongFormArticleByAuthorAndSlug(string $pubkey, string $slug): ?Article + { + $pubkey = strtolower($pubkey); + /** @var ?Article $row */ + $row = $this->entityManager->getRepository(Article::class)->createQueryBuilder('a') + ->where('LOWER(a.pubkey) = :pk') + ->andWhere('a.slug = :sl') + ->setParameter('pk', $pubkey) + ->setParameter('sl', $slug) + ->orderBy('a.createdAt', 'DESC') + ->setMaxResults(1) + ->getQuery() + ->getOneOrNullResult(); + + return $row; + } + + /** + * Minimal Nostr event shape for {@see self::wireEventSupersedes} when `raw` is not a full wire object. + */ + private static function longFormWireStubFromArticle(Article $a): object + { + $raw = $a->getRaw(); + if (\is_object($raw) && isset($raw->id) && (isset($raw->created_at) || isset($raw->createdAt))) { + return $raw; } + $o = new \stdClass(); + $o->id = (string) ($a->getEventId() ?? ''); + $ca = $a->getCreatedAt(); + $o->created_at = $ca !== null ? $ca->getTimestamp() : 0; + $o->pubkey = (string) ($a->getPubkey() ?? ''); + $k = $a->getKind(); + + $o->kind = $k !== null ? $k->value : KindsEnum::LONGFORM->value; + + return $o; + } + + private function applyLongFormArticleOnto(Article $source, Article $target): void + { + $target->setEventId((string) $source->getEventId()); + $target->setContent($source->getContent()); + $target->setTitle($source->getTitle()); + $target->setSummary($source->getSummary()); + $target->setImage($source->getImage()); + if ($source->getCreatedAt() !== null) { + $target->setCreatedAt($source->getCreatedAt()); + } + $target->setSig($source->getSig()); + if ($source->getPublishedAt() !== null) { + $target->setPublishedAt($source->getPublishedAt()); + } + $target->setTopics($source->getTopics()); + if ($source->getKind() !== null) { + $target->setKind($source->getKind()); + } + $es = $source->getEventStatus(); + $target->setEventStatus($es ?? EventStatusEnum::PUBLISHED); + $target->setRaw($source->getRaw()); } /** @@ -2424,24 +2613,38 @@ class NostrClient } /** - * Batch-fetch longform for category `a` coordinates that are not in the DB; one Nostr call per - * (author × kind) group, only the default relay (faster; magazine 30040 uses the full relay set). + * Batch-fetch latest longform for category `a` coordinates; one Nostr call per (author × kind) + * group. Uses the same full article {@see $defaultRelaySet} as kind 30040 index queries so merged + * NIP-33 results are not stuck on a single relay’s copy. {@see saveEachArticleToTheDatabase} + * upserts by NIP-33 address. * * @param list $addresses kind:pubkey:identifier */ - public function ingestMissingLongformForCategoryCoordinates(array $addresses): void + public function ingestLongformForCategoryCoordinates(array $addresses): void { if ($addresses === []) { + $this->logger->info('[longform_ingest] ingestLongform: no addresses, exit'); + return; } + $relaysForLog = implode(', ', array_map(self::relayLogLabel(...), $this->configuredArticleRelayUrlList())); + $this->logger->info('[longform_ingest] ingestLongform: start', [ + 'address_count' => \count($addresses), + 'relays' => $relaysForLog, + 'addresses_sample' => \array_values(\array_slice($addresses, 0, 15)), + ]); $groups = []; foreach ($addresses as $c) { $parts = explode(':', (string) $c, 3); if (\count($parts) < 3) { + $this->logger->notice('[longform_ingest] ingestLongform: skip malformed coordinate (not kind:pubkey:rest)', [ + 'coordinate' => $c, + ]); + continue; } $kind = (int) $parts[0]; - $pubkey = $parts[1]; + $pubkey = strtolower($parts[1]); $d = trim((string) $parts[2]); if ($d === '' || $kind <= 0) { continue; @@ -2451,28 +2654,75 @@ class NostrClient $groups[$gkey]['kind'] = $kind; $groups[$gkey]['dTags'][] = $d; } - foreach ($groups as $g) { + $this->logger->info('[longform_ingest] ingestLongform: request groups (batched by author+kind)', [ + 'group_count' => \count($groups), + ]); + foreach ($groups as $gkey => $g) { $dTags = array_values(array_unique($g['dTags'] ?? [])); if ($dTags === [] || !isset($g['pubkey'], $g['kind'])) { continue; } $kindEnum = KindsEnum::tryFrom((int) $g['kind']); if ($kindEnum === null) { - $this->logger->notice('Skipping category coordinate with unknown kind', ['kind' => $g['kind']]); + $this->logger->notice('[longform_ingest] skip group: unknown kind', ['kind' => $g['kind']]); continue; } + $this->logger->info('[longform_ingest] ingestLongform: REQ group', [ + 'group_key' => $gkey, + 'filter_kind' => (int) $g['kind'], + 'author_hex64_prefix' => substr((string) $g['pubkey'], 0, 12), + 'd_tag_count' => \count($dTags), + 'd_tags' => array_map( + fn (string $dt): string => self::longformIngestShortSlug($dt, 72), + $dTags + ), + ]); $request = $this->createNostrRequest( [$kindEnum], ['authors' => [(string) $g['pubkey']], 'tag' => ['#d', $dTags]], - $this->buildSingleRelaySet($this->defaultRelayUrl), + $this->defaultRelaySet, ); try { $events = $this->processResponse( $request->send(), static fn (object $event) => $event, ); - foreach (self::mergeNip33ParameterizedWireEvents($events) as $event) { + $rawCount = \count($events); + $rawSample = []; + $si = 0; + foreach ($events as $ev) { + if (!\is_object($ev)) { + continue; + } + if ($si < 25) { + $rawSample[] = self::longformIngestEventWireSummary($ev); + } + ++$si; + } + $this->logger->info('[longform_ingest] ingestLongform: responses merged from relays (pre-NIP-33 per-address merge)', [ + 'raw_wire_count' => $rawCount, + 'sample_up_to_25' => $rawSample, + ]); + if ($rawCount === 0) { + $this->logger->warning('[longform_ingest] ingestLongform: no EVENT rows returned for this filter (check relay index / author filter / #d list)', [ + 'group_key' => $gkey, + 'authors_filter' => $g['pubkey'], + ]); + } + $merged = self::mergeNip33ParameterizedWireEvents($events); + $mergedDetail = []; + foreach ($merged as $ev) { + if (!\is_object($ev)) { + continue; + } + $mergedDetail[] = self::longformIngestEventWireSummary($ev); + } + $this->logger->info('[longform_ingest] ingestLongform: after mergeNip33ParameterizedWireEvents', [ + 'merged_count' => \count($merged), + 'one_row_per_nip33_address' => $mergedDetail, + ]); + foreach ($merged as $event) { if (!\is_object($event)) { continue; } @@ -2480,17 +2730,18 @@ class NostrClient $this->saveEachArticleToTheDatabase($article); } } catch (\Throwable $e) { - $this->logger->error(sprintf( - 'ingestMissingLongformForCategoryCoordinates [%s]: %s', - self::relayLogLabel($this->defaultRelayUrl), - $e->getMessage() - ), [ - 'message' => $e->getMessage(), - 'pubkey' => $g['pubkey'] ?? null, - 'relay' => $this->defaultRelayUrl, - ]); + $this->logger->error( + sprintf('[longform_ingest] ingestLongform: exception in group %s: %s', (string) $gkey, $e->getMessage()), + [ + 'message' => $e->getMessage(), + 'pubkey' => $g['pubkey'] ?? null, + 'trace' => $e->getTraceAsString(), + 'relays' => $relaysForLog, + ], + ); } } + $this->logger->info('[longform_ingest] ingestLongform: done (all groups)'); } private static function magazineEventCreatedAt(mixed $event): int