Browse Source

replace articles

imwald
Silberengel 5 days ago
parent
commit
8e12daf8bb
  1. 8
      src/Command/PrewarmCommand.php
  2. 63
      src/Service/MagazineContentService.php
  3. 295
      src/Service/NostrClient.php

8
src/Command/PrewarmCommand.php

@ -185,13 +185,13 @@ final class PrewarmCommand extends Command @@ -185,13 +185,13 @@ final class PrewarmCommand extends Command
}
}
$io->section('Long-form in DB (category `a` tags missing from MySQL)');
$io->section('Long-form in DB (category `a` tags — refresh from Nostr)');
try {
$n = $this->magazineContent->ingestMissingLongformForAllMagazineCategories();
$n = $this->magazineContent->ingestLongformForAllMagazineCategories();
if ($n === 0) {
$io->note('No missing long-form rows for category `a` coordinates (or empty magazine store).');
$io->note('No category `a` coordinates in the magazine store (or empty category indices).');
} else {
$io->writeln(sprintf('Fetched or attempted ingest for <info>%d</info> missing coordinate(s).', $n));
$io->writeln(sprintf('Fetched latest long-form for <info>%d</info> coordinate(s) (new rows + NIP-33 updates).', $n));
}
} catch (\Throwable $e) {
$this->logger->error('app:prewarm longform ingest failed', ['e' => $e]);

63
src/Service/MagazineContentService.php

@ -194,7 +194,7 @@ final class MagazineContentService @@ -194,7 +194,7 @@ final class MagazineContentService
/**
* Category listing from the persisted 30040 index and DB only. Does not call relays.
* Missing `Article` rows (not yet in MySQL) appear until `app:prewarm` backfills.
* Rows come from MySQL only; run `app:prewarm` to sync new `a` tags and replaceable revisions.
*
* @return array{list: list<Article>, category: array{title: string, summary: string}}
*/
@ -263,19 +263,20 @@ final class MagazineContentService @@ -263,19 +263,20 @@ final class MagazineContentService
}
/**
* For every category in the root index, fetch Nostr long-form for `a` tags missing in MySQL.
* Nostr I/O; intended for {@see PrewarmCommand} / cron only.
* For every category in the store, fetch the latest Nostr long-form for each `a` tag so new
* posts are ingested and NIP-33 replaceable updates refresh existing MySQL rows. Nostr I/O;
* intended for {@see PrewarmCommand} / cron only.
*/
public function ingestMissingLongformForAllMagazineCategories(): int
public function ingestLongformForAllMagazineCategories(): int
{
$n = 0;
foreach ($this->getCategorySlugsFromStore() as $catSlug) {
$missing = $this->findMissingLongformCoordinatesForCategory($catSlug);
if ($missing === []) {
$all = $this->findAllLongformCoordinatesForCategory($catSlug);
if ($all === []) {
continue;
}
$this->nostrClient->ingestMissingLongformForCategoryCoordinates($missing);
$n += \count($missing);
$this->nostrClient->ingestLongformForCategoryCoordinates($all);
$n += \count($all);
}
return $n;
@ -284,56 +285,30 @@ final class MagazineContentService @@ -284,56 +285,30 @@ final class MagazineContentService
/**
* @return list<string> Nostr coordinates kind:pubkey:identifier
*/
private function findMissingLongformCoordinatesForCategory(string $slug): array
private function findAllLongformCoordinatesForCategory(string $slug): array
{
$catIndex = $this->store->getCategory($slug);
if ($catIndex === null) {
return [];
}
$coordinates = [];
$out = [];
foreach ($catIndex->getTags() as $tag) {
if (NostrEventTags::tagNameMatches($tag, 'a')) {
$seq = NostrEventTags::rowToStringList($tag);
if ($seq !== null && isset($seq[1]) && (string) $seq[1] !== '') {
$coordinates[] = (string) $seq[1];
}
}
}
if ($coordinates === []) {
return [];
}
$pairs = [];
foreach ($coordinates as $coordinate) {
$parts = explode(':', (string) $coordinate, 3);
if (\count($parts) < 3) {
if (!NostrEventTags::tagNameMatches($tag, 'a')) {
continue;
}
$slugPart = trim((string) $parts[2]);
if ($slugPart === '') {
$seq = NostrEventTags::rowToStringList($tag);
if ($seq === null || !isset($seq[1]) || (string) $seq[1] === '') {
continue;
}
$pairs[] = [
'pubkey' => (string) $parts[1],
'slug' => $slugPart,
];
}
if ($pairs === []) {
return [];
}
$byAddress = $this->articleRepository->findByAuthorAndSlugIndexed($pairs);
$missing = [];
foreach ($coordinates as $coordinate) {
$parts = explode(':', (string) $coordinate, 3);
if (\count($parts) < 3) {
$coordinate = (string) $seq[1];
$parts = explode(':', $coordinate, 3);
if (\count($parts) < 3 || trim((string) $parts[2]) === '') {
continue;
}
$k = (string) $parts[1]."\0".trim((string) $parts[2]);
if (!isset($byAddress[$k])) {
$missing[] = (string) $coordinate;
}
$out[] = $coordinate;
}
return $missing;
return $out;
}
/**

295
src/Service/NostrClient.php

@ -5,6 +5,7 @@ namespace App\Service; @@ -5,6 +5,7 @@ namespace App\Service;
use App\Entity\Article;
use App\Entity\User;
use App\Entity\Event as PublicationEventEntity;
use App\Enum\EventStatusEnum;
use App\Enum\KindsEnum;
use App\Factory\ArticleFactory;
use Doctrine\ORM\EntityManagerInterface;
@ -2046,17 +2047,205 @@ class NostrClient @@ -2046,17 +2047,205 @@ class NostrClient
*/
public function saveEachArticleToTheDatabase(Article $article): void
{
$saved = $this->entityManager->getRepository(Article::class)->findOneBy(['eventId' => $article->getEventId()]);
if (!$saved) {
$newId = (string) ($article->getEventId() ?? '');
if ($newId === '') {
$this->logger->info('[longform_ingest] saveEachArticle: skip, empty eventId on Article', [
'title' => $article->getTitle(),
]);
return;
}
$repo = $this->entityManager->getRepository(Article::class);
if ($repo->findOneBy(['eventId' => $newId]) !== null) {
$this->logger->info('[longform_ingest] saveEachArticle: skip, DB already has this exact event id (no work)', [
'eventId' => $newId,
'slug' => $article->getSlug(),
]);
return;
}
$pubkey = strtolower((string) ($article->getPubkey() ?? ''));
$slug = trim((string) ($article->getSlug() ?? ''));
if ($pubkey === '' || $slug === '') {
$this->logger->info('[longform_ingest] saveEachArticle: persist new (missing pubkey or slug on entity)', [
'eventId' => $newId,
'pubkey_empty' => $pubkey === '',
'slug' => $slug,
]);
$this->persistNewArticle($article, 'missing_pubkey_or_slug_on_entity');
return;
}
$incumbent = $this->findLatestLongFormArticleByAuthorAndSlug($pubkey, $slug);
if ($incumbent === null) {
$this->logger->info('[longform_ingest] saveEachArticle: persist new row (no DB row for author+slug)', [
'eventId' => $newId,
'address' => $pubkey.':…:'.self::longformIngestShortSlug($slug),
]);
$this->persistNewArticle($article, 'no_db_row_for_nip33_address');
return;
}
$candidate = $article->getRaw();
if (!\is_object($candidate)) {
$this->logger->warning('[longform_ingest] saveEachArticle: new Article has no raw wire; trying insert as new', [
'eventId' => $newId,
]);
$this->persistNewArticle($article, 'no_raw_on_incoming_article');
return;
}
$iWire = self::longFormWireStubFromArticle($incumbent);
$cTs = self::magazineEventCreatedAt($candidate);
$iTs = self::magazineEventCreatedAt($iWire);
if (self::wireEventSupersedes($candidate, $iWire)) {
$this->logger->info('[longform_ingest] saveEachArticle: NIP-33 update — candidate wins, flushing DB row', [
'address' => $pubkey.':…:'.self::longformIngestShortSlug($slug),
'from_event_id' => $incumbent->getEventId(),
'to_event_id' => $newId,
'db_row_id' => $incumbent->getId(),
'incumbent_created_at' => $iTs,
'candidate_created_at' => $cTs,
]);
$this->applyLongFormArticleOnto($article, $incumbent);
if ($incumbent->getPubkey() !== $pubkey) {
$incumbent->setPubkey($pubkey);
}
try {
$this->logger->info('Saving article', ['article' => $article]);
$this->entityManager->persist($article);
$this->entityManager->flush();
} catch (\Exception $e) {
$this->logger->error($e->getMessage());
$this->logger->error('[longform_ingest] saveEachArticle: flush after update failed: '.$e->getMessage());
$this->managerRegistry->resetManager();
}
return;
}
if (self::wireEventSupersedes($iWire, $candidate)) {
$this->logger->info('[longform_ingest] saveEachArticle: keep DB — merged relay result is not newer (incumbent wins)', [
'address' => $pubkey.':…:'.self::longformIngestShortSlug($slug),
'dbEventId' => $incumbent->getEventId(),
'seenEventId' => $newId,
'db_row_id' => $incumbent->getId(),
'dbCreatedAt' => $iTs,
'seenCreatedAt' => $cTs,
]);
} elseif ((string) $incumbent->getEventId() !== $newId) {
$this->logger->notice('[longform_ingest] saveEachArticle: inconclusive supersedes (different ids) — check relays / d-tag match', [
'address' => $pubkey.':…:'.self::longformIngestShortSlug($slug),
'dbEventId' => $incumbent->getEventId(),
'seenEventId' => $newId,
'db_row_id' => $incumbent->getId(),
'dbCreatedAt' => $iTs,
'seenCreatedAt' => $cTs,
]);
}
}
private function persistNewArticle(Article $article, string $reason = 'unspecified'): void
{
try {
$this->logger->info('[longform_ingest] persistNewArticle', [
'reason' => $reason,
'eventId' => $article->getEventId(),
'slug' => self::longformIngestShortSlug((string) ($article->getSlug() ?? '')),
]);
$this->entityManager->persist($article);
$this->entityManager->flush();
} catch (\Exception $e) {
$this->logger->error('[longform_ingest] persistNewArticle failed: '.$e->getMessage(), [
'reason' => $reason,
'eventId' => $article->getEventId(),
]);
$this->managerRegistry->resetManager();
}
}
private static function longformIngestShortSlug(string $slug, int $max = 100): string
{
$t = trim($slug);
if (strlen($t) > $max) {
return substr($t, 0, $max - 1).'…';
}
return $t;
}
/**
* @return array{kind: int, id: string, created_at: int, d: string, nip33: ?string}
*/
private static function longformIngestEventWireSummary(object $e): array
{
$d = self::eventDTagValue($e);
$nip = self::nip33ParameterizedReplaceableAddress($e);
return [
'kind' => (int) ($e->kind ?? 0),
'id' => (string) ($e->id ?? ''),
'created_at' => (int) ($e->created_at ?? 0),
'd' => $d !== null && $d !== '' ? self::longformIngestShortSlug($d, 80) : '',
'nip33' => $nip,
];
}
private function findLatestLongFormArticleByAuthorAndSlug(string $pubkey, string $slug): ?Article
{
$pubkey = strtolower($pubkey);
/** @var ?Article $row */
$row = $this->entityManager->getRepository(Article::class)->createQueryBuilder('a')
->where('LOWER(a.pubkey) = :pk')
->andWhere('a.slug = :sl')
->setParameter('pk', $pubkey)
->setParameter('sl', $slug)
->orderBy('a.createdAt', 'DESC')
->setMaxResults(1)
->getQuery()
->getOneOrNullResult();
return $row;
}
/**
* Minimal Nostr event shape for {@see self::wireEventSupersedes} when `raw` is not a full wire object.
*/
private static function longFormWireStubFromArticle(Article $a): object
{
$raw = $a->getRaw();
if (\is_object($raw) && isset($raw->id) && (isset($raw->created_at) || isset($raw->createdAt))) {
return $raw;
}
$o = new \stdClass();
$o->id = (string) ($a->getEventId() ?? '');
$ca = $a->getCreatedAt();
$o->created_at = $ca !== null ? $ca->getTimestamp() : 0;
$o->pubkey = (string) ($a->getPubkey() ?? '');
$k = $a->getKind();
$o->kind = $k !== null ? $k->value : KindsEnum::LONGFORM->value;
return $o;
}
private function applyLongFormArticleOnto(Article $source, Article $target): void
{
$target->setEventId((string) $source->getEventId());
$target->setContent($source->getContent());
$target->setTitle($source->getTitle());
$target->setSummary($source->getSummary());
$target->setImage($source->getImage());
if ($source->getCreatedAt() !== null) {
$target->setCreatedAt($source->getCreatedAt());
}
$target->setSig($source->getSig());
if ($source->getPublishedAt() !== null) {
$target->setPublishedAt($source->getPublishedAt());
}
$target->setTopics($source->getTopics());
if ($source->getKind() !== null) {
$target->setKind($source->getKind());
}
$es = $source->getEventStatus();
$target->setEventStatus($es ?? EventStatusEnum::PUBLISHED);
$target->setRaw($source->getRaw());
}
/**
@ -2424,24 +2613,38 @@ class NostrClient @@ -2424,24 +2613,38 @@ class NostrClient
}
/**
* Batch-fetch longform for category `a` coordinates that are not in the DB; one Nostr call per
* (author × kind) group, only the default relay (faster; magazine 30040 uses the full relay set).
* Batch-fetch latest longform for category `a` coordinates; one Nostr call per (author × kind)
* group. Uses the same full article {@see $defaultRelaySet} as kind 30040 index queries so merged
* NIP-33 results are not stuck on a single relay’s copy. {@see saveEachArticleToTheDatabase}
* upserts by NIP-33 address.
*
* @param list<string> $addresses kind:pubkey:identifier
*/
public function ingestMissingLongformForCategoryCoordinates(array $addresses): void
public function ingestLongformForCategoryCoordinates(array $addresses): void
{
if ($addresses === []) {
$this->logger->info('[longform_ingest] ingestLongform: no addresses, exit');
return;
}
$relaysForLog = implode(', ', array_map(self::relayLogLabel(...), $this->configuredArticleRelayUrlList()));
$this->logger->info('[longform_ingest] ingestLongform: start', [
'address_count' => \count($addresses),
'relays' => $relaysForLog,
'addresses_sample' => \array_values(\array_slice($addresses, 0, 15)),
]);
$groups = [];
foreach ($addresses as $c) {
$parts = explode(':', (string) $c, 3);
if (\count($parts) < 3) {
$this->logger->notice('[longform_ingest] ingestLongform: skip malformed coordinate (not kind:pubkey:rest)', [
'coordinate' => $c,
]);
continue;
}
$kind = (int) $parts[0];
$pubkey = $parts[1];
$pubkey = strtolower($parts[1]);
$d = trim((string) $parts[2]);
if ($d === '' || $kind <= 0) {
continue;
@ -2451,28 +2654,75 @@ class NostrClient @@ -2451,28 +2654,75 @@ class NostrClient
$groups[$gkey]['kind'] = $kind;
$groups[$gkey]['dTags'][] = $d;
}
foreach ($groups as $g) {
$this->logger->info('[longform_ingest] ingestLongform: request groups (batched by author+kind)', [
'group_count' => \count($groups),
]);
foreach ($groups as $gkey => $g) {
$dTags = array_values(array_unique($g['dTags'] ?? []));
if ($dTags === [] || !isset($g['pubkey'], $g['kind'])) {
continue;
}
$kindEnum = KindsEnum::tryFrom((int) $g['kind']);
if ($kindEnum === null) {
$this->logger->notice('Skipping category coordinate with unknown kind', ['kind' => $g['kind']]);
$this->logger->notice('[longform_ingest] skip group: unknown kind', ['kind' => $g['kind']]);
continue;
}
$this->logger->info('[longform_ingest] ingestLongform: REQ group', [
'group_key' => $gkey,
'filter_kind' => (int) $g['kind'],
'author_hex64_prefix' => substr((string) $g['pubkey'], 0, 12),
'd_tag_count' => \count($dTags),
'd_tags' => array_map(
fn (string $dt): string => self::longformIngestShortSlug($dt, 72),
$dTags
),
]);
$request = $this->createNostrRequest(
[$kindEnum],
['authors' => [(string) $g['pubkey']], 'tag' => ['#d', $dTags]],
$this->buildSingleRelaySet($this->defaultRelayUrl),
$this->defaultRelaySet,
);
try {
$events = $this->processResponse(
$request->send(),
static fn (object $event) => $event,
);
foreach (self::mergeNip33ParameterizedWireEvents($events) as $event) {
$rawCount = \count($events);
$rawSample = [];
$si = 0;
foreach ($events as $ev) {
if (!\is_object($ev)) {
continue;
}
if ($si < 25) {
$rawSample[] = self::longformIngestEventWireSummary($ev);
}
++$si;
}
$this->logger->info('[longform_ingest] ingestLongform: responses merged from relays (pre-NIP-33 per-address merge)', [
'raw_wire_count' => $rawCount,
'sample_up_to_25' => $rawSample,
]);
if ($rawCount === 0) {
$this->logger->warning('[longform_ingest] ingestLongform: no EVENT rows returned for this filter (check relay index / author filter / #d list)', [
'group_key' => $gkey,
'authors_filter' => $g['pubkey'],
]);
}
$merged = self::mergeNip33ParameterizedWireEvents($events);
$mergedDetail = [];
foreach ($merged as $ev) {
if (!\is_object($ev)) {
continue;
}
$mergedDetail[] = self::longformIngestEventWireSummary($ev);
}
$this->logger->info('[longform_ingest] ingestLongform: after mergeNip33ParameterizedWireEvents', [
'merged_count' => \count($merged),
'one_row_per_nip33_address' => $mergedDetail,
]);
foreach ($merged as $event) {
if (!\is_object($event)) {
continue;
}
@ -2480,17 +2730,18 @@ class NostrClient @@ -2480,17 +2730,18 @@ class NostrClient
$this->saveEachArticleToTheDatabase($article);
}
} catch (\Throwable $e) {
$this->logger->error(sprintf(
'ingestMissingLongformForCategoryCoordinates [%s]: %s',
self::relayLogLabel($this->defaultRelayUrl),
$e->getMessage()
), [
'message' => $e->getMessage(),
'pubkey' => $g['pubkey'] ?? null,
'relay' => $this->defaultRelayUrl,
]);
$this->logger->error(
sprintf('[longform_ingest] ingestLongform: exception in group %s: %s', (string) $gkey, $e->getMessage()),
[
'message' => $e->getMessage(),
'pubkey' => $g['pubkey'] ?? null,
'trace' => $e->getTraceAsString(),
'relays' => $relaysForLog,
],
);
}
}
$this->logger->info('[longform_ingest] ingestLongform: done (all groups)');
}
private static function magazineEventCreatedAt(mixed $event): int

Loading…
Cancel
Save