injections succeed (debugging)',
)]
final class ArticleHighlightsAuditCommand extends Command
{
public function __construct(
private readonly ArticleRepository $articleRepository,
private readonly ArticleHighlightRepository $articleHighlightRepository,
private readonly Converter $converter,
private readonly ArticleBodyHighlightInjector $articleBodyHighlightInjector,
) {
parent::__construct();
}
protected function configure(): void
{
$this
->addArgument('slug', InputArgument::REQUIRED, 'Article d-identifier (slug), e.g. bitcoin-is-time')
->addOption('npub', null, InputOption::VALUE_OPTIONAL, 'If set, must match the article author (npub1…)');
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$slug = trim((string) $input->getArgument('slug'));
if ($slug === '') {
$io->error('Empty slug.');
return Command::FAILURE;
}
$article = $this->articleRepository->findLatestBySlug($slug);
if (null === $article) {
$io->error('No article row for this slug.');
return Command::FAILURE;
}
$key = new Key();
$expectedNpub = $key->convertPublicKeyToBech32((string) $article->getPubkey());
$optNpub = $input->getOption('npub');
if (\is_string($optNpub) && $optNpub !== '') {
if ($key->convertToHex($optNpub) !== strtolower((string) $article->getPubkey())) {
$io->error('npub does not match this article’s author (expected: '.$expectedNpub.').');
return Command::FAILURE;
}
}
$io->title('Article highlights audit: '.$slug);
$io->writeln('Author npub: '.$expectedNpub.'');
$io->writeln('Article id: '.(string) $article->getId().' · kind: '.
($article->getKind()?->value ?? 'null').'');
$highlights = $this->articleHighlightRepository->findByArticle($article);
$io->writeln('Rows from findByArticle: '.\count($highlights).'');
try {
$html = $this->converter->convertToHTML((string) $article->getContent());
} catch (CommonMarkException $e) {
$io->error('CommonMark: '.$e->getMessage());
return Command::FAILURE;
}
$out = $this->articleBodyHighlightInjector->inject($html, $highlights);
$injected = $out['injectedEventIds'];
$markCount = \substr_count($out['html'], 'user-highlight__marker');
$io->writeln('Injected event ids with all highlights together (duplicates = same passage): '.\count($injected).'');
$io->writeln(' count in body: '.$markCount.'');
$io->section('Each highlight in isolation (same HTML, one 9802 at a time)');
$rows = [];
$isolatedOk = 0;
foreach ($highlights as $h) {
if (! $h instanceof ArticleHighlight) {
continue;
}
$eid = \strtolower($h->getEventId());
$one = $this->articleBodyHighlightInjector->inject($html, [$h]);
$found = 1 === \preg_match(
'/\bid=([\'"])highlight-'.preg_quote($eid, '/').'\1/i',
$one['html']
);
if ($found) {
++$isolatedOk;
}
$snippet = $this->excerptOneLine((string) $h->getContent(), 72);
$rows[] = [
$found ? 'yes' : 'no',
$eid,
$snippet,
];
}
$io->table(['Match', 'event id', 'stored `content` (excerpt)'], $rows);
if ($isolatedOk < \count($highlights)) {
$io->writeln(
'‘Match: no’ means the stored passage is absent from the flattened body text, or it diverges '.
'(soft hyphens, smart quotes, edits, footnotes, etc.). Re-sync kind 9802 from relays, or adjust matching in ArticleBodyHighlightInjector.'
);
}
if ($markCount < 1 && \count($highlights) > 0) {
$io->warning('With all highlights together, nothing was injected. Per-row check above still shows if any row matches in isolation.');
} elseif (\count($highlights) < 1) {
$io->note('No article_highlight rows for this slug+author. Run prewarm highlight sync or check MySQL.');
} elseif ($markCount > 0) {
$io->success('At least one was produced when all rows were passed to the injector together.');
}
if ($io->isVerbose() && $injected !== []) {
$io->section('Injected event ids (batch, may include several per passage)');
$io->listing($injected);
}
return Command::SUCCESS;
}
/**
* One line for the table: reflect {@see ArticleHighlight::getContent()} bytes faithfully.
* Only line breaks are folded to a space so the row stays one line — we do not collapse
* {@see \p{Z}} or remove U+00AD (soft hyphen); doing that made passages look like they
* contained ASCII spaces the Nostr `content` never had.
*/
private function excerptOneLine(string $s, int $max): string
{
$s = (string) \preg_replace('/\R/u', ' ', $s);
if (\mb_strlen($s, 'UTF-8') > $max) {
$s = \mb_substr($s, 0, $max - 1, 'UTF-8').'…';
}
return $s;
}
}