nzineRepository->findAll();
foreach ($nzines as $nzine) {
if (!$nzine->getFeedUrl()) {
continue;
}
/** @var NzineBot $bot */
$bot = $nzine->getNzineBot();
$bot->setEncryptionService($this->encryptionService);
$key = new Key();
$npub = $key->getPublicKey($bot->getNsec());
$articles = $this->entityManager->getRepository(Article::class)->findBy(['pubkey' => $npub]);
$io->writeln('Found ' . count($articles) . ' existing articles for bot ' . $npub);
$io->section('Fetching RSS for: ' . $nzine->getFeedUrl());
try {
$feed = $this->rssFeedService->fetchFeed($nzine->getFeedUrl());
} catch (\Throwable $e) {
$io->warning('Failed to fetch ' . $nzine->getFeedUrl() . ': ' . $e->getMessage());
continue;
}
foreach ($feed['items'] as $item) {
try {
$event = new Event();
$event->setKind(30023); // NIP-23 Long-form content
// created_at — use parsed pubDate (timestamp int) or now
$createdAt = isset($item['pubDate']) && is_numeric($item['pubDate'])
? (int)$item['pubDate']
: time();
$event->setCreatedAt($createdAt);
// slug (NIP-33 'd' tag) — stable per source item
$base = trim(($nzine->getSlug() ?? 'nzine') . '-' . ($item['title'] ?? ''));
$slug = (string) $slugger->slug($base)->lower();
// HTML → Markdown
$raw = trim($item['content'] ?? '') ?: trim($item['description'] ?? '');
$rawHtml = $this->normalizeWeirdHtml($raw);
$cleanHtml = $this->sanitizeHtml($rawHtml);
$markdown = $this->htmlToMarkdown($cleanHtml);
$event->setContent($markdown);
// Tags
$tags = [
['title', $this->safeStr($item['title'] ?? '')],
['d', $slug],
['source', $this->safeStr($item['link'] ?? '')],
];
// summary (short description)
$summary = $this->ellipsis($this->plainText($item['description'] ?? ''), 280);
if ($summary !== '') {
$tags[] = ['summary', $summary];
}
// image
if (!empty($item['image'])) {
$tags[] = ['image', $this->safeStr($item['image'])];
} else {
// try to sniff first from content if media tag was missing
if (preg_match('~
]+src="([^"]+)"~i', $rawHtml, $m)) {
$tags[] = ['image', $m[1]];
}
}
// categories → "t" tags
if (!empty($item['categories']) && is_array($item['categories'])) {
foreach ($item['categories'] as $category) {
$cat = trim((string)$category);
if ($cat !== '') {
$event->addTag(['t', $cat]);
}
}
}
$event->setTags($tags);
// Sign event
$signer = new Sign();
$signer->signEvent($event, $bot->getNsec());
// Publish (add/adjust relays as you like)
try {
$this->nostrClient->publishEvent($event, [
'wss://purplepag.es',
'wss://relay.damus.io',
'wss://nos.lol',
]);
$io->writeln('Published long-form event: ' . ($item['title'] ?? '(no title)'));
} catch (\Throwable $e) {
$io->warning('Publish failed: ' . $e->getMessage());
}
// Persist locally
$article = $this->factory->createFromLongFormContentEvent((object)$event->toArray());
$this->entityManager->persist($article);
} catch (\Throwable $e) {
// keep going on item errors
$io->warning('Item failed: ' . ($item['title'] ?? '(no title)') . ' — ' . $e->getMessage());
}
}
$this->entityManager->flush();
$io->success('RSS fetch complete for: ' . $nzine->getFeedUrl());
// --- Update bot profile (kind 0) using feed metadata ---
$feedMeta = $feed['feed'] ?? null;
if ($feedMeta) {
$profile = [
'name' => $feedMeta['title'] ?? $nzine->getTitle(),
'about' => $feedMeta['description'] ?? '',
'picture' => $feedMeta['image'] ?? null,
'website' => $feedMeta['link'] ?? null,
];
$p = new Event();
$p->setKind(0);
$p->setCreatedAt(time());
$p->setContent(json_encode($profile, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE));
$signer = new Sign();
$signer->signEvent($p, $bot->getNsec());
try {
$this->nostrClient->publishEvent($p, ['wss://purplepag.es']);
$io->success('Published bot profile (kind 0) with feed metadata');
} catch (\Throwable $e) {
$io->warning('Failed to publish bot profile event: ' . $e->getMessage());
}
}
}
return Command::SUCCESS;
}
/** -------- Helpers: HTML prep + converter + small utils -------- */
private function normalizeWeirdHtml(string $html): string
{
// 1) Unwrap Ghost "HTML cards": keep only the
⚡ Tips: ' . htmlspecialchars($m[1]) . '
', $html ); // Ghost/Koenig wrappers: keep useful inner content $html = preg_replace('~$1', $html); // YouTube iframes → links $html = preg_replace_callback( '~~i', fn($m) => '', $html ); // 2) Try to pretty up malformed markup via Tidy (if available) if (function_exists('tidy_parse_string')) { try { $tidy = tidy_parse_string($html, [ 'clean' => true, 'output-xhtml' => true, 'show-body-only' => false, 'wrap' => 0, 'drop-empty-paras' => true, 'merge-divs' => true, 'merge-spans' => true, 'numeric-entities' => false, 'quote-ampersand' => true, ], 'utf8'); $tidy->cleanRepair(); $html = (string)$tidy; } catch (\Throwable $e) { // ignore tidy failures } } // 3) DOM sanitize: remove junk, unwrap html/body/head, allowlist elements/attrs $dom = new \DOMDocument('1.0', 'UTF-8'); libxml_use_internal_errors(true); $loaded = $dom->loadHTML( // force UTF-8 meta so DOMDocument doesn't mangle ''.$html, LIBXML_NOWARNING | LIBXML_NOERROR ); libxml_clear_errors(); if (!$loaded) { // fallback: as-is minus tags we already stripped return $html; } $xpath = new \DOMXPath($dom); // Remove ,