6 changed files with 476 additions and 1 deletions
@ -0,0 +1,354 @@
@@ -0,0 +1,354 @@
|
||||
<?php |
||||
|
||||
declare(strict_types=1); |
||||
|
||||
namespace App\Controller; |
||||
|
||||
use App\Entity\Article; |
||||
use App\Enum\EventStatusEnum; |
||||
use App\Repository\ArticleRepository; |
||||
use App\Service\MagazineContentService; |
||||
use App\Service\MagazineIndexStore; |
||||
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController; |
||||
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface; |
||||
use Symfony\Component\HttpFoundation\Request; |
||||
use Symfony\Component\HttpFoundation\Response; |
||||
use Symfony\Component\Routing\Attribute\Route; |
||||
use Symfony\Component\Routing\Generator\UrlGeneratorInterface; |
||||
|
||||
/** |
||||
* Sitemap, robots.txt, and Atom feeds for the magazine and each category. |
||||
*/ |
||||
final class SeoController extends AbstractController |
||||
{ |
||||
private const FEED_MAX_ITEMS = 100; |
||||
|
||||
public function __construct( |
||||
private readonly ArticleRepository $articleRepository, |
||||
private readonly MagazineContentService $magazineContent, |
||||
private readonly MagazineIndexStore $magazineIndexStore, |
||||
private readonly ParameterBagInterface $params, |
||||
) { |
||||
} |
||||
|
||||
#[Route('/sitemap.xml', name: 'sitemap', methods: ['GET'])] |
||||
public function sitemap(): Response |
||||
{ |
||||
$urls = []; |
||||
|
||||
$urls[] = ['loc' => $this->absoluteUrlForRoute('home'), 'lastmod' => null]; |
||||
|
||||
if ((bool) $this->params->get('community_articles')) { |
||||
$urls[] = ['loc' => $this->absoluteUrlForRoute('articles'), 'lastmod' => null]; |
||||
} |
||||
|
||||
foreach ($this->magazineContent->getCategorySlugsFromStore() as $slug) { |
||||
$urls[] = [ |
||||
'loc' => $this->absoluteUrlForRoute('magazine-category', ['slug' => $slug]), |
||||
'lastmod' => null, |
||||
]; |
||||
} |
||||
|
||||
$articles = $this->articleRepository->findPublishedForSyndication(8000); |
||||
$bySlug = $this->dedupeArticlesByLatestRevision($articles); |
||||
foreach ($bySlug as $article) { |
||||
$urls[] = [ |
||||
'loc' => $this->absoluteUrlForRoute('article-slug', ['slug' => (string) $article->getSlug()]), |
||||
'lastmod' => $this->articleLastMod($article), |
||||
]; |
||||
} |
||||
|
||||
$body = '<?xml version="1.0" encoding="UTF-8"?>'
|
||||
."\n" |
||||
.'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'; |
||||
|
||||
foreach ($urls as $row) { |
||||
$body .= "\n <url>\n <loc>".$this->xmlText($row['loc']).'</loc>'; |
||||
if ($row['lastmod'] instanceof \DateTimeInterface) { |
||||
$body .= "\n <lastmod>".$row['lastmod']->format('Y-m-d').'</lastmod>'; |
||||
} |
||||
$body .= "\n </url>"; |
||||
} |
||||
$body .= "\n</urlset>\n"; |
||||
|
||||
return $this->xmlResponse($body); |
||||
} |
||||
|
||||
#[Route('/robots.txt', name: 'robots_txt', methods: ['GET'])] |
||||
public function robots(): Response |
||||
{ |
||||
$sitemap = $this->absoluteUrlForRoute('sitemap'); |
||||
$txt = "User-agent: *\nAllow: /\n\nSitemap: {$sitemap}\n"; |
||||
|
||||
return new Response( |
||||
$txt, |
||||
Response::HTTP_OK, |
||||
[ |
||||
'Content-Type' => 'text/plain; charset=UTF-8', |
||||
'Cache-Control' => 'public, max-age=3600', |
||||
], |
||||
); |
||||
} |
||||
|
||||
#[Route('/feeds/magazine.xml', name: 'feed_magazine', methods: ['GET'])] |
||||
public function feedMagazine(Request $request): Response |
||||
{ |
||||
$site = (string) $this->params->get('name'); |
||||
$articles = $this->articleRepository->findPublishedForSyndication(8000); |
||||
$bySlug = $this->dedupeArticlesByLatestRevision($articles); |
||||
$list = \array_values($bySlug); |
||||
usort($list, static function (Article $a, Article $b): int { |
||||
$ca = $a->getCreatedAt(); |
||||
$cb = $b->getCreatedAt(); |
||||
if ($ca === null && $cb === null) { |
||||
return 0; |
||||
} |
||||
if ($ca === null) { |
||||
return 1; |
||||
} |
||||
if ($cb === null) { |
||||
return -1; |
||||
} |
||||
|
||||
return $cb <=> $ca; |
||||
}); |
||||
$list = \array_slice($list, 0, self::FEED_MAX_ITEMS); |
||||
$feedUrl = $this->absoluteUrlForRoute('feed_magazine'); |
||||
$homeUrl = $this->absoluteUrlForRoute('home'); |
||||
$selfId = 'urn:web:'.$this->urlHostId($request).':feed:magazine'; |
||||
$updated = $this->newestArticleUpdate($list); |
||||
|
||||
$body = $this->buildAtomFeed( |
||||
$site.': all articles', |
||||
(string) $this->params->get('description'), |
||||
$selfId, |
||||
$feedUrl, |
||||
$homeUrl, |
||||
$updated, |
||||
$request, |
||||
$list, |
||||
); |
||||
|
||||
return $this->atomResponse($body); |
||||
} |
||||
|
||||
#[Route('/feeds/cat/{slug}.xml', name: 'feed_category', methods: ['GET'])] |
||||
public function feedCategory(Request $request, string $slug): Response |
||||
{ |
||||
if ($this->magazineIndexStore->getCategory($slug) === null) { |
||||
throw $this->createNotFoundException('Unknown category'); |
||||
} |
||||
$site = (string) $this->params->get('name'); |
||||
$data = $this->magazineContent->getCategoryPageData($slug); |
||||
$rawList = $data['list'] ?? []; |
||||
$catTitle = (string) ($data['category']['title'] ?? $this->magazineContent->getCategoryDisplayTitle($slug)); |
||||
$summary = (string) ($data['category']['summary'] ?? ''); |
||||
|
||||
$list = array_values( |
||||
array_filter( |
||||
$rawList, |
||||
static function (Article $a): bool { |
||||
$s = $a->getEventStatus(); |
||||
if ($s === null) { |
||||
return false; |
||||
} |
||||
|
||||
return $s === EventStatusEnum::PUBLISHED || $s === EventStatusEnum::ARCHIVED; |
||||
} |
||||
) |
||||
); |
||||
if (\count($list) > self::FEED_MAX_ITEMS) { |
||||
$list = \array_slice($list, 0, self::FEED_MAX_ITEMS); |
||||
} |
||||
$feedUrl = $this->absoluteUrlForRoute('feed_category', ['slug' => $slug]); |
||||
$categoryPage = $this->absoluteUrlForRoute('magazine-category', ['slug' => $slug]); |
||||
$selfId = 'urn:web:'.$this->urlHostId($request).':feed:cat:'.rawurlencode($slug); |
||||
$title = $catTitle !== '' ? $catTitle.' — '.$site : $site; |
||||
$subtitle = $summary !== '' ? $summary : (string) $this->params->get('description'); |
||||
$updated = $this->newestArticleUpdate($list); |
||||
|
||||
$body = $this->buildAtomFeed( |
||||
$title, |
||||
$subtitle, |
||||
$selfId, |
||||
$feedUrl, |
||||
$categoryPage, |
||||
$updated, |
||||
$request, |
||||
$list, |
||||
); |
||||
|
||||
return $this->atomResponse($body); |
||||
} |
||||
|
||||
private function absoluteUrlForRoute(string $name, array $params = []): string |
||||
{ |
||||
return $this->generateUrl($name, $params, UrlGeneratorInterface::ABSOLUTE_URL); |
||||
} |
||||
|
||||
private function urlHostId(Request $request): string |
||||
{ |
||||
$h = $request->getHost(); |
||||
|
||||
return preg_replace('/[^a-zA-Z0-9.\\-]+/', '-', $h) ?? 'site'; |
||||
} |
||||
|
||||
/** |
||||
* @param list<Article> $list |
||||
*/ |
||||
private function buildAtomFeed( |
||||
string $title, |
||||
string $subtitle, |
||||
string $id, |
||||
string $selfUrl, |
||||
string $alternateHtmlUrl, |
||||
\DateTimeImmutable $updated, |
||||
Request $request, |
||||
array $list, |
||||
): string { |
||||
$xml = '<?xml version="1.0" encoding="utf-8"?>'
|
||||
."\n" |
||||
.'<feed xmlns="http://www.w3.org/2005/Atom">' |
||||
."\n <title>".$this->xmlText($title)."</title>\n <subtitle>".$this->xmlText($subtitle)."</subtitle>"; |
||||
$xml .= "\n <id>".$this->xmlText($id).'</id>'; |
||||
$xml .= "\n <link href=\"".$this->xmlAttr($selfUrl)."\" rel=\"self\" type=\"application/atom+xml\"/>"; |
||||
$xml .= "\n <link href=\"".$this->xmlAttr($alternateHtmlUrl)."\" rel=\"alternate\" type=\"text/html\"/>"; |
||||
$xml .= "\n <updated>".$this->xmlText($updated->format('c')).'</updated>'; |
||||
$authorName = (string) $this->params->get('name'); |
||||
$xml .= "\n <author><name>".$this->xmlText($authorName)."</name></author>\n <generator uri=\"https://github.com/decent-newsroom/unfold\" version=\"1\">unfold</generator>"; |
||||
foreach ($list as $article) { |
||||
$xml .= $this->atomEntryForArticle($request, $article); |
||||
} |
||||
$xml .= "\n</feed>\n"; |
||||
|
||||
return $xml; |
||||
} |
||||
|
||||
private function atomEntryForArticle(Request $request, Article $article): string |
||||
{ |
||||
$slug = \trim((string) $article->getSlug()); |
||||
if ($slug === '') { |
||||
return ''; |
||||
} |
||||
$permalink = $this->absoluteUrlForRoute('article-slug', ['slug' => $slug]); |
||||
$title = (string) ($article->getTitle() ?? 'Untitled'); |
||||
$tArticle = $this->articleLastMod($article); |
||||
$sum = (string) ($article->getSummary() ?? ''); |
||||
if ($sum === '' && $article->getContent() !== null) { |
||||
$plain = preg_replace('/\s+/', ' ', (string) $article->getContent()) ?? ''; |
||||
$sum = (string) mb_substr($plain, 0, 500); |
||||
} |
||||
$eId = (string) ($article->getEventId() ?? ''); |
||||
if ($eId === '') { |
||||
$eId = (string) ($article->getId() ?? 'item'); |
||||
} |
||||
$entryId = 'urn:web:'.$this->urlHostId($request).":article:{$eId}"; |
||||
|
||||
$pub = $article->getPublishedAt() ?? $article->getCreatedAt() ?? $tArticle; |
||||
$out = "\n <entry>"; |
||||
$out .= "\n <title>".$this->xmlText($title)."</title>"; |
||||
$out .= "\n <link href=\"".$this->xmlAttr($permalink)."\" rel=\"alternate\" type=\"text/html\"/>"; |
||||
$out .= "\n <id>".$this->xmlText($entryId).'</id>'; |
||||
$out .= "\n <updated>".$this->xmlText($tArticle->format('c'))."</updated>\n <published>".$this->xmlText($pub->format('c')).'</published>'; |
||||
$out .= "\n <summary type=\"text\">".$this->xmlText($this->oneLine($sum))."</summary>"; |
||||
$out .= "\n </entry>"; |
||||
|
||||
return $out; |
||||
} |
||||
|
||||
private function oneLine(string $s): string |
||||
{ |
||||
return trim(preg_replace("/[\r\n]+/", ' ', $s) ?? ''); |
||||
} |
||||
|
||||
/** |
||||
* @param list<Article> $articles |
||||
* @return array<string, Article> |
||||
*/ |
||||
private function dedupeArticlesByLatestRevision(array $articles): array |
||||
{ |
||||
$bySlug = []; |
||||
foreach ($articles as $article) { |
||||
$slug = \trim((string) $article->getSlug()); |
||||
if ($slug === '') { |
||||
continue; |
||||
} |
||||
$c = $article->getCreatedAt(); |
||||
if (!isset($bySlug[$slug])) { |
||||
$bySlug[$slug] = $article; |
||||
|
||||
continue; |
||||
} |
||||
$prev = $bySlug[$slug]->getCreatedAt(); |
||||
if ($c !== null && (null === $prev || $c > $prev)) { |
||||
$bySlug[$slug] = $article; |
||||
} |
||||
} |
||||
|
||||
return $bySlug; |
||||
} |
||||
|
||||
/** |
||||
* @param list<Article> $list |
||||
*/ |
||||
private function newestArticleUpdate(array $list): \DateTimeImmutable |
||||
{ |
||||
$t = new \DateTimeImmutable('@0'); |
||||
foreach ($list as $a) { |
||||
$m = $this->articleLastMod($a); |
||||
if ($m > $t) { |
||||
$t = $m; |
||||
} |
||||
} |
||||
if ((int) $t->format('U') === 0) { |
||||
return new \DateTimeImmutable(); |
||||
} |
||||
|
||||
return $t; |
||||
} |
||||
|
||||
private function articleLastMod(Article $a): \DateTimeImmutable |
||||
{ |
||||
$p = $a->getPublishedAt(); |
||||
$c = $a->getCreatedAt() ?? $p; |
||||
if ($p !== null && $c !== null) { |
||||
return $p > $c ? $p : $c; |
||||
} |
||||
|
||||
return $p ?? $c ?? new \DateTimeImmutable(); |
||||
} |
||||
|
||||
private function xmlText(string $s): string |
||||
{ |
||||
return htmlspecialchars($s, \ENT_XML1 | \ENT_QUOTES, 'UTF-8'); |
||||
} |
||||
|
||||
private function xmlAttr(string $s): string |
||||
{ |
||||
return htmlspecialchars($s, \ENT_XML1 | \ENT_QUOTES, 'UTF-8'); |
||||
} |
||||
|
||||
private function xmlResponse(string $body): Response |
||||
{ |
||||
return new Response( |
||||
$body, |
||||
Response::HTTP_OK, |
||||
[ |
||||
'Content-Type' => 'application/xml; charset=UTF-8', |
||||
'Cache-Control' => 'public, max-age=600', |
||||
], |
||||
); |
||||
} |
||||
|
||||
private function atomResponse(string $body): Response |
||||
{ |
||||
return new Response( |
||||
$body, |
||||
Response::HTTP_OK, |
||||
[ |
||||
'Content-Type' => 'application/atom+xml; charset=UTF-8', |
||||
'Cache-Control' => 'public, max-age=300', |
||||
], |
||||
); |
||||
} |
||||
} |
||||
Loading…
Reference in new issue