|
|
|
@ -7,8 +7,9 @@ use Psr\Log\LoggerInterface; |
|
|
|
|
|
|
|
|
|
|
|
readonly class NostrLinkParser |
|
|
|
readonly class NostrLinkParser |
|
|
|
{ |
|
|
|
{ |
|
|
|
private const string NOSTR_LINK_PATTERN = '/(?:nostr:)?(nevent1[a-z0-9]+|naddr1[a-z0-9]+|nprofile1[a-z0-9]+|note1[a-z0-9]+|npub1[a-z0-9]+)/'; |
|
|
|
private const string NOSTR_LINK_PATTERN = '/(?:nostr:)(nevent1[a-z0-9]+|naddr1[a-z0-9]+|nprofile1[a-z0-9]+|note1[a-z0-9]+|npub1[a-z0-9]+)/'; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private const URL_PATTERN = '/https?:\/\/[\w\-\.\?\,\'\/\\\+&%@\?\$#_=:\(\)~;]+/i'; |
|
|
|
|
|
|
|
|
|
|
|
public function __construct( |
|
|
|
public function __construct( |
|
|
|
private LoggerInterface $logger |
|
|
|
private LoggerInterface $logger |
|
|
|
@ -23,36 +24,95 @@ readonly class NostrLinkParser |
|
|
|
public function parseLinks(string $content): array |
|
|
|
public function parseLinks(string $content): array |
|
|
|
{ |
|
|
|
{ |
|
|
|
$links = []; |
|
|
|
$links = []; |
|
|
|
|
|
|
|
$links = array_merge( |
|
|
|
|
|
|
|
$this->parseUrlsWithNostrIds($content), |
|
|
|
|
|
|
|
$this->parseBareNostrIdentifiers($content) |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
// Sort by position to maintain the original order in the text |
|
|
|
|
|
|
|
usort($links, fn($a, $b) => $a['position'] <=> $b['position']); |
|
|
|
|
|
|
|
return $links; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private function parseUrlsWithNostrIds(string $content): array |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
$links = []; |
|
|
|
|
|
|
|
if (preg_match_all(self::URL_PATTERN, $content, $urlMatches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { |
|
|
|
|
|
|
|
foreach ($urlMatches as $urlMatch) { |
|
|
|
|
|
|
|
$url = $urlMatch[0][0]; |
|
|
|
|
|
|
|
$position = $urlMatch[0][1]; |
|
|
|
|
|
|
|
$nostrId = null; |
|
|
|
|
|
|
|
$nostrType = null; |
|
|
|
|
|
|
|
$nostrData = null; |
|
|
|
|
|
|
|
if (preg_match(self::NOSTR_LINK_PATTERN, $url, $nostrMatch)) { |
|
|
|
|
|
|
|
$nostrId = $nostrMatch[1]; |
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
$decoded = new Bech32($nostrId); |
|
|
|
|
|
|
|
$nostrType = $decoded->type; |
|
|
|
|
|
|
|
$nostrData = $decoded->data; |
|
|
|
|
|
|
|
} catch (\Exception $e) { |
|
|
|
|
|
|
|
$this->logger->info('Failed to decode Nostr identifier in URL', [ |
|
|
|
|
|
|
|
'identifier' => $nostrId, |
|
|
|
|
|
|
|
'error' => $e->getMessage() |
|
|
|
|
|
|
|
]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
$links[] = [ |
|
|
|
|
|
|
|
'type' => $nostrType ?? 'url', |
|
|
|
|
|
|
|
'identifier' => $nostrId, |
|
|
|
|
|
|
|
'full_match' => $url, |
|
|
|
|
|
|
|
'position' => $position, |
|
|
|
|
|
|
|
'data' => $nostrData, |
|
|
|
|
|
|
|
'is_url' => true |
|
|
|
|
|
|
|
]; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return $links; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private function parseBareNostrIdentifiers(string $content): array |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
$links = []; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Improved regular expression to match all nostr: links |
|
|
|
|
|
|
|
// This will find all occurrences including multiple links in the same text |
|
|
|
|
|
|
|
if (preg_match_all(self::NOSTR_LINK_PATTERN, $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { |
|
|
|
if (preg_match_all(self::NOSTR_LINK_PATTERN, $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// If match starts with nostr:, continue otherwise check if part of URL |
|
|
|
|
|
|
|
if (!(str_starts_with($matches[0][0][0], 'nostr:'))) { |
|
|
|
|
|
|
|
// Check if the match is part of a URL, as path or query parameter |
|
|
|
|
|
|
|
$urlPattern = '/https?:\/\/[\w\-.?,\'\/+&%$#@_=:()~;]+/i'; |
|
|
|
|
|
|
|
foreach ($matches as $key => $match) { |
|
|
|
|
|
|
|
$position = $match[0][1]; |
|
|
|
|
|
|
|
// Check if the match is preceded by a URL |
|
|
|
|
|
|
|
$precedingContent = substr($content, 0, $position); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (preg_match($urlPattern, $precedingContent)) { |
|
|
|
|
|
|
|
// If the match is preceded by a URL, skip it |
|
|
|
|
|
|
|
unset($matches[$key]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
foreach ($matches as $match) { |
|
|
|
foreach ($matches as $match) { |
|
|
|
$fullMatch = $match[0][0]; |
|
|
|
|
|
|
|
$identifier = $match[1][0]; |
|
|
|
$identifier = $match[1][0]; |
|
|
|
$position = $match[0][1]; // Position in the text |
|
|
|
$position = $match[0][1]; |
|
|
|
|
|
|
|
// This check will be handled in parseLinks by sorting and merging |
|
|
|
try { |
|
|
|
try { |
|
|
|
$decoded = new Bech32($identifier); |
|
|
|
$decoded = new Bech32($identifier); |
|
|
|
$links[] = [ |
|
|
|
$links[] = [ |
|
|
|
'type' => $decoded->type, |
|
|
|
'type' => $decoded->type, |
|
|
|
'identifier' => $identifier, |
|
|
|
'identifier' => $identifier, |
|
|
|
'full_match' => $fullMatch, |
|
|
|
'full_match' => $match[0][0], |
|
|
|
'position' => $position, |
|
|
|
'position' => $position, |
|
|
|
'data' => $decoded->data |
|
|
|
'data' => $decoded->data, |
|
|
|
|
|
|
|
'is_url' => false |
|
|
|
]; |
|
|
|
]; |
|
|
|
} catch (\Exception $e) { |
|
|
|
} catch (\Exception $e) { |
|
|
|
// If decoding fails, skip this identifier |
|
|
|
|
|
|
|
$this->logger->info('Failed to decode Nostr identifier', [ |
|
|
|
$this->logger->info('Failed to decode Nostr identifier', [ |
|
|
|
'identifier' => $identifier, |
|
|
|
'identifier' => $identifier, |
|
|
|
'error' => $e->getMessage() |
|
|
|
'error' => $e->getMessage() |
|
|
|
]); |
|
|
|
]); |
|
|
|
continue; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Sort by position to maintain the original order in the text |
|
|
|
|
|
|
|
usort($links, fn($a, $b) => $a['position'] <=> $b['position']); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return $links; |
|
|
|
return $links; |
|
|
|
|