@ -28,8 +28,8 @@ function extractNostrLinks(content: string): NostrLink[] {
const nostrLinks : NostrLink [ ] = [ ] ;
const nostrLinks : NostrLink [ ] = [ ] ;
const seen = new Set < string > ( ) ;
const seen = new Set < string > ( ) ;
// Extract nostr: prefixed links
// Extract nostr: prefixed links (valid bech32 format)
const nostrMatches = content . match ( /nostr:([a-z0-9]+[a-z0-9]{6,})/g ) || [ ] ;
const nostrMatches = content . match ( /nostr:((?:npub|nprofile|nevent|naddr|note)1[a-z0-9]{6,})/gi ) || [ ] ;
nostrMatches . forEach ( match = > {
nostrMatches . forEach ( match = > {
const id = match . substring ( 6 ) ; // Remove 'nostr:'
const id = match . substring ( 6 ) ; // Remove 'nostr:'
const type = getNostrType ( id ) ;
const type = getNostrType ( id ) ;
@ -79,20 +79,33 @@ function extractWikilinks(content: string): Wikilink[] {
/ * *
/ * *
* Extract hashtags from content
* Extract hashtags from content
* Excludes hashtags in URLs , code blocks , and inline code
* /
* /
function extractHashtags ( content : string ) : string [ ] {
function extractHashtags ( content : string ) : string [ ] {
const hashtags : string [ ] = [ ] ;
const hashtags : string [ ] = [ ] ;
const seen = new Set < string > ( ) ;
const seen = new Set < string > ( ) ;
// Extract hashtags: #hashtag
// Remove code blocks first to avoid matching inside them
const hashtagMatches = content . match ( /#([a-zA-Z0-9_]+)/g ) || [ ] ;
const codeBlockPattern = /```[\s\S]*?```/g ;
hashtagMatches . forEach ( match = > {
const inlineCodePattern = /`[^`]+`/g ;
const tag = match . substring ( 1 ) . toLowerCase ( ) ;
const urlPattern = /https?:\/\/[^\s<>"']+/g ;
let processedContent = content
. replace ( codeBlockPattern , '' ) // Remove code blocks
. replace ( inlineCodePattern , '' ) // Remove inline code
. replace ( urlPattern , '' ) ; // Remove URLs
// Extract hashtags: #hashtag (word boundary to avoid matching in URLs)
const hashtagPattern = /\B#([a-zA-Z0-9_]+)/g ;
let match ;
while ( ( match = hashtagPattern . exec ( processedContent ) ) !== null ) {
const tag = match [ 1 ] . toLowerCase ( ) ;
if ( ! seen . has ( tag ) ) {
if ( ! seen . has ( tag ) ) {
hashtags . push ( tag ) ;
hashtags . push ( tag ) ;
seen . add ( tag ) ;
seen . add ( tag ) ;
}
}
} ) ;
}
return hashtags ;
return hashtags ;
}
}
@ -104,39 +117,35 @@ function extractLinks(content: string, linkBaseURL: string): Array<{ url: string
const links : Array < { url : string ; text : string ; isExternal : boolean } > = [ ] ;
const links : Array < { url : string ; text : string ; isExternal : boolean } > = [ ] ;
const seen = new Set < string > ( ) ;
const seen = new Set < string > ( ) ;
// Extract markdown links: [text](url)
// Extract markdown links: [text](url) - optimized to avoid double matching
const markdownLinks = content . match ( /\[([^\]]+)\]\(([^)]+)\)/g ) || [ ] ;
const markdownLinkPattern = /\[([^\]]+)\]\(([^)]+)\)/g ;
markdownLinks . forEach ( match = > {
let markdownMatch ;
const linkMatch = match . match ( /\[([^\]]+)\]\(([^)]+)\)/ ) ;
while ( ( markdownMatch = markdownLinkPattern . exec ( content ) ) !== null ) {
if ( linkMatch ) {
const [ , text , url ] = markdownMatch ;
const [ , text , url ] = linkMatch ;
if ( ! seen . has ( url ) && ! isNostrUrl ( url ) ) {
if ( ! seen . has ( url ) && ! isNostrUrl ( url ) ) {
seen . add ( url ) ;
seen . add ( url ) ;
links . push ( {
links . push ( {
url ,
url ,
text ,
text ,
isExternal : isExternalUrl ( url , linkBaseURL ) ,
isExternal : isExternalUrl ( url , linkBaseURL ) ,
} ) ;
} ) ;
}
}
}
} ) ;
}
// Extract asciidoc links: link:url[text]
// Extract asciidoc links: link:url[text] - optimized to avoid double matching
const asciidocLinks = content . match ( /link:([^\[]+)\[([^\]]+)\]/g ) || [ ] ;
const asciidocLinkPattern = /link:([^\[]+)\[([^\]]+)\]/g ;
asciidocLinks . forEach ( match = > {
let asciidocMatch ;
const linkMatch = match . match ( /link:([^\[]+)\[([^\]]+)\]/ ) ;
while ( ( asciidocMatch = asciidocLinkPattern . exec ( content ) ) !== null ) {
if ( linkMatch ) {
const [ , url , text ] = asciidocMatch ;
const [ , url , text ] = linkMatch ;
if ( ! seen . has ( url ) && ! isNostrUrl ( url ) ) {
if ( ! seen . has ( url ) && ! isNostrUrl ( url ) ) {
seen . add ( url ) ;
seen . add ( url ) ;
links . push ( {
links . push ( {
url ,
url ,
text ,
text ,
isExternal : isExternalUrl ( url , linkBaseURL ) ,
isExternal : isExternalUrl ( url , linkBaseURL ) ,
} ) ;
} ) ;
}
}
}
} ) ;
}
// Extract raw URLs (basic pattern)
// Extract raw URLs (basic pattern)
const urlPattern = /https?:\/\/[^\s<>"']+/g ;
const urlPattern = /https?:\/\/[^\s<>"']+/g ;
@ -162,29 +171,31 @@ function extractMedia(content: string): string[] {
const media : string [ ] = [ ] ;
const media : string [ ] = [ ] ;
const seen = new Set < string > ( ) ;
const seen = new Set < string > ( ) ;
// Extract markdown images: 
// Extract markdown images:  - optimized to avoid double matching
const imageMatches = content . match ( /!\[[^\]]*\]\(([^)]+)\)/g ) || [ ] ;
const markdownImagePattern = /!\[[^\]]*\]\(([^)]+)\)/g ;
imageMatches . forEach ( match = > {
let markdownImageMatch ;
const url = match . match ( /!\[[^\]]*\]\(([^)]+)\)/ ) ? . [ 1 ] ;
while ( ( markdownImageMatch = markdownImagePattern . exec ( content ) ) !== null ) {
const url = markdownImageMatch [ 1 ] ;
if ( url && ! seen . has ( url ) ) {
if ( url && ! seen . has ( url ) ) {
if ( isImageUrl ( url ) || isVideoUrl ( url ) ) {
if ( isImageUrl ( url ) || isVideoUrl ( url ) ) {
media . push ( url ) ;
media . push ( url ) ;
seen . add ( url ) ;
seen . add ( url ) ;
}
}
}
}
} ) ;
}
// Extract asciidoc images: image::url[alt]
// Extract asciidoc images: image::url[alt] - optimized to avoid double matching
const asciidocImageMatches = content . match ( /image::([^\[]+)\[/g ) || [ ] ;
const asciidocImagePattern = /image::([^\[]+)\[/g ;
asciidocImageMatches . forEach ( match = > {
let asciidocImageMatch ;
const url = match . match ( /image::([^\[]+)\[/ ) ? . [ 1 ] ;
while ( ( asciidocImageMatch = asciidocImagePattern . exec ( content ) ) !== null ) {
const url = asciidocImageMatch [ 1 ] ;
if ( url && ! seen . has ( url ) ) {
if ( url && ! seen . has ( url ) ) {
if ( isImageUrl ( url ) || isVideoUrl ( url ) ) {
if ( isImageUrl ( url ) || isVideoUrl ( url ) ) {
media . push ( url ) ;
media . push ( url ) ;
seen . add ( url ) ;
seen . add ( url ) ;
}
}
}
}
} ) ;
}
// Extract raw image/video URLs
// Extract raw image/video URLs
const urlPattern = /https?:\/\/[^\s<>"']+/g ;
const urlPattern = /https?:\/\/[^\s<>"']+/g ;