You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

250 lines
9.5 KiB

import { NOSTR_URI_INLINE_REGEX } from '@/lib/content-patterns'
import { isImage, isVideo, isAudio } from '@/lib/url'
import { URL_REGEX, YOUTUBE_URL_REGEX } from '@/constants'
/**
* Check if a URL is a YouTube URL
*/
function isYouTubeUrl(url: string): boolean {
// Create a new regex instance to avoid state issues with global regex
const flags = YOUTUBE_URL_REGEX.flags.replace('g', '')
const regex = new RegExp(YOUTUBE_URL_REGEX.source, flags)
return regex.test(url)
}
/**
* Preprocess content to convert raw media URLs and hyperlinks to markdown syntax
* - Images: https://example.com/image.png -> ![](https://example.com/image.png)
* - Videos: https://example.com/video.mp4 -> ![](https://example.com/video.mp4)
* - Audio: https://example.com/audio.mp3 -> ![](https://example.com/audio.mp3)
* - Hyperlinks: https://example.com/page -> [https://example.com/page](https://example.com/page)
*/
export function preprocessMarkdownMediaLinks(content: string): string {
let processed = content
// First, handle angle bracket URLs: <https://example.com> -> https://example.com
// These should be converted to plain URLs so they can be processed by the URL regex
const angleBracketUrlRegex = /<((?:https?|ftp):\/\/[^\s<>"']+)>/g
processed = processed.replace(angleBracketUrlRegex, (_match, url) => {
// Just remove the angle brackets, leaving the URL for the main URL processor to handle
return url
})
// Find all URLs but process them in reverse order to preserve indices
const allMatches: Array<{ url: string; index: number }> = []
let match
const regex = new RegExp(URL_REGEX.source, URL_REGEX.flags)
while ((match = regex.exec(processed)) !== null) {
const index = match.index
const url = match[0]
const before = processed.substring(Math.max(0, index - 20), index)
// Check if this URL is already part of markdown syntax
// Skip if preceded by: [text](url, ![text](url, or ](url
if (before.match(/\[[^\]]*$/) || before.match(/\]\([^)]*$/) || before.match(/!\[[^\]]*$/)) {
continue
}
allMatches.push({ url, index })
}
// Process in reverse order to preserve indices
for (let i = allMatches.length - 1; i >= 0; i--) {
const { url, index } = allMatches[i]
// Check if URL is in code block
const beforeUrl = processed.substring(0, index)
const backticksCount = (beforeUrl.match(/```/g) || []).length
if (backticksCount % 2 === 1) {
continue // In code block
}
// Check if URL is in inline code
const lastBacktick = beforeUrl.lastIndexOf('`')
if (lastBacktick !== -1) {
const afterUrl = processed.substring(index + url.length)
const nextBacktick = afterUrl.indexOf('`')
if (nextBacktick !== -1) {
const codeBefore = beforeUrl.substring(lastBacktick + 1)
const codeAfter = afterUrl.substring(0, nextBacktick)
// If no newlines between backticks, it's inline code
if (!codeBefore.includes('\n') && !codeAfter.includes('\n')) {
continue
}
}
}
// Check if it's a media URL or YouTube URL
const isImageUrl = isImage(url)
const isVideoUrl = isVideo(url)
const isAudioUrl = isAudio(url)
const isYouTube = isYouTubeUrl(url)
// Skip YouTube URLs - they should be left as plain text so they can be detected and rendered as YouTube embeds
if (isYouTube) {
continue
}
let replacement: string
if (isImageUrl || isVideoUrl || isAudioUrl) {
// Media URLs: convert to ![](url)
replacement = `![](${url})`
} else {
// Regular hyperlinks: convert to [url](url) format
replacement = `[${url}](${url})`
}
// Replace the URL
processed = processed.substring(0, index) + replacement + processed.substring(index + url.length)
}
return processed
}
/**
* Preprocess content to convert raw media URLs and hyperlinks to AsciiDoc syntax
* - Images: https://example.com/image.png -> image::https://example.com/image.png[]
* - Videos: https://example.com/video.mp4 -> video::https://example.com/video.mp4[]
* - Audio: https://example.com/audio.mp3 -> audio::https://example.com/audio.mp3[]
* - Hyperlinks: https://example.com/page -> https://example.com/page[link text]
* - Wikilinks: [[link]] or [[link|display]] -> +++WIKILINK:link|display+++ (passthrough for post-processing)
*/
export function preprocessAsciidocMediaLinks(content: string): string {
let processed = content
// Note: Wikilinks are now processed in AsciidocArticle.tsx BEFORE this function is called
// to prevent AsciiDoc from converting them to regular links. We skip wikilink processing here.
// Skip any remaining wikilinks (they should already be processed, but safety check)
// Check for passthrough markers to avoid double-processing
if (processed.includes('BOOKSTR_START:') || processed.includes('WIKILINK:')) {
// Wikilinks already processed, skip
} else {
// Fallback: protect bookstr wikilinks if they weren't processed yet
processed = processed.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => {
const cleanContent = bookContent.trim()
return `+++BOOKSTR_MARKER:${cleanContent}:BOOKSTR_END+++`
})
// Fallback: protect regular wikilinks if they weren't processed yet
processed = processed.replace(/\[\[([^\]]+)\]\]/g, (_match, linkContent) => {
// Skip if this was already processed as a bookstr wikilink
if (linkContent.startsWith('book::')) {
return _match
}
return `+++WIKILINK:${linkContent}+++`
})
}
// Find all URLs but process them in reverse order to preserve indices
const allMatches: Array<{ url: string; index: number }> = []
let match
const regex = new RegExp(URL_REGEX.source, URL_REGEX.flags)
while ((match = regex.exec(content)) !== null) {
const index = match.index
const url = match[0]
const urlEnd = index + url.length
// Skip URLs that are inside wikilinks (already processed as passthrough markers)
// Check if URL is inside a passthrough marker
const beforeUrl = content.substring(Math.max(0, index - 100), index)
const afterUrl = content.substring(urlEnd, Math.min(content.length, urlEnd + 100))
if (beforeUrl.includes('BOOKSTR_START:') || beforeUrl.includes('WIKILINK:') ||
afterUrl.includes(':BOOKSTR_END') || afterUrl.includes('+++')) {
continue
}
// Check if this URL is part of an AsciiDoc link format url[text]
// If URL is immediately followed by [text], it's already an AsciiDoc link - skip it
const contextAfter = content.substring(urlEnd, Math.min(content.length, urlEnd + 50))
if (contextAfter.match(/^\s*\[[^\]]+\]/)) {
continue
}
const before = content.substring(Math.max(0, index - 30), index)
// Check if this URL is already part of AsciiDoc syntax
// Skip if preceded by: image::, video::, audio::, or link:
if (before.match(/image::\s*$/) ||
before.match(/video::\s*$/) ||
before.match(/audio::\s*$/) ||
before.match(/link:\S+\[/) ||
before.match(/https?:\/\/[^\s]*\[/)) {
continue
}
allMatches.push({ url, index })
}
// Process in reverse order to preserve indices
for (let i = allMatches.length - 1; i >= 0; i--) {
const { url, index } = allMatches[i]
// Check if URL is in code block
const beforeUrl = content.substring(0, index)
const codeBlockCount = (beforeUrl.match(/----/g) || []).length
if (codeBlockCount % 2 === 1) {
continue // In code block
}
// Check if it's a media URL or YouTube URL
const isImageUrl = isImage(url)
const isVideoUrl = isVideo(url)
const isAudioUrl = isAudio(url)
const isYouTube = isYouTubeUrl(url)
let replacement: string
if (isImageUrl) {
// Images: convert to image::url[]
replacement = `image::${url}[]`
} else if (isVideoUrl) {
// Videos: convert to video::url[]
replacement = `video::${url}[]`
} else if (isAudioUrl) {
// Audio: convert to audio::url[]
replacement = `audio::${url}[]`
} else if (isYouTube) {
// YouTube URLs: convert to link:url[url] (will be handled in post-processing)
// This allows AsciiDoc to process it as a link, then we'll replace it with YouTube player
replacement = `link:${url}[${url}]`
} else {
// Regular hyperlinks: convert to link:url[url]
replacement = `link:${url}[${url}]`
}
// Replace the URL
processed = processed.substring(0, index) + replacement + processed.substring(index + url.length)
}
return processed
}
/**
* Post-process content to convert nostr: links and hashtags
* This should be applied AFTER markup processing
*/
export function postProcessNostrLinks(content: string): string {
let processed = content
// Convert nostr: prefixed links to embedded format
// nostr:npub1... -> [nostr:npub1...]
// nostr:note1... -> [nostr:note1...]
// etc.
const nostrRegex = new RegExp(NOSTR_URI_INLINE_REGEX.source, NOSTR_URI_INLINE_REGEX.flags)
processed = processed.replace(nostrRegex, (match) => {
// Already in a link? Don't double-wrap
// Check if it's already in markdown link syntax [text](nostr:...)
// or AsciiDoc link syntax link:nostr:...[text]
return match // Keep as is for now, will be processed by the parser
})
// Convert hashtags to links
// #tag -> link:/notes?t=tag[#tag] (for AsciiDoc) or [#tag](/notes?t=tag) (for Markdown)
// But only if not already in a link
// We'll handle this in the rendering phase to avoid breaking markup
return processed
}