Browse Source

interim state

master
Silberengel 11 months ago
parent
commit
dddc154b09
  1. 2
      src/lib/consts.ts
  2. 50
      src/lib/utils/advancedMarkdownParser.ts
  3. 287
      src/lib/utils/basicMarkdownParser.ts
  4. 1
      src/lib/utils/nostrUtils.ts

2
src/lib/consts.ts

@ -1,7 +1,7 @@ @@ -1,7 +1,7 @@
export const wikiKind = 30818;
export const indexKind = 30040;
export const zettelKinds = [ 30041, 30818 ];
export const standardRelays = [ 'wss://thecitadel.nostr1.com', 'wss://relay.noswhere.com' ];
export const standardRelays = [ 'wss://thecitadel.nostr1.com', 'wss://theforest.nostr1.com' ];
export const bootstrapRelays = [ 'wss://purplepag.es', 'wss://relay.noswhere.com' ];
export enum FeedType {

50
src/lib/utils/advancedMarkdownParser.ts

@ -2,7 +2,6 @@ import { parseBasicMarkdown } from './basicMarkdownParser'; @@ -2,7 +2,6 @@ import { parseBasicMarkdown } from './basicMarkdownParser';
import hljs from 'highlight.js';
import 'highlight.js/lib/common'; // Import common languages
import 'highlight.js/styles/github-dark.css'; // Dark theme only
import { processNostrIdentifiers } from './nostrUtils';
// Register common languages
hljs.configure({
@ -13,23 +12,10 @@ hljs.configure({ @@ -13,23 +12,10 @@ hljs.configure({
const HEADING_REGEX = /^(#{1,6})\s+(.+)$/gm;
const ALTERNATE_HEADING_REGEX = /^([^\n]+)\n(=+|-+)\n/gm;
const INLINE_CODE_REGEX = /`([^`\n]+)`/g;
const LINK_REGEX = /\[([^\]]+)\]\(([^)]+)\)/g;
const IMAGE_REGEX = /!\[([^\]]*)\]\(([^)]+)\)/g;
const HORIZONTAL_RULE_REGEX = /^(?:[-*_]\s*){3,}$/gm;
const FOOTNOTE_REFERENCE_REGEX = /\[\^([^\]]+)\]/g;
const FOOTNOTE_DEFINITION_REGEX = /^\[\^([^\]]+)\]:\s*(.+)$/gm;
interface Footnote {
id: string;
text: string;
referenceCount: number;
}
interface FootnoteReference {
id: string;
count: number;
}
/**
* Process headings (both styles)
*/
@ -123,23 +109,6 @@ function processTables(content: string): string { @@ -123,23 +109,6 @@ function processTables(content: string): string {
}
}
/**
* Process links and images
*/
function processLinksAndImages(content: string): string {
// Process images first to avoid conflicts with links
let processedContent = content.replace(IMAGE_REGEX,
'<img src="$2" alt="$1" class="max-w-full h-auto rounded-lg shadow-lg my-4" loading="lazy">'
);
// Process links
processedContent = processedContent.replace(LINK_REGEX,
'<a href="$2" class="text-primary-600 hover:underline">$1</a>'
);
return processedContent;
}
/**
* Process horizontal rules
*/
@ -367,21 +336,18 @@ function restoreCodeBlocks(text: string, blocks: Map<string, string>): string { @@ -367,21 +336,18 @@ function restoreCodeBlocks(text: string, blocks: Map<string, string>): string {
* Parse markdown text with advanced formatting
*/
export async function parseAdvancedMarkdown(text: string): Promise<string> {
try {
if (!text) return '';
if (!text) return '';
try {
// Step 1: Extract and save code blocks first
const { text: withoutCode, blocks } = processCodeBlocks(text);
// Step 2: Process all other markdown
let processedText = withoutCode;
// Process block-level elements
// Step 2: Process block-level elements
processedText = processTables(processedText);
processedText = processBlockquotes(processedText);
processedText = processHeadings(processedText);
processedText = processHorizontalRules(processedText);
processedText = processLinksAndImages(processedText);
// Process inline elements
processedText = processedText.replace(INLINE_CODE_REGEX, (_, code) => {
@ -395,11 +361,10 @@ export async function parseAdvancedMarkdown(text: string): Promise<string> { @@ -395,11 +361,10 @@ export async function parseAdvancedMarkdown(text: string): Promise<string> {
return `<code class="px-1.5 py-0.5 bg-white dark:bg-gray-900 border border-gray-200 dark:border-gray-700 rounded text-sm font-mono">${escapedCode}</code>`;
});
// Process footnotes before basic markdown to prevent unwanted paragraph tags
// Process footnotes
processedText = processFootnotes(processedText);
// Process async elements
processedText = await processNostrIdentifiers(processedText);
// Process basic markdown (which will also handle Nostr identifiers)
processedText = await parseBasicMarkdown(processedText);
// Step 3: Restore code blocks
@ -408,9 +373,6 @@ export async function parseAdvancedMarkdown(text: string): Promise<string> { @@ -408,9 +373,6 @@ export async function parseAdvancedMarkdown(text: string): Promise<string> {
return processedText;
} catch (error) {
console.error('Error in parseAdvancedMarkdown:', error);
if (error instanceof Error) {
return `<div class="text-red-500">Error processing markdown: ${error.message}</div>`;
}
return '<div class="text-red-500">An error occurred while processing the markdown</div>';
return `<div class="text-red-500">Error processing markdown: ${error instanceof Error ? error.message : 'Unknown error'}</div>`;
}
}

287
src/lib/utils/basicMarkdownParser.ts

@ -7,98 +7,122 @@ const STRIKETHROUGH_REGEX = /~~([^~\n]+)~~|~([^~\n]+)~/g; @@ -7,98 +7,122 @@ const STRIKETHROUGH_REGEX = /~~([^~\n]+)~~|~([^~\n]+)~/g;
const HASHTAG_REGEX = /(?<![^\s])#([a-zA-Z0-9_]+)(?!\w)/g;
const BLOCKQUOTE_REGEX = /^([ \t]*>[ \t]?.*)(?:\n\1[ \t]*(?!>).*)*$/gm;
interface ListItem {
type: 'ul' | 'ol';
indent: number;
content: string;
marker: string;
}
// List regex patterns
const UNORDERED_LIST_REGEX = /^(\s*[-*+]\s+)(.*?)$/gm;
const ORDERED_LIST_REGEX = /^(\s*\d+\.\s+)(.*?)$/gm;
// HTML escape function
function escapeHtml(text: string): string {
const htmlEscapes: { [key: string]: string } = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#39;'
};
return text.replace(/[&<>"']/g, char => htmlEscapes[char]);
}
// Markdown patterns
const MARKDOWN_LINK = /\[([^\]]+)\]\(([^)]+)\)/g;
const MARKDOWN_IMAGE = /!\[([^\]]*)\]\(([^)]+)\)/g;
/**
* Process paragraphs and line breaks
*/
function processParagraphs(content: string): string {
try {
if (!content) return '';
// URL patterns
const WSS_URL = /wss:\/\/[^\s<>"]+/g;
const DIRECT_LINK = /(?<!["'=])(https?:\/\/[^\s<>"]+)(?!["'])/g;
// Split content into paragraphs (double line breaks)
const paragraphs = content.split(/\n\s*\n/);
// Media URL patterns
const IMAGE_URL_REGEX = /https?:\/\/[^\s<]+\.(?:jpg|jpeg|gif|png|webp)(?:[^\s<]*)?/i;
const VIDEO_URL_REGEX = /https?:\/\/[^\s<]+\.(?:mp4|webm|mov|avi)(?:[^\s<]*)?/i;
const AUDIO_URL_REGEX = /https?:\/\/[^\s<]+\.(?:mp3|wav|ogg|m4a)(?:[^\s<]*)?/i;
const YOUTUBE_URL_REGEX = /https?:\/\/(?:www\.)?(?:youtube\.com\/(?:watch\?v=|embed\/)|youtu\.be\/|youtube-nocookie\.com\/embed\/)([a-zA-Z0-9_-]{11})(?:[^\s<]*)?/i;
// Process each paragraph
return paragraphs.map(para => {
if (!para.trim()) return '';
// Handle single line breaks within paragraphs
const lines = para.split('\n');
function processBasicFormatting(content: string): string {
if (!content) return '';
// Join lines with normal line breaks and add br after paragraph
return `<p>${lines.join('\n')}</p><br>`;
}).filter(Boolean).join('\n');
} catch (error) {
console.error('Error in processParagraphs:', error);
return content;
}
}
let processedText = content;
/**
* Process basic text formatting (bold, italic, strikethrough, hashtags, inline code)
*/
function processBasicFormatting(content: string): string {
try {
if (!content) return '';
// Process Markdown images first
processedText = processedText.replace(MARKDOWN_IMAGE, (match, alt, url) => {
if (YOUTUBE_URL_REGEX.test(url)) {
const videoId = extractYouTubeVideoId(url);
if (videoId) {
return `<iframe class="w-full aspect-video rounded-lg shadow-lg my-4" src="https://www.youtube-nocookie.com/embed/${videoId}" title="${alt || 'YouTube video'}" frameborder="0" allow="fullscreen" sandbox="allow-scripts allow-same-origin allow-presentation"></iframe>`;
}
}
if (VIDEO_URL_REGEX.test(url)) {
return `<video controls class="max-w-full rounded-lg shadow-lg my-4" preload="none" playsinline><source src="${url}">${alt || 'Video'}</video>`;
}
if (AUDIO_URL_REGEX.test(url)) {
return `<audio controls class="w-full my-4" preload="none"><source src="${url}">${alt || 'Audio'}</audio>`;
}
// Process bold first to avoid conflicts
content = content.replace(BOLD_REGEX, '<strong>$2</strong>');
return `<img src="${url}" alt="${alt}" class="max-w-full h-auto rounded-lg shadow-lg my-4" loading="lazy" decoding="async">`;
});
// Process Markdown links
processedText = processedText.replace(MARKDOWN_LINK, (match, text, url) =>
`<a href="${url}" class="text-primary-600 dark:text-primary-500 hover:underline" target="_blank" rel="noopener noreferrer">${text}</a>`
);
// Process WebSocket URLs
processedText = processedText.replace(WSS_URL, match => {
// Remove 'wss://' from the start and any trailing slashes
const cleanUrl = match.slice(6).replace(/\/+$/, '');
return `<a href="https://nostrudel.ninja/#/r/wss%3A%2F%2F${cleanUrl}%2F" target="_blank" rel="noopener noreferrer" class="text-primary-600 dark:text-primary-500 hover:underline">${match}</a>`;
});
// Process direct media URLs
processedText = processedText.replace(DIRECT_LINK, match => {
if (YOUTUBE_URL_REGEX.test(match)) {
const videoId = extractYouTubeVideoId(match);
if (videoId) {
return `<iframe class="w-full aspect-video rounded-lg shadow-lg my-4" src="https://www.youtube-nocookie.com/embed/${videoId}" title="YouTube video" frameborder="0" allow="fullscreen" sandbox="allow-scripts allow-same-origin allow-presentation" class="text-primary-600 dark:text-primary-500 hover:underline"></iframe>`;
}
}
if (VIDEO_URL_REGEX.test(match)) {
return `<video controls class="max-w-full rounded-lg shadow-lg my-4" preload="none" playsinline><source src="${match}">Your browser does not support the video tag.</video>`;
}
// Then process italic, handling both single and double underscores
content = content.replace(ITALIC_REGEX, match => {
if (AUDIO_URL_REGEX.test(match)) {
return `<audio controls class="w-full my-4" preload="none"><source src="${match}">Your browser does not support the audio tag.</audio>`;
}
if (IMAGE_URL_REGEX.test(match)) {
return `<img src="${match}" alt="Embedded media" class="max-w-full h-auto rounded-lg shadow-lg my-4" loading="lazy" decoding="async">`;
}
return `<a href="${match}" target="_blank" rel="noopener noreferrer" class="text-blue-500 hover:text-blue-600 dark:text-blue-400 dark:hover:text-blue-300">${match}</a>`;
});
// Process text formatting
processedText = processedText.replace(BOLD_REGEX, '<strong>$2</strong>');
processedText = processedText.replace(ITALIC_REGEX, match => {
const text = match.replace(/^_+|_+$/g, '');
return `<em>${text}</em>`;
});
// Then process strikethrough, handling both single and double tildes
content = content.replace(STRIKETHROUGH_REGEX, (match, doubleText, singleText) => {
processedText = processedText.replace(STRIKETHROUGH_REGEX, (match, doubleText, singleText) => {
const text = doubleText || singleText;
return `<del class="line-through">${text}</del>`;
});
// Finally process hashtags - style them with a lighter color
content = content.replace(HASHTAG_REGEX, '<span class="text-gray-500 dark:text-gray-400">#$1</span>');
return content;
// Process hashtags
processedText = processedText.replace(HASHTAG_REGEX, '<span class="text-gray-500 dark:text-gray-400">#$1</span>');
} catch (error) {
console.error('Error in processBasicFormatting:', error);
return content;
}
return processedText;
}
// Helper function to extract YouTube video ID
function extractYouTubeVideoId(url: string): string | null {
const match = url.match(/(?:youtube\.com\/(?:watch\?v=|embed\/)|youtu\.be\/|youtube-nocookie\.com\/embed\/)([a-zA-Z0-9_-]{11})/);
return match ? match[1] : null;
}
/**
* Process blockquotes
*/
function processBlockquotes(content: string): string {
try {
if (!content) return '';
return content.replace(BLOCKQUOTE_REGEX, match => {
// Split into lines and process each line
const lines = match.split('\n').map(line => {
// Remove the '>' marker and trim any whitespace after it
return line.replace(/^[ \t]*>[ \t]?/, '').trim();
});
// Join the lines with proper spacing and wrap in blockquote
return `<blockquote class="pl-4 border-l-4 border-gray-300 dark:border-gray-600 my-4">${
lines.join('\n')
}</blockquote>`;
@ -109,113 +133,43 @@ function processBlockquotes(content: string): string { @@ -109,113 +133,43 @@ function processBlockquotes(content: string): string {
}
}
/**
* Calculate indentation level from spaces
*/
function getIndentLevel(spaces: string): number {
return Math.floor(spaces.length / 2);
}
/**
* Process lists (ordered and unordered)
*/
function processLists(content: string): string {
const lines = content.split('\n');
const processed: string[] = [];
const listStack: { type: 'ol' | 'ul', items: string[], level: number }[] = [];
function closeList() {
if (listStack.length > 0) {
const list = listStack.pop()!;
const listType = list.type;
const listClass = listType === 'ol' ? 'list-decimal' : 'list-disc';
const indentClass = list.level > 0 ? 'ml-6' : 'ml-4';
let listHtml = `<${listType} class="${listClass} ${indentClass} my-2 space-y-2">`;
list.items.forEach(item => {
listHtml += `\n <li class="pl-1">${item}</li>`;
});
listHtml += `\n</${listType}>`;
if (listStack.length > 0) {
// If we're in a nested list, add this as an item to the parent
const parentList = listStack[listStack.length - 1];
const lastItem = parentList.items.pop()!;
parentList.items.push(lastItem + '\n' + listHtml);
} else {
processed.push(listHtml);
}
}
}
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Count leading spaces to determine nesting level
const leadingSpaces = line.match(/^(\s*)/)?.[0]?.length ?? 0;
const effectiveLevel = Math.floor(leadingSpaces / 2); // 2 spaces per level
// Trim the line and check for list markers
const trimmedLine = line.trim();
const orderedMatch = trimmedLine.match(/^(\d+)\.[ \t]+(.+)$/);
const unorderedMatch = trimmedLine.match(/^[-*][ \t]+(.+)$/);
if (orderedMatch || unorderedMatch) {
const content = orderedMatch ? orderedMatch[2] : (unorderedMatch && unorderedMatch[1]) || '';
const type = orderedMatch ? 'ol' : 'ul';
// Close any lists that are at a deeper level
while (listStack.length > 0 && listStack[listStack.length - 1].level > effectiveLevel) {
closeList();
}
// If we're at a new level, start a new list
if (listStack.length === 0 || listStack[listStack.length - 1].level < effectiveLevel) {
listStack.push({ type, items: [], level: effectiveLevel });
}
// If we're at the same level but different type, close the current list and start a new one
else if (listStack[listStack.length - 1].type !== type && listStack[listStack.length - 1].level === effectiveLevel) {
closeList();
listStack.push({ type, items: [], level: effectiveLevel });
}
// Add the item to the current list
listStack[listStack.length - 1].items.push(content);
} else {
// Not a list item - close all open lists and add the line
while (listStack.length > 0) {
closeList();
}
processed.push(line);
}
}
// Close any remaining open lists
while (listStack.length > 0) {
closeList();
}
return processed.join('\n');
}
/**
* Parse markdown text with basic formatting
*/
export async function parseBasicMarkdown(text: string): Promise<string> {
try {
if (!text) return '';
let processedText = text;
if (!text) return '';
// Process lists first to handle indentation properly
processedText = processLists(processedText);
// Process blockquotes next
try {
// Process basic text formatting first
let processedText = processBasicFormatting(text);
// Process lists - handle ordered lists first
processedText = processedText
// Process ordered lists
.replace(ORDERED_LIST_REGEX, (match, marker, content) => {
// Count leading spaces to determine nesting level
const indent = marker.match(/^\s*/)[0].length;
const extraIndent = indent > 0 ? ` ml-${indent * 4}` : '';
return `<li class="py-2${extraIndent}">${content}</li>`;
})
.replace(/<li.*?>.*?<\/li>\n?/gs, '<ol class="list-decimal my-4 ml-8">$&</ol>')
// Process unordered lists
.replace(UNORDERED_LIST_REGEX, (match, marker, content) => {
// Count leading spaces to determine nesting level
const indent = marker.match(/^\s*/)[0].length;
const extraIndent = indent > 0 ? ` ml-${indent * 4}` : '';
return `<li class="py-2${extraIndent}">${content}</li>`;
})
.replace(/<li.*?>.*?<\/li>\n?/gs, '<ul class="list-disc my-4 ml-8">$&</ul>');
// Process blockquotes
processedText = processBlockquotes(processedText);
// Process paragraphs
processedText = processParagraphs(processedText);
// Process basic text formatting
processedText = processBasicFormatting(processedText);
// Process paragraphs - split by double newlines and wrap in p tags
processedText = processedText
.split(/\n\n+/)
.map(para => para.trim())
.filter(para => para.length > 0)
.map(para => `<p class="my-4">${para}</p>`)
.join('\n');
// Process Nostr identifiers last
processedText = await processNostrIdentifiers(processedText);
@ -223,9 +177,6 @@ export async function parseBasicMarkdown(text: string): Promise<string> { @@ -223,9 +177,6 @@ export async function parseBasicMarkdown(text: string): Promise<string> {
return processedText;
} catch (error) {
console.error('Error in parseBasicMarkdown:', error);
if (error instanceof Error) {
return `<div class="text-red-500">Error processing markdown: ${error.message}</div>`;
}
return '<div class="text-red-500">An error occurred while processing the markdown</div>';
return `<div class="text-red-500">Error processing markdown: ${error instanceof Error ? error.message : 'Unknown error'}</div>`;
}
}

1
src/lib/utils/nostrUtils.ts

@ -118,6 +118,7 @@ function createNoteLink(identifier: string): string { @@ -118,6 +118,7 @@ function createNoteLink(identifier: string): string {
* Process Nostr identifiers in text
*/
export async function processNostrIdentifiers(content: string): Promise<string> {
console.log('Processing Nostr identifiers:', { input: content });
let processedContent = content;
// Process profiles (npub and nprofile)

Loading…
Cancel
Save