Browse Source

implementation

master
Silberengel 2 weeks ago
parent
commit
e10cf615a0
  1. 4
      package.json
  2. 275
      src/converters/to-asciidoc.ts
  3. 4
      src/detector.ts
  4. 263
      src/extractors/metadata.ts
  5. 1
      src/index.ts
  6. 88
      src/parser.ts
  7. 66
      src/processors/asciidoc-links.ts
  8. 145
      src/processors/asciidoc.ts
  9. 52
      src/processors/code.ts
  10. 192
      src/processors/html-postprocess.ts
  11. 37
      src/processors/latex.ts
  12. 49
      src/processors/markdown-links.ts
  13. 36
      src/processors/markdown.ts
  14. 27
      src/processors/music.ts
  15. 28
      src/processors/nostr.ts
  16. 42
      src/processors/plain.ts
  17. 29
      src/types.ts
  18. 20
      src/types/asciidoctor.d.ts

4
package.json

@ -21,9 +21,7 @@
"author": "", "author": "",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@asciidoctor/core": "^3.0.4", "@asciidoctor/core": "^3.0.4"
"highlight.js": "^11.10.0",
"marked": "^12.0.0"
}, },
"devDependencies": { "devDependencies": {
"@types/node": "^20.11.0", "@types/node": "^20.11.0",

275
src/converters/to-asciidoc.ts

@ -0,0 +1,275 @@
import { ContentFormat } from '../types';
export interface ConvertOptions {
enableNostrAddresses?: boolean;
}
/**
* Converts content to AsciiDoc format based on detected format
* This is the unified entry point - everything becomes AsciiDoc
*/
export function convertToAsciidoc(
content: string,
format: ContentFormat,
linkBaseURL: string,
options: ConvertOptions = {}
): string {
let asciidoc = '';
switch (format) {
case ContentFormat.AsciiDoc:
// For AsciiDoc content, ensure proper formatting
asciidoc = content.replace(/\\n/g, '\n');
// Ensure headers are on their own lines with proper spacing
asciidoc = asciidoc.replace(/(\S[^\n]*)\n(={1,6}\s+[^\n]+)/g, (_match, before, header) => {
return `${before}\n\n${header}`;
});
break;
case ContentFormat.Markdown:
asciidoc = convertMarkdownToAsciidoc(content);
break;
case ContentFormat.Plain:
default:
asciidoc = convertPlainTextToAsciidoc(content);
break;
}
// Process special elements for all content types
// Process wikilinks
asciidoc = processWikilinks(asciidoc, linkBaseURL);
// Process nostr: addresses if enabled
if (options.enableNostrAddresses !== false) {
asciidoc = processNostrAddresses(asciidoc, linkBaseURL);
}
// Process hashtags
asciidoc = processHashtags(asciidoc);
return asciidoc;
}
/**
* Converts Markdown to AsciiDoc format
* Based on jumble's conversion patterns
*/
function convertMarkdownToAsciidoc(content: string): string {
let asciidoc = content.replace(/\\n/g, '\n');
// Fix spacing issues
asciidoc = asciidoc.replace(/`([^`\n]+)`\s*\(([^)]+)\)/g, '`$1` ($2)');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`([a-zA-Z0-9])/g, '$1 `$2` $3');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])`([^`\n]+)`\s*\(/g, '$1 `$2` (');
asciidoc = asciidoc.replace(/\)`([^`\n]+)`([a-zA-Z0-9])/g, ') `$1` $2');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])\)([a-zA-Z0-9])/g, '$1) $2');
asciidoc = asciidoc.replace(/([a-zA-Z0-9])==/g, '$1 ==');
// Preserve nostr: addresses temporarily
asciidoc = asciidoc.replace(/nostr:([a-z0-9]+)/g, 'nostr:$1');
// Convert headers
asciidoc = asciidoc.replace(/^#{6}\s+(.+)$/gm, '====== $1 ======');
asciidoc = asciidoc.replace(/^#{5}\s+(.+)$/gm, '===== $1 =====');
asciidoc = asciidoc.replace(/^#{4}\s+(.+)$/gm, '==== $1 ====');
asciidoc = asciidoc.replace(/^#{3}\s+(.+)$/gm, '=== $1 ===');
asciidoc = asciidoc.replace(/^#{2}\s+(.+)$/gm, '== $1 ==');
asciidoc = asciidoc.replace(/^#{1}\s+(.+)$/gm, '= $1 =');
asciidoc = asciidoc.replace(/^==\s+(.+?)\s+==$/gm, '== $1 ==');
asciidoc = asciidoc.replace(/\s==\s+([^=]+?)\s+==\s/g, ' == $1 == ');
// Convert emphasis
asciidoc = asciidoc.replace(/\*\*(.+?)\*\*/g, '*$1*'); // Bold
asciidoc = asciidoc.replace(/__(.+?)__/g, '*$1*'); // Bold
asciidoc = asciidoc.replace(/\*(.+?)\*/g, '_$1_'); // Italic
asciidoc = asciidoc.replace(/_(.+?)_/g, '_$1_'); // Italic
asciidoc = asciidoc.replace(/~~(.+?)~~/g, '[line-through]#$1#'); // Strikethrough
asciidoc = asciidoc.replace(/~(.+?)~/g, '[subscript]#$1#'); // Subscript
asciidoc = asciidoc.replace(/\^(.+?)\^/g, '[superscript]#$1#'); // Superscript
// Convert code blocks
asciidoc = asciidoc.replace(/```(\w+)?\n([\s\S]*?)\n```/g, (_match, lang, code) => {
const trimmedCode = code.trim();
if (trimmedCode.length === 0) return '';
const hasCodePatterns = /[{}();=<>]|function|class|import|export|def |if |for |while |return |const |let |var |public |private |static |console\.log/.test(trimmedCode);
const isLikelyText = /^[A-Za-z\s.,!?\-'"]+$/.test(trimmedCode) && trimmedCode.length > 50;
const hasTooManySpaces = (trimmedCode.match(/\s{3,}/g) || []).length > 3;
const hasMarkdownPatterns = /^#{1,6}\s|^\*\s|^\d+\.\s|^\>\s|^\|.*\|/.test(trimmedCode);
if ((!hasCodePatterns && trimmedCode.length > 100) || isLikelyText || hasTooManySpaces || hasMarkdownPatterns) {
return _match;
}
return `[source${lang ? ',' + lang : ''}]\n----\n${trimmedCode}\n----`;
});
asciidoc = asciidoc.replace(/`([^`]+)`/g, '`$1`'); // Inline code
asciidoc = asciidoc.replace(/`\$([^$]+)\$`/g, '`$\\$1\\$$`'); // Preserve LaTeX in code
// Convert images
asciidoc = asciidoc.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, 'image::$2[$1,width=100%]');
asciidoc = asciidoc.replace(/image::([^\[]+)\[([^\]]+),width=100%\]/g, 'image::$1[$2,width=100%]');
// Convert links
asciidoc = asciidoc.replace(/\[([^\]]+)\]\(([^)]+)\)/g, 'link:$2[$1]');
// Convert horizontal rules
asciidoc = asciidoc.replace(/^---$/gm, '\'\'\'');
// Convert unordered lists
asciidoc = asciidoc.replace(/^(\s*)\*\s+(.+)$/gm, '$1* $2');
asciidoc = asciidoc.replace(/^(\s*)-\s+(.+)$/gm, '$1* $2');
asciidoc = asciidoc.replace(/^(\s*)\+\s+(.+)$/gm, '$1* $2');
// Convert ordered lists
asciidoc = asciidoc.replace(/^(\s*)\d+\.\s+(.+)$/gm, '$1. $2');
// Convert blockquotes with attribution
asciidoc = asciidoc.replace(/^(>\s+.+(?:\n>\s+.+)*)/gm, (match) => {
const lines = match.split('\n').map(line => line.replace(/^>\s*/, ''));
let quoteBodyLines: string[] = [];
let attributionLine: string | undefined;
for (let i = lines.length - 1; i >= 0; i--) {
const line = lines[i].trim();
if (line.startsWith('—') || line.startsWith('--')) {
attributionLine = line;
quoteBodyLines = lines.slice(0, i);
break;
}
}
const quoteContent = quoteBodyLines.filter(l => l.trim() !== '').join('\n').trim();
if (attributionLine) {
let cleanedAttribution = attributionLine.replace(/^[—-]+/, '').trim();
let author = '';
let source = '';
const linkMatch = cleanedAttribution.match(/^(.*?),?\s*link:([^[\\]]+)\[([^\\]]+)\]$/);
if (linkMatch) {
author = linkMatch[1].trim();
source = `link:${linkMatch[2].trim()}[${linkMatch[3].trim()}]`;
} else {
const parts = cleanedAttribution.split(',').map(p => p.trim());
author = parts[0];
if (parts.length > 1) {
source = parts.slice(1).join(', ').trim();
}
}
return `[quote, ${author}, ${source}]\n____\n${quoteContent}\n____`;
} else {
return `____\n${quoteContent}\n____`;
}
});
// Convert tables
asciidoc = asciidoc.replace(/(\|.*\|[\r\n]+\|[\s\-\|]*[\r\n]+(\|.*\|[\r\n]+)*)/g, (match) => {
const lines = match.trim().split('\n').filter(line => line.trim());
if (lines.length < 2) return match;
const headerRow = lines[0];
const separatorRow = lines[1];
const dataRows = lines.slice(2);
if (!separatorRow.includes('-')) return match;
let tableAsciidoc = '[cols="1,1"]\n|===\n';
tableAsciidoc += headerRow + '\n';
dataRows.forEach(row => {
tableAsciidoc += row + '\n';
});
tableAsciidoc += '|===';
return tableAsciidoc;
});
// Convert footnotes
const footnoteDefinitions: { [id: string]: string } = {};
let tempAsciidoc = asciidoc;
tempAsciidoc = tempAsciidoc.replace(/^\[\^([^\]]+)\]:\s*([\s\S]*?)(?=\n\[\^|\n---|\n##|\n###|\n####|\n#####|\n######|$)/gm, (_, id, text) => {
footnoteDefinitions[id] = text.trim();
return '';
});
asciidoc = tempAsciidoc.replace(/\[\^([^\]]+)\]/g, (match, id) => {
if (footnoteDefinitions[id]) {
return `footnote:[${footnoteDefinitions[id]}]`;
}
return match;
});
return asciidoc;
}
/**
* Converts plain text to AsciiDoc format
*/
function convertPlainTextToAsciidoc(content: string): string {
return content
.replace(/\n\n/g, '\n\n')
.replace(/\n/g, ' +\n');
}
/**
* Normalizes text to d-tag format
*/
function normalizeDtag(text: string): string {
return text
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');
}
/**
* Processes wikilinks: [[target]] or [[target|display text]]
* Converts to wikilink:dtag[display] format
*/
function processWikilinks(content: string, linkBaseURL: string): string {
// Process bookstr macro wikilinks: [[book::...]]
content = content.replace(/\[\[book::([^\]]+)\]\]/g, (_match, bookContent) => {
const cleanContent = bookContent.trim();
return `BOOKSTR:${cleanContent}`;
});
// Process standard wikilinks: [[Target Page]] or [[target page|see this]]
content = content.replace(/\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g, (_match, target, displayText) => {
const cleanTarget = target.trim();
const cleanDisplay = displayText ? displayText.trim() : cleanTarget;
const dTag = normalizeDtag(cleanTarget);
return `wikilink:${dTag}[${cleanDisplay}]`;
});
return content;
}
/**
* Processes nostr: addresses
* Converts to link:nostr:...[...] format
*/
function processNostrAddresses(content: string, linkBaseURL: string): string {
// Match nostr: followed by valid bech32 string
return content.replace(/nostr:([a-z0-9]+[a-z0-9]{6,})/g, (_match, bech32Id) => {
return `link:nostr:${bech32Id}[${bech32Id}]`;
});
}
/**
* Processes hashtags
* Converts to hashtag:tag[#tag] format
*/
function processHashtags(content: string): string {
// Match # followed by word characters, avoiding those in URLs, code blocks, etc.
return content.replace(/\B#([a-zA-Z0-9_]+)/g, (_match, hashtag) => {
const normalizedHashtag = hashtag.toLowerCase();
return `hashtag:${normalizedHashtag}[#${hashtag}]`;
});
}

4
src/detector.ts

@ -15,7 +15,9 @@ export function detectFormat(content: string): ContentFormat {
'----', // Listing block '----', // Listing block
'....', // Literal block '....', // Literal block
'|===', // Table '|===', // Table
':', // Attribute (common in AsciiDoc) 'link:', // AsciiDoc link format
'wikilink:', // Wikilink macro
'hashtag:', // Hashtag macro
]; ];
let asciidocScore = 0; let asciidocScore = 0;

263
src/extractors/metadata.ts

@ -0,0 +1,263 @@
import { NostrLink, Wikilink } from '../types';
export interface ExtractedMetadata {
nostrLinks: NostrLink[];
wikilinks: Wikilink[];
hashtags: string[];
links: Array<{ url: string; text: string; isExternal: boolean }>;
media: string[];
}
/**
* Extracts metadata from content before processing
*/
export function extractMetadata(content: string, linkBaseURL: string): ExtractedMetadata {
return {
nostrLinks: extractNostrLinks(content),
wikilinks: extractWikilinks(content),
hashtags: extractHashtags(content),
links: extractLinks(content, linkBaseURL),
media: extractMedia(content),
};
}
/**
* Extract Nostr links from content
*/
function extractNostrLinks(content: string): NostrLink[] {
const nostrLinks: NostrLink[] = [];
const seen = new Set<string>();
// Extract nostr: prefixed links
const nostrMatches = content.match(/nostr:([a-z0-9]+[a-z0-9]{6,})/g) || [];
nostrMatches.forEach(match => {
const id = match.substring(6); // Remove 'nostr:'
const type = getNostrType(id);
if (type && !seen.has(id)) {
seen.add(id);
nostrLinks.push({
type,
id,
text: match,
bech32: id,
});
}
});
return nostrLinks;
}
/**
* Extract wikilinks from content
*/
function extractWikilinks(content: string): Wikilink[] {
const wikilinks: Wikilink[] = [];
const seen = new Set<string>();
// Match [[target]] or [[target|display]]
const wikilinkPattern = /\[\[([^|\]]+)(?:\|([^\]]+))?\]\]/g;
let match;
while ((match = wikilinkPattern.exec(content)) !== null) {
const target = match[1].trim();
const display = match[2] ? match[2].trim() : target;
const dtag = normalizeDtag(target);
const key = `${dtag}|${display}`;
if (!seen.has(key)) {
seen.add(key);
wikilinks.push({
dtag,
display,
original: match[0],
});
}
}
return wikilinks;
}
/**
* Extract hashtags from content
*/
function extractHashtags(content: string): string[] {
const hashtags: string[] = [];
const seen = new Set<string>();
// Extract hashtags: #hashtag
const hashtagMatches = content.match(/#([a-zA-Z0-9_]+)/g) || [];
hashtagMatches.forEach(match => {
const tag = match.substring(1).toLowerCase();
if (!seen.has(tag)) {
hashtags.push(tag);
seen.add(tag);
}
});
return hashtags;
}
/**
* Extract regular links from content
*/
function extractLinks(content: string, linkBaseURL: string): Array<{ url: string; text: string; isExternal: boolean }> {
const links: Array<{ url: string; text: string; isExternal: boolean }> = [];
const seen = new Set<string>();
// Extract markdown links: [text](url)
const markdownLinks = content.match(/\[([^\]]+)\]\(([^)]+)\)/g) || [];
markdownLinks.forEach(match => {
const linkMatch = match.match(/\[([^\]]+)\]\(([^)]+)\)/);
if (linkMatch) {
const [, text, url] = linkMatch;
if (!seen.has(url) && !isNostrUrl(url)) {
seen.add(url);
links.push({
url,
text,
isExternal: isExternalUrl(url, linkBaseURL),
});
}
}
});
// Extract asciidoc links: link:url[text]
const asciidocLinks = content.match(/link:([^\[]+)\[([^\]]+)\]/g) || [];
asciidocLinks.forEach(match => {
const linkMatch = match.match(/link:([^\[]+)\[([^\]]+)\]/);
if (linkMatch) {
const [, url, text] = linkMatch;
if (!seen.has(url) && !isNostrUrl(url)) {
seen.add(url);
links.push({
url,
text,
isExternal: isExternalUrl(url, linkBaseURL),
});
}
}
});
// Extract raw URLs (basic pattern)
const urlPattern = /https?:\/\/[^\s<>"']+/g;
const rawUrls = content.match(urlPattern) || [];
rawUrls.forEach(url => {
if (!seen.has(url) && !isNostrUrl(url)) {
seen.add(url);
links.push({
url,
text: url,
isExternal: isExternalUrl(url, linkBaseURL),
});
}
});
return links;
}
/**
* Extract media URLs from content
*/
function extractMedia(content: string): string[] {
const media: string[] = [];
const seen = new Set<string>();
// Extract markdown images: ![alt](url)
const imageMatches = content.match(/!\[[^\]]*\]\(([^)]+)\)/g) || [];
imageMatches.forEach(match => {
const url = match.match(/!\[[^\]]*\]\(([^)]+)\)/)?.[1];
if (url && !seen.has(url)) {
if (isImageUrl(url) || isVideoUrl(url)) {
media.push(url);
seen.add(url);
}
}
});
// Extract asciidoc images: image::url[alt]
const asciidocImageMatches = content.match(/image::([^\[]+)\[/g) || [];
asciidocImageMatches.forEach(match => {
const url = match.match(/image::([^\[]+)\[/)?.[1];
if (url && !seen.has(url)) {
if (isImageUrl(url) || isVideoUrl(url)) {
media.push(url);
seen.add(url);
}
}
});
// Extract raw image/video URLs
const urlPattern = /https?:\/\/[^\s<>"']+/g;
const rawUrls = content.match(urlPattern) || [];
rawUrls.forEach(url => {
if (!seen.has(url) && (isImageUrl(url) || isVideoUrl(url))) {
media.push(url);
seen.add(url);
}
});
return media;
}
/**
* Get Nostr identifier type
*/
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null {
if (id.startsWith('npub')) return 'npub';
if (id.startsWith('nprofile')) return 'nprofile';
if (id.startsWith('nevent')) return 'nevent';
if (id.startsWith('naddr')) return 'naddr';
if (id.startsWith('note')) return 'note';
return null;
}
/**
* Normalize text to d-tag format
*/
function normalizeDtag(text: string): string {
return text
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');
}
/**
* Check if URL is external
*/
function isExternalUrl(url: string, linkBaseURL: string): boolean {
if (!linkBaseURL) return true;
try {
// Use a simple string-based check for Node.js compatibility
// Extract hostname from URL string
const urlMatch = url.match(/^https?:\/\/([^\/]+)/);
const baseMatch = linkBaseURL.match(/^https?:\/\/([^\/]+)/);
if (urlMatch && baseMatch) {
return urlMatch[1] !== baseMatch[1];
}
return true;
} catch {
return true;
}
}
/**
* Check if URL is a Nostr URL
*/
function isNostrUrl(url: string): boolean {
return url.startsWith('nostr:') || getNostrType(url) !== null;
}
/**
* Check if URL is an image
*/
function isImageUrl(url: string): boolean {
return /\.(jpeg|jpg|png|gif|webp|svg)$/i.test(url);
}
/**
* Check if URL is a video
*/
function isVideoUrl(url: string): boolean {
return /\.(mp4|webm|ogg)$/i.test(url);
}

1
src/index.ts

@ -1,2 +1,3 @@
export * from './parser'; export * from './parser';
export * from './types'; export * from './types';
export * from './detector';

88
src/parser.ts

@ -1,12 +1,8 @@
import { ParserOptions, ProcessResult, ContentFormat } from './types'; import { ParserOptions, ProcessResult, ContentFormat } from './types';
import { processAsciiDoc } from './processors/asciidoc';
import { processMarkdown } from './processors/markdown';
import { processPlainText } from './processors/plain';
import { processNostrAddresses } from './processors/nostr';
import { detectFormat } from './detector'; import { detectFormat } from './detector';
import { processLaTeX, hasLaTeX } from './processors/latex'; import { convertToAsciidoc } from './converters/to-asciidoc';
import { processMusicalNotation, hasMusicalNotation } from './processors/music'; import { processAsciidoc } from './processors/asciidoc';
import { ensureCodeHighlighting } from './processors/code'; import { extractMetadata } from './extractors/metadata';
/** /**
* Default parser options * Default parser options
@ -27,6 +23,8 @@ export function defaultOptions(): ParserOptions {
* Main parser for Nostr event content * Main parser for Nostr event content
* Handles multiple content formats: AsciiDoc, Markdown, code syntax, * Handles multiple content formats: AsciiDoc, Markdown, code syntax,
* LaTeX, musical notation, and nostr: prefixed addresses * LaTeX, musical notation, and nostr: prefixed addresses
*
* Everything is converted to AsciiDoc first, then processed through AsciiDoctor
*/ */
export class Parser { export class Parser {
private options: Required<ParserOptions>; private options: Required<ParserOptions>;
@ -47,63 +45,45 @@ export class Parser {
/** /**
* Process Nostr event content and return HTML * Process Nostr event content and return HTML
* Automatically detects the content format and processes accordingly * Automatically detects the content format and processes accordingly
* Everything is converted to AsciiDoc first, then processed through AsciiDoctor
*/ */
async process(content: string): Promise<ProcessResult> { async process(content: string): Promise<ProcessResult> {
// First, process nostr: addresses (if enabled) // Extract metadata from original content (before conversion)
if (this.options.enableNostrAddresses) { const metadata = extractMetadata(content, this.options.linkBaseURL);
content = processNostrAddresses(content, this.options.linkBaseURL);
}
// Detect content format // Detect content format
const format = detectFormat(content); const format = detectFormat(content);
let result: ProcessResult; // Convert everything to AsciiDoc format first
const asciidocContent = convertToAsciidoc(
switch (format) { content,
case ContentFormat.AsciiDoc: format,
if (this.options.enableAsciiDoc) { this.options.linkBaseURL,
result = await processAsciiDoc(content, this.options.linkBaseURL); {
} else if (this.options.enableMarkdown) { enableNostrAddresses: this.options.enableNostrAddresses,
// Fallback to markdown if AsciiDoc is disabled
result = await processMarkdown(content, this.options.linkBaseURL);
} else {
result = processPlainText(content);
}
break;
case ContentFormat.Markdown:
if (this.options.enableMarkdown) {
result = await processMarkdown(content, this.options.linkBaseURL);
} else {
// Fallback to plain text
result = processPlainText(content);
}
break;
default:
// Plain text or mixed content
result = processPlainText(content);
}
// Post-process: handle LaTeX and musical notation in the HTML
if (this.options.enableLaTeX) {
result.hasLaTeX = hasLaTeX(result.content);
if (result.hasLaTeX) {
result.content = processLaTeX(result.content);
}
}
if (this.options.enableMusicalNotation) {
result.hasMusicalNotation = hasMusicalNotation(result.content);
if (result.hasMusicalNotation) {
result.content = processMusicalNotation(result.content);
}
} }
);
// Ensure code highlighting is applied if enabled // Process through AsciiDoctor
if (this.options.enableCodeHighlighting) { const result = await processAsciidoc(
result.content = ensureCodeHighlighting(result.content); asciidocContent,
{
enableCodeHighlighting: this.options.enableCodeHighlighting,
enableLaTeX: this.options.enableLaTeX,
enableMusicalNotation: this.options.enableMusicalNotation,
originalContent: content, // Pass original for LaTeX detection
} }
);
return result; // Combine with extracted metadata
return {
...result,
nostrLinks: metadata.nostrLinks,
wikilinks: metadata.wikilinks,
hashtags: metadata.hashtags,
links: metadata.links,
media: metadata.media,
};
} }
} }

66
src/processors/asciidoc-links.ts

@ -1,66 +0,0 @@
/**
* Normalizes a d tag according to NIP-54 rules
*/
export function normalizeDTag(dTag: string): string {
// Convert to lowercase
let normalized = dTag.toLowerCase();
// Convert whitespace to hyphens
normalized = normalized.replace(/\s+/g, '-');
// Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII)
normalized = normalized.replace(/[^a-z0-9\-\u0080-\uFFFF]/g, '');
// Collapse multiple consecutive hyphens
normalized = normalized.replace(/-+/g, '-');
// Remove leading and trailing hyphens
normalized = normalized.replace(/^-+|-+$/g, '');
return normalized;
}
/**
* Rewrites wikilinks and nostr: links in AsciiDoc content
*/
export function rewriteAsciiDocLinks(content: string, linkBaseURL: string): string {
// Rewrite wikilinks: [[target]] or [[target|display text]]
// Format: [[target]] -> link:url[display]
const wikilinkRegex = /\[\[([^\]]+)\]\]/g;
content = content.replace(wikilinkRegex, (match, inner) => {
let target: string;
let display: string;
if (inner.includes('|')) {
const parts = inner.split('|', 2);
target = parts[0].trim();
display = parts[1].trim();
} else {
target = inner.trim();
display = target;
}
// Normalize the d tag
const normalized = normalizeDTag(target);
// Create the link
if (linkBaseURL) {
const url = `${linkBaseURL}/events?d=${normalized}`;
return `link:${url}[${display}]`;
}
return `link:#${normalized}[${display}]`;
});
// Rewrite nostr: links: nostr:naddr1... or nostr:nevent1...
// Format: nostr:naddr1... -> link:url[nostr:naddr1...]
const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)/g;
content = content.replace(nostrLinkRegex, (match, nostrID) => {
if (linkBaseURL) {
const url = `${linkBaseURL}/events?id=${nostrID}`;
return `link:${url}[${match}]`;
}
return match;
});
return content;
}

145
src/processors/asciidoc.ts

@ -1,49 +1,150 @@
import asciidoctor from '@asciidoctor/core'; import asciidoctor from '@asciidoctor/core';
import { ProcessResult } from '../types'; import { ProcessResult } from '../types';
import { rewriteAsciiDocLinks } from './asciidoc-links'; import { extractTOC, sanitizeHTML } from './html-utils';
import { extractTOC, sanitizeHTML, processLinks } from './html-utils'; import { postProcessHtml } from './html-postprocess';
const asciidoctorInstance = asciidoctor(); const asciidoctorInstance = asciidoctor();
export interface ProcessOptions {
enableCodeHighlighting?: boolean;
enableLaTeX?: boolean;
enableMusicalNotation?: boolean;
originalContent?: string; // Original content for LaTeX detection
}
/** /**
* Processes AsciiDoc content to HTML * Processes AsciiDoc content to HTML using AsciiDoctor
* Uses AsciiDoctor's built-in highlight.js and LaTeX support
*/ */
export async function processAsciiDoc(content: string, linkBaseURL: string): Promise<ProcessResult> { export async function processAsciidoc(
// Rewrite links in AsciiDoc content content: string,
const processedContent = rewriteAsciiDocLinks(content, linkBaseURL); options: ProcessOptions = {}
): Promise<ProcessResult> {
const {
enableCodeHighlighting = true,
enableLaTeX = true,
enableMusicalNotation = true,
} = options;
// Check if content starts with level 3+ headers
// Asciidoctor article doctype requires level 1 (=) or level 2 (==) before level 3 (===)
// If content starts with level 3+, use book doctype
const firstHeaderMatch = content.match(/^(={1,6})\s+/m);
let doctype: 'article' | 'book' = 'article';
if (firstHeaderMatch) {
const firstHeaderLevel = firstHeaderMatch[1].length;
if (firstHeaderLevel >= 3) {
doctype = 'book';
}
}
// Convert AsciiDoc to HTML try {
const html = asciidoctorInstance.convert(processedContent, { const result = asciidoctorInstance.convert(content, {
safe: 'safe', safe: 'safe',
backend: 'html5', backend: 'html5',
doctype: 'article', doctype: doctype,
attributes: { attributes: {
showtitle: true, 'showtitle': true,
icons: 'font', 'sectanchors': true,
sectanchors: true, 'sectlinks': true,
sectlinks: true, 'toc': 'left',
toc: 'left', 'toclevels': 6,
toclevels: 3, 'toc-title': 'Table of Contents',
}, 'source-highlighter': enableCodeHighlighting ? 'highlight.js' : 'none',
}) as string; 'stem': enableLaTeX ? 'latexmath' : 'none',
'data-uri': true,
'imagesdir': '',
'linkcss': false,
'stylesheet': '',
'stylesdir': '',
'prewrap': true,
'sectnums': false,
'sectnumlevels': 6,
'experimental': true,
'compat-mode': false,
'attribute-missing': 'warn',
'attribute-undefined': 'warn',
'skip-front-matter': true,
'source-indent': 0,
'indent': 0,
'tabsize': 2,
'tabwidth': 2,
'hardbreaks': false,
'paragraph-rewrite': 'normal',
'sectids': true,
'idprefix': '',
'idseparator': '-',
'sectidprefix': '',
'sectidseparator': '-'
}
});
const htmlString = typeof result === 'string' ? result : result.toString();
// Extract table of contents from HTML // Extract table of contents from HTML
const { toc, contentWithoutTOC } = extractTOC(html); const { toc, contentWithoutTOC } = extractTOC(htmlString);
// Sanitize HTML to prevent XSS // Sanitize HTML to prevent XSS
const sanitized = sanitizeHTML(contentWithoutTOC); const sanitized = sanitizeHTML(contentWithoutTOC);
// Process links: make external links open in new tab, local links in same tab // Post-process HTML: convert macros to HTML, add styling, etc.
const processed = processLinks(sanitized, linkBaseURL); const processed = postProcessHtml(sanitized, {
enableMusicalNotation,
});
// Also sanitize and process links in TOC // Also process TOC
const tocSanitized = sanitizeHTML(toc); const tocSanitized = sanitizeHTML(toc);
const tocProcessed = processLinks(tocSanitized, linkBaseURL); const tocProcessed = postProcessHtml(tocSanitized, {
enableMusicalNotation: false, // Don't process music in TOC
});
// Check for LaTeX in original content (more reliable than checking HTML)
const contentToCheck = options.originalContent || content;
const hasLaTeX = enableLaTeX && hasMathContent(contentToCheck);
// Check for musical notation in processed HTML
const hasMusicalNotation = enableMusicalNotation && (
/class="abc-notation"|class="lilypond-notation"|class="chord"|class="musicxml-notation"/.test(processed)
);
return { return {
content: processed, content: processed,
tableOfContents: tocProcessed, tableOfContents: tocProcessed,
hasLaTeX,
hasMusicalNotation,
nostrLinks: [], // Will be populated by metadata extraction
wikilinks: [],
hashtags: [],
links: [],
media: [],
};
} catch (error) {
// Fallback to plain text
return {
content: `<p>${sanitizeHTML(content)}</p>`,
tableOfContents: '',
hasLaTeX: false, hasLaTeX: false,
hasMusicalNotation: false, hasMusicalNotation: false,
nostrLinks: [],
wikilinks: [],
hashtags: [],
links: [],
media: [],
}; };
}
}
/**
* Check if content has LaTeX math
* Based on jumble's detection pattern
*/
function hasMathContent(content: string): boolean {
// Check for inline math: $...$ or \(...\)
const inlineMath = /\$[^$]+\$|\\\([^)]+\\\)/.test(content);
// Check for block math: $$...$$ or \[...\]
const blockMath = /\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]/.test(content);
return inlineMath || blockMath;
} }

52
src/processors/code.ts

@ -1,52 +0,0 @@
import hljs from 'highlight.js';
/**
* Ensures code blocks have syntax highlighting using highlight.js
*/
export function ensureCodeHighlighting(html: string): string {
// Pattern to match code blocks: <pre><code>...</code></pre> or <pre><code class="language-xxx">...</code></pre>
const codeBlockRegex = /<pre><code(?:\s+class=["']language-([^"']+)["'])?[^>]*>(.*?)<\/code><\/pre>/gs;
return html.replace(codeBlockRegex, (match, lang, code) => {
// Unescape HTML entities in code
const unescapedCode = unescapeHTML(code);
// Highlight the code
try {
let highlighted: hljs.HighlightResult;
if (lang) {
// Try to get the language
const language = hljs.getLanguage(lang);
if (language) {
highlighted = hljs.highlight(unescapedCode, { language: lang });
} else {
// Try auto-detection
highlighted = hljs.highlightAuto(unescapedCode);
}
} else {
// Auto-detect language
highlighted = hljs.highlightAuto(unescapedCode);
}
// Return highlighted code with proper classes
const langClass = highlighted.language ? ` class="language-${highlighted.language}"` : '';
return `<pre><code${langClass}>${highlighted.value}</code></pre>`;
} catch (error) {
// If highlighting fails, return original
return match;
}
});
}
/**
* Unescapes HTML entities
*/
function unescapeHTML(text: string): string {
return text
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
}

192
src/processors/html-postprocess.ts

@ -0,0 +1,192 @@
import { processMusicalNotation } from './music';
export interface PostProcessOptions {
enableMusicalNotation?: boolean;
}
/**
* Post-processes HTML output from AsciiDoctor
* Converts AsciiDoc macros to HTML with data attributes and CSS classes
*/
export function postProcessHtml(html: string, options: PostProcessOptions = {}): string {
let processed = html;
// Convert bookstr markers to HTML placeholders
processed = processed.replace(/BOOKSTR:([^<>\s]+)/g, (_match, bookContent) => {
const escaped = bookContent.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
return `<span data-bookstr="${escaped}" class="bookstr-placeholder"></span>`;
});
// Convert hashtag links to HTML
processed = processed.replace(/hashtag:([^[]+)\[([^\]]+)\]/g, (_match, normalizedHashtag, displayText) => {
return `<a href="/notes?t=${normalizedHashtag}" class="hashtag-link text-green-600 dark:text-green-400 hover:text-green-700 dark:hover:text-green-300 hover:underline">${displayText}</a>`;
});
// Convert wikilink:dtag[display] format to HTML
processed = processed.replace(/wikilink:([^[]+)\[([^\]]+)\]/g, (_match, dTag, displayText) => {
const escapedDtag = dTag.replace(/"/g, '&quot;');
const escapedDisplay = displayText.replace(/"/g, '&quot;');
return `<span class="wikilink cursor-pointer text-blue-600 hover:text-blue-800 hover:underline border-b border-dotted border-blue-300" data-dtag="${escapedDtag}" data-display="${escapedDisplay}">${displayText}</span>`;
});
// Convert nostr: links to HTML
processed = processed.replace(/link:nostr:([^[]+)\[([^\]]+)\]/g, (_match, bech32Id, displayText) => {
const nostrType = getNostrType(bech32Id);
if (nostrType === 'nevent' || nostrType === 'naddr' || nostrType === 'note') {
// Render as embedded event placeholder
const escaped = bech32Id.replace(/"/g, '&quot;');
return `<div data-embedded-note="${escaped}" class="embedded-note-container">Loading embedded event...</div>`;
} else if (nostrType === 'npub' || nostrType === 'nprofile') {
// Render as user handle
const escaped = bech32Id.replace(/"/g, '&quot;');
return `<span class="user-handle" data-pubkey="${escaped}">@${displayText}</span>`;
} else {
// Fallback to regular link
const escaped = bech32Id.replace(/"/g, '&quot;');
return `<a href="nostr:${bech32Id}" class="nostr-link text-blue-600 hover:text-blue-800 hover:underline" data-nostr-type="${nostrType || 'unknown'}" data-bech32="${escaped}">${displayText}</a>`;
}
});
// Process images: add max-width styling and data attributes
processed = processImages(processed);
// Process musical notation if enabled
if (options.enableMusicalNotation) {
processed = processMusicalNotation(processed);
}
// Clean up any leftover markdown syntax
processed = cleanupMarkdown(processed);
// Add styling classes
processed = addStylingClasses(processed);
// Hide raw ToC text
processed = hideRawTocText(processed);
return processed;
}
/**
* Get Nostr identifier type
*/
function getNostrType(id: string): 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note' | null {
if (id.startsWith('npub')) return 'npub';
if (id.startsWith('nprofile')) return 'nprofile';
if (id.startsWith('nevent')) return 'nevent';
if (id.startsWith('naddr')) return 'naddr';
if (id.startsWith('note')) return 'note';
return null;
}
/**
* Process images: add max-width styling and data attributes
*/
function processImages(html: string): string {
const imageUrls: string[] = [];
const imageUrlRegex = /<img[^>]+src=["']([^"']+)["'][^>]*>/gi;
let match;
while ((match = imageUrlRegex.exec(html)) !== null) {
const url = match[1];
if (url && !imageUrls.includes(url)) {
imageUrls.push(url);
}
}
return html.replace(/<img([^>]+)>/gi, (imgTag, attributes) => {
const srcMatch = attributes.match(/src=["']([^"']+)["']/i);
if (!srcMatch) return imgTag;
const src = srcMatch[1];
const currentIndex = imageUrls.indexOf(src);
let updatedAttributes = attributes;
if (updatedAttributes.match(/class=["']/i)) {
updatedAttributes = updatedAttributes.replace(/class=["']([^"']*)["']/i, (_match, classes) => {
const cleanedClasses = classes.replace(/max-w-\[?[^\s\]]+\]?/g, '').trim();
const newClasses = cleanedClasses
? `${cleanedClasses} max-w-[400px] object-contain cursor-zoom-in`
: 'max-w-[400px] object-contain cursor-zoom-in';
return `class="${newClasses}"`;
});
} else {
updatedAttributes += ` class="max-w-[400px] h-auto object-contain cursor-zoom-in"`;
}
updatedAttributes += ` data-asciidoc-image="true" data-image-index="${currentIndex}" data-image-src="${src.replace(/"/g, '&quot;')}"`;
return `<img${updatedAttributes}>`;
});
}
/**
* Clean up leftover markdown syntax
*/
function cleanupMarkdown(html: string): string {
let cleaned = html;
// Clean up markdown image syntax
cleaned = cleaned.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (_match, alt, url) => {
const altText = alt || '';
return `<img src="${url}" alt="${altText}" class="max-w-[400px] object-contain my-0" />`;
});
// Clean up markdown link syntax
cleaned = cleaned.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, text, url) => {
if (cleaned.includes(`href="${url}"`)) {
return _match;
}
return `<a href="${url}" target="_blank" rel="noreferrer noopener" class="break-words inline-flex items-baseline gap-1">${text} <svg class="size-3" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" /></svg></a>`;
});
return cleaned;
}
/**
* Add proper CSS classes for styling
*/
function addStylingClasses(html: string): string {
let styled = html;
// Add strikethrough styling
styled = styled.replace(/<span class="line-through">([^<]+)<\/span>/g, '<span class="line-through line-through-2">$1</span>');
// Add subscript styling
styled = styled.replace(/<span class="subscript">([^<]+)<\/span>/g, '<span class="subscript text-xs align-sub">$1</span>');
// Add superscript styling
styled = styled.replace(/<span class="superscript">([^<]+)<\/span>/g, '<span class="superscript text-xs align-super">$1</span>');
// Add code highlighting classes
styled = styled.replace(/<pre class="highlightjs[^"]*">/g, '<pre class="highlightjs hljs">');
styled = styled.replace(/<code class="highlightjs[^"]*">/g, '<code class="highlightjs hljs">');
return styled;
}
/**
* Hide raw AsciiDoc ToC text
*/
function hideRawTocText(html: string): string {
let cleaned = html;
cleaned = cleaned.replace(
/<h[1-6][^>]*>.*?Table of Contents.*?\(\d+\).*?<\/h[1-6]>/gi,
''
);
cleaned = cleaned.replace(
/<p[^>]*>.*?Table of Contents.*?\(\d+\).*?<\/p>/gi,
''
);
cleaned = cleaned.replace(
/<p[^>]*>.*?Assumptions.*?\[n=0\].*?<\/p>/gi,
''
);
return cleaned;
}

37
src/processors/latex.ts

@ -1,37 +0,0 @@
/**
* Checks if content contains LaTeX math expressions
*/
export function hasLaTeX(content: string): boolean {
// Check for inline math: $...$ or \(...\)
const inlineMathPattern = /\$[^$]+\$|\\\([^)]+\\\)/;
// Check for block math: $$...$$ or \[...\]
const blockMathPattern = /\$\$[^$]+\$\$|\\\[[^\]]+\\\]/;
return inlineMathPattern.test(content) || blockMathPattern.test(content);
}
/**
* Processes LaTeX math expressions in HTML content
* Wraps LaTeX expressions in appropriate HTML for rendering with MathJax or KaTeX
*/
export function processLaTeX(html: string): string {
// Process block math: $$...$$ or \[...\]
// Convert to <div class="math-block">...</div> for MathJax/KaTeX
const blockMathPattern = /\$\$([^$]+)\$\$|\\\[([^\]]+)\\\]/gs;
html = html.replace(blockMathPattern, (match, dollarContent, bracketContent) => {
const mathContent = (dollarContent || bracketContent || '').trim();
// Wrap in appropriate tags for MathJax/KaTeX
return `<div class="math-block">\\[${mathContent}\\]</div>`;
});
// Process inline math: $...$ or \(...\)
// Convert to <span class="math-inline">...</span> for MathJax/KaTeX
const inlineMathPattern = /\$([^$\n]+)\$|\\\(([^)]+)\\\)/g;
html = html.replace(inlineMathPattern, (match, dollarContent, bracketContent) => {
const mathContent = (dollarContent || bracketContent || '').trim();
// Wrap in appropriate tags for MathJax/KaTeX
return `<span class="math-inline">\\(${mathContent}\\)</span>`;
});
return html;
}

49
src/processors/markdown-links.ts

@ -1,49 +0,0 @@
import { normalizeDTag } from './asciidoc-links';
/**
* Rewrites wikilinks and nostr: links in Markdown content
*/
export function rewriteMarkdownLinks(content: string, linkBaseURL: string): string {
// Rewrite wikilinks: [[target]] or [[target|display text]]
const wikilinkRegex = /\[\[([^\]]+)\]\]/g;
content = content.replace(wikilinkRegex, (match, inner) => {
let target: string;
let display: string;
if (inner.includes('|')) {
const parts = inner.split('|', 2);
target = parts[0].trim();
display = parts[1].trim();
} else {
target = inner.trim();
display = target;
}
const normalized = normalizeDTag(target);
if (linkBaseURL) {
const url = `${linkBaseURL}/events?d=${normalized}`;
return `[${display}](${url})`;
}
return `[${display}](#${normalized})`;
});
// Rewrite nostr: links in Markdown
const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+|note1[^\s\]]+|npub1[^\s\]]+|nprofile1[^\s\]]+)/g;
content = content.replace(nostrLinkRegex, (match, nostrID) => {
if (linkBaseURL) {
let url: string;
if (nostrID.startsWith('npub')) {
url = `${linkBaseURL}/profile?pubkey=${nostrID}`;
} else if (nostrID.startsWith('nprofile')) {
url = `${linkBaseURL}/profile?id=${nostrID}`;
} else {
url = `${linkBaseURL}/events?id=${nostrID}`;
}
return `[${match}](${url})`;
}
return match;
});
return content;
}

36
src/processors/markdown.ts

@ -1,36 +0,0 @@
import { marked } from 'marked';
import { ProcessResult } from '../types';
import { rewriteMarkdownLinks } from './markdown-links';
import { sanitizeHTML, processLinks } from './html-utils';
// Configure marked options
marked.setOptions({
breaks: true,
gfm: true,
headerIds: true,
mangle: false,
});
/**
* Processes Markdown content to HTML
*/
export async function processMarkdown(content: string, linkBaseURL: string): Promise<ProcessResult> {
// Rewrite links in Markdown content
const processedContent = rewriteMarkdownLinks(content, linkBaseURL);
// Convert Markdown to HTML
const html = await marked.parse(processedContent) as string;
// Sanitize HTML to prevent XSS
const sanitized = sanitizeHTML(html);
// Process links: make external links open in new tab, local links in same tab
const processed = processLinks(sanitized, linkBaseURL);
return {
content: processed,
tableOfContents: '',
hasLaTeX: false,
hasMusicalNotation: false,
};
}

27
src/processors/music.ts

@ -1,57 +1,32 @@
/**
* Checks if content contains musical notation
*/
export function hasMusicalNotation(content: string): boolean {
// Check for ABC notation: X:1, K:C, etc.
const abcPattern = /X:\s*\d+|K:\s*[A-G]|M:\s*\d+\/\d+/i;
// Check for LilyPond notation: \relative, \clef, etc.
const lilypondPattern = /\\relative|\\clef|\\key|\\time/;
// Check for MusicXML-like tags: <note>, <pitch>, etc.
const musicxmlPattern = /<note>|<pitch>|<rest>/i;
// Check for simple chord notation: [C], [Am], etc.
const chordPattern = /\[[A-G][#b]?m?[0-9]?\]/;
return abcPattern.test(content) ||
lilypondPattern.test(content) ||
musicxmlPattern.test(content) ||
chordPattern.test(content);
}
/** /**
* Processes musical notation in HTML content * Processes musical notation in HTML content
* Wraps musical notation in appropriate HTML for rendering * Wraps musical notation in appropriate HTML for rendering
*/ */
export function processMusicalNotation(html: string): string { export function processMusicalNotation(html: string): string {
// Process ABC notation blocks // Process ABC notation blocks
// ABC notation typically starts with X:1 and contains multiple lines
const abcBlockPattern = /(X:\s*\d+[^\n]*\n(?:[^\n]+\n)*)/gs; const abcBlockPattern = /(X:\s*\d+[^\n]*\n(?:[^\n]+\n)*)/gs;
html = html.replace(abcBlockPattern, (match) => { html = html.replace(abcBlockPattern, (match) => {
const abcContent = match.trim(); const abcContent = match.trim();
// Wrap in a div for ABC.js or similar renderer
return `<div class="abc-notation" data-abc="${escapeForAttr(abcContent)}">${abcContent}</div>`; return `<div class="abc-notation" data-abc="${escapeForAttr(abcContent)}">${abcContent}</div>`;
}); });
// Process LilyPond notation blocks // Process LilyPond notation blocks
// LilyPond notation is typically in code blocks or between \relative and }
const lilypondPattern = /(\\relative[^}]+})/gs; const lilypondPattern = /(\\relative[^}]+})/gs;
html = html.replace(lilypondPattern, (match) => { html = html.replace(lilypondPattern, (match) => {
const lilypondContent = match.trim(); const lilypondContent = match.trim();
// Wrap in a div for LilyPond rendering
return `<div class="lilypond-notation" data-lilypond="${escapeForAttr(lilypondContent)}">${lilypondContent}</div>`; return `<div class="lilypond-notation" data-lilypond="${escapeForAttr(lilypondContent)}">${lilypondContent}</div>`;
}); });
// Process inline chord notation: [C], [Am], [F#m7], etc. // Process inline chord notation: [C], [Am], [F#m7], etc.
const chordPattern = /\[([A-G][#b]?m?[0-9]?[^\[\]]*)\]/g; const chordPattern = /\[([A-G][#b]?m?[0-9]?[^\[\]]*)\]/g;
html = html.replace(chordPattern, (match, chord) => { html = html.replace(chordPattern, (match, chord) => {
// Wrap in a span for chord rendering
return `<span class="chord" data-chord="${escapeForAttr(chord)}">[${chord}]</span>`; return `<span class="chord" data-chord="${escapeForAttr(chord)}">[${chord}]</span>`;
}); });
// Process MusicXML-like notation (if present in content) // Process MusicXML-like notation
const musicxmlPattern = /(<music[^>]*>.*?<\/music>)/gs; const musicxmlPattern = /(<music[^>]*>.*?<\/music>)/gs;
html = html.replace(musicxmlPattern, (match) => { html = html.replace(musicxmlPattern, (match) => {
const musicxmlContent = match.trim(); const musicxmlContent = match.trim();
// Wrap in a div for MusicXML rendering
return `<div class="musicxml-notation" data-musicxml="${escapeForAttr(musicxmlContent)}">${musicxmlContent}</div>`; return `<div class="musicxml-notation" data-musicxml="${escapeForAttr(musicxmlContent)}">${musicxmlContent}</div>`;
}); });

28
src/processors/nostr.ts

@ -1,28 +0,0 @@
/**
* Processes nostr: prefixed addresses
*/
export function processNostrAddresses(content: string, linkBaseURL: string): string {
// Pattern: nostr:naddr1..., nostr:nevent1..., nostr:note1..., nostr:npub1..., nostr:nprofile1...
const nostrPattern = /nostr:([a-z0-9]+[a-z0-9]{1,})/g;
return content.replace(nostrPattern, (match, nostrID) => {
// If linkBaseURL is set, convert to a link
if (linkBaseURL) {
// Determine the type and create appropriate link
if (nostrID.startsWith('naddr')) {
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
} else if (nostrID.startsWith('nevent')) {
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
} else if (nostrID.startsWith('note')) {
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
} else if (nostrID.startsWith('npub')) {
return `<a href="${linkBaseURL}/profile?pubkey=${nostrID}" class="nostr-address">${match}</a>`;
} else if (nostrID.startsWith('nprofile')) {
return `<a href="${linkBaseURL}/profile?id=${nostrID}" class="nostr-address">${match}</a>`;
}
}
// Return as a span with class for styling
return `<span class="nostr-address">${match}</span>`;
});
}

42
src/processors/plain.ts

@ -1,42 +0,0 @@
import { ProcessResult } from '../types';
/**
* Escapes HTML special characters
*/
function escapeHTML(text: string): string {
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
/**
* Processes plain text content with basic formatting
*/
export function processPlainText(text: string): ProcessResult {
// Escape HTML
let html = escapeHTML(text);
// Convert line breaks to <br>
html = html.replace(/\n/g, '<br>\n');
// Convert double line breaks to paragraphs
const paragraphs = html.split('<br>\n<br>\n');
const result: string[] = [];
for (const para of paragraphs) {
const trimmed = para.trim();
if (trimmed) {
result.push(`<p>${trimmed}</p>`);
}
}
return {
content: result.join('\n'),
tableOfContents: '',
hasLaTeX: false,
hasMusicalNotation: false,
};
}

29
src/types.ts

@ -18,6 +18,25 @@ export interface ParserOptions {
enableNostrAddresses?: boolean; enableNostrAddresses?: boolean;
} }
/**
* Nostr link information
*/
export interface NostrLink {
type: 'npub' | 'nprofile' | 'nevent' | 'naddr' | 'note';
id: string;
text: string;
bech32: string;
}
/**
* Wikilink information
*/
export interface Wikilink {
dtag: string;
display: string;
original: string;
}
/** /**
* Result of processing content * Result of processing content
*/ */
@ -30,6 +49,16 @@ export interface ProcessResult {
hasLaTeX: boolean; hasLaTeX: boolean;
/** Indicates if musical notation was found */ /** Indicates if musical notation was found */
hasMusicalNotation: boolean; hasMusicalNotation: boolean;
/** Extracted Nostr links */
nostrLinks: NostrLink[];
/** Extracted wikilinks */
wikilinks: Wikilink[];
/** Extracted hashtags */
hashtags: string[];
/** Extracted regular links */
links: Array<{ url: string; text: string; isExternal: boolean }>;
/** Extracted media URLs */
media: string[];
} }
/** /**

20
src/types/asciidoctor.d.ts vendored

@ -0,0 +1,20 @@
/**
* Type declarations for @asciidoctor/core
* These are minimal types - the actual types should come from the package
*/
declare module '@asciidoctor/core' {
interface ConvertOptions {
safe?: string;
backend?: string;
doctype?: string;
attributes?: Record<string, any>;
extension_registry?: any;
}
interface Asciidoctor {
convert(content: string, options?: ConvertOptions): string | any;
}
function asciidoctor(): Asciidoctor;
export default asciidoctor;
}
Loading…
Cancel
Save