initial commit

3 months ago · d689b1b050
19 changed files with 1171 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,33 @@
				@@ -0,0 +1,33 @@
+# Binaries
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+gc-parser
+
+# Test binary
+*.test
+
+# Output
+*.out
+
+# Go workspace file
+go.work
+
+# Node.js
+node_modules/
+package-lock.json
+dist/
+*.log
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
--- a/README.md
+++ b/README.md
@ -0,0 +1,212 @@
				@@ -0,0 +1,212 @@
+# GC Parser
+
+A super-parser for Nostr event content that handles multiple content formats including AsciiDoc, Markdown, code syntax highlighting, LaTeX, musical notation, and `nostr:` prefixed addresses.
+
+Built with TypeScript/JavaScript using:
+- **asciidoctor.js** for AsciiDoc processing
+- **marked** for Markdown processing
+- **highlight.js** for code syntax highlighting
+
+## Features
+
+- **AsciiDoc Processing**: Full AsciiDoc to HTML conversion with table of contents support
+- **Markdown Processing**: Markdown to HTML conversion with GFM support
+- **Code Syntax Highlighting**: Automatic syntax highlighting for code blocks using highlight.js
+- **LaTeX Math**: Support for inline and block LaTeX math expressions (compatible with MathJax/KaTeX)
+- **Musical Notation**: Support for ABC notation, LilyPond, chord notation, and MusicXML
+- **Nostr Addresses**: Automatic processing of `nostr:` prefixed addresses (naddr, nevent, note, npub, nprofile)
+- **Link Rewriting**: Automatic rewriting of wikilinks and nostr addresses to proper URLs
+- **HTML Sanitization**: Built-in XSS protection
+
+## Installation
+
+```bash
+npm install gc-parser
+```
+
+## Usage
+
+### Basic Example
+
+```typescript
+import { Parser, defaultOptions } from 'gc-parser';
+
+// Create parser with default options
+const opts = defaultOptions();
+opts.linkBaseURL = 'https://example.com';
+
+const parser = new Parser(opts);
+
+// Process content
+const content = `# Hello World
+
+This is **markdown** content with a nostr:npub1... address.`;
+
+const result = await parser.process(content);
+console.log(result.content);
+console.log('Has LaTeX:', result.hasLaTeX);
+console.log('Has Musical Notation:', result.hasMusicalNotation);
+```
+
+### Advanced Configuration
+
+```typescript
+import { Parser } from 'gc-parser';
+
+const parser = new Parser({
+  linkBaseURL: 'https://example.com',
+  enableAsciiDoc: true,
+  enableMarkdown: true,
+  enableCodeHighlighting: true,
+  enableLaTeX: true,
+  enableMusicalNotation: true,
+  enableNostrAddresses: true,
+});
+
+const result = await parser.process(content);
+```
+
+### Processing AsciiDoc
+
+```typescript
+const content = `= Document Title
+
+== Section
+
+This is AsciiDoc content with a [[wikilink]] and nostr:naddr1...`;
+
+const result = await parser.process(content);
+// result.content contains the HTML
+// result.tableOfContents contains the extracted TOC
+```
+
+### Processing Markdown
+
+```typescript
+const content = `# Markdown Document
+
+This is **bold** and *italic* text.
+
+\`\`\`go
+func main() {
+    fmt.Println("Hello")
+}
+\`\`\`
+`;
+
+const result = await parser.process(content);
+```
+
+### LaTeX Math
+
+The parser automatically detects and processes LaTeX math expressions:
+
+- Inline math: `$E = mc^2$` or `\(E = mc^2\)`
+- Block math: `$$\int_{-\infty}^{\infty} e^{-x^2} dx = \sqrt{\pi}$$` or `\[...\]`
+
+The output is compatible with MathJax or KaTeX. Include one of these libraries in your HTML:
+
+```html
+<!-- For MathJax -->
+<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+
+<!-- Or for KaTeX -->
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">
+<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>
+```
+
+### Musical Notation
+
+The parser supports multiple musical notation formats:
+
+- **ABC Notation**: Automatically detected and wrapped for ABC.js
+- **LilyPond**: Detected and wrapped for LilyPond rendering
+- **Chord Notation**: Inline chords like `[C]`, `[Am]`, `[F#m7]`
+- **MusicXML**: XML-based notation
+
+Example:
+```
+X:1
+K:C
+C D E F | G A B c
+```
+
+### Nostr Addresses
+
+The parser automatically processes `nostr:` prefixed addresses:
+
+- `nostr:naddr1...` - Parameterized replaceable events
+- `nostr:nevent1...` - Event references
+- `nostr:note1...` - Note IDs
+- `nostr:npub1...` - Public keys
+- `nostr:nprofile1...` - Profile references
+
+These are automatically converted to links if `linkBaseURL` is set.
+
+## Integration with gitcitadel-online
+
+This parser is designed to replace the content processing logic in `gitcitadel-online`. 
+
+### Migration Example
+
+**Before (in gitcitadel-online):**
+```go
+// Old way - calling Node.js via exec
+result, err := g.asciidocProc.Process(wiki.Content)
+html := result.Content
+```
+
+**After (using gc-parser):**
+```go
+// New way - import the JavaScript/TypeScript module
+// You can call it via Node.js exec or use a Go bridge
+const { Parser } = require('gc-parser');
+const parser = new Parser({ linkBaseURL: 'https://example.com' });
+const result = await parser.process(content);
+```
+
+Or use it directly in a Node.js script that gitcitadel-online can call:
+
+```javascript
+// process-content.js
+const { Parser } = require('gc-parser');
+
+const parser = new Parser({
+  linkBaseURL: process.env.LINK_BASE_URL || '',
+});
+
+const content = process.argv[2] || '';
+parser.process(content).then(result => {
+  console.log(JSON.stringify(result));
+}).catch(err => {
+  console.error(err);
+  process.exit(1);
+});
+```
+
+## Requirements
+
+- Node.js 18+ 
+- TypeScript 5.3+ (for development)
+
+## Development
+
+```bash
+# Install dependencies
+npm install
+
+# Build TypeScript
+npm run build
+
+# Run tests
+npm test
+```
+
+## License
+
+MIT
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
--- a/example.js
+++ b/example.js
@ -0,0 +1,55 @@
				@@ -0,0 +1,55 @@
+#!/usr/bin/env node
+
+/**
+ * Example usage of gc-parser
+ * This can be called from Go or used directly in Node.js
+ */
+
+const { Parser, defaultOptions } = require('./dist/index.js');
+
+async function main() {
+  // Create parser with default options
+  const opts = defaultOptions();
+  opts.linkBaseURL = process.env.LINK_BASE_URL || 'https://example.com';
+
+  const parser = new Parser(opts);
+
+  // Get content from command line argument or stdin
+  let content = '';
+  if (process.argv[2]) {
+    content = process.argv[2];
+  } else {
+    // Read from stdin
+    const readline = require('readline');
+    const rl = readline.createInterface({
+      input: process.stdin,
+      output: process.stdout,
+      terminal: false
+    });
+
+    for await (const line of rl) {
+      content += line + '\n';
+    }
+  }
+
+  if (!content) {
+    console.error('No content provided');
+    process.exit(1);
+  }
+
+  try {
+    const result = await parser.process(content);
+    
+    // Output as JSON for easy parsing
+    console.log(JSON.stringify(result, null, 2));
+  } catch (error) {
+    console.error('Error processing content:', error);
+    process.exit(1);
+  }
+}
+
+if (require.main === module) {
+  main();
+}
+
+module.exports = { main };
--- a/package.json
+++ b/package.json
@ -0,0 +1,35 @@
				@@ -0,0 +1,35 @@
+{
+  "name": "gc-parser",
+  "version": "1.0.0",
+  "description": "Super-parser for Nostr event content supporting AsciiDoc, Markdown, code syntax highlighting, LaTeX, musical notation, and nostr: addresses",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "scripts": {
+    "build": "tsc",
+    "test": "jest",
+    "prepublishOnly": "npm run build"
+  },
+  "keywords": [
+    "nostr",
+    "parser",
+    "asciidoc",
+    "markdown",
+    "syntax-highlighting",
+    "latex",
+    "music"
+  ],
+  "author": "",
+  "license": "MIT",
+  "dependencies": {
+    "@asciidoctor/core": "^3.0.4",
+    "highlight.js": "^11.10.0",
+    "marked": "^12.0.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.11.0",
+    "typescript": "^5.3.3",
+    "jest": "^29.7.0",
+    "@types/jest": "^29.5.11",
+    "@types/highlight.js": "^10.1.0"
+  }
+}
--- a/src/detector.ts
+++ b/src/detector.ts
@ -0,0 +1,55 @@
				@@ -0,0 +1,55 @@
+import { ContentFormat } from './types';
+
+/**
+ * Detects the content format based on content patterns
+ */
+export function detectFormat(content: string): ContentFormat {
+  // Check for AsciiDoc indicators
+  const asciidocIndicators = [
+    '= ',           // Title
+    '== ',          // Section
+    '=== ',         // Subsection
+    'include::',    // Include directive
+    'image::',      // Image block
+    '[source',      // Source block
+    '----',         // Listing block
+    '....',         // Literal block
+    '|===',         // Table
+    ':',            // Attribute (common in AsciiDoc)
+  ];
+
+  let asciidocScore = 0;
+  for (const indicator of asciidocIndicators) {
+    if (content.includes(indicator)) {
+      asciidocScore++;
+    }
+  }
+
+  // Check for Markdown indicators
+  const markdownIndicators = [
+    '# ',           // Heading
+    '## ',          // Subheading
+    '```',          // Code block
+    '**',           // Bold
+    '*',            // Italic or list
+    '- ',           // List item
+    '![',           // Image
+    '[',            // Link
+  ];
+
+  let markdownScore = 0;
+  for (const indicator of markdownIndicators) {
+    if (content.includes(indicator)) {
+      markdownScore++;
+    }
+  }
+
+  // Determine format based on scores
+  if (asciidocScore > markdownScore && asciidocScore >= 2) {
+    return ContentFormat.AsciiDoc;
+  } else if (markdownScore > 0) {
+    return ContentFormat.Markdown;
+  }
+
+  return ContentFormat.Plain;
+}
--- a/src/index.ts
+++ b/src/index.ts
@ -0,0 +1,2 @@
				@@ -0,0 +1,2 @@
+export * from './parser';
+export * from './types';
--- a/src/parser.ts
+++ b/src/parser.ts
@ -0,0 +1,116 @@
				@@ -0,0 +1,116 @@
+import { ParserOptions, ProcessResult, ContentFormat } from './types';
+import { processAsciiDoc } from './processors/asciidoc';
+import { processMarkdown } from './processors/markdown';
+import { processPlainText } from './processors/plain';
+import { processNostrAddresses } from './processors/nostr';
+import { detectFormat } from './detector';
+import { processLaTeX, hasLaTeX } from './processors/latex';
+import { processMusicalNotation, hasMusicalNotation } from './processors/music';
+import { ensureCodeHighlighting } from './processors/code';
+
+/**
+ * Default parser options
+ */
+export function defaultOptions(): ParserOptions {
+  return {
+    linkBaseURL: '',
+    enableAsciiDoc: true,
+    enableMarkdown: true,
+    enableCodeHighlighting: true,
+    enableLaTeX: true,
+    enableMusicalNotation: true,
+    enableNostrAddresses: true,
+  };
+}
+
+/**
+ * Main parser for Nostr event content
+ * Handles multiple content formats: AsciiDoc, Markdown, code syntax,
+ * LaTeX, musical notation, and nostr: prefixed addresses
+ */
+export class Parser {
+  private options: Required<ParserOptions>;
+
+  constructor(options: ParserOptions = {}) {
+    const defaults = defaultOptions();
+    this.options = {
+      linkBaseURL: options.linkBaseURL ?? defaults.linkBaseURL ?? '',
+      enableAsciiDoc: options.enableAsciiDoc ?? defaults.enableAsciiDoc ?? true,
+      enableMarkdown: options.enableMarkdown ?? defaults.enableMarkdown ?? true,
+      enableCodeHighlighting: options.enableCodeHighlighting ?? defaults.enableCodeHighlighting ?? true,
+      enableLaTeX: options.enableLaTeX ?? defaults.enableLaTeX ?? true,
+      enableMusicalNotation: options.enableMusicalNotation ?? defaults.enableMusicalNotation ?? true,
+      enableNostrAddresses: options.enableNostrAddresses ?? defaults.enableNostrAddresses ?? true,
+    };
+  }
+
+  /**
+   * Process Nostr event content and return HTML
+   * Automatically detects the content format and processes accordingly
+   */
+  async process(content: string): Promise<ProcessResult> {
+    // First, process nostr: addresses (if enabled)
+    if (this.options.enableNostrAddresses) {
+      content = processNostrAddresses(content, this.options.linkBaseURL);
+    }
+
+    // Detect content format
+    const format = detectFormat(content);
+
+    let result: ProcessResult;
+
+    switch (format) {
+      case ContentFormat.AsciiDoc:
+        if (this.options.enableAsciiDoc) {
+          result = await processAsciiDoc(content, this.options.linkBaseURL);
+        } else if (this.options.enableMarkdown) {
+          // Fallback to markdown if AsciiDoc is disabled
+          result = await processMarkdown(content, this.options.linkBaseURL);
+        } else {
+          result = processPlainText(content);
+        }
+        break;
+      case ContentFormat.Markdown:
+        if (this.options.enableMarkdown) {
+          result = await processMarkdown(content, this.options.linkBaseURL);
+        } else {
+          // Fallback to plain text
+          result = processPlainText(content);
+        }
+        break;
+      default:
+        // Plain text or mixed content
+        result = processPlainText(content);
+    }
+
+    // Post-process: handle LaTeX and musical notation in the HTML
+    if (this.options.enableLaTeX) {
+      result.hasLaTeX = hasLaTeX(result.content);
+      if (result.hasLaTeX) {
+        result.content = processLaTeX(result.content);
+      }
+    }
+
+    if (this.options.enableMusicalNotation) {
+      result.hasMusicalNotation = hasMusicalNotation(result.content);
+      if (result.hasMusicalNotation) {
+        result.content = processMusicalNotation(result.content);
+      }
+    }
+
+    // Ensure code highlighting is applied if enabled
+    if (this.options.enableCodeHighlighting) {
+      result.content = ensureCodeHighlighting(result.content);
+    }
+
+    return result;
+  }
+}
+
+/**
+ * Convenience function to process content with default options
+ */
+export async function process(content: string, options?: ParserOptions): Promise<ProcessResult> {
+  const parser = new Parser(options);
+  return parser.process(content);
+}
--- a/src/processors/asciidoc-links.ts
+++ b/src/processors/asciidoc-links.ts
@ -0,0 +1,66 @@
				@@ -0,0 +1,66 @@
+/**
+ * Normalizes a d tag according to NIP-54 rules
+ */
+export function normalizeDTag(dTag: string): string {
+  // Convert to lowercase
+  let normalized = dTag.toLowerCase();
+
+  // Convert whitespace to hyphens
+  normalized = normalized.replace(/\s+/g, '-');
+
+  // Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII)
+  normalized = normalized.replace(/[^a-z0-9\-\u0080-\uFFFF]/g, '');
+
+  // Collapse multiple consecutive hyphens
+  normalized = normalized.replace(/-+/g, '-');
+
+  // Remove leading and trailing hyphens
+  normalized = normalized.replace(/^-+|-+$/g, '');
+
+  return normalized;
+}
+
+/**
+ * Rewrites wikilinks and nostr: links in AsciiDoc content
+ */
+export function rewriteAsciiDocLinks(content: string, linkBaseURL: string): string {
+  // Rewrite wikilinks: [[target]] or [[target|display text]]
+  // Format: [[target]] -> link:url[display]
+  const wikilinkRegex = /\[\[([^\]]+)\]\]/g;
+  content = content.replace(wikilinkRegex, (match, inner) => {
+    let target: string;
+    let display: string;
+
+    if (inner.includes('|')) {
+      const parts = inner.split('|', 2);
+      target = parts[0].trim();
+      display = parts[1].trim();
+    } else {
+      target = inner.trim();
+      display = target;
+    }
+
+    // Normalize the d tag
+    const normalized = normalizeDTag(target);
+
+    // Create the link
+    if (linkBaseURL) {
+      const url = `${linkBaseURL}/events?d=${normalized}`;
+      return `link:${url}[${display}]`;
+    }
+    return `link:#${normalized}[${display}]`;
+  });
+
+  // Rewrite nostr: links: nostr:naddr1... or nostr:nevent1...
+  // Format: nostr:naddr1... -> link:url[nostr:naddr1...]
+  const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)/g;
+  content = content.replace(nostrLinkRegex, (match, nostrID) => {
+    if (linkBaseURL) {
+      const url = `${linkBaseURL}/events?id=${nostrID}`;
+      return `link:${url}[${match}]`;
+    }
+    return match;
+  });
+
+  return content;
+}
--- a/src/processors/asciidoc.ts
+++ b/src/processors/asciidoc.ts
@ -0,0 +1,49 @@
				@@ -0,0 +1,49 @@
+import asciidoctor from '@asciidoctor/core';
+import { ProcessResult } from '../types';
+import { rewriteAsciiDocLinks } from './asciidoc-links';
+import { extractTOC, sanitizeHTML, processLinks } from './html-utils';
+
+const asciidoctorInstance = asciidoctor();
+
+/**
+ * Processes AsciiDoc content to HTML
+ */
+export async function processAsciiDoc(content: string, linkBaseURL: string): Promise<ProcessResult> {
+  // Rewrite links in AsciiDoc content
+  const processedContent = rewriteAsciiDocLinks(content, linkBaseURL);
+
+  // Convert AsciiDoc to HTML
+  const html = asciidoctorInstance.convert(processedContent, {
+    safe: 'safe',
+    backend: 'html5',
+    doctype: 'article',
+    attributes: {
+      showtitle: true,
+      icons: 'font',
+      sectanchors: true,
+      sectlinks: true,
+      toc: 'left',
+      toclevels: 3,
+    },
+  }) as string;
+
+  // Extract table of contents from HTML
+  const { toc, contentWithoutTOC } = extractTOC(html);
+
+  // Sanitize HTML to prevent XSS
+  const sanitized = sanitizeHTML(contentWithoutTOC);
+
+  // Process links: make external links open in new tab, local links in same tab
+  const processed = processLinks(sanitized, linkBaseURL);
+
+  // Also sanitize and process links in TOC
+  const tocSanitized = sanitizeHTML(toc);
+  const tocProcessed = processLinks(tocSanitized, linkBaseURL);
+
+  return {
+    content: processed,
+    tableOfContents: tocProcessed,
+    hasLaTeX: false,
+    hasMusicalNotation: false,
+  };
+}
--- a/src/processors/code.ts
+++ b/src/processors/code.ts
@ -0,0 +1,52 @@
				@@ -0,0 +1,52 @@
+import hljs from 'highlight.js';
+
+/**
+ * Ensures code blocks have syntax highlighting using highlight.js
+ */
+export function ensureCodeHighlighting(html: string): string {
+  // Pattern to match code blocks: <pre><code>...</code></pre> or <pre><code class="language-xxx">...</code></pre>
+  const codeBlockRegex = /<pre><code(?:\s+class=["']language-([^"']+)["'])?[^>]*>(.*?)<\/code><\/pre>/gs;
+
+  return html.replace(codeBlockRegex, (match, lang, code) => {
+    // Unescape HTML entities in code
+    const unescapedCode = unescapeHTML(code);
+
+    // Highlight the code
+    try {
+      let highlighted: hljs.HighlightResult;
+
+      if (lang) {
+        // Try to get the language
+        const language = hljs.getLanguage(lang);
+        if (language) {
+          highlighted = hljs.highlight(unescapedCode, { language: lang });
+        } else {
+          // Try auto-detection
+          highlighted = hljs.highlightAuto(unescapedCode);
+        }
+      } else {
+        // Auto-detect language
+        highlighted = hljs.highlightAuto(unescapedCode);
+      }
+
+      // Return highlighted code with proper classes
+      const langClass = highlighted.language ? ` class="language-${highlighted.language}"` : '';
+      return `<pre><code${langClass}>${highlighted.value}</code></pre>`;
+    } catch (error) {
+      // If highlighting fails, return original
+      return match;
+    }
+  });
+}
+
+/**
+ * Unescapes HTML entities
+ */
+function unescapeHTML(text: string): string {
+  return text
+    .replace(/&lt;/g, '<')
+    .replace(/&gt;/g, '>')
+    .replace(/&amp;/g, '&')
+    .replace(/&quot;/g, '"')
+    .replace(/&#39;/g, "'");
+}
--- a/src/processors/html-utils.ts
+++ b/src/processors/html-utils.ts
@ -0,0 +1,170 @@
				@@ -0,0 +1,170 @@
+/**
+ * Extracts the table of contents from AsciiDoc HTML output
+ * Returns the TOC HTML and the content HTML without the TOC
+ */
+export function extractTOC(html: string): { toc: string; contentWithoutTOC: string } {
+  // AsciiDoc with toc: 'left' generates a TOC in a div with id="toc" or class="toc"
+  let tocContent = '';
+  let contentWithoutTOC = html;
+
+  // Find the start of the TOC div - try multiple patterns
+  const tocStartPatterns = [
+    /<div\s+id=["']toc["']\s+class=["']toc["'][^>]*>/i,
+    /<div\s+id=["']toc["'][^>]*>/i,
+    /<div\s+class=["']toc["'][^>]*>/i,
+    /<nav\s+id=["']toc["'][^>]*>/i,
+  ];
+
+  let tocStartIdx = -1;
+  let tocStartTag = '';
+
+  for (const pattern of tocStartPatterns) {
+    const match = html.match(pattern);
+    if (match && match.index !== undefined) {
+      tocStartIdx = match.index;
+      tocStartTag = match[0];
+      break;
+    }
+  }
+
+  if (tocStartIdx === -1) {
+    // No TOC found
+    return { toc: '', contentWithoutTOC: html };
+  }
+
+  // Find the matching closing tag by counting div tags
+  const searchStart = tocStartIdx + tocStartTag.length;
+  let depth = 1;
+  let i = searchStart;
+
+  while (i < html.length && depth > 0) {
+    // Look for opening or closing div/nav tags
+    if (i + 4 < html.length && html.substring(i, i + 4) === '<div') {
+      // Check if it's a closing tag
+      if (i + 5 < html.length && html[i + 4] === '/') {
+        depth--;
+        const closeIdx = html.indexOf('>', i);
+        if (closeIdx === -1) break;
+        i = closeIdx + 1;
+      } else {
+        // Opening tag - find the end
+        const closeIdx = html.indexOf('>', i);
+        if (closeIdx === -1) break;
+        // Check if it's self-closing
+        if (html[closeIdx - 1] !== '/') {
+          depth++;
+        }
+        i = closeIdx + 1;
+      }
+    } else if (i + 5 < html.length && html.substring(i, i + 5) === '</div') {
+      depth--;
+      const closeIdx = html.indexOf('>', i);
+      if (closeIdx === -1) break;
+      i = closeIdx + 1;
+    } else if (i + 5 < html.length && html.substring(i, i + 5) === '</nav') {
+      depth--;
+      const closeIdx = html.indexOf('>', i);
+      if (closeIdx === -1) break;
+      i = closeIdx + 1;
+    } else {
+      i++;
+    }
+  }
+
+  if (depth === 0) {
+    // Found the matching closing tag
+    const tocEndIdx = i;
+    // Extract the TOC content (inner HTML)
+    const tocFullHTML = html.substring(tocStartIdx, tocEndIdx);
+    // Extract just the inner content (without the outer div tags)
+    let innerStart = tocStartTag.length;
+    let innerEnd = tocFullHTML.length;
+    // Find the last </div> or </nav>
+    if (tocFullHTML.endsWith('</div>')) {
+      innerEnd -= 6;
+    } else if (tocFullHTML.endsWith('</nav>')) {
+      innerEnd -= 7;
+    }
+    tocContent = tocFullHTML.substring(innerStart, innerEnd).trim();
+
+    // Remove the toctitle div if present (AsciiDoc adds "Table of Contents" title)
+    tocContent = tocContent.replace(/<div\s+id=["']toctitle["'][^>]*>.*?<\/div>\s*/gis, '');
+    tocContent = tocContent.trim();
+
+    // Remove the TOC from the content
+    contentWithoutTOC = html.substring(0, tocStartIdx) + html.substring(tocEndIdx);
+  }
+
+  return { toc: tocContent, contentWithoutTOC };
+}
+
+/**
+ * Performs basic HTML sanitization to prevent XSS
+ */
+export function sanitizeHTML(html: string): string {
+  // Remove script tags and their content
+  html = html.replace(/<script[^>]*>.*?<\/script>/gis, '');
+
+  // Remove event handlers (onclick, onerror, etc.)
+  html = html.replace(/\s*on\w+\s*=\s*["'][^"']*["']/gi, '');
+
+  // Remove javascript: protocol in links
+  html = html.replace(/javascript:/gi, '');
+
+  // Remove data: URLs that could be dangerous
+  html = html.replace(/data:\s*text\/html/gi, '');
+
+  return html;
+}
+
+/**
+ * Processes HTML links to add target="_blank" to external links
+ */
+export function processLinks(html: string, linkBaseURL: string): string {
+  // Extract domain from linkBaseURL for comparison
+  let linkBaseDomain = '';
+  if (linkBaseURL) {
+    const url = linkBaseURL.replace(/^https?:\/\//, '');
+    const parts = url.split('/');
+    if (parts.length > 0) {
+      linkBaseDomain = parts[0];
+    }
+  }
+
+  // Regex to match <a> tags with href attributes
+  const linkRegex = /<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>/g;
+
+  return html.replace(linkRegex, (match, before, href, after) => {
+    // Check if it's an external link (starts with http:// or https://)
+    const isExternal = href.startsWith('http://') || href.startsWith('https://');
+
+    if (isExternal) {
+      // Check if it's pointing to our own domain
+      if (linkBaseDomain && href.includes(linkBaseDomain)) {
+        // Same domain - open in same tab (remove any existing target attribute)
+        return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
+      }
+
+      // External link - add target="_blank" and rel="noopener noreferrer" if not already present
+      if (!match.includes('target=')) {
+        if (!match.includes('rel=')) {
+          return match.replace('>', ' target="_blank" rel="noopener noreferrer">');
+        } else {
+          // Update existing rel attribute to include noopener if not present
+          const updatedMatch = match.replace(/rel\s*=\s*["']([^"']*)["']/gi, (relMatch, relValue) => {
+            if (!relValue.includes('noopener')) {
+              return `rel="${relValue} noopener noreferrer"`;
+            }
+            return relMatch;
+          });
+          return updatedMatch.replace('>', ' target="_blank">');
+        }
+      }
+    } else {
+      // Local/relative link - ensure it opens in same tab (remove target if present)
+      return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
+    }
+
+    return match;
+  });
+}
--- a/src/processors/latex.ts
+++ b/src/processors/latex.ts
@ -0,0 +1,37 @@
				@@ -0,0 +1,37 @@
+/**
+ * Checks if content contains LaTeX math expressions
+ */
+export function hasLaTeX(content: string): boolean {
+  // Check for inline math: $...$ or \(...\)
+  const inlineMathPattern = /\$[^$]+\$|\\\([^)]+\\\)/;
+  // Check for block math: $$...$$ or \[...\]
+  const blockMathPattern = /\$\$[^$]+\$\$|\\\[[^\]]+\\\]/;
+
+  return inlineMathPattern.test(content) || blockMathPattern.test(content);
+}
+
+/**
+ * Processes LaTeX math expressions in HTML content
+ * Wraps LaTeX expressions in appropriate HTML for rendering with MathJax or KaTeX
+ */
+export function processLaTeX(html: string): string {
+  // Process block math: $$...$$ or \[...\]
+  // Convert to <div class="math-block">...</div> for MathJax/KaTeX
+  const blockMathPattern = /\$\$([^$]+)\$\$|\\\[([^\]]+)\\\]/gs;
+  html = html.replace(blockMathPattern, (match, dollarContent, bracketContent) => {
+    const mathContent = (dollarContent || bracketContent || '').trim();
+    // Wrap in appropriate tags for MathJax/KaTeX
+    return `<div class="math-block">\\[${mathContent}\\]</div>`;
+  });
+
+  // Process inline math: $...$ or \(...\)
+  // Convert to <span class="math-inline">...</span> for MathJax/KaTeX
+  const inlineMathPattern = /\$([^$\n]+)\$|\\\(([^)]+)\\\)/g;
+  html = html.replace(inlineMathPattern, (match, dollarContent, bracketContent) => {
+    const mathContent = (dollarContent || bracketContent || '').trim();
+    // Wrap in appropriate tags for MathJax/KaTeX
+    return `<span class="math-inline">\\(${mathContent}\\)</span>`;
+  });
+
+  return html;
+}
--- a/src/processors/markdown-links.ts
+++ b/src/processors/markdown-links.ts
@ -0,0 +1,49 @@
				@@ -0,0 +1,49 @@
+import { normalizeDTag } from './asciidoc-links';
+
+/**
+ * Rewrites wikilinks and nostr: links in Markdown content
+ */
+export function rewriteMarkdownLinks(content: string, linkBaseURL: string): string {
+  // Rewrite wikilinks: [[target]] or [[target|display text]]
+  const wikilinkRegex = /\[\[([^\]]+)\]\]/g;
+  content = content.replace(wikilinkRegex, (match, inner) => {
+    let target: string;
+    let display: string;
+
+    if (inner.includes('|')) {
+      const parts = inner.split('|', 2);
+      target = parts[0].trim();
+      display = parts[1].trim();
+    } else {
+      target = inner.trim();
+      display = target;
+    }
+
+    const normalized = normalizeDTag(target);
+
+    if (linkBaseURL) {
+      const url = `${linkBaseURL}/events?d=${normalized}`;
+      return `[${display}](${url})`;
+    }
+    return `[${display}](#${normalized})`;
+  });
+
+  // Rewrite nostr: links in Markdown
+  const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+|note1[^\s\]]+|npub1[^\s\]]+|nprofile1[^\s\]]+)/g;
+  content = content.replace(nostrLinkRegex, (match, nostrID) => {
+    if (linkBaseURL) {
+      let url: string;
+      if (nostrID.startsWith('npub')) {
+        url = `${linkBaseURL}/profile?pubkey=${nostrID}`;
+      } else if (nostrID.startsWith('nprofile')) {
+        url = `${linkBaseURL}/profile?id=${nostrID}`;
+      } else {
+        url = `${linkBaseURL}/events?id=${nostrID}`;
+      }
+      return `[${match}](${url})`;
+    }
+    return match;
+  });
+
+  return content;
+}
--- a/src/processors/markdown.ts
+++ b/src/processors/markdown.ts
@ -0,0 +1,36 @@
				@@ -0,0 +1,36 @@
+import { marked } from 'marked';
+import { ProcessResult } from '../types';
+import { rewriteMarkdownLinks } from './markdown-links';
+import { sanitizeHTML, processLinks } from './html-utils';
+
+// Configure marked options
+marked.setOptions({
+  breaks: true,
+  gfm: true,
+  headerIds: true,
+  mangle: false,
+});
+
+/**
+ * Processes Markdown content to HTML
+ */
+export async function processMarkdown(content: string, linkBaseURL: string): Promise<ProcessResult> {
+  // Rewrite links in Markdown content
+  const processedContent = rewriteMarkdownLinks(content, linkBaseURL);
+
+  // Convert Markdown to HTML
+  const html = await marked.parse(processedContent) as string;
+
+  // Sanitize HTML to prevent XSS
+  const sanitized = sanitizeHTML(html);
+
+  // Process links: make external links open in new tab, local links in same tab
+  const processed = processLinks(sanitized, linkBaseURL);
+
+  return {
+    content: processed,
+    tableOfContents: '',
+    hasLaTeX: false,
+    hasMusicalNotation: false,
+  };
+}
--- a/src/processors/music.ts
+++ b/src/processors/music.ts
@ -0,0 +1,72 @@
				@@ -0,0 +1,72 @@
+/**
+ * Checks if content contains musical notation
+ */
+export function hasMusicalNotation(content: string): boolean {
+  // Check for ABC notation: X:1, K:C, etc.
+  const abcPattern = /X:\s*\d+|K:\s*[A-G]|M:\s*\d+\/\d+/i;
+  // Check for LilyPond notation: \relative, \clef, etc.
+  const lilypondPattern = /\\relative|\\clef|\\key|\\time/;
+  // Check for MusicXML-like tags: <note>, <pitch>, etc.
+  const musicxmlPattern = /<note>|<pitch>|<rest>/i;
+  // Check for simple chord notation: [C], [Am], etc.
+  const chordPattern = /\[[A-G][#b]?m?[0-9]?\]/;
+
+  return abcPattern.test(content) ||
+    lilypondPattern.test(content) ||
+    musicxmlPattern.test(content) ||
+    chordPattern.test(content);
+}
+
+/**
+ * Processes musical notation in HTML content
+ * Wraps musical notation in appropriate HTML for rendering
+ */
+export function processMusicalNotation(html: string): string {
+  // Process ABC notation blocks
+  // ABC notation typically starts with X:1 and contains multiple lines
+  const abcBlockPattern = /(X:\s*\d+[^\n]*\n(?:[^\n]+\n)*)/gs;
+  html = html.replace(abcBlockPattern, (match) => {
+    const abcContent = match.trim();
+    // Wrap in a div for ABC.js or similar renderer
+    return `<div class="abc-notation" data-abc="${escapeForAttr(abcContent)}">${abcContent}</div>`;
+  });
+
+  // Process LilyPond notation blocks
+  // LilyPond notation is typically in code blocks or between \relative and }
+  const lilypondPattern = /(\\relative[^}]+})/gs;
+  html = html.replace(lilypondPattern, (match) => {
+    const lilypondContent = match.trim();
+    // Wrap in a div for LilyPond rendering
+    return `<div class="lilypond-notation" data-lilypond="${escapeForAttr(lilypondContent)}">${lilypondContent}</div>`;
+  });
+
+  // Process inline chord notation: [C], [Am], [F#m7], etc.
+  const chordPattern = /\[([A-G][#b]?m?[0-9]?[^\[\]]*)\]/g;
+  html = html.replace(chordPattern, (match, chord) => {
+    // Wrap in a span for chord rendering
+    return `<span class="chord" data-chord="${escapeForAttr(chord)}">[${chord}]</span>`;
+  });
+
+  // Process MusicXML-like notation (if present in content)
+  const musicxmlPattern = /(<music[^>]*>.*?<\/music>)/gs;
+  html = html.replace(musicxmlPattern, (match) => {
+    const musicxmlContent = match.trim();
+    // Wrap in a div for MusicXML rendering
+    return `<div class="musicxml-notation" data-musicxml="${escapeForAttr(musicxmlContent)}">${musicxmlContent}</div>`;
+  });
+
+  return html;
+}
+
+/**
+ * Escapes a string for use in HTML attributes
+ */
+function escapeForAttr(text: string): string {
+  return text
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&#39;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/\n/g, ' ')
+    .replace(/\r/g, '');
+}
--- a/src/processors/nostr.ts
+++ b/src/processors/nostr.ts
@ -0,0 +1,28 @@
				@@ -0,0 +1,28 @@
+/**
+ * Processes nostr: prefixed addresses
+ */
+export function processNostrAddresses(content: string, linkBaseURL: string): string {
+  // Pattern: nostr:naddr1..., nostr:nevent1..., nostr:note1..., nostr:npub1..., nostr:nprofile1...
+  const nostrPattern = /nostr:([a-z0-9]+[a-z0-9]{1,})/g;
+
+  return content.replace(nostrPattern, (match, nostrID) => {
+    // If linkBaseURL is set, convert to a link
+    if (linkBaseURL) {
+      // Determine the type and create appropriate link
+      if (nostrID.startsWith('naddr')) {
+        return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
+      } else if (nostrID.startsWith('nevent')) {
+        return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
+      } else if (nostrID.startsWith('note')) {
+        return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
+      } else if (nostrID.startsWith('npub')) {
+        return `<a href="${linkBaseURL}/profile?pubkey=${nostrID}" class="nostr-address">${match}</a>`;
+      } else if (nostrID.startsWith('nprofile')) {
+        return `<a href="${linkBaseURL}/profile?id=${nostrID}" class="nostr-address">${match}</a>`;
+      }
+    }
+
+    // Return as a span with class for styling
+    return `<span class="nostr-address">${match}</span>`;
+  });
+}
--- a/src/processors/plain.ts
+++ b/src/processors/plain.ts
@ -0,0 +1,42 @@
				@@ -0,0 +1,42 @@
+import { ProcessResult } from '../types';
+
+/**
+ * Escapes HTML special characters
+ */
+function escapeHTML(text: string): string {
+  return text
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&#39;');
+}
+
+/**
+ * Processes plain text content with basic formatting
+ */
+export function processPlainText(text: string): ProcessResult {
+  // Escape HTML
+  let html = escapeHTML(text);
+
+  // Convert line breaks to <br>
+  html = html.replace(/\n/g, '<br>\n');
+
+  // Convert double line breaks to paragraphs
+  const paragraphs = html.split('<br>\n<br>\n');
+  const result: string[] = [];
+  
+  for (const para of paragraphs) {
+    const trimmed = para.trim();
+    if (trimmed) {
+      result.push(`<p>${trimmed}</p>`);
+    }
+  }
+
+  return {
+    content: result.join('\n'),
+    tableOfContents: '',
+    hasLaTeX: false,
+    hasMusicalNotation: false,
+  };
+}
--- a/src/types.ts
+++ b/src/types.ts
@ -0,0 +1,43 @@
				@@ -0,0 +1,43 @@
+/**
+ * Options for configuring the parser behavior
+ */
+export interface ParserOptions {
+  /** Base URL for rewriting relative links and nostr: addresses */
+  linkBaseURL?: string;
+  /** Enable AsciiDoc processing (default: true) */
+  enableAsciiDoc?: boolean;
+  /** Enable Markdown processing (default: true) */
+  enableMarkdown?: boolean;
+  /** Enable code syntax highlighting (default: true) */
+  enableCodeHighlighting?: boolean;
+  /** Enable LaTeX math rendering (default: true) */
+  enableLaTeX?: boolean;
+  /** Enable musical notation rendering (default: true) */
+  enableMusicalNotation?: boolean;
+  /** Enable nostr: address processing (default: true) */
+  enableNostrAddresses?: boolean;
+}
+
+/**
+ * Result of processing content
+ */
+export interface ProcessResult {
+  /** Main processed HTML content */
+  content: string;
+  /** Extracted table of contents (for AsciiDoc) */
+  tableOfContents: string;
+  /** Indicates if LaTeX content was found */
+  hasLaTeX: boolean;
+  /** Indicates if musical notation was found */
+  hasMusicalNotation: boolean;
+}
+
+/**
+ * Detected content format
+ */
+export enum ContentFormat {
+  Unknown = 'unknown',
+  AsciiDoc = 'asciidoc',
+  Markdown = 'markdown',
+  Plain = 'plain'
+}
--- a/tsconfig.json
+++ b/tsconfig.json
@ -0,0 +1,19 @@
				@@ -0,0 +1,19 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "module": "commonjs",
+    "lib": ["ES2020"],
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "resolveJsonModule": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist", "**/*.test.ts"]
+}