Browse Source

initial commit

master
Silberengel 2 weeks ago
commit
d689b1b050
  1. 33
      .gitignore
  2. 212
      README.md
  3. 55
      example.js
  4. 35
      package.json
  5. 55
      src/detector.ts
  6. 2
      src/index.ts
  7. 116
      src/parser.ts
  8. 66
      src/processors/asciidoc-links.ts
  9. 49
      src/processors/asciidoc.ts
  10. 52
      src/processors/code.ts
  11. 170
      src/processors/html-utils.ts
  12. 37
      src/processors/latex.ts
  13. 49
      src/processors/markdown-links.ts
  14. 36
      src/processors/markdown.ts
  15. 72
      src/processors/music.ts
  16. 28
      src/processors/nostr.ts
  17. 42
      src/processors/plain.ts
  18. 43
      src/types.ts
  19. 19
      tsconfig.json

33
.gitignore vendored

@ -0,0 +1,33 @@ @@ -0,0 +1,33 @@
# Binaries
*.exe
*.exe~
*.dll
*.so
*.dylib
gc-parser
# Test binary
*.test
# Output
*.out
# Go workspace file
go.work
# Node.js
node_modules/
package-lock.json
dist/
*.log
# IDE
.idea/
.vscode/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db

212
README.md

@ -0,0 +1,212 @@ @@ -0,0 +1,212 @@
# GC Parser
A super-parser for Nostr event content that handles multiple content formats including AsciiDoc, Markdown, code syntax highlighting, LaTeX, musical notation, and `nostr:` prefixed addresses.
Built with TypeScript/JavaScript using:
- **asciidoctor.js** for AsciiDoc processing
- **marked** for Markdown processing
- **highlight.js** for code syntax highlighting
## Features
- **AsciiDoc Processing**: Full AsciiDoc to HTML conversion with table of contents support
- **Markdown Processing**: Markdown to HTML conversion with GFM support
- **Code Syntax Highlighting**: Automatic syntax highlighting for code blocks using highlight.js
- **LaTeX Math**: Support for inline and block LaTeX math expressions (compatible with MathJax/KaTeX)
- **Musical Notation**: Support for ABC notation, LilyPond, chord notation, and MusicXML
- **Nostr Addresses**: Automatic processing of `nostr:` prefixed addresses (naddr, nevent, note, npub, nprofile)
- **Link Rewriting**: Automatic rewriting of wikilinks and nostr addresses to proper URLs
- **HTML Sanitization**: Built-in XSS protection
## Installation
```bash
npm install gc-parser
```
## Usage
### Basic Example
```typescript
import { Parser, defaultOptions } from 'gc-parser';
// Create parser with default options
const opts = defaultOptions();
opts.linkBaseURL = 'https://example.com';
const parser = new Parser(opts);
// Process content
const content = `# Hello World
This is **markdown** content with a nostr:npub1... address.`;
const result = await parser.process(content);
console.log(result.content);
console.log('Has LaTeX:', result.hasLaTeX);
console.log('Has Musical Notation:', result.hasMusicalNotation);
```
### Advanced Configuration
```typescript
import { Parser } from 'gc-parser';
const parser = new Parser({
linkBaseURL: 'https://example.com',
enableAsciiDoc: true,
enableMarkdown: true,
enableCodeHighlighting: true,
enableLaTeX: true,
enableMusicalNotation: true,
enableNostrAddresses: true,
});
const result = await parser.process(content);
```
### Processing AsciiDoc
```typescript
const content = `= Document Title
== Section
This is AsciiDoc content with a [[wikilink]] and nostr:naddr1...`;
const result = await parser.process(content);
// result.content contains the HTML
// result.tableOfContents contains the extracted TOC
```
### Processing Markdown
```typescript
const content = `# Markdown Document
This is **bold** and *italic* text.
\`\`\`go
func main() {
fmt.Println("Hello")
}
\`\`\`
`;
const result = await parser.process(content);
```
### LaTeX Math
The parser automatically detects and processes LaTeX math expressions:
- Inline math: `$E = mc^2$` or `\(E = mc^2\)`
- Block math: `$$\int_{-\infty}^{\infty} e^{-x^2} dx = \sqrt{\pi}$$` or `\[...\]`
The output is compatible with MathJax or KaTeX. Include one of these libraries in your HTML:
```html
<!-- For MathJax -->
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<!-- Or for KaTeX -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>
```
### Musical Notation
The parser supports multiple musical notation formats:
- **ABC Notation**: Automatically detected and wrapped for ABC.js
- **LilyPond**: Detected and wrapped for LilyPond rendering
- **Chord Notation**: Inline chords like `[C]`, `[Am]`, `[F#m7]`
- **MusicXML**: XML-based notation
Example:
```
X:1
K:C
C D E F | G A B c
```
### Nostr Addresses
The parser automatically processes `nostr:` prefixed addresses:
- `nostr:naddr1...` - Parameterized replaceable events
- `nostr:nevent1...` - Event references
- `nostr:note1...` - Note IDs
- `nostr:npub1...` - Public keys
- `nostr:nprofile1...` - Profile references
These are automatically converted to links if `linkBaseURL` is set.
## Integration with gitcitadel-online
This parser is designed to replace the content processing logic in `gitcitadel-online`.
### Migration Example
**Before (in gitcitadel-online):**
```go
// Old way - calling Node.js via exec
result, err := g.asciidocProc.Process(wiki.Content)
html := result.Content
```
**After (using gc-parser):**
```go
// New way - import the JavaScript/TypeScript module
// You can call it via Node.js exec or use a Go bridge
const { Parser } = require('gc-parser');
const parser = new Parser({ linkBaseURL: 'https://example.com' });
const result = await parser.process(content);
```
Or use it directly in a Node.js script that gitcitadel-online can call:
```javascript
// process-content.js
const { Parser } = require('gc-parser');
const parser = new Parser({
linkBaseURL: process.env.LINK_BASE_URL || '',
});
const content = process.argv[2] || '';
parser.process(content).then(result => {
console.log(JSON.stringify(result));
}).catch(err => {
console.error(err);
process.exit(1);
});
```
## Requirements
- Node.js 18+
- TypeScript 5.3+ (for development)
## Development
```bash
# Install dependencies
npm install
# Build TypeScript
npm run build
# Run tests
npm test
```
## License
MIT
## Contributing
Contributions are welcome! Please feel free to submit a Pull Request.

55
example.js

@ -0,0 +1,55 @@ @@ -0,0 +1,55 @@
#!/usr/bin/env node
/**
* Example usage of gc-parser
* This can be called from Go or used directly in Node.js
*/
const { Parser, defaultOptions } = require('./dist/index.js');
async function main() {
// Create parser with default options
const opts = defaultOptions();
opts.linkBaseURL = process.env.LINK_BASE_URL || 'https://example.com';
const parser = new Parser(opts);
// Get content from command line argument or stdin
let content = '';
if (process.argv[2]) {
content = process.argv[2];
} else {
// Read from stdin
const readline = require('readline');
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false
});
for await (const line of rl) {
content += line + '\n';
}
}
if (!content) {
console.error('No content provided');
process.exit(1);
}
try {
const result = await parser.process(content);
// Output as JSON for easy parsing
console.log(JSON.stringify(result, null, 2));
} catch (error) {
console.error('Error processing content:', error);
process.exit(1);
}
}
if (require.main === module) {
main();
}
module.exports = { main };

35
package.json

@ -0,0 +1,35 @@ @@ -0,0 +1,35 @@
{
"name": "gc-parser",
"version": "1.0.0",
"description": "Super-parser for Nostr event content supporting AsciiDoc, Markdown, code syntax highlighting, LaTeX, musical notation, and nostr: addresses",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
"build": "tsc",
"test": "jest",
"prepublishOnly": "npm run build"
},
"keywords": [
"nostr",
"parser",
"asciidoc",
"markdown",
"syntax-highlighting",
"latex",
"music"
],
"author": "",
"license": "MIT",
"dependencies": {
"@asciidoctor/core": "^3.0.4",
"highlight.js": "^11.10.0",
"marked": "^12.0.0"
},
"devDependencies": {
"@types/node": "^20.11.0",
"typescript": "^5.3.3",
"jest": "^29.7.0",
"@types/jest": "^29.5.11",
"@types/highlight.js": "^10.1.0"
}
}

55
src/detector.ts

@ -0,0 +1,55 @@ @@ -0,0 +1,55 @@
import { ContentFormat } from './types';
/**
* Detects the content format based on content patterns
*/
export function detectFormat(content: string): ContentFormat {
// Check for AsciiDoc indicators
const asciidocIndicators = [
'= ', // Title
'== ', // Section
'=== ', // Subsection
'include::', // Include directive
'image::', // Image block
'[source', // Source block
'----', // Listing block
'....', // Literal block
'|===', // Table
':', // Attribute (common in AsciiDoc)
];
let asciidocScore = 0;
for (const indicator of asciidocIndicators) {
if (content.includes(indicator)) {
asciidocScore++;
}
}
// Check for Markdown indicators
const markdownIndicators = [
'# ', // Heading
'## ', // Subheading
'```', // Code block
'**', // Bold
'*', // Italic or list
'- ', // List item
'![', // Image
'[', // Link
];
let markdownScore = 0;
for (const indicator of markdownIndicators) {
if (content.includes(indicator)) {
markdownScore++;
}
}
// Determine format based on scores
if (asciidocScore > markdownScore && asciidocScore >= 2) {
return ContentFormat.AsciiDoc;
} else if (markdownScore > 0) {
return ContentFormat.Markdown;
}
return ContentFormat.Plain;
}

2
src/index.ts

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
export * from './parser';
export * from './types';

116
src/parser.ts

@ -0,0 +1,116 @@ @@ -0,0 +1,116 @@
import { ParserOptions, ProcessResult, ContentFormat } from './types';
import { processAsciiDoc } from './processors/asciidoc';
import { processMarkdown } from './processors/markdown';
import { processPlainText } from './processors/plain';
import { processNostrAddresses } from './processors/nostr';
import { detectFormat } from './detector';
import { processLaTeX, hasLaTeX } from './processors/latex';
import { processMusicalNotation, hasMusicalNotation } from './processors/music';
import { ensureCodeHighlighting } from './processors/code';
/**
* Default parser options
*/
export function defaultOptions(): ParserOptions {
return {
linkBaseURL: '',
enableAsciiDoc: true,
enableMarkdown: true,
enableCodeHighlighting: true,
enableLaTeX: true,
enableMusicalNotation: true,
enableNostrAddresses: true,
};
}
/**
* Main parser for Nostr event content
* Handles multiple content formats: AsciiDoc, Markdown, code syntax,
* LaTeX, musical notation, and nostr: prefixed addresses
*/
export class Parser {
private options: Required<ParserOptions>;
constructor(options: ParserOptions = {}) {
const defaults = defaultOptions();
this.options = {
linkBaseURL: options.linkBaseURL ?? defaults.linkBaseURL ?? '',
enableAsciiDoc: options.enableAsciiDoc ?? defaults.enableAsciiDoc ?? true,
enableMarkdown: options.enableMarkdown ?? defaults.enableMarkdown ?? true,
enableCodeHighlighting: options.enableCodeHighlighting ?? defaults.enableCodeHighlighting ?? true,
enableLaTeX: options.enableLaTeX ?? defaults.enableLaTeX ?? true,
enableMusicalNotation: options.enableMusicalNotation ?? defaults.enableMusicalNotation ?? true,
enableNostrAddresses: options.enableNostrAddresses ?? defaults.enableNostrAddresses ?? true,
};
}
/**
* Process Nostr event content and return HTML
* Automatically detects the content format and processes accordingly
*/
async process(content: string): Promise<ProcessResult> {
// First, process nostr: addresses (if enabled)
if (this.options.enableNostrAddresses) {
content = processNostrAddresses(content, this.options.linkBaseURL);
}
// Detect content format
const format = detectFormat(content);
let result: ProcessResult;
switch (format) {
case ContentFormat.AsciiDoc:
if (this.options.enableAsciiDoc) {
result = await processAsciiDoc(content, this.options.linkBaseURL);
} else if (this.options.enableMarkdown) {
// Fallback to markdown if AsciiDoc is disabled
result = await processMarkdown(content, this.options.linkBaseURL);
} else {
result = processPlainText(content);
}
break;
case ContentFormat.Markdown:
if (this.options.enableMarkdown) {
result = await processMarkdown(content, this.options.linkBaseURL);
} else {
// Fallback to plain text
result = processPlainText(content);
}
break;
default:
// Plain text or mixed content
result = processPlainText(content);
}
// Post-process: handle LaTeX and musical notation in the HTML
if (this.options.enableLaTeX) {
result.hasLaTeX = hasLaTeX(result.content);
if (result.hasLaTeX) {
result.content = processLaTeX(result.content);
}
}
if (this.options.enableMusicalNotation) {
result.hasMusicalNotation = hasMusicalNotation(result.content);
if (result.hasMusicalNotation) {
result.content = processMusicalNotation(result.content);
}
}
// Ensure code highlighting is applied if enabled
if (this.options.enableCodeHighlighting) {
result.content = ensureCodeHighlighting(result.content);
}
return result;
}
}
/**
* Convenience function to process content with default options
*/
export async function process(content: string, options?: ParserOptions): Promise<ProcessResult> {
const parser = new Parser(options);
return parser.process(content);
}

66
src/processors/asciidoc-links.ts

@ -0,0 +1,66 @@ @@ -0,0 +1,66 @@
/**
* Normalizes a d tag according to NIP-54 rules
*/
export function normalizeDTag(dTag: string): string {
// Convert to lowercase
let normalized = dTag.toLowerCase();
// Convert whitespace to hyphens
normalized = normalized.replace(/\s+/g, '-');
// Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII)
normalized = normalized.replace(/[^a-z0-9\-\u0080-\uFFFF]/g, '');
// Collapse multiple consecutive hyphens
normalized = normalized.replace(/-+/g, '-');
// Remove leading and trailing hyphens
normalized = normalized.replace(/^-+|-+$/g, '');
return normalized;
}
/**
* Rewrites wikilinks and nostr: links in AsciiDoc content
*/
export function rewriteAsciiDocLinks(content: string, linkBaseURL: string): string {
// Rewrite wikilinks: [[target]] or [[target|display text]]
// Format: [[target]] -> link:url[display]
const wikilinkRegex = /\[\[([^\]]+)\]\]/g;
content = content.replace(wikilinkRegex, (match, inner) => {
let target: string;
let display: string;
if (inner.includes('|')) {
const parts = inner.split('|', 2);
target = parts[0].trim();
display = parts[1].trim();
} else {
target = inner.trim();
display = target;
}
// Normalize the d tag
const normalized = normalizeDTag(target);
// Create the link
if (linkBaseURL) {
const url = `${linkBaseURL}/events?d=${normalized}`;
return `link:${url}[${display}]`;
}
return `link:#${normalized}[${display}]`;
});
// Rewrite nostr: links: nostr:naddr1... or nostr:nevent1...
// Format: nostr:naddr1... -> link:url[nostr:naddr1...]
const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)/g;
content = content.replace(nostrLinkRegex, (match, nostrID) => {
if (linkBaseURL) {
const url = `${linkBaseURL}/events?id=${nostrID}`;
return `link:${url}[${match}]`;
}
return match;
});
return content;
}

49
src/processors/asciidoc.ts

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
import asciidoctor from '@asciidoctor/core';
import { ProcessResult } from '../types';
import { rewriteAsciiDocLinks } from './asciidoc-links';
import { extractTOC, sanitizeHTML, processLinks } from './html-utils';
const asciidoctorInstance = asciidoctor();
/**
* Processes AsciiDoc content to HTML
*/
export async function processAsciiDoc(content: string, linkBaseURL: string): Promise<ProcessResult> {
// Rewrite links in AsciiDoc content
const processedContent = rewriteAsciiDocLinks(content, linkBaseURL);
// Convert AsciiDoc to HTML
const html = asciidoctorInstance.convert(processedContent, {
safe: 'safe',
backend: 'html5',
doctype: 'article',
attributes: {
showtitle: true,
icons: 'font',
sectanchors: true,
sectlinks: true,
toc: 'left',
toclevels: 3,
},
}) as string;
// Extract table of contents from HTML
const { toc, contentWithoutTOC } = extractTOC(html);
// Sanitize HTML to prevent XSS
const sanitized = sanitizeHTML(contentWithoutTOC);
// Process links: make external links open in new tab, local links in same tab
const processed = processLinks(sanitized, linkBaseURL);
// Also sanitize and process links in TOC
const tocSanitized = sanitizeHTML(toc);
const tocProcessed = processLinks(tocSanitized, linkBaseURL);
return {
content: processed,
tableOfContents: tocProcessed,
hasLaTeX: false,
hasMusicalNotation: false,
};
}

52
src/processors/code.ts

@ -0,0 +1,52 @@ @@ -0,0 +1,52 @@
import hljs from 'highlight.js';
/**
* Ensures code blocks have syntax highlighting using highlight.js
*/
export function ensureCodeHighlighting(html: string): string {
// Pattern to match code blocks: <pre><code>...</code></pre> or <pre><code class="language-xxx">...</code></pre>
const codeBlockRegex = /<pre><code(?:\s+class=["']language-([^"']+)["'])?[^>]*>(.*?)<\/code><\/pre>/gs;
return html.replace(codeBlockRegex, (match, lang, code) => {
// Unescape HTML entities in code
const unescapedCode = unescapeHTML(code);
// Highlight the code
try {
let highlighted: hljs.HighlightResult;
if (lang) {
// Try to get the language
const language = hljs.getLanguage(lang);
if (language) {
highlighted = hljs.highlight(unescapedCode, { language: lang });
} else {
// Try auto-detection
highlighted = hljs.highlightAuto(unescapedCode);
}
} else {
// Auto-detect language
highlighted = hljs.highlightAuto(unescapedCode);
}
// Return highlighted code with proper classes
const langClass = highlighted.language ? ` class="language-${highlighted.language}"` : '';
return `<pre><code${langClass}>${highlighted.value}</code></pre>`;
} catch (error) {
// If highlighting fails, return original
return match;
}
});
}
/**
* Unescapes HTML entities
*/
function unescapeHTML(text: string): string {
return text
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
}

170
src/processors/html-utils.ts

@ -0,0 +1,170 @@ @@ -0,0 +1,170 @@
/**
* Extracts the table of contents from AsciiDoc HTML output
* Returns the TOC HTML and the content HTML without the TOC
*/
export function extractTOC(html: string): { toc: string; contentWithoutTOC: string } {
// AsciiDoc with toc: 'left' generates a TOC in a div with id="toc" or class="toc"
let tocContent = '';
let contentWithoutTOC = html;
// Find the start of the TOC div - try multiple patterns
const tocStartPatterns = [
/<div\s+id=["']toc["']\s+class=["']toc["'][^>]*>/i,
/<div\s+id=["']toc["'][^>]*>/i,
/<div\s+class=["']toc["'][^>]*>/i,
/<nav\s+id=["']toc["'][^>]*>/i,
];
let tocStartIdx = -1;
let tocStartTag = '';
for (const pattern of tocStartPatterns) {
const match = html.match(pattern);
if (match && match.index !== undefined) {
tocStartIdx = match.index;
tocStartTag = match[0];
break;
}
}
if (tocStartIdx === -1) {
// No TOC found
return { toc: '', contentWithoutTOC: html };
}
// Find the matching closing tag by counting div tags
const searchStart = tocStartIdx + tocStartTag.length;
let depth = 1;
let i = searchStart;
while (i < html.length && depth > 0) {
// Look for opening or closing div/nav tags
if (i + 4 < html.length && html.substring(i, i + 4) === '<div') {
// Check if it's a closing tag
if (i + 5 < html.length && html[i + 4] === '/') {
depth--;
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
i = closeIdx + 1;
} else {
// Opening tag - find the end
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
// Check if it's self-closing
if (html[closeIdx - 1] !== '/') {
depth++;
}
i = closeIdx + 1;
}
} else if (i + 5 < html.length && html.substring(i, i + 5) === '</div') {
depth--;
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
i = closeIdx + 1;
} else if (i + 5 < html.length && html.substring(i, i + 5) === '</nav') {
depth--;
const closeIdx = html.indexOf('>', i);
if (closeIdx === -1) break;
i = closeIdx + 1;
} else {
i++;
}
}
if (depth === 0) {
// Found the matching closing tag
const tocEndIdx = i;
// Extract the TOC content (inner HTML)
const tocFullHTML = html.substring(tocStartIdx, tocEndIdx);
// Extract just the inner content (without the outer div tags)
let innerStart = tocStartTag.length;
let innerEnd = tocFullHTML.length;
// Find the last </div> or </nav>
if (tocFullHTML.endsWith('</div>')) {
innerEnd -= 6;
} else if (tocFullHTML.endsWith('</nav>')) {
innerEnd -= 7;
}
tocContent = tocFullHTML.substring(innerStart, innerEnd).trim();
// Remove the toctitle div if present (AsciiDoc adds "Table of Contents" title)
tocContent = tocContent.replace(/<div\s+id=["']toctitle["'][^>]*>.*?<\/div>\s*/gis, '');
tocContent = tocContent.trim();
// Remove the TOC from the content
contentWithoutTOC = html.substring(0, tocStartIdx) + html.substring(tocEndIdx);
}
return { toc: tocContent, contentWithoutTOC };
}
/**
* Performs basic HTML sanitization to prevent XSS
*/
export function sanitizeHTML(html: string): string {
// Remove script tags and their content
html = html.replace(/<script[^>]*>.*?<\/script>/gis, '');
// Remove event handlers (onclick, onerror, etc.)
html = html.replace(/\s*on\w+\s*=\s*["'][^"']*["']/gi, '');
// Remove javascript: protocol in links
html = html.replace(/javascript:/gi, '');
// Remove data: URLs that could be dangerous
html = html.replace(/data:\s*text\/html/gi, '');
return html;
}
/**
* Processes HTML links to add target="_blank" to external links
*/
export function processLinks(html: string, linkBaseURL: string): string {
// Extract domain from linkBaseURL for comparison
let linkBaseDomain = '';
if (linkBaseURL) {
const url = linkBaseURL.replace(/^https?:\/\//, '');
const parts = url.split('/');
if (parts.length > 0) {
linkBaseDomain = parts[0];
}
}
// Regex to match <a> tags with href attributes
const linkRegex = /<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>/g;
return html.replace(linkRegex, (match, before, href, after) => {
// Check if it's an external link (starts with http:// or https://)
const isExternal = href.startsWith('http://') || href.startsWith('https://');
if (isExternal) {
// Check if it's pointing to our own domain
if (linkBaseDomain && href.includes(linkBaseDomain)) {
// Same domain - open in same tab (remove any existing target attribute)
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
}
// External link - add target="_blank" and rel="noopener noreferrer" if not already present
if (!match.includes('target=')) {
if (!match.includes('rel=')) {
return match.replace('>', ' target="_blank" rel="noopener noreferrer">');
} else {
// Update existing rel attribute to include noopener if not present
const updatedMatch = match.replace(/rel\s*=\s*["']([^"']*)["']/gi, (relMatch, relValue) => {
if (!relValue.includes('noopener')) {
return `rel="${relValue} noopener noreferrer"`;
}
return relMatch;
});
return updatedMatch.replace('>', ' target="_blank">');
}
}
} else {
// Local/relative link - ensure it opens in same tab (remove target if present)
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, '');
}
return match;
});
}

37
src/processors/latex.ts

@ -0,0 +1,37 @@ @@ -0,0 +1,37 @@
/**
* Checks if content contains LaTeX math expressions
*/
export function hasLaTeX(content: string): boolean {
// Check for inline math: $...$ or \(...\)
const inlineMathPattern = /\$[^$]+\$|\\\([^)]+\\\)/;
// Check for block math: $$...$$ or \[...\]
const blockMathPattern = /\$\$[^$]+\$\$|\\\[[^\]]+\\\]/;
return inlineMathPattern.test(content) || blockMathPattern.test(content);
}
/**
* Processes LaTeX math expressions in HTML content
* Wraps LaTeX expressions in appropriate HTML for rendering with MathJax or KaTeX
*/
export function processLaTeX(html: string): string {
// Process block math: $$...$$ or \[...\]
// Convert to <div class="math-block">...</div> for MathJax/KaTeX
const blockMathPattern = /\$\$([^$]+)\$\$|\\\[([^\]]+)\\\]/gs;
html = html.replace(blockMathPattern, (match, dollarContent, bracketContent) => {
const mathContent = (dollarContent || bracketContent || '').trim();
// Wrap in appropriate tags for MathJax/KaTeX
return `<div class="math-block">\\[${mathContent}\\]</div>`;
});
// Process inline math: $...$ or \(...\)
// Convert to <span class="math-inline">...</span> for MathJax/KaTeX
const inlineMathPattern = /\$([^$\n]+)\$|\\\(([^)]+)\\\)/g;
html = html.replace(inlineMathPattern, (match, dollarContent, bracketContent) => {
const mathContent = (dollarContent || bracketContent || '').trim();
// Wrap in appropriate tags for MathJax/KaTeX
return `<span class="math-inline">\\(${mathContent}\\)</span>`;
});
return html;
}

49
src/processors/markdown-links.ts

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
import { normalizeDTag } from './asciidoc-links';
/**
* Rewrites wikilinks and nostr: links in Markdown content
*/
export function rewriteMarkdownLinks(content: string, linkBaseURL: string): string {
// Rewrite wikilinks: [[target]] or [[target|display text]]
const wikilinkRegex = /\[\[([^\]]+)\]\]/g;
content = content.replace(wikilinkRegex, (match, inner) => {
let target: string;
let display: string;
if (inner.includes('|')) {
const parts = inner.split('|', 2);
target = parts[0].trim();
display = parts[1].trim();
} else {
target = inner.trim();
display = target;
}
const normalized = normalizeDTag(target);
if (linkBaseURL) {
const url = `${linkBaseURL}/events?d=${normalized}`;
return `[${display}](${url})`;
}
return `[${display}](#${normalized})`;
});
// Rewrite nostr: links in Markdown
const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+|note1[^\s\]]+|npub1[^\s\]]+|nprofile1[^\s\]]+)/g;
content = content.replace(nostrLinkRegex, (match, nostrID) => {
if (linkBaseURL) {
let url: string;
if (nostrID.startsWith('npub')) {
url = `${linkBaseURL}/profile?pubkey=${nostrID}`;
} else if (nostrID.startsWith('nprofile')) {
url = `${linkBaseURL}/profile?id=${nostrID}`;
} else {
url = `${linkBaseURL}/events?id=${nostrID}`;
}
return `[${match}](${url})`;
}
return match;
});
return content;
}

36
src/processors/markdown.ts

@ -0,0 +1,36 @@ @@ -0,0 +1,36 @@
import { marked } from 'marked';
import { ProcessResult } from '../types';
import { rewriteMarkdownLinks } from './markdown-links';
import { sanitizeHTML, processLinks } from './html-utils';
// Configure marked options
marked.setOptions({
breaks: true,
gfm: true,
headerIds: true,
mangle: false,
});
/**
* Processes Markdown content to HTML
*/
export async function processMarkdown(content: string, linkBaseURL: string): Promise<ProcessResult> {
// Rewrite links in Markdown content
const processedContent = rewriteMarkdownLinks(content, linkBaseURL);
// Convert Markdown to HTML
const html = await marked.parse(processedContent) as string;
// Sanitize HTML to prevent XSS
const sanitized = sanitizeHTML(html);
// Process links: make external links open in new tab, local links in same tab
const processed = processLinks(sanitized, linkBaseURL);
return {
content: processed,
tableOfContents: '',
hasLaTeX: false,
hasMusicalNotation: false,
};
}

72
src/processors/music.ts

@ -0,0 +1,72 @@ @@ -0,0 +1,72 @@
/**
* Checks if content contains musical notation
*/
export function hasMusicalNotation(content: string): boolean {
// Check for ABC notation: X:1, K:C, etc.
const abcPattern = /X:\s*\d+|K:\s*[A-G]|M:\s*\d+\/\d+/i;
// Check for LilyPond notation: \relative, \clef, etc.
const lilypondPattern = /\\relative|\\clef|\\key|\\time/;
// Check for MusicXML-like tags: <note>, <pitch>, etc.
const musicxmlPattern = /<note>|<pitch>|<rest>/i;
// Check for simple chord notation: [C], [Am], etc.
const chordPattern = /\[[A-G][#b]?m?[0-9]?\]/;
return abcPattern.test(content) ||
lilypondPattern.test(content) ||
musicxmlPattern.test(content) ||
chordPattern.test(content);
}
/**
* Processes musical notation in HTML content
* Wraps musical notation in appropriate HTML for rendering
*/
export function processMusicalNotation(html: string): string {
// Process ABC notation blocks
// ABC notation typically starts with X:1 and contains multiple lines
const abcBlockPattern = /(X:\s*\d+[^\n]*\n(?:[^\n]+\n)*)/gs;
html = html.replace(abcBlockPattern, (match) => {
const abcContent = match.trim();
// Wrap in a div for ABC.js or similar renderer
return `<div class="abc-notation" data-abc="${escapeForAttr(abcContent)}">${abcContent}</div>`;
});
// Process LilyPond notation blocks
// LilyPond notation is typically in code blocks or between \relative and }
const lilypondPattern = /(\\relative[^}]+})/gs;
html = html.replace(lilypondPattern, (match) => {
const lilypondContent = match.trim();
// Wrap in a div for LilyPond rendering
return `<div class="lilypond-notation" data-lilypond="${escapeForAttr(lilypondContent)}">${lilypondContent}</div>`;
});
// Process inline chord notation: [C], [Am], [F#m7], etc.
const chordPattern = /\[([A-G][#b]?m?[0-9]?[^\[\]]*)\]/g;
html = html.replace(chordPattern, (match, chord) => {
// Wrap in a span for chord rendering
return `<span class="chord" data-chord="${escapeForAttr(chord)}">[${chord}]</span>`;
});
// Process MusicXML-like notation (if present in content)
const musicxmlPattern = /(<music[^>]*>.*?<\/music>)/gs;
html = html.replace(musicxmlPattern, (match) => {
const musicxmlContent = match.trim();
// Wrap in a div for MusicXML rendering
return `<div class="musicxml-notation" data-musicxml="${escapeForAttr(musicxmlContent)}">${musicxmlContent}</div>`;
});
return html;
}
/**
* Escapes a string for use in HTML attributes
*/
function escapeForAttr(text: string): string {
return text
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/\n/g, ' ')
.replace(/\r/g, '');
}

28
src/processors/nostr.ts

@ -0,0 +1,28 @@ @@ -0,0 +1,28 @@
/**
* Processes nostr: prefixed addresses
*/
export function processNostrAddresses(content: string, linkBaseURL: string): string {
// Pattern: nostr:naddr1..., nostr:nevent1..., nostr:note1..., nostr:npub1..., nostr:nprofile1...
const nostrPattern = /nostr:([a-z0-9]+[a-z0-9]{1,})/g;
return content.replace(nostrPattern, (match, nostrID) => {
// If linkBaseURL is set, convert to a link
if (linkBaseURL) {
// Determine the type and create appropriate link
if (nostrID.startsWith('naddr')) {
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
} else if (nostrID.startsWith('nevent')) {
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
} else if (nostrID.startsWith('note')) {
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`;
} else if (nostrID.startsWith('npub')) {
return `<a href="${linkBaseURL}/profile?pubkey=${nostrID}" class="nostr-address">${match}</a>`;
} else if (nostrID.startsWith('nprofile')) {
return `<a href="${linkBaseURL}/profile?id=${nostrID}" class="nostr-address">${match}</a>`;
}
}
// Return as a span with class for styling
return `<span class="nostr-address">${match}</span>`;
});
}

42
src/processors/plain.ts

@ -0,0 +1,42 @@ @@ -0,0 +1,42 @@
import { ProcessResult } from '../types';
/**
* Escapes HTML special characters
*/
function escapeHTML(text: string): string {
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
/**
* Processes plain text content with basic formatting
*/
export function processPlainText(text: string): ProcessResult {
// Escape HTML
let html = escapeHTML(text);
// Convert line breaks to <br>
html = html.replace(/\n/g, '<br>\n');
// Convert double line breaks to paragraphs
const paragraphs = html.split('<br>\n<br>\n');
const result: string[] = [];
for (const para of paragraphs) {
const trimmed = para.trim();
if (trimmed) {
result.push(`<p>${trimmed}</p>`);
}
}
return {
content: result.join('\n'),
tableOfContents: '',
hasLaTeX: false,
hasMusicalNotation: false,
};
}

43
src/types.ts

@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
/**
* Options for configuring the parser behavior
*/
export interface ParserOptions {
/** Base URL for rewriting relative links and nostr: addresses */
linkBaseURL?: string;
/** Enable AsciiDoc processing (default: true) */
enableAsciiDoc?: boolean;
/** Enable Markdown processing (default: true) */
enableMarkdown?: boolean;
/** Enable code syntax highlighting (default: true) */
enableCodeHighlighting?: boolean;
/** Enable LaTeX math rendering (default: true) */
enableLaTeX?: boolean;
/** Enable musical notation rendering (default: true) */
enableMusicalNotation?: boolean;
/** Enable nostr: address processing (default: true) */
enableNostrAddresses?: boolean;
}
/**
* Result of processing content
*/
export interface ProcessResult {
/** Main processed HTML content */
content: string;
/** Extracted table of contents (for AsciiDoc) */
tableOfContents: string;
/** Indicates if LaTeX content was found */
hasLaTeX: boolean;
/** Indicates if musical notation was found */
hasMusicalNotation: boolean;
}
/**
* Detected content format
*/
export enum ContentFormat {
Unknown = 'unknown',
AsciiDoc = 'asciidoc',
Markdown = 'markdown',
Plain = 'plain'
}

19
tsconfig.json

@ -0,0 +1,19 @@ @@ -0,0 +1,19 @@
{
"compilerOptions": {
"target": "ES2020",
"module": "commonjs",
"lib": ["ES2020"],
"outDir": "./dist",
"rootDir": "./src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"resolveJsonModule": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "**/*.test.ts"]
}
Loading…
Cancel
Save