commit
d689b1b050
19 changed files with 1171 additions and 0 deletions
@ -0,0 +1,33 @@
@@ -0,0 +1,33 @@
|
||||
# Binaries |
||||
*.exe |
||||
*.exe~ |
||||
*.dll |
||||
*.so |
||||
*.dylib |
||||
gc-parser |
||||
|
||||
# Test binary |
||||
*.test |
||||
|
||||
# Output |
||||
*.out |
||||
|
||||
# Go workspace file |
||||
go.work |
||||
|
||||
# Node.js |
||||
node_modules/ |
||||
package-lock.json |
||||
dist/ |
||||
*.log |
||||
|
||||
# IDE |
||||
.idea/ |
||||
.vscode/ |
||||
*.swp |
||||
*.swo |
||||
*~ |
||||
|
||||
# OS |
||||
.DS_Store |
||||
Thumbs.db |
||||
@ -0,0 +1,212 @@
@@ -0,0 +1,212 @@
|
||||
# GC Parser |
||||
|
||||
A super-parser for Nostr event content that handles multiple content formats including AsciiDoc, Markdown, code syntax highlighting, LaTeX, musical notation, and `nostr:` prefixed addresses. |
||||
|
||||
Built with TypeScript/JavaScript using: |
||||
- **asciidoctor.js** for AsciiDoc processing |
||||
- **marked** for Markdown processing |
||||
- **highlight.js** for code syntax highlighting |
||||
|
||||
## Features |
||||
|
||||
- **AsciiDoc Processing**: Full AsciiDoc to HTML conversion with table of contents support |
||||
- **Markdown Processing**: Markdown to HTML conversion with GFM support |
||||
- **Code Syntax Highlighting**: Automatic syntax highlighting for code blocks using highlight.js |
||||
- **LaTeX Math**: Support for inline and block LaTeX math expressions (compatible with MathJax/KaTeX) |
||||
- **Musical Notation**: Support for ABC notation, LilyPond, chord notation, and MusicXML |
||||
- **Nostr Addresses**: Automatic processing of `nostr:` prefixed addresses (naddr, nevent, note, npub, nprofile) |
||||
- **Link Rewriting**: Automatic rewriting of wikilinks and nostr addresses to proper URLs |
||||
- **HTML Sanitization**: Built-in XSS protection |
||||
|
||||
## Installation |
||||
|
||||
```bash |
||||
npm install gc-parser |
||||
``` |
||||
|
||||
## Usage |
||||
|
||||
### Basic Example |
||||
|
||||
```typescript |
||||
import { Parser, defaultOptions } from 'gc-parser'; |
||||
|
||||
// Create parser with default options |
||||
const opts = defaultOptions(); |
||||
opts.linkBaseURL = 'https://example.com'; |
||||
|
||||
const parser = new Parser(opts); |
||||
|
||||
// Process content |
||||
const content = `# Hello World |
||||
|
||||
This is **markdown** content with a nostr:npub1... address.`; |
||||
|
||||
const result = await parser.process(content); |
||||
console.log(result.content); |
||||
console.log('Has LaTeX:', result.hasLaTeX); |
||||
console.log('Has Musical Notation:', result.hasMusicalNotation); |
||||
``` |
||||
|
||||
### Advanced Configuration |
||||
|
||||
```typescript |
||||
import { Parser } from 'gc-parser'; |
||||
|
||||
const parser = new Parser({ |
||||
linkBaseURL: 'https://example.com', |
||||
enableAsciiDoc: true, |
||||
enableMarkdown: true, |
||||
enableCodeHighlighting: true, |
||||
enableLaTeX: true, |
||||
enableMusicalNotation: true, |
||||
enableNostrAddresses: true, |
||||
}); |
||||
|
||||
const result = await parser.process(content); |
||||
``` |
||||
|
||||
### Processing AsciiDoc |
||||
|
||||
```typescript |
||||
const content = `= Document Title |
||||
|
||||
== Section |
||||
|
||||
This is AsciiDoc content with a [[wikilink]] and nostr:naddr1...`; |
||||
|
||||
const result = await parser.process(content); |
||||
// result.content contains the HTML |
||||
// result.tableOfContents contains the extracted TOC |
||||
``` |
||||
|
||||
### Processing Markdown |
||||
|
||||
```typescript |
||||
const content = `# Markdown Document |
||||
|
||||
This is **bold** and *italic* text. |
||||
|
||||
\`\`\`go |
||||
func main() { |
||||
fmt.Println("Hello") |
||||
} |
||||
\`\`\` |
||||
`; |
||||
|
||||
const result = await parser.process(content); |
||||
``` |
||||
|
||||
### LaTeX Math |
||||
|
||||
The parser automatically detects and processes LaTeX math expressions: |
||||
|
||||
- Inline math: `$E = mc^2$` or `\(E = mc^2\)` |
||||
- Block math: `$$\int_{-\infty}^{\infty} e^{-x^2} dx = \sqrt{\pi}$$` or `\[...\]` |
||||
|
||||
The output is compatible with MathJax or KaTeX. Include one of these libraries in your HTML: |
||||
|
||||
```html |
||||
<!-- For MathJax --> |
||||
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> |
||||
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script> |
||||
|
||||
<!-- Or for KaTeX --> |
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css"> |
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script> |
||||
``` |
||||
|
||||
### Musical Notation |
||||
|
||||
The parser supports multiple musical notation formats: |
||||
|
||||
- **ABC Notation**: Automatically detected and wrapped for ABC.js |
||||
- **LilyPond**: Detected and wrapped for LilyPond rendering |
||||
- **Chord Notation**: Inline chords like `[C]`, `[Am]`, `[F#m7]` |
||||
- **MusicXML**: XML-based notation |
||||
|
||||
Example: |
||||
``` |
||||
X:1 |
||||
K:C |
||||
C D E F | G A B c |
||||
``` |
||||
|
||||
### Nostr Addresses |
||||
|
||||
The parser automatically processes `nostr:` prefixed addresses: |
||||
|
||||
- `nostr:naddr1...` - Parameterized replaceable events |
||||
- `nostr:nevent1...` - Event references |
||||
- `nostr:note1...` - Note IDs |
||||
- `nostr:npub1...` - Public keys |
||||
- `nostr:nprofile1...` - Profile references |
||||
|
||||
These are automatically converted to links if `linkBaseURL` is set. |
||||
|
||||
## Integration with gitcitadel-online |
||||
|
||||
This parser is designed to replace the content processing logic in `gitcitadel-online`. |
||||
|
||||
### Migration Example |
||||
|
||||
**Before (in gitcitadel-online):** |
||||
```go |
||||
// Old way - calling Node.js via exec |
||||
result, err := g.asciidocProc.Process(wiki.Content) |
||||
html := result.Content |
||||
``` |
||||
|
||||
**After (using gc-parser):** |
||||
```go |
||||
// New way - import the JavaScript/TypeScript module |
||||
// You can call it via Node.js exec or use a Go bridge |
||||
const { Parser } = require('gc-parser'); |
||||
const parser = new Parser({ linkBaseURL: 'https://example.com' }); |
||||
const result = await parser.process(content); |
||||
``` |
||||
|
||||
Or use it directly in a Node.js script that gitcitadel-online can call: |
||||
|
||||
```javascript |
||||
// process-content.js |
||||
const { Parser } = require('gc-parser'); |
||||
|
||||
const parser = new Parser({ |
||||
linkBaseURL: process.env.LINK_BASE_URL || '', |
||||
}); |
||||
|
||||
const content = process.argv[2] || ''; |
||||
parser.process(content).then(result => { |
||||
console.log(JSON.stringify(result)); |
||||
}).catch(err => { |
||||
console.error(err); |
||||
process.exit(1); |
||||
}); |
||||
``` |
||||
|
||||
## Requirements |
||||
|
||||
- Node.js 18+ |
||||
- TypeScript 5.3+ (for development) |
||||
|
||||
## Development |
||||
|
||||
```bash |
||||
# Install dependencies |
||||
npm install |
||||
|
||||
# Build TypeScript |
||||
npm run build |
||||
|
||||
# Run tests |
||||
npm test |
||||
``` |
||||
|
||||
## License |
||||
|
||||
MIT |
||||
|
||||
## Contributing |
||||
|
||||
Contributions are welcome! Please feel free to submit a Pull Request. |
||||
@ -0,0 +1,55 @@
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/** |
||||
* Example usage of gc-parser |
||||
* This can be called from Go or used directly in Node.js |
||||
*/ |
||||
|
||||
const { Parser, defaultOptions } = require('./dist/index.js'); |
||||
|
||||
async function main() { |
||||
// Create parser with default options
|
||||
const opts = defaultOptions(); |
||||
opts.linkBaseURL = process.env.LINK_BASE_URL || 'https://example.com'; |
||||
|
||||
const parser = new Parser(opts); |
||||
|
||||
// Get content from command line argument or stdin
|
||||
let content = ''; |
||||
if (process.argv[2]) { |
||||
content = process.argv[2]; |
||||
} else { |
||||
// Read from stdin
|
||||
const readline = require('readline'); |
||||
const rl = readline.createInterface({ |
||||
input: process.stdin, |
||||
output: process.stdout, |
||||
terminal: false |
||||
}); |
||||
|
||||
for await (const line of rl) { |
||||
content += line + '\n'; |
||||
} |
||||
} |
||||
|
||||
if (!content) { |
||||
console.error('No content provided'); |
||||
process.exit(1); |
||||
} |
||||
|
||||
try { |
||||
const result = await parser.process(content); |
||||
|
||||
// Output as JSON for easy parsing
|
||||
console.log(JSON.stringify(result, null, 2)); |
||||
} catch (error) { |
||||
console.error('Error processing content:', error); |
||||
process.exit(1); |
||||
} |
||||
} |
||||
|
||||
if (require.main === module) { |
||||
main(); |
||||
} |
||||
|
||||
module.exports = { main }; |
||||
@ -0,0 +1,35 @@
@@ -0,0 +1,35 @@
|
||||
{ |
||||
"name": "gc-parser", |
||||
"version": "1.0.0", |
||||
"description": "Super-parser for Nostr event content supporting AsciiDoc, Markdown, code syntax highlighting, LaTeX, musical notation, and nostr: addresses", |
||||
"main": "dist/index.js", |
||||
"types": "dist/index.d.ts", |
||||
"scripts": { |
||||
"build": "tsc", |
||||
"test": "jest", |
||||
"prepublishOnly": "npm run build" |
||||
}, |
||||
"keywords": [ |
||||
"nostr", |
||||
"parser", |
||||
"asciidoc", |
||||
"markdown", |
||||
"syntax-highlighting", |
||||
"latex", |
||||
"music" |
||||
], |
||||
"author": "", |
||||
"license": "MIT", |
||||
"dependencies": { |
||||
"@asciidoctor/core": "^3.0.4", |
||||
"highlight.js": "^11.10.0", |
||||
"marked": "^12.0.0" |
||||
}, |
||||
"devDependencies": { |
||||
"@types/node": "^20.11.0", |
||||
"typescript": "^5.3.3", |
||||
"jest": "^29.7.0", |
||||
"@types/jest": "^29.5.11", |
||||
"@types/highlight.js": "^10.1.0" |
||||
} |
||||
} |
||||
@ -0,0 +1,55 @@
@@ -0,0 +1,55 @@
|
||||
import { ContentFormat } from './types'; |
||||
|
||||
/** |
||||
* Detects the content format based on content patterns |
||||
*/ |
||||
export function detectFormat(content: string): ContentFormat { |
||||
// Check for AsciiDoc indicators
|
||||
const asciidocIndicators = [ |
||||
'= ', // Title
|
||||
'== ', // Section
|
||||
'=== ', // Subsection
|
||||
'include::', // Include directive
|
||||
'image::', // Image block
|
||||
'[source', // Source block
|
||||
'----', // Listing block
|
||||
'....', // Literal block
|
||||
'|===', // Table
|
||||
':', // Attribute (common in AsciiDoc)
|
||||
]; |
||||
|
||||
let asciidocScore = 0; |
||||
for (const indicator of asciidocIndicators) { |
||||
if (content.includes(indicator)) { |
||||
asciidocScore++; |
||||
} |
||||
} |
||||
|
||||
// Check for Markdown indicators
|
||||
const markdownIndicators = [ |
||||
'# ', // Heading
|
||||
'## ', // Subheading
|
||||
'```', // Code block
|
||||
'**', // Bold
|
||||
'*', // Italic or list
|
||||
'- ', // List item
|
||||
'![', // Image
|
||||
'[', // Link
|
||||
]; |
||||
|
||||
let markdownScore = 0; |
||||
for (const indicator of markdownIndicators) { |
||||
if (content.includes(indicator)) { |
||||
markdownScore++; |
||||
} |
||||
} |
||||
|
||||
// Determine format based on scores
|
||||
if (asciidocScore > markdownScore && asciidocScore >= 2) { |
||||
return ContentFormat.AsciiDoc; |
||||
} else if (markdownScore > 0) { |
||||
return ContentFormat.Markdown; |
||||
} |
||||
|
||||
return ContentFormat.Plain; |
||||
} |
||||
@ -0,0 +1,2 @@
@@ -0,0 +1,2 @@
|
||||
export * from './parser'; |
||||
export * from './types'; |
||||
@ -0,0 +1,116 @@
@@ -0,0 +1,116 @@
|
||||
import { ParserOptions, ProcessResult, ContentFormat } from './types'; |
||||
import { processAsciiDoc } from './processors/asciidoc'; |
||||
import { processMarkdown } from './processors/markdown'; |
||||
import { processPlainText } from './processors/plain'; |
||||
import { processNostrAddresses } from './processors/nostr'; |
||||
import { detectFormat } from './detector'; |
||||
import { processLaTeX, hasLaTeX } from './processors/latex'; |
||||
import { processMusicalNotation, hasMusicalNotation } from './processors/music'; |
||||
import { ensureCodeHighlighting } from './processors/code'; |
||||
|
||||
/** |
||||
* Default parser options |
||||
*/ |
||||
export function defaultOptions(): ParserOptions { |
||||
return { |
||||
linkBaseURL: '', |
||||
enableAsciiDoc: true, |
||||
enableMarkdown: true, |
||||
enableCodeHighlighting: true, |
||||
enableLaTeX: true, |
||||
enableMusicalNotation: true, |
||||
enableNostrAddresses: true, |
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Main parser for Nostr event content |
||||
* Handles multiple content formats: AsciiDoc, Markdown, code syntax, |
||||
* LaTeX, musical notation, and nostr: prefixed addresses |
||||
*/ |
||||
export class Parser { |
||||
private options: Required<ParserOptions>; |
||||
|
||||
constructor(options: ParserOptions = {}) { |
||||
const defaults = defaultOptions(); |
||||
this.options = { |
||||
linkBaseURL: options.linkBaseURL ?? defaults.linkBaseURL ?? '', |
||||
enableAsciiDoc: options.enableAsciiDoc ?? defaults.enableAsciiDoc ?? true, |
||||
enableMarkdown: options.enableMarkdown ?? defaults.enableMarkdown ?? true, |
||||
enableCodeHighlighting: options.enableCodeHighlighting ?? defaults.enableCodeHighlighting ?? true, |
||||
enableLaTeX: options.enableLaTeX ?? defaults.enableLaTeX ?? true, |
||||
enableMusicalNotation: options.enableMusicalNotation ?? defaults.enableMusicalNotation ?? true, |
||||
enableNostrAddresses: options.enableNostrAddresses ?? defaults.enableNostrAddresses ?? true, |
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Process Nostr event content and return HTML |
||||
* Automatically detects the content format and processes accordingly |
||||
*/ |
||||
async process(content: string): Promise<ProcessResult> { |
||||
// First, process nostr: addresses (if enabled)
|
||||
if (this.options.enableNostrAddresses) { |
||||
content = processNostrAddresses(content, this.options.linkBaseURL); |
||||
} |
||||
|
||||
// Detect content format
|
||||
const format = detectFormat(content); |
||||
|
||||
let result: ProcessResult; |
||||
|
||||
switch (format) { |
||||
case ContentFormat.AsciiDoc: |
||||
if (this.options.enableAsciiDoc) { |
||||
result = await processAsciiDoc(content, this.options.linkBaseURL); |
||||
} else if (this.options.enableMarkdown) { |
||||
// Fallback to markdown if AsciiDoc is disabled
|
||||
result = await processMarkdown(content, this.options.linkBaseURL); |
||||
} else { |
||||
result = processPlainText(content); |
||||
} |
||||
break; |
||||
case ContentFormat.Markdown: |
||||
if (this.options.enableMarkdown) { |
||||
result = await processMarkdown(content, this.options.linkBaseURL); |
||||
} else { |
||||
// Fallback to plain text
|
||||
result = processPlainText(content); |
||||
} |
||||
break; |
||||
default: |
||||
// Plain text or mixed content
|
||||
result = processPlainText(content); |
||||
} |
||||
|
||||
// Post-process: handle LaTeX and musical notation in the HTML
|
||||
if (this.options.enableLaTeX) { |
||||
result.hasLaTeX = hasLaTeX(result.content); |
||||
if (result.hasLaTeX) { |
||||
result.content = processLaTeX(result.content); |
||||
} |
||||
} |
||||
|
||||
if (this.options.enableMusicalNotation) { |
||||
result.hasMusicalNotation = hasMusicalNotation(result.content); |
||||
if (result.hasMusicalNotation) { |
||||
result.content = processMusicalNotation(result.content); |
||||
} |
||||
} |
||||
|
||||
// Ensure code highlighting is applied if enabled
|
||||
if (this.options.enableCodeHighlighting) { |
||||
result.content = ensureCodeHighlighting(result.content); |
||||
} |
||||
|
||||
return result; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Convenience function to process content with default options |
||||
*/ |
||||
export async function process(content: string, options?: ParserOptions): Promise<ProcessResult> { |
||||
const parser = new Parser(options); |
||||
return parser.process(content); |
||||
} |
||||
@ -0,0 +1,66 @@
@@ -0,0 +1,66 @@
|
||||
/** |
||||
* Normalizes a d tag according to NIP-54 rules |
||||
*/ |
||||
export function normalizeDTag(dTag: string): string { |
||||
// Convert to lowercase
|
||||
let normalized = dTag.toLowerCase(); |
||||
|
||||
// Convert whitespace to hyphens
|
||||
normalized = normalized.replace(/\s+/g, '-'); |
||||
|
||||
// Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII)
|
||||
normalized = normalized.replace(/[^a-z0-9\-\u0080-\uFFFF]/g, ''); |
||||
|
||||
// Collapse multiple consecutive hyphens
|
||||
normalized = normalized.replace(/-+/g, '-'); |
||||
|
||||
// Remove leading and trailing hyphens
|
||||
normalized = normalized.replace(/^-+|-+$/g, ''); |
||||
|
||||
return normalized; |
||||
} |
||||
|
||||
/** |
||||
* Rewrites wikilinks and nostr: links in AsciiDoc content |
||||
*/ |
||||
export function rewriteAsciiDocLinks(content: string, linkBaseURL: string): string { |
||||
// Rewrite wikilinks: [[target]] or [[target|display text]]
|
||||
// Format: [[target]] -> link:url[display]
|
||||
const wikilinkRegex = /\[\[([^\]]+)\]\]/g; |
||||
content = content.replace(wikilinkRegex, (match, inner) => { |
||||
let target: string; |
||||
let display: string; |
||||
|
||||
if (inner.includes('|')) { |
||||
const parts = inner.split('|', 2); |
||||
target = parts[0].trim(); |
||||
display = parts[1].trim(); |
||||
} else { |
||||
target = inner.trim(); |
||||
display = target; |
||||
} |
||||
|
||||
// Normalize the d tag
|
||||
const normalized = normalizeDTag(target); |
||||
|
||||
// Create the link
|
||||
if (linkBaseURL) { |
||||
const url = `${linkBaseURL}/events?d=${normalized}`; |
||||
return `link:${url}[${display}]`; |
||||
} |
||||
return `link:#${normalized}[${display}]`; |
||||
}); |
||||
|
||||
// Rewrite nostr: links: nostr:naddr1... or nostr:nevent1...
|
||||
// Format: nostr:naddr1... -> link:url[nostr:naddr1...]
|
||||
const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)/g; |
||||
content = content.replace(nostrLinkRegex, (match, nostrID) => { |
||||
if (linkBaseURL) { |
||||
const url = `${linkBaseURL}/events?id=${nostrID}`; |
||||
return `link:${url}[${match}]`; |
||||
} |
||||
return match; |
||||
}); |
||||
|
||||
return content; |
||||
} |
||||
@ -0,0 +1,49 @@
@@ -0,0 +1,49 @@
|
||||
import asciidoctor from '@asciidoctor/core'; |
||||
import { ProcessResult } from '../types'; |
||||
import { rewriteAsciiDocLinks } from './asciidoc-links'; |
||||
import { extractTOC, sanitizeHTML, processLinks } from './html-utils'; |
||||
|
||||
const asciidoctorInstance = asciidoctor(); |
||||
|
||||
/** |
||||
* Processes AsciiDoc content to HTML |
||||
*/ |
||||
export async function processAsciiDoc(content: string, linkBaseURL: string): Promise<ProcessResult> { |
||||
// Rewrite links in AsciiDoc content
|
||||
const processedContent = rewriteAsciiDocLinks(content, linkBaseURL); |
||||
|
||||
// Convert AsciiDoc to HTML
|
||||
const html = asciidoctorInstance.convert(processedContent, { |
||||
safe: 'safe', |
||||
backend: 'html5', |
||||
doctype: 'article', |
||||
attributes: { |
||||
showtitle: true, |
||||
icons: 'font', |
||||
sectanchors: true, |
||||
sectlinks: true, |
||||
toc: 'left', |
||||
toclevels: 3, |
||||
}, |
||||
}) as string; |
||||
|
||||
// Extract table of contents from HTML
|
||||
const { toc, contentWithoutTOC } = extractTOC(html); |
||||
|
||||
// Sanitize HTML to prevent XSS
|
||||
const sanitized = sanitizeHTML(contentWithoutTOC); |
||||
|
||||
// Process links: make external links open in new tab, local links in same tab
|
||||
const processed = processLinks(sanitized, linkBaseURL); |
||||
|
||||
// Also sanitize and process links in TOC
|
||||
const tocSanitized = sanitizeHTML(toc); |
||||
const tocProcessed = processLinks(tocSanitized, linkBaseURL); |
||||
|
||||
return { |
||||
content: processed, |
||||
tableOfContents: tocProcessed, |
||||
hasLaTeX: false, |
||||
hasMusicalNotation: false, |
||||
}; |
||||
} |
||||
@ -0,0 +1,52 @@
@@ -0,0 +1,52 @@
|
||||
import hljs from 'highlight.js'; |
||||
|
||||
/** |
||||
* Ensures code blocks have syntax highlighting using highlight.js |
||||
*/ |
||||
export function ensureCodeHighlighting(html: string): string { |
||||
// Pattern to match code blocks: <pre><code>...</code></pre> or <pre><code class="language-xxx">...</code></pre>
|
||||
const codeBlockRegex = /<pre><code(?:\s+class=["']language-([^"']+)["'])?[^>]*>(.*?)<\/code><\/pre>/gs; |
||||
|
||||
return html.replace(codeBlockRegex, (match, lang, code) => { |
||||
// Unescape HTML entities in code
|
||||
const unescapedCode = unescapeHTML(code); |
||||
|
||||
// Highlight the code
|
||||
try { |
||||
let highlighted: hljs.HighlightResult; |
||||
|
||||
if (lang) { |
||||
// Try to get the language
|
||||
const language = hljs.getLanguage(lang); |
||||
if (language) { |
||||
highlighted = hljs.highlight(unescapedCode, { language: lang }); |
||||
} else { |
||||
// Try auto-detection
|
||||
highlighted = hljs.highlightAuto(unescapedCode); |
||||
} |
||||
} else { |
||||
// Auto-detect language
|
||||
highlighted = hljs.highlightAuto(unescapedCode); |
||||
} |
||||
|
||||
// Return highlighted code with proper classes
|
||||
const langClass = highlighted.language ? ` class="language-${highlighted.language}"` : ''; |
||||
return `<pre><code${langClass}>${highlighted.value}</code></pre>`; |
||||
} catch (error) { |
||||
// If highlighting fails, return original
|
||||
return match; |
||||
} |
||||
}); |
||||
} |
||||
|
||||
/** |
||||
* Unescapes HTML entities |
||||
*/ |
||||
function unescapeHTML(text: string): string { |
||||
return text |
||||
.replace(/</g, '<') |
||||
.replace(/>/g, '>') |
||||
.replace(/&/g, '&') |
||||
.replace(/"/g, '"') |
||||
.replace(/'/g, "'"); |
||||
} |
||||
@ -0,0 +1,170 @@
@@ -0,0 +1,170 @@
|
||||
/** |
||||
* Extracts the table of contents from AsciiDoc HTML output |
||||
* Returns the TOC HTML and the content HTML without the TOC |
||||
*/ |
||||
export function extractTOC(html: string): { toc: string; contentWithoutTOC: string } { |
||||
// AsciiDoc with toc: 'left' generates a TOC in a div with id="toc" or class="toc"
|
||||
let tocContent = ''; |
||||
let contentWithoutTOC = html; |
||||
|
||||
// Find the start of the TOC div - try multiple patterns
|
||||
const tocStartPatterns = [ |
||||
/<div\s+id=["']toc["']\s+class=["']toc["'][^>]*>/i, |
||||
/<div\s+id=["']toc["'][^>]*>/i, |
||||
/<div\s+class=["']toc["'][^>]*>/i, |
||||
/<nav\s+id=["']toc["'][^>]*>/i, |
||||
]; |
||||
|
||||
let tocStartIdx = -1; |
||||
let tocStartTag = ''; |
||||
|
||||
for (const pattern of tocStartPatterns) { |
||||
const match = html.match(pattern); |
||||
if (match && match.index !== undefined) { |
||||
tocStartIdx = match.index; |
||||
tocStartTag = match[0]; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if (tocStartIdx === -1) { |
||||
// No TOC found
|
||||
return { toc: '', contentWithoutTOC: html }; |
||||
} |
||||
|
||||
// Find the matching closing tag by counting div tags
|
||||
const searchStart = tocStartIdx + tocStartTag.length; |
||||
let depth = 1; |
||||
let i = searchStart; |
||||
|
||||
while (i < html.length && depth > 0) { |
||||
// Look for opening or closing div/nav tags
|
||||
if (i + 4 < html.length && html.substring(i, i + 4) === '<div') { |
||||
// Check if it's a closing tag
|
||||
if (i + 5 < html.length && html[i + 4] === '/') { |
||||
depth--; |
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
i = closeIdx + 1; |
||||
} else { |
||||
// Opening tag - find the end
|
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
// Check if it's self-closing
|
||||
if (html[closeIdx - 1] !== '/') { |
||||
depth++; |
||||
} |
||||
i = closeIdx + 1; |
||||
} |
||||
} else if (i + 5 < html.length && html.substring(i, i + 5) === '</div') { |
||||
depth--; |
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
i = closeIdx + 1; |
||||
} else if (i + 5 < html.length && html.substring(i, i + 5) === '</nav') { |
||||
depth--; |
||||
const closeIdx = html.indexOf('>', i); |
||||
if (closeIdx === -1) break; |
||||
i = closeIdx + 1; |
||||
} else { |
||||
i++; |
||||
} |
||||
} |
||||
|
||||
if (depth === 0) { |
||||
// Found the matching closing tag
|
||||
const tocEndIdx = i; |
||||
// Extract the TOC content (inner HTML)
|
||||
const tocFullHTML = html.substring(tocStartIdx, tocEndIdx); |
||||
// Extract just the inner content (without the outer div tags)
|
||||
let innerStart = tocStartTag.length; |
||||
let innerEnd = tocFullHTML.length; |
||||
// Find the last </div> or </nav>
|
||||
if (tocFullHTML.endsWith('</div>')) { |
||||
innerEnd -= 6; |
||||
} else if (tocFullHTML.endsWith('</nav>')) { |
||||
innerEnd -= 7; |
||||
} |
||||
tocContent = tocFullHTML.substring(innerStart, innerEnd).trim(); |
||||
|
||||
// Remove the toctitle div if present (AsciiDoc adds "Table of Contents" title)
|
||||
tocContent = tocContent.replace(/<div\s+id=["']toctitle["'][^>]*>.*?<\/div>\s*/gis, ''); |
||||
tocContent = tocContent.trim(); |
||||
|
||||
// Remove the TOC from the content
|
||||
contentWithoutTOC = html.substring(0, tocStartIdx) + html.substring(tocEndIdx); |
||||
} |
||||
|
||||
return { toc: tocContent, contentWithoutTOC }; |
||||
} |
||||
|
||||
/** |
||||
* Performs basic HTML sanitization to prevent XSS |
||||
*/ |
||||
export function sanitizeHTML(html: string): string { |
||||
// Remove script tags and their content
|
||||
html = html.replace(/<script[^>]*>.*?<\/script>/gis, ''); |
||||
|
||||
// Remove event handlers (onclick, onerror, etc.)
|
||||
html = html.replace(/\s*on\w+\s*=\s*["'][^"']*["']/gi, ''); |
||||
|
||||
// Remove javascript: protocol in links
|
||||
html = html.replace(/javascript:/gi, ''); |
||||
|
||||
// Remove data: URLs that could be dangerous
|
||||
html = html.replace(/data:\s*text\/html/gi, ''); |
||||
|
||||
return html; |
||||
} |
||||
|
||||
/** |
||||
* Processes HTML links to add target="_blank" to external links |
||||
*/ |
||||
export function processLinks(html: string, linkBaseURL: string): string { |
||||
// Extract domain from linkBaseURL for comparison
|
||||
let linkBaseDomain = ''; |
||||
if (linkBaseURL) { |
||||
const url = linkBaseURL.replace(/^https?:\/\//, ''); |
||||
const parts = url.split('/'); |
||||
if (parts.length > 0) { |
||||
linkBaseDomain = parts[0]; |
||||
} |
||||
} |
||||
|
||||
// Regex to match <a> tags with href attributes
|
||||
const linkRegex = /<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>/g; |
||||
|
||||
return html.replace(linkRegex, (match, before, href, after) => { |
||||
// Check if it's an external link (starts with http:// or https://)
|
||||
const isExternal = href.startsWith('http://') || href.startsWith('https://'); |
||||
|
||||
if (isExternal) { |
||||
// Check if it's pointing to our own domain
|
||||
if (linkBaseDomain && href.includes(linkBaseDomain)) { |
||||
// Same domain - open in same tab (remove any existing target attribute)
|
||||
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, ''); |
||||
} |
||||
|
||||
// External link - add target="_blank" and rel="noopener noreferrer" if not already present
|
||||
if (!match.includes('target=')) { |
||||
if (!match.includes('rel=')) { |
||||
return match.replace('>', ' target="_blank" rel="noopener noreferrer">'); |
||||
} else { |
||||
// Update existing rel attribute to include noopener if not present
|
||||
const updatedMatch = match.replace(/rel\s*=\s*["']([^"']*)["']/gi, (relMatch, relValue) => { |
||||
if (!relValue.includes('noopener')) { |
||||
return `rel="${relValue} noopener noreferrer"`; |
||||
} |
||||
return relMatch; |
||||
}); |
||||
return updatedMatch.replace('>', ' target="_blank">'); |
||||
} |
||||
} |
||||
} else { |
||||
// Local/relative link - ensure it opens in same tab (remove target if present)
|
||||
return match.replace(/\s*target\s*=\s*["'][^"']*["']/gi, ''); |
||||
} |
||||
|
||||
return match; |
||||
}); |
||||
} |
||||
@ -0,0 +1,37 @@
@@ -0,0 +1,37 @@
|
||||
/** |
||||
* Checks if content contains LaTeX math expressions |
||||
*/ |
||||
export function hasLaTeX(content: string): boolean { |
||||
// Check for inline math: $...$ or \(...\)
|
||||
const inlineMathPattern = /\$[^$]+\$|\\\([^)]+\\\)/; |
||||
// Check for block math: $$...$$ or \[...\]
|
||||
const blockMathPattern = /\$\$[^$]+\$\$|\\\[[^\]]+\\\]/; |
||||
|
||||
return inlineMathPattern.test(content) || blockMathPattern.test(content); |
||||
} |
||||
|
||||
/** |
||||
* Processes LaTeX math expressions in HTML content |
||||
* Wraps LaTeX expressions in appropriate HTML for rendering with MathJax or KaTeX |
||||
*/ |
||||
export function processLaTeX(html: string): string { |
||||
// Process block math: $$...$$ or \[...\]
|
||||
// Convert to <div class="math-block">...</div> for MathJax/KaTeX
|
||||
const blockMathPattern = /\$\$([^$]+)\$\$|\\\[([^\]]+)\\\]/gs; |
||||
html = html.replace(blockMathPattern, (match, dollarContent, bracketContent) => { |
||||
const mathContent = (dollarContent || bracketContent || '').trim(); |
||||
// Wrap in appropriate tags for MathJax/KaTeX
|
||||
return `<div class="math-block">\\[${mathContent}\\]</div>`; |
||||
}); |
||||
|
||||
// Process inline math: $...$ or \(...\)
|
||||
// Convert to <span class="math-inline">...</span> for MathJax/KaTeX
|
||||
const inlineMathPattern = /\$([^$\n]+)\$|\\\(([^)]+)\\\)/g; |
||||
html = html.replace(inlineMathPattern, (match, dollarContent, bracketContent) => { |
||||
const mathContent = (dollarContent || bracketContent || '').trim(); |
||||
// Wrap in appropriate tags for MathJax/KaTeX
|
||||
return `<span class="math-inline">\\(${mathContent}\\)</span>`; |
||||
}); |
||||
|
||||
return html; |
||||
} |
||||
@ -0,0 +1,49 @@
@@ -0,0 +1,49 @@
|
||||
import { normalizeDTag } from './asciidoc-links'; |
||||
|
||||
/** |
||||
* Rewrites wikilinks and nostr: links in Markdown content |
||||
*/ |
||||
export function rewriteMarkdownLinks(content: string, linkBaseURL: string): string { |
||||
// Rewrite wikilinks: [[target]] or [[target|display text]]
|
||||
const wikilinkRegex = /\[\[([^\]]+)\]\]/g; |
||||
content = content.replace(wikilinkRegex, (match, inner) => { |
||||
let target: string; |
||||
let display: string; |
||||
|
||||
if (inner.includes('|')) { |
||||
const parts = inner.split('|', 2); |
||||
target = parts[0].trim(); |
||||
display = parts[1].trim(); |
||||
} else { |
||||
target = inner.trim(); |
||||
display = target; |
||||
} |
||||
|
||||
const normalized = normalizeDTag(target); |
||||
|
||||
if (linkBaseURL) { |
||||
const url = `${linkBaseURL}/events?d=${normalized}`; |
||||
return `[${display}](${url})`; |
||||
} |
||||
return `[${display}](#${normalized})`; |
||||
}); |
||||
|
||||
// Rewrite nostr: links in Markdown
|
||||
const nostrLinkRegex = /nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+|note1[^\s\]]+|npub1[^\s\]]+|nprofile1[^\s\]]+)/g; |
||||
content = content.replace(nostrLinkRegex, (match, nostrID) => { |
||||
if (linkBaseURL) { |
||||
let url: string; |
||||
if (nostrID.startsWith('npub')) { |
||||
url = `${linkBaseURL}/profile?pubkey=${nostrID}`; |
||||
} else if (nostrID.startsWith('nprofile')) { |
||||
url = `${linkBaseURL}/profile?id=${nostrID}`; |
||||
} else { |
||||
url = `${linkBaseURL}/events?id=${nostrID}`; |
||||
} |
||||
return `[${match}](${url})`; |
||||
} |
||||
return match; |
||||
}); |
||||
|
||||
return content; |
||||
} |
||||
@ -0,0 +1,36 @@
@@ -0,0 +1,36 @@
|
||||
import { marked } from 'marked'; |
||||
import { ProcessResult } from '../types'; |
||||
import { rewriteMarkdownLinks } from './markdown-links'; |
||||
import { sanitizeHTML, processLinks } from './html-utils'; |
||||
|
||||
// Configure marked options
|
||||
marked.setOptions({ |
||||
breaks: true, |
||||
gfm: true, |
||||
headerIds: true, |
||||
mangle: false, |
||||
}); |
||||
|
||||
/** |
||||
* Processes Markdown content to HTML |
||||
*/ |
||||
export async function processMarkdown(content: string, linkBaseURL: string): Promise<ProcessResult> { |
||||
// Rewrite links in Markdown content
|
||||
const processedContent = rewriteMarkdownLinks(content, linkBaseURL); |
||||
|
||||
// Convert Markdown to HTML
|
||||
const html = await marked.parse(processedContent) as string; |
||||
|
||||
// Sanitize HTML to prevent XSS
|
||||
const sanitized = sanitizeHTML(html); |
||||
|
||||
// Process links: make external links open in new tab, local links in same tab
|
||||
const processed = processLinks(sanitized, linkBaseURL); |
||||
|
||||
return { |
||||
content: processed, |
||||
tableOfContents: '', |
||||
hasLaTeX: false, |
||||
hasMusicalNotation: false, |
||||
}; |
||||
} |
||||
@ -0,0 +1,72 @@
@@ -0,0 +1,72 @@
|
||||
/** |
||||
* Checks if content contains musical notation |
||||
*/ |
||||
export function hasMusicalNotation(content: string): boolean { |
||||
// Check for ABC notation: X:1, K:C, etc.
|
||||
const abcPattern = /X:\s*\d+|K:\s*[A-G]|M:\s*\d+\/\d+/i; |
||||
// Check for LilyPond notation: \relative, \clef, etc.
|
||||
const lilypondPattern = /\\relative|\\clef|\\key|\\time/; |
||||
// Check for MusicXML-like tags: <note>, <pitch>, etc.
|
||||
const musicxmlPattern = /<note>|<pitch>|<rest>/i; |
||||
// Check for simple chord notation: [C], [Am], etc.
|
||||
const chordPattern = /\[[A-G][#b]?m?[0-9]?\]/; |
||||
|
||||
return abcPattern.test(content) || |
||||
lilypondPattern.test(content) || |
||||
musicxmlPattern.test(content) || |
||||
chordPattern.test(content); |
||||
} |
||||
|
||||
/** |
||||
* Processes musical notation in HTML content |
||||
* Wraps musical notation in appropriate HTML for rendering |
||||
*/ |
||||
export function processMusicalNotation(html: string): string { |
||||
// Process ABC notation blocks
|
||||
// ABC notation typically starts with X:1 and contains multiple lines
|
||||
const abcBlockPattern = /(X:\s*\d+[^\n]*\n(?:[^\n]+\n)*)/gs; |
||||
html = html.replace(abcBlockPattern, (match) => { |
||||
const abcContent = match.trim(); |
||||
// Wrap in a div for ABC.js or similar renderer
|
||||
return `<div class="abc-notation" data-abc="${escapeForAttr(abcContent)}">${abcContent}</div>`; |
||||
}); |
||||
|
||||
// Process LilyPond notation blocks
|
||||
// LilyPond notation is typically in code blocks or between \relative and }
|
||||
const lilypondPattern = /(\\relative[^}]+})/gs; |
||||
html = html.replace(lilypondPattern, (match) => { |
||||
const lilypondContent = match.trim(); |
||||
// Wrap in a div for LilyPond rendering
|
||||
return `<div class="lilypond-notation" data-lilypond="${escapeForAttr(lilypondContent)}">${lilypondContent}</div>`; |
||||
}); |
||||
|
||||
// Process inline chord notation: [C], [Am], [F#m7], etc.
|
||||
const chordPattern = /\[([A-G][#b]?m?[0-9]?[^\[\]]*)\]/g; |
||||
html = html.replace(chordPattern, (match, chord) => { |
||||
// Wrap in a span for chord rendering
|
||||
return `<span class="chord" data-chord="${escapeForAttr(chord)}">[${chord}]</span>`; |
||||
}); |
||||
|
||||
// Process MusicXML-like notation (if present in content)
|
||||
const musicxmlPattern = /(<music[^>]*>.*?<\/music>)/gs; |
||||
html = html.replace(musicxmlPattern, (match) => { |
||||
const musicxmlContent = match.trim(); |
||||
// Wrap in a div for MusicXML rendering
|
||||
return `<div class="musicxml-notation" data-musicxml="${escapeForAttr(musicxmlContent)}">${musicxmlContent}</div>`; |
||||
}); |
||||
|
||||
return html; |
||||
} |
||||
|
||||
/** |
||||
* Escapes a string for use in HTML attributes |
||||
*/ |
||||
function escapeForAttr(text: string): string { |
||||
return text |
||||
.replace(/"/g, '"') |
||||
.replace(/'/g, ''') |
||||
.replace(/</g, '<') |
||||
.replace(/>/g, '>') |
||||
.replace(/\n/g, ' ') |
||||
.replace(/\r/g, ''); |
||||
} |
||||
@ -0,0 +1,28 @@
@@ -0,0 +1,28 @@
|
||||
/** |
||||
* Processes nostr: prefixed addresses |
||||
*/ |
||||
export function processNostrAddresses(content: string, linkBaseURL: string): string { |
||||
// Pattern: nostr:naddr1..., nostr:nevent1..., nostr:note1..., nostr:npub1..., nostr:nprofile1...
|
||||
const nostrPattern = /nostr:([a-z0-9]+[a-z0-9]{1,})/g; |
||||
|
||||
return content.replace(nostrPattern, (match, nostrID) => { |
||||
// If linkBaseURL is set, convert to a link
|
||||
if (linkBaseURL) { |
||||
// Determine the type and create appropriate link
|
||||
if (nostrID.startsWith('naddr')) { |
||||
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`; |
||||
} else if (nostrID.startsWith('nevent')) { |
||||
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`; |
||||
} else if (nostrID.startsWith('note')) { |
||||
return `<a href="${linkBaseURL}/events?id=${nostrID}" class="nostr-address">${match}</a>`; |
||||
} else if (nostrID.startsWith('npub')) { |
||||
return `<a href="${linkBaseURL}/profile?pubkey=${nostrID}" class="nostr-address">${match}</a>`; |
||||
} else if (nostrID.startsWith('nprofile')) { |
||||
return `<a href="${linkBaseURL}/profile?id=${nostrID}" class="nostr-address">${match}</a>`; |
||||
} |
||||
} |
||||
|
||||
// Return as a span with class for styling
|
||||
return `<span class="nostr-address">${match}</span>`; |
||||
}); |
||||
} |
||||
@ -0,0 +1,42 @@
@@ -0,0 +1,42 @@
|
||||
import { ProcessResult } from '../types'; |
||||
|
||||
/** |
||||
* Escapes HTML special characters |
||||
*/ |
||||
function escapeHTML(text: string): string { |
||||
return text |
||||
.replace(/&/g, '&') |
||||
.replace(/</g, '<') |
||||
.replace(/>/g, '>') |
||||
.replace(/"/g, '"') |
||||
.replace(/'/g, '''); |
||||
} |
||||
|
||||
/** |
||||
* Processes plain text content with basic formatting |
||||
*/ |
||||
export function processPlainText(text: string): ProcessResult { |
||||
// Escape HTML
|
||||
let html = escapeHTML(text); |
||||
|
||||
// Convert line breaks to <br>
|
||||
html = html.replace(/\n/g, '<br>\n'); |
||||
|
||||
// Convert double line breaks to paragraphs
|
||||
const paragraphs = html.split('<br>\n<br>\n'); |
||||
const result: string[] = []; |
||||
|
||||
for (const para of paragraphs) { |
||||
const trimmed = para.trim(); |
||||
if (trimmed) { |
||||
result.push(`<p>${trimmed}</p>`); |
||||
} |
||||
} |
||||
|
||||
return { |
||||
content: result.join('\n'), |
||||
tableOfContents: '', |
||||
hasLaTeX: false, |
||||
hasMusicalNotation: false, |
||||
}; |
||||
} |
||||
@ -0,0 +1,43 @@
@@ -0,0 +1,43 @@
|
||||
/** |
||||
* Options for configuring the parser behavior |
||||
*/ |
||||
export interface ParserOptions { |
||||
/** Base URL for rewriting relative links and nostr: addresses */ |
||||
linkBaseURL?: string; |
||||
/** Enable AsciiDoc processing (default: true) */ |
||||
enableAsciiDoc?: boolean; |
||||
/** Enable Markdown processing (default: true) */ |
||||
enableMarkdown?: boolean; |
||||
/** Enable code syntax highlighting (default: true) */ |
||||
enableCodeHighlighting?: boolean; |
||||
/** Enable LaTeX math rendering (default: true) */ |
||||
enableLaTeX?: boolean; |
||||
/** Enable musical notation rendering (default: true) */ |
||||
enableMusicalNotation?: boolean; |
||||
/** Enable nostr: address processing (default: true) */ |
||||
enableNostrAddresses?: boolean; |
||||
} |
||||
|
||||
/** |
||||
* Result of processing content |
||||
*/ |
||||
export interface ProcessResult { |
||||
/** Main processed HTML content */ |
||||
content: string; |
||||
/** Extracted table of contents (for AsciiDoc) */ |
||||
tableOfContents: string; |
||||
/** Indicates if LaTeX content was found */ |
||||
hasLaTeX: boolean; |
||||
/** Indicates if musical notation was found */ |
||||
hasMusicalNotation: boolean; |
||||
} |
||||
|
||||
/** |
||||
* Detected content format |
||||
*/ |
||||
export enum ContentFormat { |
||||
Unknown = 'unknown', |
||||
AsciiDoc = 'asciidoc', |
||||
Markdown = 'markdown', |
||||
Plain = 'plain' |
||||
} |
||||
@ -0,0 +1,19 @@
@@ -0,0 +1,19 @@
|
||||
{ |
||||
"compilerOptions": { |
||||
"target": "ES2020", |
||||
"module": "commonjs", |
||||
"lib": ["ES2020"], |
||||
"outDir": "./dist", |
||||
"rootDir": "./src", |
||||
"strict": true, |
||||
"esModuleInterop": true, |
||||
"skipLibCheck": true, |
||||
"forceConsistentCasingInFileNames": true, |
||||
"declaration": true, |
||||
"declarationMap": true, |
||||
"sourceMap": true, |
||||
"resolveJsonModule": true |
||||
}, |
||||
"include": ["src/**/*"], |
||||
"exclude": ["node_modules", "dist", "**/*.test.ts"] |
||||
} |
||||
Loading…
Reference in new issue