package asciidoc import ( "bytes" "fmt" "os/exec" "regexp" "strings" ) // Processor handles AsciiDoc to HTML conversion type Processor struct { linkBaseURL string } // ProcessResult contains the processed HTML content and extracted table of contents type ProcessResult struct { Content string TableOfContents string } // NewProcessor creates a new AsciiDoc processor func NewProcessor(linkBaseURL string) *Processor { return &Processor{ linkBaseURL: linkBaseURL, } } // Process converts AsciiDoc content to HTML with link rewriting // Returns both the content HTML and the extracted table of contents func (p *Processor) Process(asciidocContent string) (*ProcessResult, error) { // First, rewrite links in the AsciiDoc content processedContent := p.rewriteLinks(asciidocContent) // Convert AsciiDoc to HTML using asciidoctor CLI html, err := p.convertToHTML(processedContent) if err != nil { return nil, fmt.Errorf("failed to convert AsciiDoc to HTML: %w", err) } // Extract table of contents from HTML toc, contentWithoutTOC := p.extractTOC(html) // Sanitize HTML to prevent XSS sanitized := p.sanitizeHTML(contentWithoutTOC) // Process links: make external links open in new tab, local links in same tab processed := p.processLinks(sanitized) // Also sanitize and process links in TOC tocSanitized := p.sanitizeHTML(toc) tocProcessed := p.processLinks(tocSanitized) return &ProcessResult{ Content: processed, TableOfContents: tocProcessed, }, nil } // rewriteLinks rewrites wikilinks and nostr: links in AsciiDoc content func (p *Processor) rewriteLinks(content string) string { // Rewrite wikilinks: [[target]] or [[target|display text]] // Format: [[target]] -> https://alexandria.gitcitadel.eu/events?d= wikilinkRegex := regexp.MustCompile(`\[\[([^\]]+)\]\]`) content = wikilinkRegex.ReplaceAllStringFunc(content, func(match string) string { // Extract the content inside [[ ]] inner := match[2 : len(match)-2] var target, display string if strings.Contains(inner, "|") { parts := strings.SplitN(inner, "|", 2) target = strings.TrimSpace(parts[0]) display = strings.TrimSpace(parts[1]) } else { target = strings.TrimSpace(inner) display = target } // Normalize the d tag (convert to lowercase, replace spaces with hyphens, etc.) normalized := normalizeDTag(target) // Create the link url := fmt.Sprintf("%s/events?d=%s", p.linkBaseURL, normalized) return fmt.Sprintf("link:%s[%s]", url, display) }) // Rewrite nostr: links: nostr:naddr1... or nostr:nevent1... // Format: nostr:naddr1... -> https://alexandria.gitcitadel.eu/events?id=naddr1... nostrLinkRegex := regexp.MustCompile(`nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)`) content = nostrLinkRegex.ReplaceAllStringFunc(content, func(match string) string { nostrID := strings.TrimPrefix(match, "nostr:") url := fmt.Sprintf("%s/events?id=%s", p.linkBaseURL, nostrID) return url }) return content } // normalizeDTag normalizes a d tag according to NIP-54 rules func normalizeDTag(dTag string) string { // Convert to lowercase dTag = strings.ToLower(dTag) // Convert whitespace to hyphens dTag = strings.ReplaceAll(dTag, " ", "-") dTag = strings.ReplaceAll(dTag, "\t", "-") dTag = strings.ReplaceAll(dTag, "\n", "-") // Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII) var result strings.Builder for _, r := range dTag { if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r > 127 { result.WriteRune(r) } } dTag = result.String() // Collapse multiple consecutive hyphens for strings.Contains(dTag, "--") { dTag = strings.ReplaceAll(dTag, "--", "-") } // Remove leading and trailing hyphens dTag = strings.Trim(dTag, "-") return dTag } // convertToHTML converts AsciiDoc to HTML using asciidoctor.js via Node.js func (p *Processor) convertToHTML(asciidocContent string) (string, error) { // Check if node is available cmd := exec.Command("node", "--version") if err := cmd.Run(); err != nil { return "", fmt.Errorf("node.js not found: %w", err) } // JavaScript code to run asciidoctor.js // Read content from stdin to handle special characters properly jsCode := ` const asciidoctor = require('@asciidoctor/core')(); let content = ''; process.stdin.setEncoding('utf8'); process.stdin.on('data', (chunk) => { content += chunk; }); process.stdin.on('end', () => { try { const html = asciidoctor.convert(content, { safe: 'safe', backend: 'html5', doctype: 'article', attributes: { 'showtitle': true, 'icons': 'font', 'sectanchors': true, 'sectlinks': true, 'toc': 'left', 'toclevels': 3 } }); process.stdout.write(html); } catch (error) { console.error('Error converting AsciiDoc:', error.message); process.exit(1); } }); ` // Run node with the JavaScript code, passing content via stdin cmd = exec.Command("node", "-e", jsCode) cmd.Stdin = strings.NewReader(asciidocContent) var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { return "", fmt.Errorf("asciidoctor.js conversion failed: %w, stderr: %s", err, stderr.String()) } return stdout.String(), nil } // sanitizeHTML performs basic HTML sanitization to prevent XSS // Note: This is a basic implementation. For production, consider using a proper HTML sanitizer library func (p *Processor) sanitizeHTML(html string) string { // Remove script tags and their content scriptRegex := regexp.MustCompile(`(?i)]*>.*?`) html = scriptRegex.ReplaceAllString(html, "") // Remove event handlers (onclick, onerror, etc.) eventHandlerRegex := regexp.MustCompile(`(?i)\s*on\w+\s*=\s*["'][^"']*["']`) html = eventHandlerRegex.ReplaceAllString(html, "") // Remove javascript: protocol in links javascriptRegex := regexp.MustCompile(`(?i)javascript:`) html = javascriptRegex.ReplaceAllString(html, "") // Remove data: URLs that could be dangerous dataURLRegex := regexp.MustCompile(`(?i)data:\s*text/html`) html = dataURLRegex.ReplaceAllString(html, "") return html } // extractTOC extracts the table of contents from AsciiDoc HTML output // Returns the TOC HTML and the content HTML without the TOC func (p *Processor) extractTOC(html string) (string, string) { // AsciiDoc with toc: 'left' generates a TOC in a div with id="toc" or class="toc" // We need to match the entire TOC div including nested content // Since divs can be nested, we need to count opening/closing tags var tocContent string contentWithoutTOC := html // Find the start of the TOC div - try multiple patterns tocStartPatterns := []*regexp.Regexp{ // Pattern 1:
regexp.MustCompile(`(?i)]*>`), // Pattern 2:
regexp.MustCompile(`(?i)]*>`), // Pattern 3:
regexp.MustCompile(`(?i)]*>`), // Pattern 4:
or if strings.HasSuffix(tocFullHTML, "
") { innerEnd -= 6 } else if strings.HasSuffix(tocFullHTML, "") { innerEnd -= 7 } tocContent = strings.TrimSpace(tocFullHTML[innerStart:innerEnd]) // Remove the toctitle div if present (AsciiDoc adds "Table of Contents" title) toctitlePattern := regexp.MustCompile(`(?s)]*>.*?
\s*`) tocContent = toctitlePattern.ReplaceAllString(tocContent, "") tocContent = strings.TrimSpace(tocContent) // Remove the TOC from the content contentWithoutTOC = html[:tocStartIdx] + html[tocEndIdx:] } return tocContent, contentWithoutTOC } // processLinks processes HTML links to add target="_blank" to external links // External links are those that start with http:// or https:// and don't point to the linkBaseURL domain // Local links (including relative links and links to linkBaseURL) open in the same tab func (p *Processor) processLinks(html string) string { // Extract domain from linkBaseURL for comparison linkBaseDomain := "" if strings.HasPrefix(p.linkBaseURL, "http://") || strings.HasPrefix(p.linkBaseURL, "https://") { // Extract domain (e.g., "alexandria.gitcitadel.eu" from "https://alexandria.gitcitadel.eu") parts := strings.Split(strings.TrimPrefix(strings.TrimPrefix(p.linkBaseURL, "https://"), "http://"), "/") if len(parts) > 0 { linkBaseDomain = parts[0] } } // Regex to match tags with href attributes (more flexible pattern) linkRegex := regexp.MustCompile(`]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>`) html = linkRegex.ReplaceAllStringFunc(html, func(match string) string { // Extract href value hrefMatch := regexp.MustCompile(`href\s*=\s*["']([^"']+)["']`) hrefSubmatch := hrefMatch.FindStringSubmatch(match) if len(hrefSubmatch) < 2 { return match // No href found, return as-is } href := hrefSubmatch[1] // Check if it's an external link (starts with http:// or https://) isExternal := strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") if isExternal { // Check if it's pointing to our own domain if linkBaseDomain != "" && strings.Contains(href, linkBaseDomain) { // Same domain - open in same tab (remove any existing target attribute) targetRegex := regexp.MustCompile(`\s*target\s*=\s*["'][^"']*["']`) match = targetRegex.ReplaceAllString(match, "") return match } // External link - add target="_blank" and rel="noopener noreferrer" if not already present if !strings.Contains(match, `target=`) { // Insert before the closing > match = strings.TrimSuffix(match, ">") if !strings.Contains(match, `rel=`) { match += ` target="_blank" rel="noopener noreferrer">` } else { // Update existing rel attribute to include noopener if not present relRegex := regexp.MustCompile(`rel\s*=\s*["']([^"']*)["']`) match = relRegex.ReplaceAllStringFunc(match, func(relMatch string) string { relValue := relRegex.FindStringSubmatch(relMatch)[1] if !strings.Contains(relValue, "noopener") { relValue += " noopener noreferrer" } return `rel="` + strings.TrimSpace(relValue) + `"` }) match += ` target="_blank">` } } } else { // Local/relative link - ensure it opens in same tab (remove target if present) targetRegex := regexp.MustCompile(`\s*target\s*=\s*["'][^"']*["']`) match = targetRegex.ReplaceAllString(match, "") } return match }) return html }