You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

259 lines
8.3 KiB

package asciidoc
import (
"bytes"
"fmt"
"os/exec"
"regexp"
"strings"
)
// Processor handles AsciiDoc to HTML conversion
type Processor struct {
linkBaseURL string
}
// NewProcessor creates a new AsciiDoc processor
func NewProcessor(linkBaseURL string) *Processor {
return &Processor{
linkBaseURL: linkBaseURL,
}
}
// Process converts AsciiDoc content to HTML with link rewriting
func (p *Processor) Process(asciidocContent string) (string, error) {
// First, rewrite links in the AsciiDoc content
processedContent := p.rewriteLinks(asciidocContent)
// Convert AsciiDoc to HTML using asciidoctor CLI
html, err := p.convertToHTML(processedContent)
if err != nil {
return "", fmt.Errorf("failed to convert AsciiDoc to HTML: %w", err)
}
// Sanitize HTML to prevent XSS
sanitized := p.sanitizeHTML(html)
// Process links: make external links open in new tab, local links in same tab
processed := p.processLinks(sanitized)
return processed, nil
}
// rewriteLinks rewrites wikilinks and nostr: links in AsciiDoc content
func (p *Processor) rewriteLinks(content string) string {
// Rewrite wikilinks: [[target]] or [[target|display text]]
// Format: [[target]] -> https://alexandria.gitcitadel.eu/events?d=<normalized-d-tag>
wikilinkRegex := regexp.MustCompile(`\[\[([^\]]+)\]\]`)
content = wikilinkRegex.ReplaceAllStringFunc(content, func(match string) string {
// Extract the content inside [[ ]]
inner := match[2 : len(match)-2]
var target, display string
if strings.Contains(inner, "|") {
parts := strings.SplitN(inner, "|", 2)
target = strings.TrimSpace(parts[0])
display = strings.TrimSpace(parts[1])
} else {
target = strings.TrimSpace(inner)
display = target
}
// Normalize the d tag (convert to lowercase, replace spaces with hyphens, etc.)
normalized := normalizeDTag(target)
// Create the link
url := fmt.Sprintf("%s/events?d=%s", p.linkBaseURL, normalized)
return fmt.Sprintf("link:%s[%s]", url, display)
})
// Rewrite nostr: links: nostr:naddr1... or nostr:nevent1...
// Format: nostr:naddr1... -> https://alexandria.gitcitadel.eu/events?id=naddr1...
nostrLinkRegex := regexp.MustCompile(`nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)`)
content = nostrLinkRegex.ReplaceAllStringFunc(content, func(match string) string {
nostrID := strings.TrimPrefix(match, "nostr:")
url := fmt.Sprintf("%s/events?id=%s", p.linkBaseURL, nostrID)
return url
})
return content
}
// normalizeDTag normalizes a d tag according to NIP-54 rules
func normalizeDTag(dTag string) string {
// Convert to lowercase
dTag = strings.ToLower(dTag)
// Convert whitespace to hyphens
dTag = strings.ReplaceAll(dTag, " ", "-")
dTag = strings.ReplaceAll(dTag, "\t", "-")
dTag = strings.ReplaceAll(dTag, "\n", "-")
// Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII)
var result strings.Builder
for _, r := range dTag {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r > 127 {
result.WriteRune(r)
}
}
dTag = result.String()
// Collapse multiple consecutive hyphens
for strings.Contains(dTag, "--") {
dTag = strings.ReplaceAll(dTag, "--", "-")
}
// Remove leading and trailing hyphens
dTag = strings.Trim(dTag, "-")
return dTag
}
// convertToHTML converts AsciiDoc to HTML using asciidoctor.js via Node.js
func (p *Processor) convertToHTML(asciidocContent string) (string, error) {
// Check if node is available
cmd := exec.Command("node", "--version")
if err := cmd.Run(); err != nil {
return "", fmt.Errorf("node.js not found: %w", err)
}
// JavaScript code to run asciidoctor.js
// Read content from stdin to handle special characters properly
jsCode := `
const asciidoctor = require('@asciidoctor/core')();
let content = '';
process.stdin.setEncoding('utf8');
process.stdin.on('data', (chunk) => {
content += chunk;
});
process.stdin.on('end', () => {
try {
const html = asciidoctor.convert(content, {
safe: 'safe',
backend: 'html5',
doctype: 'article',
attributes: {
'showtitle': true,
'icons': 'font',
'sectanchors': true,
'sectlinks': true,
'toc': 'left',
'toclevels': 3
}
});
process.stdout.write(html);
} catch (error) {
console.error('Error converting AsciiDoc:', error.message);
process.exit(1);
}
});
`
// Run node with the JavaScript code, passing content via stdin
cmd = exec.Command("node", "-e", jsCode)
cmd.Stdin = strings.NewReader(asciidocContent)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return "", fmt.Errorf("asciidoctor.js conversion failed: %w, stderr: %s", err, stderr.String())
}
return stdout.String(), nil
}
// sanitizeHTML performs basic HTML sanitization to prevent XSS
// Note: This is a basic implementation. For production, consider using a proper HTML sanitizer library
func (p *Processor) sanitizeHTML(html string) string {
// Remove script tags and their content
scriptRegex := regexp.MustCompile(`(?i)<script[^>]*>.*?</script>`)
html = scriptRegex.ReplaceAllString(html, "")
// Remove event handlers (onclick, onerror, etc.)
eventHandlerRegex := regexp.MustCompile(`(?i)\s*on\w+\s*=\s*["'][^"']*["']`)
html = eventHandlerRegex.ReplaceAllString(html, "")
// Remove javascript: protocol in links
javascriptRegex := regexp.MustCompile(`(?i)javascript:`)
html = javascriptRegex.ReplaceAllString(html, "")
// Remove data: URLs that could be dangerous
dataURLRegex := regexp.MustCompile(`(?i)data:\s*text/html`)
html = dataURLRegex.ReplaceAllString(html, "")
return html
}
// processLinks processes HTML links to add target="_blank" to external links
// External links are those that start with http:// or https:// and don't point to the linkBaseURL domain
// Local links (including relative links and links to linkBaseURL) open in the same tab
func (p *Processor) processLinks(html string) string {
// Extract domain from linkBaseURL for comparison
linkBaseDomain := ""
if strings.HasPrefix(p.linkBaseURL, "http://") || strings.HasPrefix(p.linkBaseURL, "https://") {
// Extract domain (e.g., "alexandria.gitcitadel.eu" from "https://alexandria.gitcitadel.eu")
parts := strings.Split(strings.TrimPrefix(strings.TrimPrefix(p.linkBaseURL, "https://"), "http://"), "/")
if len(parts) > 0 {
linkBaseDomain = parts[0]
}
}
// Regex to match <a> tags with href attributes (more flexible pattern)
linkRegex := regexp.MustCompile(`<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>`)
html = linkRegex.ReplaceAllStringFunc(html, func(match string) string {
// Extract href value
hrefMatch := regexp.MustCompile(`href\s*=\s*["']([^"']+)["']`)
hrefSubmatch := hrefMatch.FindStringSubmatch(match)
if len(hrefSubmatch) < 2 {
return match // No href found, return as-is
}
href := hrefSubmatch[1]
// Check if it's an external link (starts with http:// or https://)
isExternal := strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://")
if isExternal {
// Check if it's pointing to our own domain
if linkBaseDomain != "" && strings.Contains(href, linkBaseDomain) {
// Same domain - open in same tab (remove any existing target attribute)
targetRegex := regexp.MustCompile(`\s*target\s*=\s*["'][^"']*["']`)
match = targetRegex.ReplaceAllString(match, "")
return match
}
// External link - add target="_blank" and rel="noopener noreferrer" if not already present
if !strings.Contains(match, `target=`) {
// Insert before the closing >
match = strings.TrimSuffix(match, ">")
if !strings.Contains(match, `rel=`) {
match += ` target="_blank" rel="noopener noreferrer">`
} else {
// Update existing rel attribute to include noopener if not present
relRegex := regexp.MustCompile(`rel\s*=\s*["']([^"']*)["']`)
match = relRegex.ReplaceAllStringFunc(match, func(relMatch string) string {
relValue := relRegex.FindStringSubmatch(relMatch)[1]
if !strings.Contains(relValue, "noopener") {
relValue += " noopener noreferrer"
}
return `rel="` + strings.TrimSpace(relValue) + `"`
})
match += ` target="_blank">`
}
}
} else {
// Local/relative link - ensure it opens in same tab (remove target if present)
targetRegex := regexp.MustCompile(`\s*target\s*=\s*["'][^"']*["']`)
match = targetRegex.ReplaceAllString(match, "")
}
return match
})
return html
}