You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
259 lines
8.3 KiB
259 lines
8.3 KiB
package asciidoc |
|
|
|
import ( |
|
"bytes" |
|
"fmt" |
|
"os/exec" |
|
"regexp" |
|
"strings" |
|
) |
|
|
|
// Processor handles AsciiDoc to HTML conversion |
|
type Processor struct { |
|
linkBaseURL string |
|
} |
|
|
|
// NewProcessor creates a new AsciiDoc processor |
|
func NewProcessor(linkBaseURL string) *Processor { |
|
return &Processor{ |
|
linkBaseURL: linkBaseURL, |
|
} |
|
} |
|
|
|
// Process converts AsciiDoc content to HTML with link rewriting |
|
func (p *Processor) Process(asciidocContent string) (string, error) { |
|
// First, rewrite links in the AsciiDoc content |
|
processedContent := p.rewriteLinks(asciidocContent) |
|
|
|
// Convert AsciiDoc to HTML using asciidoctor CLI |
|
html, err := p.convertToHTML(processedContent) |
|
if err != nil { |
|
return "", fmt.Errorf("failed to convert AsciiDoc to HTML: %w", err) |
|
} |
|
|
|
// Sanitize HTML to prevent XSS |
|
sanitized := p.sanitizeHTML(html) |
|
|
|
// Process links: make external links open in new tab, local links in same tab |
|
processed := p.processLinks(sanitized) |
|
|
|
return processed, nil |
|
} |
|
|
|
// rewriteLinks rewrites wikilinks and nostr: links in AsciiDoc content |
|
func (p *Processor) rewriteLinks(content string) string { |
|
// Rewrite wikilinks: [[target]] or [[target|display text]] |
|
// Format: [[target]] -> https://alexandria.gitcitadel.eu/events?d=<normalized-d-tag> |
|
wikilinkRegex := regexp.MustCompile(`\[\[([^\]]+)\]\]`) |
|
content = wikilinkRegex.ReplaceAllStringFunc(content, func(match string) string { |
|
// Extract the content inside [[ ]] |
|
inner := match[2 : len(match)-2] |
|
|
|
var target, display string |
|
if strings.Contains(inner, "|") { |
|
parts := strings.SplitN(inner, "|", 2) |
|
target = strings.TrimSpace(parts[0]) |
|
display = strings.TrimSpace(parts[1]) |
|
} else { |
|
target = strings.TrimSpace(inner) |
|
display = target |
|
} |
|
|
|
// Normalize the d tag (convert to lowercase, replace spaces with hyphens, etc.) |
|
normalized := normalizeDTag(target) |
|
|
|
// Create the link |
|
url := fmt.Sprintf("%s/events?d=%s", p.linkBaseURL, normalized) |
|
return fmt.Sprintf("link:%s[%s]", url, display) |
|
}) |
|
|
|
// Rewrite nostr: links: nostr:naddr1... or nostr:nevent1... |
|
// Format: nostr:naddr1... -> https://alexandria.gitcitadel.eu/events?id=naddr1... |
|
nostrLinkRegex := regexp.MustCompile(`nostr:(naddr1[^\s\]]+|nevent1[^\s\]]+)`) |
|
content = nostrLinkRegex.ReplaceAllStringFunc(content, func(match string) string { |
|
nostrID := strings.TrimPrefix(match, "nostr:") |
|
url := fmt.Sprintf("%s/events?id=%s", p.linkBaseURL, nostrID) |
|
return url |
|
}) |
|
|
|
return content |
|
} |
|
|
|
// normalizeDTag normalizes a d tag according to NIP-54 rules |
|
func normalizeDTag(dTag string) string { |
|
// Convert to lowercase |
|
dTag = strings.ToLower(dTag) |
|
|
|
// Convert whitespace to hyphens |
|
dTag = strings.ReplaceAll(dTag, " ", "-") |
|
dTag = strings.ReplaceAll(dTag, "\t", "-") |
|
dTag = strings.ReplaceAll(dTag, "\n", "-") |
|
|
|
// Remove punctuation and symbols (keep alphanumeric, hyphens, and non-ASCII) |
|
var result strings.Builder |
|
for _, r := range dTag { |
|
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r > 127 { |
|
result.WriteRune(r) |
|
} |
|
} |
|
dTag = result.String() |
|
|
|
// Collapse multiple consecutive hyphens |
|
for strings.Contains(dTag, "--") { |
|
dTag = strings.ReplaceAll(dTag, "--", "-") |
|
} |
|
|
|
// Remove leading and trailing hyphens |
|
dTag = strings.Trim(dTag, "-") |
|
|
|
return dTag |
|
} |
|
|
|
// convertToHTML converts AsciiDoc to HTML using asciidoctor.js via Node.js |
|
func (p *Processor) convertToHTML(asciidocContent string) (string, error) { |
|
// Check if node is available |
|
cmd := exec.Command("node", "--version") |
|
if err := cmd.Run(); err != nil { |
|
return "", fmt.Errorf("node.js not found: %w", err) |
|
} |
|
|
|
// JavaScript code to run asciidoctor.js |
|
// Read content from stdin to handle special characters properly |
|
jsCode := ` |
|
const asciidoctor = require('@asciidoctor/core')(); |
|
|
|
let content = ''; |
|
process.stdin.setEncoding('utf8'); |
|
|
|
process.stdin.on('data', (chunk) => { |
|
content += chunk; |
|
}); |
|
|
|
process.stdin.on('end', () => { |
|
try { |
|
const html = asciidoctor.convert(content, { |
|
safe: 'safe', |
|
backend: 'html5', |
|
doctype: 'article', |
|
attributes: { |
|
'showtitle': true, |
|
'icons': 'font', |
|
'sectanchors': true, |
|
'sectlinks': true, |
|
'toc': 'left', |
|
'toclevels': 3 |
|
} |
|
}); |
|
process.stdout.write(html); |
|
} catch (error) { |
|
console.error('Error converting AsciiDoc:', error.message); |
|
process.exit(1); |
|
} |
|
}); |
|
` |
|
|
|
// Run node with the JavaScript code, passing content via stdin |
|
cmd = exec.Command("node", "-e", jsCode) |
|
cmd.Stdin = strings.NewReader(asciidocContent) |
|
|
|
var stdout, stderr bytes.Buffer |
|
cmd.Stdout = &stdout |
|
cmd.Stderr = &stderr |
|
|
|
if err := cmd.Run(); err != nil { |
|
return "", fmt.Errorf("asciidoctor.js conversion failed: %w, stderr: %s", err, stderr.String()) |
|
} |
|
|
|
return stdout.String(), nil |
|
} |
|
|
|
// sanitizeHTML performs basic HTML sanitization to prevent XSS |
|
// Note: This is a basic implementation. For production, consider using a proper HTML sanitizer library |
|
func (p *Processor) sanitizeHTML(html string) string { |
|
// Remove script tags and their content |
|
scriptRegex := regexp.MustCompile(`(?i)<script[^>]*>.*?</script>`) |
|
html = scriptRegex.ReplaceAllString(html, "") |
|
|
|
// Remove event handlers (onclick, onerror, etc.) |
|
eventHandlerRegex := regexp.MustCompile(`(?i)\s*on\w+\s*=\s*["'][^"']*["']`) |
|
html = eventHandlerRegex.ReplaceAllString(html, "") |
|
|
|
// Remove javascript: protocol in links |
|
javascriptRegex := regexp.MustCompile(`(?i)javascript:`) |
|
html = javascriptRegex.ReplaceAllString(html, "") |
|
|
|
// Remove data: URLs that could be dangerous |
|
dataURLRegex := regexp.MustCompile(`(?i)data:\s*text/html`) |
|
html = dataURLRegex.ReplaceAllString(html, "") |
|
|
|
return html |
|
} |
|
|
|
// processLinks processes HTML links to add target="_blank" to external links |
|
// External links are those that start with http:// or https:// and don't point to the linkBaseURL domain |
|
// Local links (including relative links and links to linkBaseURL) open in the same tab |
|
func (p *Processor) processLinks(html string) string { |
|
// Extract domain from linkBaseURL for comparison |
|
linkBaseDomain := "" |
|
if strings.HasPrefix(p.linkBaseURL, "http://") || strings.HasPrefix(p.linkBaseURL, "https://") { |
|
// Extract domain (e.g., "alexandria.gitcitadel.eu" from "https://alexandria.gitcitadel.eu") |
|
parts := strings.Split(strings.TrimPrefix(strings.TrimPrefix(p.linkBaseURL, "https://"), "http://"), "/") |
|
if len(parts) > 0 { |
|
linkBaseDomain = parts[0] |
|
} |
|
} |
|
|
|
// Regex to match <a> tags with href attributes (more flexible pattern) |
|
linkRegex := regexp.MustCompile(`<a\s+([^>]*?)href\s*=\s*["']([^"']+)["']([^>]*?)>`) |
|
|
|
html = linkRegex.ReplaceAllStringFunc(html, func(match string) string { |
|
// Extract href value |
|
hrefMatch := regexp.MustCompile(`href\s*=\s*["']([^"']+)["']`) |
|
hrefSubmatch := hrefMatch.FindStringSubmatch(match) |
|
if len(hrefSubmatch) < 2 { |
|
return match // No href found, return as-is |
|
} |
|
href := hrefSubmatch[1] |
|
|
|
// Check if it's an external link (starts with http:// or https://) |
|
isExternal := strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") |
|
|
|
if isExternal { |
|
// Check if it's pointing to our own domain |
|
if linkBaseDomain != "" && strings.Contains(href, linkBaseDomain) { |
|
// Same domain - open in same tab (remove any existing target attribute) |
|
targetRegex := regexp.MustCompile(`\s*target\s*=\s*["'][^"']*["']`) |
|
match = targetRegex.ReplaceAllString(match, "") |
|
return match |
|
} |
|
|
|
// External link - add target="_blank" and rel="noopener noreferrer" if not already present |
|
if !strings.Contains(match, `target=`) { |
|
// Insert before the closing > |
|
match = strings.TrimSuffix(match, ">") |
|
if !strings.Contains(match, `rel=`) { |
|
match += ` target="_blank" rel="noopener noreferrer">` |
|
} else { |
|
// Update existing rel attribute to include noopener if not present |
|
relRegex := regexp.MustCompile(`rel\s*=\s*["']([^"']*)["']`) |
|
match = relRegex.ReplaceAllStringFunc(match, func(relMatch string) string { |
|
relValue := relRegex.FindStringSubmatch(relMatch)[1] |
|
if !strings.Contains(relValue, "noopener") { |
|
relValue += " noopener noreferrer" |
|
} |
|
return `rel="` + strings.TrimSpace(relValue) + `"` |
|
}) |
|
match += ` target="_blank">` |
|
} |
|
} |
|
} else { |
|
// Local/relative link - ensure it opens in same tab (remove target if present) |
|
targetRegex := regexp.MustCompile(`\s*target\s*=\s*["'][^"']*["']`) |
|
match = targetRegex.ReplaceAllString(match, "") |
|
} |
|
|
|
return match |
|
}) |
|
|
|
return html |
|
}
|
|
|