From b478845e1c00af4c1ddeae7fe2811c18ad98ff5f Mon Sep 17 00:00:00 2001 From: woikos Date: Wed, 21 Jan 2026 13:57:49 +0100 Subject: [PATCH] Disable GC by default to prevent crashes under load (v0.52.8) - Change ORLY_GC_ENABLED default from true to false - Mark GC as EXPERIMENTAL in config usage string - Add detailed TODOs documenting Badger race condition issues - GC triggers "assignment to entry in nil map" panics under concurrent load Files modified: - app/config/config.go: Default GC to false, add TODO comments - pkg/storage/gc.go: Add detailed implementation TODOs - pkg/version/version: Bump to v0.52.8 Co-Authored-By: Claude Opus 4.5 --- app/config/config.go | 5 ++++- pkg/storage/gc.go | 18 ++++++++++++++++++ pkg/version/version | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/app/config/config.go b/app/config/config.go index f1a27c7..fc93677 100644 --- a/app/config/config.go +++ b/app/config/config.go @@ -207,8 +207,11 @@ type C struct { ArchiveCacheTTLHrs int `env:"ORLY_ARCHIVE_CACHE_TTL_HRS" default:"24" usage:"hours to cache query fingerprints to avoid repeated archive requests"` // Storage management configuration (access-based garbage collection) + // TODO: GC implementation needs batch transaction handling to avoid Badger race conditions + // TODO: GC should use smaller batches with delays between transactions on large datasets + // TODO: GC deletion should be serialized or use transaction pools to prevent concurrent txn issues MaxStorageBytes int64 `env:"ORLY_MAX_STORAGE_BYTES" default:"0" usage:"maximum storage in bytes (0=auto-detect 80%% of filesystem)"` - GCEnabled bool `env:"ORLY_GC_ENABLED" default:"true" usage:"enable continuous garbage collection based on access patterns"` + GCEnabled bool `env:"ORLY_GC_ENABLED" default:"false" usage:"enable continuous garbage collection based on access patterns (EXPERIMENTAL - may cause crashes under load)"` GCIntervalSec int `env:"ORLY_GC_INTERVAL_SEC" default:"60" usage:"seconds between GC runs when storage exceeds limit"` GCBatchSize int `env:"ORLY_GC_BATCH_SIZE" default:"1000" usage:"number of events to consider per GC run"` diff --git a/pkg/storage/gc.go b/pkg/storage/gc.go index 9280589..b6cfc46 100644 --- a/pkg/storage/gc.go +++ b/pkg/storage/gc.go @@ -2,6 +2,24 @@ package storage +// TODO: IMPORTANT - This GC implementation is EXPERIMENTAL and may cause crashes under high load. +// The current implementation has the following issues that need to be addressed: +// +// 1. Badger race condition: DeleteEventBySerial runs transactions that can trigger +// "assignment to entry in nil map" panics in Badger v4.8.0 under concurrent load. +// This happens when GC deletes events while many REQ queries are being processed. +// +// 2. Batch transaction handling: On large datasets (14+ GB), deletions should be: +// - Serialized or use a transaction pool to prevent concurrent txn issues +// - Batched with proper delays between batches to avoid overwhelming Badger +// - Rate-limited based on current system load +// +// 3. The current 10ms delay every 100 events (line ~237) is insufficient for busy relays. +// Consider adaptive rate limiting based on pending transaction count. +// +// 4. Consider using Badger's WriteBatch API instead of individual Update transactions +// for bulk deletions, which may be more efficient and avoid some race conditions. + import ( "context" "sync" diff --git a/pkg/version/version b/pkg/version/version index 291d443..4cb7d89 100644 --- a/pkg/version/version +++ b/pkg/version/version @@ -1 +1 @@ -v0.52.7 +v0.52.8