You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

232 lines
5.6 KiB

//go:build !(js && wasm)
package bbolt
import (
"bytes"
"context"
"errors"
"runtime/debug"
"sort"
"time"
bolt "go.etcd.io/bbolt"
"lol.mleku.dev/chk"
"lol.mleku.dev/log"
"next.orly.dev/pkg/database"
"next.orly.dev/pkg/database/bufpool"
"git.mleku.dev/mleku/nostr/encoders/event"
)
// SaveEventMinimal stores only the essential event data for fast bulk import.
// It skips all indexes - call BuildIndexes after import completes.
func (b *B) SaveEventMinimal(ev *event.E) error {
if ev == nil {
return errors.New("nil event")
}
// Reject ephemeral events
if ev.Kind >= 20000 && ev.Kind <= 29999 {
return nil
}
// Get the next serial number
serial := b.getNextEventSerial()
// Serialize event in raw binary format (not compact - preserves full pubkey)
// This allows index building to work without pubkey serial resolution
legacyBuf := bufpool.GetMedium()
defer bufpool.PutMedium(legacyBuf)
ev.MarshalBinary(legacyBuf)
eventData := bufpool.CopyBytes(legacyBuf)
// Create minimal batch - only event data and ID mappings
batch := &EventBatch{
Serial: serial,
EventData: eventData,
Indexes: []BatchedWrite{
// Event ID -> Serial (for lookups)
{BucketName: bucketEid, Key: ev.ID[:], Value: makeSerialKey(serial)},
// Serial -> Event ID (for reverse lookups)
{BucketName: bucketSei, Key: makeSerialKey(serial), Value: ev.ID[:]},
},
}
return b.batcher.Add(batch)
}
// BuildIndexes builds all query indexes from stored events.
// Call this after importing events with SaveEventMinimal.
// Processes events in chunks to avoid OOM on large databases.
func (b *B) BuildIndexes(ctx context.Context) error {
log.I.F("bbolt: starting index build...")
startTime := time.Now()
// Force GC before starting to reclaim batch buffer memory
debug.FreeOSMemory()
// Process in small chunks to avoid OOM on memory-constrained systems
// With ~15 indexes per event and ~50 bytes per key, 50k events = ~37.5MB per chunk
const chunkSize = 50000
var totalEvents int
var lastSerial uint64 = 0
var lastLogTime = time.Now()
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Collect indexes for this chunk
indexesByBucket := make(map[string][][]byte)
var chunkEvents int
var chunkSerial uint64
// Read a chunk of events
err := b.db.View(func(tx *bolt.Tx) error {
cmpBucket := tx.Bucket(bucketCmp)
if cmpBucket == nil {
return errors.New("cmp bucket not found")
}
cursor := cmpBucket.Cursor()
// Seek to start position
var k, v []byte
if lastSerial == 0 {
k, v = cursor.First()
} else {
// Seek past the last processed serial
seekKey := makeSerialKey(lastSerial + 1)
k, v = cursor.Seek(seekKey)
}
for ; k != nil && chunkEvents < chunkSize; k, v = cursor.Next() {
serial := decodeSerialKey(k)
chunkSerial = serial
// Decode event from raw binary format
ev := event.New()
if err := ev.UnmarshalBinary(bytes.NewBuffer(v)); err != nil {
log.W.F("bbolt: failed to unmarshal event at serial %d: %v", serial, err)
continue
}
// Generate indexes for this event
rawIdxs, err := database.GetIndexesForEvent(ev, serial)
if chk.E(err) {
ev.Free()
continue
}
// Group by bucket (first 3 bytes)
for _, idx := range rawIdxs {
if len(idx) < 3 {
continue
}
bucketName := string(idx[:3])
key := idx[3:]
// Skip eid and sei - already stored during import
if bucketName == "eid" || bucketName == "sei" {
continue
}
// Make a copy of the key
keyCopy := make([]byte, len(key))
copy(keyCopy, key)
indexesByBucket[bucketName] = append(indexesByBucket[bucketName], keyCopy)
}
ev.Free()
chunkEvents++
}
return nil
})
if err != nil {
return err
}
// No more events to process
if chunkEvents == 0 {
break
}
totalEvents += chunkEvents
lastSerial = chunkSerial
// Progress logging
if time.Since(lastLogTime) >= 5*time.Second {
log.I.F("bbolt: index build progress: %d events processed", totalEvents)
lastLogTime = time.Now()
}
// Count total keys in this chunk
var totalKeys int
for _, keys := range indexesByBucket {
totalKeys += len(keys)
}
log.I.F("bbolt: writing %d index keys for chunk (%d events)", totalKeys, chunkEvents)
// Write this chunk's indexes
for bucketName, keys := range indexesByBucket {
if len(keys) == 0 {
continue
}
bucketBytes := []byte(bucketName)
// Sort keys for this bucket before writing
sort.Slice(keys, func(i, j int) bool {
return bytes.Compare(keys[i], keys[j]) < 0
})
// Write in batches
const batchSize = 50000
for i := 0; i < len(keys); i += batchSize {
end := i + batchSize
if end > len(keys) {
end = len(keys)
}
batch := keys[i:end]
err := b.db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket(bucketBytes)
if bucket == nil {
return nil
}
for _, key := range batch {
if err := bucket.Put(key, nil); err != nil {
return err
}
}
return nil
})
if err != nil {
log.E.F("bbolt: failed to write batch for bucket %s: %v", bucketName, err)
return err
}
}
}
// Clear for next chunk and release memory
indexesByBucket = nil
debug.FreeOSMemory()
}
elapsed := time.Since(startTime)
log.I.F("bbolt: index build complete in %v (%d events)", elapsed.Round(time.Second), totalEvents)
return nil
}
// decodeSerialKey decodes a 5-byte serial key to uint64
func decodeSerialKey(b []byte) uint64 {
if len(b) < 5 {
return 0
}
return uint64(b[0])<<32 | uint64(b[1])<<24 | uint64(b[2])<<16 | uint64(b[3])<<8 | uint64(b[4])
}