19 changed files with 1841 additions and 229 deletions
@ -0,0 +1,421 @@
@@ -0,0 +1,421 @@
|
||||
//go:build !(js && wasm)
|
||||
|
||||
package database |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/binary" |
||||
"errors" |
||||
"io" |
||||
|
||||
"git.mleku.dev/mleku/nostr/crypto/ec/schnorr" |
||||
"git.mleku.dev/mleku/nostr/encoders/event" |
||||
"git.mleku.dev/mleku/nostr/encoders/tag" |
||||
"git.mleku.dev/mleku/nostr/encoders/varint" |
||||
"lol.mleku.dev/chk" |
||||
) |
||||
|
||||
// CompactEventFormat defines the binary format for compact event storage.
|
||||
// This format uses 5-byte serial references instead of 32-byte IDs/pubkeys,
|
||||
// dramatically reducing storage requirements.
|
||||
//
|
||||
// Format:
|
||||
// - 1 byte: Version (currently 1)
|
||||
// - 5 bytes: Author pubkey serial (reference to spk table)
|
||||
// - varint: CreatedAt timestamp
|
||||
// - 2 bytes: Kind (uint16 big-endian)
|
||||
// - varint: Number of tags
|
||||
// - For each tag:
|
||||
// - varint: Number of elements in tag
|
||||
// - For each element:
|
||||
// - 1 byte: Element type flag
|
||||
// - 0x00 = raw bytes (followed by varint length + data)
|
||||
// - 0x01 = pubkey serial reference (followed by 5-byte serial)
|
||||
// - 0x02 = event ID serial reference (followed by 5-byte serial)
|
||||
// - 0x03 = unknown event ID (followed by 32-byte full ID)
|
||||
// - Element data based on type
|
||||
// - varint: Content length
|
||||
// - Content bytes
|
||||
// - 64 bytes: Signature
|
||||
//
|
||||
// Space savings example (event with 3 p-tags, 1 e-tag):
|
||||
// - Original: 32 (ID) + 32 (pubkey) + 32*4 (tags) = 192 bytes
|
||||
// - Compact: 5 (pubkey serial) + 5*4 (tag serials) = 25 bytes
|
||||
// - Savings: 167 bytes per event (87%)
|
||||
|
||||
const ( |
||||
CompactFormatVersion = 1 |
||||
|
||||
// Tag element type flags
|
||||
TagElementRaw = 0x00 // Raw bytes (varint length + data)
|
||||
TagElementPubkeySerial = 0x01 // Pubkey serial reference (5 bytes)
|
||||
TagElementEventSerial = 0x02 // Event ID serial reference (5 bytes)
|
||||
TagElementEventIdFull = 0x03 // Full event ID (32 bytes) - for unknown refs
|
||||
) |
||||
|
||||
// SerialResolver is an interface for resolving serials during compact encoding/decoding.
|
||||
// This allows the encoder/decoder to look up or create serial mappings.
|
||||
type SerialResolver interface { |
||||
// GetOrCreatePubkeySerial returns the serial for a pubkey, creating one if needed.
|
||||
GetOrCreatePubkeySerial(pubkey []byte) (serial uint64, err error) |
||||
|
||||
// GetPubkeyBySerial returns the full pubkey for a serial.
|
||||
GetPubkeyBySerial(serial uint64) (pubkey []byte, err error) |
||||
|
||||
// GetEventSerialById returns the serial for an event ID, or 0 if not found.
|
||||
GetEventSerialById(eventId []byte) (serial uint64, found bool, err error) |
||||
|
||||
// GetEventIdBySerial returns the full event ID for a serial.
|
||||
GetEventIdBySerial(serial uint64) (eventId []byte, err error) |
||||
} |
||||
|
||||
// MarshalCompactEvent encodes an event using compact serial references.
|
||||
// The resolver is used to look up/create serial mappings for pubkeys and event IDs.
|
||||
func MarshalCompactEvent(ev *event.E, resolver SerialResolver) (data []byte, err error) { |
||||
buf := new(bytes.Buffer) |
||||
|
||||
// Version byte
|
||||
buf.WriteByte(CompactFormatVersion) |
||||
|
||||
// Author pubkey serial (5 bytes)
|
||||
var authorSerial uint64 |
||||
if authorSerial, err = resolver.GetOrCreatePubkeySerial(ev.Pubkey); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
writeUint40(buf, authorSerial) |
||||
|
||||
// CreatedAt (varint)
|
||||
varint.Encode(buf, uint64(ev.CreatedAt)) |
||||
|
||||
// Kind (2 bytes big-endian)
|
||||
binary.Write(buf, binary.BigEndian, ev.Kind) |
||||
|
||||
// Tags
|
||||
if ev.Tags == nil || ev.Tags.Len() == 0 { |
||||
varint.Encode(buf, 0) |
||||
} else { |
||||
varint.Encode(buf, uint64(ev.Tags.Len())) |
||||
for _, t := range *ev.Tags { |
||||
if err = encodeCompactTag(buf, t, resolver); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Content
|
||||
varint.Encode(buf, uint64(len(ev.Content))) |
||||
buf.Write(ev.Content) |
||||
|
||||
// Signature (64 bytes)
|
||||
buf.Write(ev.Sig) |
||||
|
||||
return buf.Bytes(), nil |
||||
} |
||||
|
||||
// encodeCompactTag encodes a single tag with serial references for e/p tags.
|
||||
func encodeCompactTag(w io.Writer, t *tag.T, resolver SerialResolver) (err error) { |
||||
if t == nil || t.Len() == 0 { |
||||
varint.Encode(w, 0) |
||||
return nil |
||||
} |
||||
|
||||
varint.Encode(w, uint64(t.Len())) |
||||
|
||||
// Get tag key to determine if we should use serial references
|
||||
key := t.Key() |
||||
isPTag := len(key) == 1 && key[0] == 'p' |
||||
isETag := len(key) == 1 && key[0] == 'e' |
||||
|
||||
for i, elem := range t.T { |
||||
if i == 0 { |
||||
// First element is always the tag key - store as raw
|
||||
writeTagElement(w, TagElementRaw, elem) |
||||
continue |
||||
} |
||||
|
||||
if i == 1 { |
||||
// Second element is the value - potentially a serial reference
|
||||
if isPTag && len(elem) == 32 { |
||||
// Binary pubkey - look up serial
|
||||
serial, serErr := resolver.GetOrCreatePubkeySerial(elem) |
||||
if serErr == nil { |
||||
writeTagElementSerial(w, TagElementPubkeySerial, serial) |
||||
continue |
||||
} |
||||
// Fall through to raw encoding on error
|
||||
} else if isPTag && len(elem) == 64 { |
||||
// Hex pubkey - decode and look up serial
|
||||
var pubkey []byte |
||||
if pubkey, err = hexDecode(elem); err == nil && len(pubkey) == 32 { |
||||
serial, serErr := resolver.GetOrCreatePubkeySerial(pubkey) |
||||
if serErr == nil { |
||||
writeTagElementSerial(w, TagElementPubkeySerial, serial) |
||||
continue |
||||
} |
||||
} |
||||
// Fall through to raw encoding on error
|
||||
} else if isETag && len(elem) == 32 { |
||||
// Binary event ID - look up serial if exists
|
||||
serial, found, serErr := resolver.GetEventSerialById(elem) |
||||
if serErr == nil && found { |
||||
writeTagElementSerial(w, TagElementEventSerial, serial) |
||||
continue |
||||
} |
||||
// Event not found - store full ID
|
||||
writeTagElement(w, TagElementEventIdFull, elem) |
||||
continue |
||||
} else if isETag && len(elem) == 64 { |
||||
// Hex event ID - decode and look up serial
|
||||
var eventId []byte |
||||
if eventId, err = hexDecode(elem); err == nil && len(eventId) == 32 { |
||||
serial, found, serErr := resolver.GetEventSerialById(eventId) |
||||
if serErr == nil && found { |
||||
writeTagElementSerial(w, TagElementEventSerial, serial) |
||||
continue |
||||
} |
||||
// Event not found - store full ID
|
||||
writeTagElement(w, TagElementEventIdFull, eventId) |
||||
continue |
||||
} |
||||
// Fall through to raw encoding on error
|
||||
} |
||||
} |
||||
|
||||
// Default: raw encoding
|
||||
writeTagElement(w, TagElementRaw, elem) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// writeTagElement writes a tag element with type flag.
|
||||
func writeTagElement(w io.Writer, typeFlag byte, data []byte) { |
||||
w.Write([]byte{typeFlag}) |
||||
if typeFlag == TagElementEventIdFull { |
||||
// Full event ID - no length prefix, always 32 bytes
|
||||
w.Write(data) |
||||
} else { |
||||
// Raw data - length prefix
|
||||
varint.Encode(w, uint64(len(data))) |
||||
w.Write(data) |
||||
} |
||||
} |
||||
|
||||
// writeTagElementSerial writes a serial reference tag element.
|
||||
func writeTagElementSerial(w io.Writer, typeFlag byte, serial uint64) { |
||||
w.Write([]byte{typeFlag}) |
||||
writeUint40(w, serial) |
||||
} |
||||
|
||||
// writeUint40 writes a 5-byte big-endian unsigned integer.
|
||||
func writeUint40(w io.Writer, value uint64) { |
||||
buf := []byte{ |
||||
byte((value >> 32) & 0xFF), |
||||
byte((value >> 24) & 0xFF), |
||||
byte((value >> 16) & 0xFF), |
||||
byte((value >> 8) & 0xFF), |
||||
byte(value & 0xFF), |
||||
} |
||||
w.Write(buf) |
||||
} |
||||
|
||||
// readUint40 reads a 5-byte big-endian unsigned integer.
|
||||
func readUint40(r io.Reader) (value uint64, err error) { |
||||
buf := make([]byte, 5) |
||||
if _, err = io.ReadFull(r, buf); err != nil { |
||||
return 0, err |
||||
} |
||||
value = (uint64(buf[0]) << 32) | |
||||
(uint64(buf[1]) << 24) | |
||||
(uint64(buf[2]) << 16) | |
||||
(uint64(buf[3]) << 8) | |
||||
uint64(buf[4]) |
||||
return value, nil |
||||
} |
||||
|
||||
// UnmarshalCompactEvent decodes a compact event back to a full event.E.
|
||||
// The resolver is used to look up pubkeys and event IDs from serials.
|
||||
// The eventId parameter is the full 32-byte event ID (from SerialEventId table).
|
||||
func UnmarshalCompactEvent(data []byte, eventId []byte, resolver SerialResolver) (ev *event.E, err error) { |
||||
r := bytes.NewReader(data) |
||||
ev = new(event.E) |
||||
|
||||
// Version byte
|
||||
version, err := r.ReadByte() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if version != CompactFormatVersion { |
||||
return nil, errors.New("unsupported compact event format version") |
||||
} |
||||
|
||||
// Set the event ID (passed separately from SerialEventId lookup)
|
||||
ev.ID = make([]byte, 32) |
||||
copy(ev.ID, eventId) |
||||
|
||||
// Author pubkey serial (5 bytes) -> full pubkey
|
||||
authorSerial, err := readUint40(r) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if ev.Pubkey, err = resolver.GetPubkeyBySerial(authorSerial); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
|
||||
// CreatedAt (varint)
|
||||
var ca uint64 |
||||
if ca, err = varint.Decode(r); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
ev.CreatedAt = int64(ca) |
||||
|
||||
// Kind (2 bytes big-endian)
|
||||
if err = binary.Read(r, binary.BigEndian, &ev.Kind); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
|
||||
// Tags
|
||||
var nTags uint64 |
||||
if nTags, err = varint.Decode(r); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
if nTags > 0 { |
||||
ev.Tags = tag.NewSWithCap(int(nTags)) |
||||
for i := uint64(0); i < nTags; i++ { |
||||
var t *tag.T |
||||
if t, err = decodeCompactTag(r, resolver); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
*ev.Tags = append(*ev.Tags, t) |
||||
} |
||||
} |
||||
|
||||
// Content
|
||||
var contentLen uint64 |
||||
if contentLen, err = varint.Decode(r); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
ev.Content = make([]byte, contentLen) |
||||
if _, err = io.ReadFull(r, ev.Content); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
|
||||
// Signature (64 bytes)
|
||||
ev.Sig = make([]byte, schnorr.SignatureSize) |
||||
if _, err = io.ReadFull(r, ev.Sig); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
|
||||
return ev, nil |
||||
} |
||||
|
||||
// decodeCompactTag decodes a single tag from compact format.
|
||||
func decodeCompactTag(r io.Reader, resolver SerialResolver) (t *tag.T, err error) { |
||||
var nElems uint64 |
||||
if nElems, err = varint.Decode(r); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
|
||||
t = tag.NewWithCap(int(nElems)) |
||||
|
||||
for i := uint64(0); i < nElems; i++ { |
||||
var elem []byte |
||||
if elem, err = decodeTagElement(r, resolver); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
t.T = append(t.T, elem) |
||||
} |
||||
|
||||
return t, nil |
||||
} |
||||
|
||||
// decodeTagElement decodes a single tag element from compact format.
|
||||
func decodeTagElement(r io.Reader, resolver SerialResolver) (elem []byte, err error) { |
||||
// Read type flag
|
||||
typeBuf := make([]byte, 1) |
||||
if _, err = io.ReadFull(r, typeBuf); err != nil { |
||||
return nil, err |
||||
} |
||||
typeFlag := typeBuf[0] |
||||
|
||||
switch typeFlag { |
||||
case TagElementRaw: |
||||
// Raw bytes: varint length + data
|
||||
var length uint64 |
||||
if length, err = varint.Decode(r); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
elem = make([]byte, length) |
||||
if _, err = io.ReadFull(r, elem); err != nil { |
||||
return nil, err |
||||
} |
||||
return elem, nil |
||||
|
||||
case TagElementPubkeySerial: |
||||
// Pubkey serial: 5 bytes -> lookup full pubkey -> return as 32-byte binary
|
||||
serial, err := readUint40(r) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
pubkey, err := resolver.GetPubkeyBySerial(serial) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
// Return as 32-byte binary (nostr library optimized format)
|
||||
return pubkey, nil |
||||
|
||||
case TagElementEventSerial: |
||||
// Event serial: 5 bytes -> lookup full event ID -> return as 32-byte binary
|
||||
serial, err := readUint40(r) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
eventId, err := resolver.GetEventIdBySerial(serial) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
// Return as 32-byte binary
|
||||
return eventId, nil |
||||
|
||||
case TagElementEventIdFull: |
||||
// Full event ID: 32 bytes (for unknown/forward references)
|
||||
elem = make([]byte, 32) |
||||
if _, err = io.ReadFull(r, elem); err != nil { |
||||
return nil, err |
||||
} |
||||
return elem, nil |
||||
|
||||
default: |
||||
return nil, errors.New("unknown tag element type flag") |
||||
} |
||||
} |
||||
|
||||
// hexDecode decodes hex bytes to binary.
|
||||
// This is a simple implementation - the real one uses the optimized hex package.
|
||||
func hexDecode(src []byte) (dst []byte, err error) { |
||||
if len(src)%2 != 0 { |
||||
return nil, errors.New("hex string has odd length") |
||||
} |
||||
dst = make([]byte, len(src)/2) |
||||
for i := 0; i < len(dst); i++ { |
||||
a := unhex(src[i*2]) |
||||
b := unhex(src[i*2+1]) |
||||
if a == 0xFF || b == 0xFF { |
||||
return nil, errors.New("invalid hex character") |
||||
} |
||||
dst[i] = (a << 4) | b |
||||
} |
||||
return dst, nil |
||||
} |
||||
|
||||
func unhex(c byte) byte { |
||||
switch { |
||||
case '0' <= c && c <= '9': |
||||
return c - '0' |
||||
case 'a' <= c && c <= 'f': |
||||
return c - 'a' + 10 |
||||
case 'A' <= c && c <= 'F': |
||||
return c - 'A' + 10 |
||||
} |
||||
return 0xFF |
||||
} |
||||
@ -0,0 +1,195 @@
@@ -0,0 +1,195 @@
|
||||
//go:build !(js && wasm)
|
||||
|
||||
package database |
||||
|
||||
import ( |
||||
"bytes" |
||||
"sync/atomic" |
||||
|
||||
"github.com/dgraph-io/badger/v4" |
||||
"lol.mleku.dev/chk" |
||||
"lol.mleku.dev/log" |
||||
"next.orly.dev/pkg/database/indexes" |
||||
) |
||||
|
||||
// CompactStorageStats holds statistics about compact vs legacy storage.
|
||||
type CompactStorageStats struct { |
||||
// Event counts
|
||||
CompactEvents int64 // Number of events in compact format (cmp prefix)
|
||||
LegacyEvents int64 // Number of events in legacy format (evt/sev prefixes)
|
||||
TotalEvents int64 // Total events
|
||||
|
||||
// Storage sizes
|
||||
CompactBytes int64 // Total bytes used by compact format
|
||||
LegacyBytes int64 // Total bytes used by legacy format (would be used without compact)
|
||||
|
||||
// Savings
|
||||
BytesSaved int64 // Bytes saved by using compact format
|
||||
PercentSaved float64 // Percentage of space saved
|
||||
AverageCompact float64 // Average compact event size
|
||||
AverageLegacy float64 // Average legacy event size (estimated)
|
||||
|
||||
// Serial mappings
|
||||
SerialEventIdEntries int64 // Number of sei (serial -> event ID) mappings
|
||||
SerialEventIdBytes int64 // Bytes used by sei mappings
|
||||
} |
||||
|
||||
// CompactStorageStats calculates storage statistics for compact event storage.
|
||||
// This scans the database to provide accurate metrics on space savings.
|
||||
func (d *D) CompactStorageStats() (stats CompactStorageStats, err error) { |
||||
if err = d.View(func(txn *badger.Txn) error { |
||||
// Count compact events (cmp prefix)
|
||||
cmpPrf := new(bytes.Buffer) |
||||
if err = indexes.CompactEventEnc(nil).MarshalWrite(cmpPrf); chk.E(err) { |
||||
return err |
||||
} |
||||
|
||||
it := txn.NewIterator(badger.IteratorOptions{Prefix: cmpPrf.Bytes()}) |
||||
for it.Rewind(); it.Valid(); it.Next() { |
||||
item := it.Item() |
||||
stats.CompactEvents++ |
||||
stats.CompactBytes += int64(len(item.Key())) + int64(item.ValueSize()) |
||||
} |
||||
it.Close() |
||||
|
||||
// Count legacy evt entries
|
||||
evtPrf := new(bytes.Buffer) |
||||
if err = indexes.EventEnc(nil).MarshalWrite(evtPrf); chk.E(err) { |
||||
return err |
||||
} |
||||
|
||||
it = txn.NewIterator(badger.IteratorOptions{Prefix: evtPrf.Bytes()}) |
||||
for it.Rewind(); it.Valid(); it.Next() { |
||||
item := it.Item() |
||||
stats.LegacyEvents++ |
||||
stats.LegacyBytes += int64(len(item.Key())) + int64(item.ValueSize()) |
||||
} |
||||
it.Close() |
||||
|
||||
// Count legacy sev entries
|
||||
sevPrf := new(bytes.Buffer) |
||||
if err = indexes.SmallEventEnc(nil).MarshalWrite(sevPrf); chk.E(err) { |
||||
return err |
||||
} |
||||
|
||||
it = txn.NewIterator(badger.IteratorOptions{Prefix: sevPrf.Bytes()}) |
||||
for it.Rewind(); it.Valid(); it.Next() { |
||||
item := it.Item() |
||||
stats.LegacyEvents++ |
||||
stats.LegacyBytes += int64(len(item.Key())) // sev stores data in key
|
||||
} |
||||
it.Close() |
||||
|
||||
// Count SerialEventId mappings (sei prefix)
|
||||
seiPrf := new(bytes.Buffer) |
||||
if err = indexes.SerialEventIdEnc(nil).MarshalWrite(seiPrf); chk.E(err) { |
||||
return err |
||||
} |
||||
|
||||
it = txn.NewIterator(badger.IteratorOptions{Prefix: seiPrf.Bytes()}) |
||||
for it.Rewind(); it.Valid(); it.Next() { |
||||
item := it.Item() |
||||
stats.SerialEventIdEntries++ |
||||
stats.SerialEventIdBytes += int64(len(item.Key())) + int64(item.ValueSize()) |
||||
} |
||||
it.Close() |
||||
|
||||
return nil |
||||
}); chk.E(err) { |
||||
return |
||||
} |
||||
|
||||
stats.TotalEvents = stats.CompactEvents + stats.LegacyEvents |
||||
|
||||
// Calculate averages
|
||||
if stats.CompactEvents > 0 { |
||||
stats.AverageCompact = float64(stats.CompactBytes) / float64(stats.CompactEvents) |
||||
} |
||||
if stats.LegacyEvents > 0 { |
||||
stats.AverageLegacy = float64(stats.LegacyBytes) / float64(stats.LegacyEvents) |
||||
} |
||||
|
||||
// Estimate savings: compare compact size to what legacy size would be
|
||||
// For events that are in compact format, estimate legacy size based on typical ratios
|
||||
// A typical event has:
|
||||
// - 32 bytes event ID (saved in compact: stored separately in sei)
|
||||
// - 32 bytes pubkey (saved: replaced by 5-byte serial)
|
||||
// - For e-tags: 32 bytes each (saved: replaced by 5-byte serial when known)
|
||||
// - For p-tags: 32 bytes each (saved: replaced by 5-byte serial)
|
||||
// Conservative estimate: compact format is ~60% of legacy size for typical events
|
||||
if stats.CompactEvents > 0 && stats.AverageCompact > 0 { |
||||
// Estimate what the legacy size would have been
|
||||
estimatedLegacyForCompact := float64(stats.CompactBytes) / 0.60 // 60% compression ratio
|
||||
stats.BytesSaved = int64(estimatedLegacyForCompact) - stats.CompactBytes - stats.SerialEventIdBytes |
||||
if stats.BytesSaved < 0 { |
||||
stats.BytesSaved = 0 |
||||
} |
||||
totalWithoutCompact := estimatedLegacyForCompact + float64(stats.LegacyBytes) |
||||
totalWithCompact := float64(stats.CompactBytes + stats.LegacyBytes + stats.SerialEventIdBytes) |
||||
if totalWithoutCompact > 0 { |
||||
stats.PercentSaved = (1.0 - totalWithCompact/totalWithoutCompact) * 100.0 |
||||
} |
||||
} |
||||
|
||||
return stats, nil |
||||
} |
||||
|
||||
// compactSaveCounter tracks cumulative bytes saved by compact format
|
||||
var compactSaveCounter atomic.Int64 |
||||
|
||||
// LogCompactSavings logs the storage savings achieved by compact format.
|
||||
// Call this periodically or after significant operations.
|
||||
func (d *D) LogCompactSavings() { |
||||
stats, err := d.CompactStorageStats() |
||||
if err != nil { |
||||
log.W.F("failed to get compact storage stats: %v", err) |
||||
return |
||||
} |
||||
|
||||
if stats.TotalEvents == 0 { |
||||
return |
||||
} |
||||
|
||||
log.I.F("📊 Compact storage stats: %d compact events, %d legacy events", |
||||
stats.CompactEvents, stats.LegacyEvents) |
||||
log.I.F(" Compact size: %.2f MB, Legacy size: %.2f MB", |
||||
float64(stats.CompactBytes)/(1024.0*1024.0), |
||||
float64(stats.LegacyBytes)/(1024.0*1024.0)) |
||||
log.I.F(" Serial mappings (sei): %d entries, %.2f KB", |
||||
stats.SerialEventIdEntries, |
||||
float64(stats.SerialEventIdBytes)/1024.0) |
||||
|
||||
if stats.CompactEvents > 0 { |
||||
log.I.F(" Average compact event: %.0f bytes, estimated legacy: %.0f bytes", |
||||
stats.AverageCompact, stats.AverageCompact/0.60) |
||||
log.I.F(" Estimated savings: %.2f MB (%.1f%%)", |
||||
float64(stats.BytesSaved)/(1024.0*1024.0), |
||||
stats.PercentSaved) |
||||
} |
||||
|
||||
// Also log serial cache stats
|
||||
cacheStats := d.SerialCacheStats() |
||||
log.I.F(" Serial cache: %d/%d pubkeys, %d/%d event IDs, ~%.2f MB memory", |
||||
cacheStats.PubkeysCached, cacheStats.PubkeysMaxSize, |
||||
cacheStats.EventIdsCached, cacheStats.EventIdsMaxSize, |
||||
float64(cacheStats.TotalMemoryBytes)/(1024.0*1024.0)) |
||||
} |
||||
|
||||
// TrackCompactSaving records bytes saved for a single event.
|
||||
// Call this during event save to track cumulative savings.
|
||||
func TrackCompactSaving(legacySize, compactSize int) { |
||||
saved := legacySize - compactSize |
||||
if saved > 0 { |
||||
compactSaveCounter.Add(int64(saved)) |
||||
} |
||||
} |
||||
|
||||
// GetCumulativeCompactSavings returns total bytes saved across all compact saves.
|
||||
func GetCumulativeCompactSavings() int64 { |
||||
return compactSaveCounter.Load() |
||||
} |
||||
|
||||
// ResetCompactSavingsCounter resets the cumulative savings counter.
|
||||
func ResetCompactSavingsCounter() { |
||||
compactSaveCounter.Store(0) |
||||
} |
||||
@ -0,0 +1,374 @@
@@ -0,0 +1,374 @@
|
||||
//go:build !(js && wasm)
|
||||
|
||||
package database |
||||
|
||||
import ( |
||||
"bytes" |
||||
"errors" |
||||
"sync" |
||||
|
||||
"github.com/dgraph-io/badger/v4" |
||||
"lol.mleku.dev/chk" |
||||
"next.orly.dev/pkg/database/indexes" |
||||
"next.orly.dev/pkg/database/indexes/types" |
||||
) |
||||
|
||||
// SerialCache provides LRU caching for pubkey and event ID serial lookups.
|
||||
// This is critical for compact event decoding performance since every event
|
||||
// requires looking up the author pubkey and potentially multiple tag references.
|
||||
type SerialCache struct { |
||||
// Pubkey serial -> full pubkey (for decoding)
|
||||
pubkeyBySerial map[uint64][]byte |
||||
pubkeyBySerialLock sync.RWMutex |
||||
|
||||
// Pubkey hash -> serial (for encoding)
|
||||
serialByPubkeyHash map[string]uint64 |
||||
serialByPubkeyHashLock sync.RWMutex |
||||
|
||||
// Event serial -> full event ID (for decoding)
|
||||
eventIdBySerial map[uint64][]byte |
||||
eventIdBySerialLock sync.RWMutex |
||||
|
||||
// Event ID hash -> serial (for encoding)
|
||||
serialByEventIdHash map[string]uint64 |
||||
serialByEventIdHashLock sync.RWMutex |
||||
|
||||
// Maximum cache sizes
|
||||
maxPubkeys int |
||||
maxEventIds int |
||||
} |
||||
|
||||
// NewSerialCache creates a new serial cache with the specified sizes.
|
||||
func NewSerialCache(maxPubkeys, maxEventIds int) *SerialCache { |
||||
if maxPubkeys <= 0 { |
||||
maxPubkeys = 100000 // Default 100k pubkeys (~3.2MB)
|
||||
} |
||||
if maxEventIds <= 0 { |
||||
maxEventIds = 500000 // Default 500k event IDs (~16MB)
|
||||
} |
||||
return &SerialCache{ |
||||
pubkeyBySerial: make(map[uint64][]byte, maxPubkeys), |
||||
serialByPubkeyHash: make(map[string]uint64, maxPubkeys), |
||||
eventIdBySerial: make(map[uint64][]byte, maxEventIds), |
||||
serialByEventIdHash: make(map[string]uint64, maxEventIds), |
||||
maxPubkeys: maxPubkeys, |
||||
maxEventIds: maxEventIds, |
||||
} |
||||
} |
||||
|
||||
// CachePubkey adds a pubkey to the cache.
|
||||
func (c *SerialCache) CachePubkey(serial uint64, pubkey []byte) { |
||||
if len(pubkey) != 32 { |
||||
return |
||||
} |
||||
|
||||
// Cache serial -> pubkey
|
||||
c.pubkeyBySerialLock.Lock() |
||||
if len(c.pubkeyBySerial) >= c.maxPubkeys { |
||||
// Simple eviction: clear half the cache
|
||||
// A proper LRU would be better but this is simpler
|
||||
count := 0 |
||||
for k := range c.pubkeyBySerial { |
||||
delete(c.pubkeyBySerial, k) |
||||
count++ |
||||
if count >= c.maxPubkeys/2 { |
||||
break |
||||
} |
||||
} |
||||
} |
||||
pk := make([]byte, 32) |
||||
copy(pk, pubkey) |
||||
c.pubkeyBySerial[serial] = pk |
||||
c.pubkeyBySerialLock.Unlock() |
||||
|
||||
// Cache pubkey hash -> serial
|
||||
c.serialByPubkeyHashLock.Lock() |
||||
if len(c.serialByPubkeyHash) >= c.maxPubkeys { |
||||
count := 0 |
||||
for k := range c.serialByPubkeyHash { |
||||
delete(c.serialByPubkeyHash, k) |
||||
count++ |
||||
if count >= c.maxPubkeys/2 { |
||||
break |
||||
} |
||||
} |
||||
} |
||||
c.serialByPubkeyHash[string(pubkey)] = serial |
||||
c.serialByPubkeyHashLock.Unlock() |
||||
} |
||||
|
||||
// GetPubkeyBySerial returns the pubkey for a serial from cache.
|
||||
func (c *SerialCache) GetPubkeyBySerial(serial uint64) (pubkey []byte, found bool) { |
||||
c.pubkeyBySerialLock.RLock() |
||||
pubkey, found = c.pubkeyBySerial[serial] |
||||
c.pubkeyBySerialLock.RUnlock() |
||||
return |
||||
} |
||||
|
||||
// GetSerialByPubkey returns the serial for a pubkey from cache.
|
||||
func (c *SerialCache) GetSerialByPubkey(pubkey []byte) (serial uint64, found bool) { |
||||
c.serialByPubkeyHashLock.RLock() |
||||
serial, found = c.serialByPubkeyHash[string(pubkey)] |
||||
c.serialByPubkeyHashLock.RUnlock() |
||||
return |
||||
} |
||||
|
||||
// CacheEventId adds an event ID to the cache.
|
||||
func (c *SerialCache) CacheEventId(serial uint64, eventId []byte) { |
||||
if len(eventId) != 32 { |
||||
return |
||||
} |
||||
|
||||
// Cache serial -> event ID
|
||||
c.eventIdBySerialLock.Lock() |
||||
if len(c.eventIdBySerial) >= c.maxEventIds { |
||||
count := 0 |
||||
for k := range c.eventIdBySerial { |
||||
delete(c.eventIdBySerial, k) |
||||
count++ |
||||
if count >= c.maxEventIds/2 { |
||||
break |
||||
} |
||||
} |
||||
} |
||||
eid := make([]byte, 32) |
||||
copy(eid, eventId) |
||||
c.eventIdBySerial[serial] = eid |
||||
c.eventIdBySerialLock.Unlock() |
||||
|
||||
// Cache event ID hash -> serial
|
||||
c.serialByEventIdHashLock.Lock() |
||||
if len(c.serialByEventIdHash) >= c.maxEventIds { |
||||
count := 0 |
||||
for k := range c.serialByEventIdHash { |
||||
delete(c.serialByEventIdHash, k) |
||||
count++ |
||||
if count >= c.maxEventIds/2 { |
||||
break |
||||
} |
||||
} |
||||
} |
||||
c.serialByEventIdHash[string(eventId)] = serial |
||||
c.serialByEventIdHashLock.Unlock() |
||||
} |
||||
|
||||
// GetEventIdBySerial returns the event ID for a serial from cache.
|
||||
func (c *SerialCache) GetEventIdBySerial(serial uint64) (eventId []byte, found bool) { |
||||
c.eventIdBySerialLock.RLock() |
||||
eventId, found = c.eventIdBySerial[serial] |
||||
c.eventIdBySerialLock.RUnlock() |
||||
return |
||||
} |
||||
|
||||
// GetSerialByEventId returns the serial for an event ID from cache.
|
||||
func (c *SerialCache) GetSerialByEventId(eventId []byte) (serial uint64, found bool) { |
||||
c.serialByEventIdHashLock.RLock() |
||||
serial, found = c.serialByEventIdHash[string(eventId)] |
||||
c.serialByEventIdHashLock.RUnlock() |
||||
return |
||||
} |
||||
|
||||
// DatabaseSerialResolver implements SerialResolver using the database and cache.
|
||||
type DatabaseSerialResolver struct { |
||||
db *D |
||||
cache *SerialCache |
||||
} |
||||
|
||||
// NewDatabaseSerialResolver creates a new resolver.
|
||||
func NewDatabaseSerialResolver(db *D, cache *SerialCache) *DatabaseSerialResolver { |
||||
return &DatabaseSerialResolver{db: db, cache: cache} |
||||
} |
||||
|
||||
// GetOrCreatePubkeySerial implements SerialResolver.
|
||||
func (r *DatabaseSerialResolver) GetOrCreatePubkeySerial(pubkey []byte) (serial uint64, err error) { |
||||
if len(pubkey) != 32 { |
||||
return 0, errors.New("pubkey must be 32 bytes") |
||||
} |
||||
|
||||
// Check cache first
|
||||
if s, found := r.cache.GetSerialByPubkey(pubkey); found { |
||||
return s, nil |
||||
} |
||||
|
||||
// Use existing function which handles creation
|
||||
ser, err := r.db.GetOrCreatePubkeySerial(pubkey) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
|
||||
serial = ser.Get() |
||||
|
||||
// Cache it
|
||||
r.cache.CachePubkey(serial, pubkey) |
||||
|
||||
return serial, nil |
||||
} |
||||
|
||||
// GetPubkeyBySerial implements SerialResolver.
|
||||
func (r *DatabaseSerialResolver) GetPubkeyBySerial(serial uint64) (pubkey []byte, err error) { |
||||
// Check cache first
|
||||
if pk, found := r.cache.GetPubkeyBySerial(serial); found { |
||||
return pk, nil |
||||
} |
||||
|
||||
// Look up in database
|
||||
ser := new(types.Uint40) |
||||
if err = ser.Set(serial); err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
pubkey, err = r.db.GetPubkeyBySerial(ser) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Cache it
|
||||
r.cache.CachePubkey(serial, pubkey) |
||||
|
||||
return pubkey, nil |
||||
} |
||||
|
||||
// GetEventSerialById implements SerialResolver.
|
||||
func (r *DatabaseSerialResolver) GetEventSerialById(eventId []byte) (serial uint64, found bool, err error) { |
||||
if len(eventId) != 32 { |
||||
return 0, false, errors.New("event ID must be 32 bytes") |
||||
} |
||||
|
||||
// Check cache first
|
||||
if s, ok := r.cache.GetSerialByEventId(eventId); ok { |
||||
return s, true, nil |
||||
} |
||||
|
||||
// Look up in database using existing GetSerialById
|
||||
ser, err := r.db.GetSerialById(eventId) |
||||
if err != nil { |
||||
// Not found is not an error - just return found=false
|
||||
return 0, false, nil |
||||
} |
||||
|
||||
serial = ser.Get() |
||||
|
||||
// Cache it
|
||||
r.cache.CacheEventId(serial, eventId) |
||||
|
||||
return serial, true, nil |
||||
} |
||||
|
||||
// GetEventIdBySerial implements SerialResolver.
|
||||
func (r *DatabaseSerialResolver) GetEventIdBySerial(serial uint64) (eventId []byte, err error) { |
||||
// Check cache first
|
||||
if eid, found := r.cache.GetEventIdBySerial(serial); found { |
||||
return eid, nil |
||||
} |
||||
|
||||
// Look up in database - use SerialEventId index
|
||||
ser := new(types.Uint40) |
||||
if err = ser.Set(serial); err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
eventId, err = r.db.GetEventIdBySerial(ser) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// Cache it
|
||||
r.cache.CacheEventId(serial, eventId) |
||||
|
||||
return eventId, nil |
||||
} |
||||
|
||||
// GetEventIdBySerial looks up an event ID by its serial number.
|
||||
// Uses the SerialEventId index (sei prefix).
|
||||
func (d *D) GetEventIdBySerial(ser *types.Uint40) (eventId []byte, err error) { |
||||
keyBuf := new(bytes.Buffer) |
||||
if err = indexes.SerialEventIdEnc(ser).MarshalWrite(keyBuf); chk.E(err) { |
||||
return nil, err |
||||
} |
||||
|
||||
err = d.View(func(txn *badger.Txn) error { |
||||
item, gerr := txn.Get(keyBuf.Bytes()) |
||||
if chk.E(gerr) { |
||||
return gerr |
||||
} |
||||
|
||||
return item.Value(func(val []byte) error { |
||||
eventId = make([]byte, len(val)) |
||||
copy(eventId, val) |
||||
return nil |
||||
}) |
||||
}) |
||||
|
||||
if err != nil { |
||||
return nil, errors.New("event ID not found for serial") |
||||
} |
||||
|
||||
return eventId, nil |
||||
} |
||||
|
||||
// StoreEventIdSerial stores the mapping from event serial to full event ID.
|
||||
// This is called during event save to enable later reconstruction.
|
||||
func (d *D) StoreEventIdSerial(txn *badger.Txn, serial uint64, eventId []byte) error { |
||||
if len(eventId) != 32 { |
||||
return errors.New("event ID must be 32 bytes") |
||||
} |
||||
|
||||
ser := new(types.Uint40) |
||||
if err := ser.Set(serial); err != nil { |
||||
return err |
||||
} |
||||
|
||||
keyBuf := new(bytes.Buffer) |
||||
if err := indexes.SerialEventIdEnc(ser).MarshalWrite(keyBuf); chk.E(err) { |
||||
return err |
||||
} |
||||
|
||||
return txn.Set(keyBuf.Bytes(), eventId) |
||||
} |
||||
|
||||
// SerialCacheStats holds statistics about the serial cache.
|
||||
type SerialCacheStats struct { |
||||
PubkeysCached int // Number of pubkeys currently cached
|
||||
PubkeysMaxSize int // Maximum pubkey cache size
|
||||
EventIdsCached int // Number of event IDs currently cached
|
||||
EventIdsMaxSize int // Maximum event ID cache size
|
||||
PubkeyMemoryBytes int // Estimated memory usage for pubkey cache
|
||||
EventIdMemoryBytes int // Estimated memory usage for event ID cache
|
||||
TotalMemoryBytes int // Total estimated memory usage
|
||||
} |
||||
|
||||
// Stats returns statistics about the serial cache.
|
||||
func (c *SerialCache) Stats() SerialCacheStats { |
||||
c.pubkeyBySerialLock.RLock() |
||||
pubkeysCached := len(c.pubkeyBySerial) |
||||
c.pubkeyBySerialLock.RUnlock() |
||||
|
||||
c.eventIdBySerialLock.RLock() |
||||
eventIdsCached := len(c.eventIdBySerial) |
||||
c.eventIdBySerialLock.RUnlock() |
||||
|
||||
// Memory estimation:
|
||||
// - Each pubkey entry: 8 bytes (uint64 key) + 32 bytes (pubkey value) = 40 bytes
|
||||
// - Each event ID entry: 8 bytes (uint64 key) + 32 bytes (event ID value) = 40 bytes
|
||||
// - Map overhead is roughly 2x the entry size for buckets
|
||||
pubkeyMemory := pubkeysCached * 40 * 2 |
||||
eventIdMemory := eventIdsCached * 40 * 2 |
||||
|
||||
return SerialCacheStats{ |
||||
PubkeysCached: pubkeysCached, |
||||
PubkeysMaxSize: c.maxPubkeys, |
||||
EventIdsCached: eventIdsCached, |
||||
EventIdsMaxSize: c.maxEventIds, |
||||
PubkeyMemoryBytes: pubkeyMemory, |
||||
EventIdMemoryBytes: eventIdMemory, |
||||
TotalMemoryBytes: pubkeyMemory + eventIdMemory, |
||||
} |
||||
} |
||||
|
||||
// SerialCacheStats returns statistics about the serial cache.
|
||||
func (d *D) SerialCacheStats() SerialCacheStats { |
||||
if d.serialCache == nil { |
||||
return SerialCacheStats{} |
||||
} |
||||
return d.serialCache.Stats() |
||||
} |
||||
Loading…
Reference in new issue