Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 63 additions & 17 deletions sei-cosmos/types/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@ import (
const (
maxStoreTraceIterators = 16
maxStoreTraceIteratorKeys = 64

// maxStoreTraceModuleBytes caps the key/value payload retained in a
// module's access log. Past this the log is truncated (ModuleTrace.Truncated
// set) and further accesses are dropped from it, bounding a pathological tx
// (e.g. a huge iterator scan) that would otherwise grow the response without
// limit — the cause of the debug_traceStateAccess OOM. Running stats stay
// accurate regardless of truncation.
maxStoreTraceModuleBytes = 4 << 20 // 4 MiB
// perAccessOverheadBytes charges each retained access a fixed cost on top of
// its payload so high-count, near-zero-payload patterns (e.g. iterator
// Next() floods) are bounded by the same cap.
perAccessOverheadBytes = 64
)

// StoreTracer collects every KVStore access (Get/Has/Set/Delete/iterator)
Expand All @@ -28,12 +40,16 @@ type StoreTracer struct {
mu *sync.Mutex
}

// ModuleTrace holds every access event for a single module within a trace,
// plus a per-iterator roll-up.
// ModuleTrace holds the access events for a single module within a trace
// (capped at maxStoreTraceModuleBytes of retained payload), plus a per-iterator
// roll-up and running, always-accurate operation stats.
type ModuleTrace struct {
Accesses []Access
Iterators []*IteratorTrace
iteratorIndexBy map[int]int
stats map[OpType]OperationSummary
accessBytes int
Truncated bool
}

// IteratorTrace aggregates one opened iterator: its bounds, direction, the
Expand Down Expand Up @@ -148,7 +164,7 @@ func (st *StoreTracer) StartIterator(start, end []byte, ascending bool, module s
st.nextIteratorID++
iteratorID := st.nextIteratorID

mt.Accesses = append(mt.Accesses, Access{
mt.record(Access{
Op: IteratorOpen,
Key: slices.Clone(start),
Value: slices.Clone(end),
Expand Down Expand Up @@ -179,7 +195,7 @@ func (st *StoreTracer) RecordIteratorValue(iteratorID int, key []byte, value []b
defer st.mu.Unlock()

mt := st.getOrSetModuleTrace(module)
mt.Accesses = append(mt.Accesses, Access{
mt.record(Access{
Op: IteratorValue,
Key: slices.Clone(key),
Value: slices.Clone(value),
Expand All @@ -204,7 +220,7 @@ func (st *StoreTracer) RecordIteratorNext(iteratorID int, module string, duratio
defer st.mu.Unlock()

mt := st.getOrSetModuleTrace(module)
mt.Accesses = append(mt.Accesses, Access{
mt.record(Access{
Op: IteratorNext,
DurationNanos: duration.Nanoseconds(),
})
Expand All @@ -224,6 +240,7 @@ func (st *StoreTracer) getOrSetModuleTrace(module string) (mt *ModuleTrace) {
Accesses: []Access{},
Iterators: []*IteratorTrace{},
iteratorIndexBy: map[int]int{},
stats: map[OpType]OperationSummary{},
}
st.Modules[module] = mt
} else {
Expand All @@ -232,11 +249,36 @@ func (st *StoreTracer) getOrSetModuleTrace(module string) (mt *ModuleTrace) {
return
}

// record updates the module's running op stats and appends the access to the
// log, retaining its cloned key/value only while the module stays under
// maxStoreTraceModuleBytes. Past the cap the log is marked Truncated and the
// access is dropped from it; stats are updated unconditionally so op
// counts/durations remain accurate.
func (mt *ModuleTrace) record(access Access) {
s := mt.stats[access.Op]
s.Count++
s.TotalNanos += access.DurationNanos
mt.stats[access.Op] = s

if mt.Truncated {
return
}
cost := len(access.Key) + len(access.Value) + perAccessOverheadBytes
Comment thread
amir-deris marked this conversation as resolved.
// An access whose own payload exceeds the cap trips truncation on the first
// access (nothing retained); intended — we never retain an oversized blob.
if mt.accessBytes+cost > maxStoreTraceModuleBytes {
mt.Truncated = true
return
}
mt.accessBytes += cost
mt.Accesses = append(mt.Accesses, access)
}
Comment thread
amir-deris marked this conversation as resolved.

func (st *StoreTracer) recordAccess(module string, access Access) {
st.mu.Lock()
defer st.mu.Unlock()
mt := st.getOrSetModuleTrace(module)
mt.Accesses = append(mt.Accesses, access)
mt.record(access)
}

// Clear resets the tracer to its empty state so a single StoreTracer can be
Expand All @@ -263,6 +305,9 @@ type ModuleTraceDump struct {
Has []string `json:"has"`
Stats map[string]OperationSummary `json:"stats,omitempty"`
Iterators []IteratorTraceDump `json:"iterators,omitempty"`
// Truncated is set when the module's access log hit maxStoreTraceModuleBytes;
// Reads/Has then reflect only the retained prefix while Stats stay complete.
Truncated bool `json:"truncated,omitempty"`
}

type IteratorTraceDump struct {
Expand Down Expand Up @@ -294,16 +339,25 @@ func (st *StoreTracer) dumpLocked() StoreTraceDump {
mtd := ModuleTraceDump{
Reads: make(map[string]string),
Has: []string{},
Stats: map[string]OperationSummary{},
Stats: make(map[string]OperationSummary, len(module.stats)),
Iterators: make([]IteratorTraceDump, 0, len(module.Iterators)),
Truncated: module.Truncated,
}
// Stats are accumulated at record time, so they stay complete even when
// the access log below was truncated.
for op, s := range module.stats {
key := op.String()
mtd.Stats[key] = s
agg := d.Stats[key]
agg.Count += s.Count
agg.TotalNanos += s.TotalNanos
d.Stats[key] = agg
}
// any read for key XYZ after a Set/Delete to XYZ is discarded
// because the result doesn't represent prestate.
writtenKey := map[string]struct{}{}
hasMap := map[string]struct{}{}
for _, a := range module.Accesses {
updateSummary(d.Stats, a.Op, a.DurationNanos)
updateSummary(mtd.Stats, a.Op, a.DurationNanos)
switch a.Op {
case Get, IteratorValue:
if _, ok := writtenKey[string(a.Key)]; ok {
Expand Down Expand Up @@ -342,14 +396,6 @@ func (st *StoreTracer) dumpLocked() StoreTraceDump {
return d
}

func updateSummary(stats map[string]OperationSummary, op OpType, durationNanos int64) {
key := op.String()
summary := stats[key]
summary.Count++
summary.TotalNanos += durationNanos
stats[key] = summary
}

// DerivePrestateToJson returns a JSON encoding of the current trace state,
// used by debug_traceTransaction to attach AppState to the response.
func (st *StoreTracer) DerivePrestateToJson() []byte {
Expand Down
116 changes: 116 additions & 0 deletions sei-cosmos/types/tracer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package types

import (
"testing"
"time"
)

// retainedBytes sums the key/value payload the tracer holds until the trace
// response is built.
func retainedBytes(st *StoreTracer) int {
total := 0
for _, mt := range st.Modules {
for _, a := range mt.Accesses {
total += len(a.Key) + len(a.Value)
}
for _, it := range mt.Iterators {
total += len(it.Start) + len(it.End)
for _, k := range it.Keys {
total += len(k)
}
}
}
return total
}

// TestStoreTracerBoundsIteratorScanMemory reproduces the blowup: mt.Accesses
// is uncapped, so a large iterator scan clones one key/value per surfaced key
// and retained memory grows linearly with scan size. The 8 MiB budget matches
// the file's "a few MB per module" intent.
func TestStoreTracerBoundsIteratorScanMemory(t *testing.T) {
const (
module = "evm"
numKeys = 100_000 // a large-but-plausible scan over a big store
valueSize = 512 // bytes per surfaced value

maxRetainedBytes = 8 << 20 // 8 MiB
)

st := NewStoreTracer()
iterID := st.StartIterator([]byte("start"), []byte("end"), true, module, 0)

value := make([]byte, valueSize)
for i := range numKeys {
key := []byte{byte(i), byte(i >> 8), byte(i >> 16)}
st.RecordIteratorNext(iterID, module, time.Microsecond)
st.RecordIteratorValue(iterID, key, value, module)
}

got := retainedBytes(st)
if got > maxRetainedBytes {
t.Fatalf("StoreTracer retained %d bytes after scanning %d keys; want <= %d.\n"+
"mt.Accesses is unbounded: each IteratorValue clones key+value into the "+
"access log regardless of the %d-key iterator cap, so memory grows linearly "+
"with the scan size.",
got, numKeys, maxRetainedBytes, maxStoreTraceIteratorKeys)
}
}

// TestStoreTracerStatsAccurateWhenTruncated drives Get past
// maxStoreTraceModuleBytes and asserts that per-op stats (Count/TotalNanos) still
// count every access once the log is Truncated, while Reads keeps only the
// retained prefix. These stats feed historicalLookupNanos (evmrpc/trace_profile.go).
func TestStoreTracerStatsAccurateWhenTruncated(t *testing.T) {
const (
module = "evm"
numGets = 5_000 // each retained Get costs valueSize+keyLen+overhead;
valueSize = 4096 // ~4 KiB per access truncates well before all 5k retain
perGet = time.Microsecond // known per-access duration to verify TotalNanos
)

st := NewStoreTracer()
value := make([]byte, valueSize)
for i := range numGets {
// Distinct keys so each retained Get is a distinct entry in Reads.
key := []byte{byte(i), byte(i >> 8), byte(i >> 16)}
st.Get(key, value, module, perGet)
}

mt, ok := st.Modules[module]
if !ok {
t.Fatalf("module %q missing from tracer", module)
}
if !mt.Truncated {
t.Fatalf("module not Truncated after %d Gets of %d-byte values; cap %d should have been exceeded",
numGets, valueSize, maxStoreTraceModuleBytes)
}
if len(mt.Accesses) >= numGets {
t.Fatalf("retained %d accesses; truncation should drop some of the %d Gets",
len(mt.Accesses), numGets)
}

// Internal running stats must count every access, truncated or not.
if got := mt.stats[Get]; got.Count != numGets || got.TotalNanos != int64(numGets)*perGet.Nanoseconds() {
t.Fatalf("module stats under truncation = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}",
got.Count, got.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds())
}

// The wire dump (what historicalLookupNanos reads) must agree, and Reads
// must reflect only the retained prefix.
d := st.Dump()
mtd := d.Modules[module]
if !mtd.Truncated {
t.Fatal("dump module not flagged Truncated")
}
if got := mtd.Stats[Get.String()]; got.Count != numGets || got.TotalNanos != int64(numGets)*perGet.Nanoseconds() {
t.Fatalf("dump module stats = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}",
got.Count, got.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds())
}
if top := d.Stats[Get.String()]; top.Count != numGets || top.TotalNanos != int64(numGets)*perGet.Nanoseconds() {
t.Fatalf("dump top-level stats = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}",
top.Count, top.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds())
}
if len(mtd.Reads) >= numGets {
t.Fatalf("dump retained %d reads; truncation should bound this below %d", len(mtd.Reads), numGets)
}
}
Loading