From 33a9fd424a6f78883388bce0b45b9882fda47516 Mon Sep 17 00:00:00 2001 From: Amir Deris Date: Fri, 26 Jun 2026 13:50:05 -0700 Subject: [PATCH 1/3] Added cap on module trace access records, added test --- sei-cosmos/types/tracer.go | 78 ++++++++++++++++++++++++++------- sei-cosmos/types/tracer_test.go | 57 ++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 17 deletions(-) create mode 100644 sei-cosmos/types/tracer_test.go diff --git a/sei-cosmos/types/tracer.go b/sei-cosmos/types/tracer.go index ec01928bd1..0796d9c835 100644 --- a/sei-cosmos/types/tracer.go +++ b/sei-cosmos/types/tracer.go @@ -15,6 +15,18 @@ import ( const ( maxStoreTraceIterators = 16 maxStoreTraceIteratorKeys = 64 + + // maxStoreTraceModuleBytes caps the key/value payload retained in a + // module's access log. Past this the log is truncated (ModuleTrace.Truncated + // set) and further accesses are dropped from it, bounding a pathological tx + // (e.g. a huge iterator scan) that would otherwise grow the response without + // limit — the cause of the debug_traceStateAccess OOM. Running stats stay + // accurate regardless of truncation. + maxStoreTraceModuleBytes = 4 << 20 // 4 MiB + // perAccessOverheadBytes charges each retained access a fixed cost on top of + // its payload so high-count, near-zero-payload patterns (e.g. iterator + // Next() floods) are bounded by the same cap. + perAccessOverheadBytes = 64 ) // StoreTracer collects every KVStore access (Get/Has/Set/Delete/iterator) @@ -28,12 +40,16 @@ type StoreTracer struct { mu *sync.Mutex } -// ModuleTrace holds every access event for a single module within a trace, -// plus a per-iterator roll-up. +// ModuleTrace holds the access events for a single module within a trace +// (capped at maxStoreTraceModuleBytes of retained payload), plus a per-iterator +// roll-up and running, always-accurate operation stats. type ModuleTrace struct { Accesses []Access Iterators []*IteratorTrace iteratorIndexBy map[int]int + stats map[OpType]OperationSummary + accessBytes int + Truncated bool } // IteratorTrace aggregates one opened iterator: its bounds, direction, the @@ -148,7 +164,7 @@ func (st *StoreTracer) StartIterator(start, end []byte, ascending bool, module s st.nextIteratorID++ iteratorID := st.nextIteratorID - mt.Accesses = append(mt.Accesses, Access{ + mt.record(Access{ Op: IteratorOpen, Key: slices.Clone(start), Value: slices.Clone(end), @@ -179,7 +195,7 @@ func (st *StoreTracer) RecordIteratorValue(iteratorID int, key []byte, value []b defer st.mu.Unlock() mt := st.getOrSetModuleTrace(module) - mt.Accesses = append(mt.Accesses, Access{ + mt.record(Access{ Op: IteratorValue, Key: slices.Clone(key), Value: slices.Clone(value), @@ -204,7 +220,7 @@ func (st *StoreTracer) RecordIteratorNext(iteratorID int, module string, duratio defer st.mu.Unlock() mt := st.getOrSetModuleTrace(module) - mt.Accesses = append(mt.Accesses, Access{ + mt.record(Access{ Op: IteratorNext, DurationNanos: duration.Nanoseconds(), }) @@ -224,6 +240,7 @@ func (st *StoreTracer) getOrSetModuleTrace(module string) (mt *ModuleTrace) { Accesses: []Access{}, Iterators: []*IteratorTrace{}, iteratorIndexBy: map[int]int{}, + stats: map[OpType]OperationSummary{}, } st.Modules[module] = mt } else { @@ -232,11 +249,34 @@ func (st *StoreTracer) getOrSetModuleTrace(module string) (mt *ModuleTrace) { return } +// record updates the module's running op stats and appends the access to the +// log, retaining its cloned key/value only while the module stays under +// maxStoreTraceModuleBytes. Past the cap the log is marked Truncated and the +// access is dropped from it; stats are updated unconditionally so op +// counts/durations remain accurate. +func (mt *ModuleTrace) record(access Access) { + s := mt.stats[access.Op] + s.Count++ + s.TotalNanos += access.DurationNanos + mt.stats[access.Op] = s + + if mt.Truncated { + return + } + cost := len(access.Key) + len(access.Value) + perAccessOverheadBytes + if mt.accessBytes+cost > maxStoreTraceModuleBytes { + mt.Truncated = true + return + } + mt.accessBytes += cost + mt.Accesses = append(mt.Accesses, access) +} + func (st *StoreTracer) recordAccess(module string, access Access) { st.mu.Lock() defer st.mu.Unlock() mt := st.getOrSetModuleTrace(module) - mt.Accesses = append(mt.Accesses, access) + mt.record(access) } // Clear resets the tracer to its empty state so a single StoreTracer can be @@ -263,6 +303,9 @@ type ModuleTraceDump struct { Has []string `json:"has"` Stats map[string]OperationSummary `json:"stats,omitempty"` Iterators []IteratorTraceDump `json:"iterators,omitempty"` + // Truncated is set when the module's access log hit maxStoreTraceModuleBytes; + // Reads/Has then reflect only the retained prefix while Stats stay complete. + Truncated bool `json:"truncated,omitempty"` } type IteratorTraceDump struct { @@ -294,16 +337,25 @@ func (st *StoreTracer) dumpLocked() StoreTraceDump { mtd := ModuleTraceDump{ Reads: make(map[string]string), Has: []string{}, - Stats: map[string]OperationSummary{}, + Stats: make(map[string]OperationSummary, len(module.stats)), Iterators: make([]IteratorTraceDump, 0, len(module.Iterators)), + Truncated: module.Truncated, + } + // Stats are accumulated at record time, so they stay complete even when + // the access log below was truncated. + for op, s := range module.stats { + key := op.String() + mtd.Stats[key] = s + agg := d.Stats[key] + agg.Count += s.Count + agg.TotalNanos += s.TotalNanos + d.Stats[key] = agg } // any read for key XYZ after a Set/Delete to XYZ is discarded // because the result doesn't represent prestate. writtenKey := map[string]struct{}{} hasMap := map[string]struct{}{} for _, a := range module.Accesses { - updateSummary(d.Stats, a.Op, a.DurationNanos) - updateSummary(mtd.Stats, a.Op, a.DurationNanos) switch a.Op { case Get, IteratorValue: if _, ok := writtenKey[string(a.Key)]; ok { @@ -342,14 +394,6 @@ func (st *StoreTracer) dumpLocked() StoreTraceDump { return d } -func updateSummary(stats map[string]OperationSummary, op OpType, durationNanos int64) { - key := op.String() - summary := stats[key] - summary.Count++ - summary.TotalNanos += durationNanos - stats[key] = summary -} - // DerivePrestateToJson returns a JSON encoding of the current trace state, // used by debug_traceTransaction to attach AppState to the response. func (st *StoreTracer) DerivePrestateToJson() []byte { diff --git a/sei-cosmos/types/tracer_test.go b/sei-cosmos/types/tracer_test.go new file mode 100644 index 0000000000..3d0bbd1c95 --- /dev/null +++ b/sei-cosmos/types/tracer_test.go @@ -0,0 +1,57 @@ +package types + +import ( + "testing" + "time" +) + +// retainedBytes sums the key/value payload the tracer holds until the trace +// response is built. +func retainedBytes(st *StoreTracer) int { + total := 0 + for _, mt := range st.Modules { + for _, a := range mt.Accesses { + total += len(a.Key) + len(a.Value) + } + for _, it := range mt.Iterators { + total += len(it.Start) + len(it.End) + for _, k := range it.Keys { + total += len(k) + } + } + } + return total +} + +// TestStoreTracerBoundsIteratorScanMemory reproduces the blowup: mt.Accesses +// is uncapped, so a large iterator scan clones one key/value per surfaced key +// and retained memory grows linearly with scan size. The 8 MiB budget matches +// the file's "a few MB per module" intent. +func TestStoreTracerBoundsIteratorScanMemory(t *testing.T) { + const ( + module = "evm" + numKeys = 100_000 // a large-but-plausible scan over a big store + valueSize = 512 // bytes per surfaced value + + maxRetainedBytes = 8 << 20 // 8 MiB + ) + + st := NewStoreTracer() + iterID := st.StartIterator([]byte("start"), []byte("end"), true, module, 0) + + value := make([]byte, valueSize) + for i := range numKeys { + key := []byte{byte(i), byte(i >> 8), byte(i >> 16)} + st.RecordIteratorNext(iterID, module, time.Microsecond) + st.RecordIteratorValue(iterID, key, value, module) + } + + got := retainedBytes(st) + if got > maxRetainedBytes { + t.Fatalf("StoreTracer retained %d bytes after scanning %d keys; want <= %d.\n"+ + "mt.Accesses is unbounded: each IteratorValue clones key+value into the "+ + "access log regardless of the %d-key iterator cap, so memory grows linearly "+ + "with the scan size.", + got, numKeys, maxRetainedBytes, maxStoreTraceIteratorKeys) + } +} From b08adc183bb144b45f5d52a854ecd6d1479c88ff Mon Sep 17 00:00:00 2001 From: Amir Deris Date: Fri, 26 Jun 2026 15:15:32 -0700 Subject: [PATCH 2/3] Added test to confirm stats remain exact under truncation --- sei-cosmos/types/tracer_test.go | 59 +++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/sei-cosmos/types/tracer_test.go b/sei-cosmos/types/tracer_test.go index 3d0bbd1c95..b678c2f942 100644 --- a/sei-cosmos/types/tracer_test.go +++ b/sei-cosmos/types/tracer_test.go @@ -55,3 +55,62 @@ func TestStoreTracerBoundsIteratorScanMemory(t *testing.T) { got, numKeys, maxRetainedBytes, maxStoreTraceIteratorKeys) } } + +// TestStoreTracerStatsAccurateWhenTruncated drives Get past +// maxStoreTraceModuleBytes and asserts that per-op stats (Count/TotalNanos) still +// count every access once the log is Truncated, while Reads keeps only the +// retained prefix. These stats feed historicalLookupNanos (evmrpc/trace_profile.go). +func TestStoreTracerStatsAccurateWhenTruncated(t *testing.T) { + const ( + module = "evm" + numGets = 5_000 // each retained Get costs valueSize+keyLen+overhead; + valueSize = 4096 // ~4 KiB per access truncates well before all 5k retain + perGet = time.Microsecond // known per-access duration to verify TotalNanos + ) + + st := NewStoreTracer() + value := make([]byte, valueSize) + for i := range numGets { + // Distinct keys so each retained Get is a distinct entry in Reads. + key := []byte{byte(i), byte(i >> 8), byte(i >> 16)} + st.Get(key, value, module, perGet) + } + + mt, ok := st.Modules[module] + if !ok { + t.Fatalf("module %q missing from tracer", module) + } + if !mt.Truncated { + t.Fatalf("module not Truncated after %d Gets of %d-byte values; cap %d should have been exceeded", + numGets, valueSize, maxStoreTraceModuleBytes) + } + if len(mt.Accesses) >= numGets { + t.Fatalf("retained %d accesses; truncation should drop some of the %d Gets", + len(mt.Accesses), numGets) + } + + // Internal running stats must count every access, truncated or not. + if got := mt.stats[Get]; got.Count != numGets || got.TotalNanos != int64(numGets)*perGet.Nanoseconds() { + t.Fatalf("module stats under truncation = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}", + got.Count, got.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds()) + } + + // The wire dump (what historicalLookupNanos reads) must agree, and Reads + // must reflect only the retained prefix. + d := st.Dump() + mtd := d.Modules[module] + if !mtd.Truncated { + t.Fatal("dump module not flagged Truncated") + } + if got := mtd.Stats[Get.String()]; got.Count != numGets || got.TotalNanos != int64(numGets)*perGet.Nanoseconds() { + t.Fatalf("dump module stats = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}", + got.Count, got.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds()) + } + if top := d.Stats[Get.String()]; top.Count != numGets || top.TotalNanos != int64(numGets)*perGet.Nanoseconds() { + t.Fatalf("dump top-level stats = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}", + top.Count, top.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds()) + } + if len(mtd.Reads) >= numGets { + t.Fatalf("dump retained %d reads; truncation should bound this below %d", len(mtd.Reads), numGets) + } +} From c9db9ee8dd08bc1db00243d3ed9abf698a2d1ba6 Mon Sep 17 00:00:00 2001 From: Amir Deris Date: Fri, 26 Jun 2026 17:17:12 -0700 Subject: [PATCH 3/3] Updated comment --- sei-cosmos/types/tracer.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sei-cosmos/types/tracer.go b/sei-cosmos/types/tracer.go index 0796d9c835..e1d5a1b295 100644 --- a/sei-cosmos/types/tracer.go +++ b/sei-cosmos/types/tracer.go @@ -264,6 +264,8 @@ func (mt *ModuleTrace) record(access Access) { return } cost := len(access.Key) + len(access.Value) + perAccessOverheadBytes + // An access whose own payload exceeds the cap trips truncation on the first + // access (nothing retained); intended — we never retain an oversized blob. if mt.accessBytes+cost > maxStoreTraceModuleBytes { mt.Truncated = true return