sei-protocol · amir-deris · Jun 26, 2026 · Jun 26, 2026 · Jun 27, 2026 · Jun 27, 2026
diff --git a/sei-cosmos/types/tracer.go b/sei-cosmos/types/tracer.go
@@ -15,6 +15,18 @@ import (
 const (
 	maxStoreTraceIterators    = 16
 	maxStoreTraceIteratorKeys = 64
+
+	// maxStoreTraceModuleBytes caps the key/value payload retained in a
+	// module's access log. Past this the log is truncated (ModuleTrace.Truncated
+	// set) and further accesses are dropped from it, bounding a pathological tx
+	// (e.g. a huge iterator scan) that would otherwise grow the response without
+	// limit — the cause of the debug_traceStateAccess OOM. Running stats stay
+	// accurate regardless of truncation.
+	maxStoreTraceModuleBytes = 4 << 20 // 4 MiB
+	// perAccessOverheadBytes charges each retained access a fixed cost on top of
+	// its payload so high-count, near-zero-payload patterns (e.g. iterator
+	// Next() floods) are bounded by the same cap.
+	perAccessOverheadBytes = 64
 )
 
 // StoreTracer collects every KVStore access (Get/Has/Set/Delete/iterator)
@@ -28,12 +40,16 @@ type StoreTracer struct {
 	mu             *sync.Mutex
 }
 
-// ModuleTrace holds every access event for a single module within a trace,
-// plus a per-iterator roll-up.
+// ModuleTrace holds the access events for a single module within a trace
+// (capped at maxStoreTraceModuleBytes of retained payload), plus a per-iterator
+// roll-up and running, always-accurate operation stats.
 type ModuleTrace struct {
 	Accesses        []Access
 	Iterators       []*IteratorTrace
 	iteratorIndexBy map[int]int
+	stats           map[OpType]OperationSummary
+	accessBytes     int
+	Truncated       bool
 }
 
 // IteratorTrace aggregates one opened iterator: its bounds, direction, the
@@ -148,7 +164,7 @@ func (st *StoreTracer) StartIterator(start, end []byte, ascending bool, module s
 	st.nextIteratorID++
 	iteratorID := st.nextIteratorID
 
-	mt.Accesses = append(mt.Accesses, Access{
+	mt.record(Access{
 		Op:            IteratorOpen,
 		Key:           slices.Clone(start),
 		Value:         slices.Clone(end),
@@ -179,7 +195,7 @@ func (st *StoreTracer) RecordIteratorValue(iteratorID int, key []byte, value []b
 	defer st.mu.Unlock()
 
 	mt := st.getOrSetModuleTrace(module)
-	mt.Accesses = append(mt.Accesses, Access{
+	mt.record(Access{
 		Op:    IteratorValue,
 		Key:   slices.Clone(key),
 		Value: slices.Clone(value),
@@ -204,7 +220,7 @@ func (st *StoreTracer) RecordIteratorNext(iteratorID int, module string, duratio
 	defer st.mu.Unlock()
 
 	mt := st.getOrSetModuleTrace(module)
-	mt.Accesses = append(mt.Accesses, Access{
+	mt.record(Access{
 		Op:            IteratorNext,
 		DurationNanos: duration.Nanoseconds(),
 	})
@@ -224,6 +240,7 @@ func (st *StoreTracer) getOrSetModuleTrace(module string) (mt *ModuleTrace) {
 			Accesses:        []Access{},
 			Iterators:       []*IteratorTrace{},
 			iteratorIndexBy: map[int]int{},
+			stats:           map[OpType]OperationSummary{},
 		}
 		st.Modules[module] = mt
 	} else {
@@ -232,11 +249,36 @@ func (st *StoreTracer) getOrSetModuleTrace(module string) (mt *ModuleTrace) {
 	return
 }
 
+// record updates the module's running op stats and appends the access to the
+// log, retaining its cloned key/value only while the module stays under
+// maxStoreTraceModuleBytes. Past the cap the log is marked Truncated and the
+// access is dropped from it; stats are updated unconditionally so op
+// counts/durations remain accurate.
+func (mt *ModuleTrace) record(access Access) {
+	s := mt.stats[access.Op]
+	s.Count++
+	s.TotalNanos += access.DurationNanos
+	mt.stats[access.Op] = s
+
+	if mt.Truncated {
+		return
+	}
+	cost := len(access.Key) + len(access.Value) + perAccessOverheadBytes
+	// An access whose own payload exceeds the cap trips truncation on the first
+	// access (nothing retained); intended — we never retain an oversized blob.
+	if mt.accessBytes+cost > maxStoreTraceModuleBytes {
+		mt.Truncated = true
+		return
+	}
+	mt.accessBytes += cost
+	mt.Accesses = append(mt.Accesses, access)
+}
+
 func (st *StoreTracer) recordAccess(module string, access Access) {
 	st.mu.Lock()
 	defer st.mu.Unlock()
 	mt := st.getOrSetModuleTrace(module)
-	mt.Accesses = append(mt.Accesses, access)
+	mt.record(access)
 }
 
 // Clear resets the tracer to its empty state so a single StoreTracer can be
@@ -263,6 +305,9 @@ type ModuleTraceDump struct {
 	Has       []string                    `json:"has"`
 	Stats     map[string]OperationSummary `json:"stats,omitempty"`
 	Iterators []IteratorTraceDump         `json:"iterators,omitempty"`
+	// Truncated is set when the module's access log hit maxStoreTraceModuleBytes;
+	// Reads/Has then reflect only the retained prefix while Stats stay complete.
+	Truncated bool `json:"truncated,omitempty"`
 }
 
 type IteratorTraceDump struct {
@@ -294,16 +339,25 @@ func (st *StoreTracer) dumpLocked() StoreTraceDump {
 		mtd := ModuleTraceDump{
 			Reads:     make(map[string]string),
 			Has:       []string{},
-			Stats:     map[string]OperationSummary{},
+			Stats:     make(map[string]OperationSummary, len(module.stats)),
 			Iterators: make([]IteratorTraceDump, 0, len(module.Iterators)),
+			Truncated: module.Truncated,
+		}
+		// Stats are accumulated at record time, so they stay complete even when
+		// the access log below was truncated.
+		for op, s := range module.stats {
+			key := op.String()
+			mtd.Stats[key] = s
+			agg := d.Stats[key]
+			agg.Count += s.Count
+			agg.TotalNanos += s.TotalNanos
+			d.Stats[key] = agg
 		}
 		// any read for key XYZ after a Set/Delete to XYZ is discarded
 		// because the result doesn't represent prestate.
 		writtenKey := map[string]struct{}{}
 		hasMap := map[string]struct{}{}
 		for _, a := range module.Accesses {
-			updateSummary(d.Stats, a.Op, a.DurationNanos)
-			updateSummary(mtd.Stats, a.Op, a.DurationNanos)
 			switch a.Op {
 			case Get, IteratorValue:
 				if _, ok := writtenKey[string(a.Key)]; ok {
@@ -342,14 +396,6 @@ func (st *StoreTracer) dumpLocked() StoreTraceDump {
 	return d
 }
 
-func updateSummary(stats map[string]OperationSummary, op OpType, durationNanos int64) {
-	key := op.String()
-	summary := stats[key]
-	summary.Count++
-	summary.TotalNanos += durationNanos
-	stats[key] = summary
-}
-
 // DerivePrestateToJson returns a JSON encoding of the current trace state,
 // used by debug_traceTransaction to attach AppState to the response.
 func (st *StoreTracer) DerivePrestateToJson() []byte {

diff --git a/sei-cosmos/types/tracer_test.go b/sei-cosmos/types/tracer_test.go
@@ -0,0 +1,116 @@
+package types
+
+import (
+	"testing"
+	"time"
+)
+
+// retainedBytes sums the key/value payload the tracer holds until the trace
+// response is built.
+func retainedBytes(st *StoreTracer) int {
+	total := 0
+	for _, mt := range st.Modules {
+		for _, a := range mt.Accesses {
+			total += len(a.Key) + len(a.Value)
+		}
+		for _, it := range mt.Iterators {
+			total += len(it.Start) + len(it.End)
+			for _, k := range it.Keys {
+				total += len(k)
+			}
+		}
+	}
+	return total
+}
+
+// TestStoreTracerBoundsIteratorScanMemory reproduces the blowup: mt.Accesses
+// is uncapped, so a large iterator scan clones one key/value per surfaced key
+// and retained memory grows linearly with scan size. The 8 MiB budget matches
+// the file's "a few MB per module" intent.
+func TestStoreTracerBoundsIteratorScanMemory(t *testing.T) {
+	const (
+		module    = "evm"
+		numKeys   = 100_000 // a large-but-plausible scan over a big store
+		valueSize = 512     // bytes per surfaced value
+
+		maxRetainedBytes = 8 << 20 // 8 MiB
+	)
+
+	st := NewStoreTracer()
+	iterID := st.StartIterator([]byte("start"), []byte("end"), true, module, 0)
+
+	value := make([]byte, valueSize)
+	for i := range numKeys {
+		key := []byte{byte(i), byte(i >> 8), byte(i >> 16)}
+		st.RecordIteratorNext(iterID, module, time.Microsecond)
+		st.RecordIteratorValue(iterID, key, value, module)
+	}
+
+	got := retainedBytes(st)
+	if got > maxRetainedBytes {
+		t.Fatalf("StoreTracer retained %d bytes after scanning %d keys; want <= %d.\n"+
+			"mt.Accesses is unbounded: each IteratorValue clones key+value into the "+
+			"access log regardless of the %d-key iterator cap, so memory grows linearly "+
+			"with the scan size.",
+			got, numKeys, maxRetainedBytes, maxStoreTraceIteratorKeys)
+	}
+}
+
+// TestStoreTracerStatsAccurateWhenTruncated drives Get past
+// maxStoreTraceModuleBytes and asserts that per-op stats (Count/TotalNanos) still
+// count every access once the log is Truncated, while Reads keeps only the
+// retained prefix. These stats feed historicalLookupNanos (evmrpc/trace_profile.go).
+func TestStoreTracerStatsAccurateWhenTruncated(t *testing.T) {
+	const (
+		module    = "evm"
+		numGets   = 5_000            // each retained Get costs valueSize+keyLen+overhead;
+		valueSize = 4096             // ~4 KiB per access truncates well before all 5k retain
+		perGet    = time.Microsecond // known per-access duration to verify TotalNanos
+	)
+
+	st := NewStoreTracer()
+	value := make([]byte, valueSize)
+	for i := range numGets {
+		// Distinct keys so each retained Get is a distinct entry in Reads.
+		key := []byte{byte(i), byte(i >> 8), byte(i >> 16)}
+		st.Get(key, value, module, perGet)
+	}
+
+	mt, ok := st.Modules[module]
+	if !ok {
+		t.Fatalf("module %q missing from tracer", module)
+	}
+	if !mt.Truncated {
+		t.Fatalf("module not Truncated after %d Gets of %d-byte values; cap %d should have been exceeded",
+			numGets, valueSize, maxStoreTraceModuleBytes)
+	}
+	if len(mt.Accesses) >= numGets {
+		t.Fatalf("retained %d accesses; truncation should drop some of the %d Gets",
+			len(mt.Accesses), numGets)
+	}
+
+	// Internal running stats must count every access, truncated or not.
+	if got := mt.stats[Get]; got.Count != numGets || got.TotalNanos != int64(numGets)*perGet.Nanoseconds() {
+		t.Fatalf("module stats under truncation = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}",
+			got.Count, got.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds())
+	}
+
+	// The wire dump (what historicalLookupNanos reads) must agree, and Reads
+	// must reflect only the retained prefix.
+	d := st.Dump()
+	mtd := d.Modules[module]
+	if !mtd.Truncated {
+		t.Fatal("dump module not flagged Truncated")
+	}
+	if got := mtd.Stats[Get.String()]; got.Count != numGets || got.TotalNanos != int64(numGets)*perGet.Nanoseconds() {
+		t.Fatalf("dump module stats = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}",
+			got.Count, got.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds())
+	}
+	if top := d.Stats[Get.String()]; top.Count != numGets || top.TotalNanos != int64(numGets)*perGet.Nanoseconds() {
+		t.Fatalf("dump top-level stats = {Count:%d TotalNanos:%d}; want {Count:%d TotalNanos:%d}",
+			top.Count, top.TotalNanos, numGets, int64(numGets)*perGet.Nanoseconds())
+	}
+	if len(mtd.Reads) >= numGets {
+		t.Fatalf("dump retained %d reads; truncation should bound this below %d", len(mtd.Reads), numGets)
+	}
+}