Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 88 additions & 1 deletion pkg/tui/components/message/message.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ type Model interface {
SetSelected(selected bool)
SetHovered(hovered bool)
CodeBlocks() []markdown.CodeBlock
// Finalize releases per-message render state that is only needed while the
// message is actively streaming. The message content and code-block metadata
// are preserved; calling View() afterwards still produces correct output
// without retaining a per-view render cache or IncrementalRenderer.
Finalize()
// HasLiveRenderState reports whether this view currently retains a
// populated renderCache or an IncrementalRenderer instance. Used by tests
// to assert that finalized views have actually released their per-message
// render state without reaching into unexported fields via reflection.
HasLiveRenderState() bool
}

// messageModel implements Model
Expand Down Expand Up @@ -59,6 +69,14 @@ type messageModel struct {
// streamed-in chunks only re-render the trailing block instead of the whole
// accumulated markdown each time.
mdRenderer *markdown.IncrementalRenderer

// finalized is set by Finalize() once the message is no longer the active
// streaming view. After it is set, Render() still produces correct output,
// but does not store anything in renderCache and does not retain an
// IncrementalRenderer between calls — both are pure caches whose memory
// dominates a long session, and they are not worth keeping for messages
// that are unlikely to be re-rendered hot.
finalized bool
}

// renderCache stores the most recent Render result keyed by the inputs that
Expand Down Expand Up @@ -99,6 +117,11 @@ func (mv *messageModel) Init() tea.Cmd {
}

func (mv *messageModel) SetMessage(msg *types.Message) {
// Un-finalize when the underlying message is changed (e.g. streaming
// resumes into this view). Finalize is meant for views that have
// permanently lost their actively-streaming status; mutating the message
// re-arms the per-message caches so subsequent renders are fast again.
mv.finalized = false
// If the new content is not an extension of the previous one (different
// message, or the message was edited), drop the IncrementalRenderer's
// cached prefix so its memory is released immediately rather than on the
Expand Down Expand Up @@ -177,7 +200,11 @@ func (mv *messageModel) Render(width int) string {
// MessageTypeAssistant placeholder) animate on every tick, so the result is
// not cacheable. Everything else is a pure function of the inputs tracked in
// renderCache below.
cacheable := !mv.isSpinnerDriven()
// Spinner-driven messages animate every tick and are not cacheable.
// Finalized messages skip writing into renderCache so the per-view
// retained ANSI string does not pile up across long sessions; the chat
// list's bounded LRU still memoizes their rendered output.
cacheable := !mv.isSpinnerDriven() && !mv.finalized
Comment thread
aheritier marked this conversation as resolved.
if cacheable {
c := &mv.renderCache
if c.valid &&
Expand Down Expand Up @@ -376,10 +403,22 @@ func (mv *messageModel) render(width int) string {
// so each new chunk only re-parses the trailing region. The first render at a
// given width is equivalent to a fresh full render.
//
// For finalized messages we use a transient renderer that is discarded after
// each call. Finalized messages are no longer streamed, so the prefix-cache
// inside an IncrementalRenderer is not earning its keep — keeping it resident
// across the lifetime of every historical message in a session is the
// dominant source of retained memory in long sessions. The parent message
// list's bounded rendered-item LRU can still memoize finalized message output
// without storing an additional per-view copy.
//
// It also returns the list of fenced code blocks emitted by the renderer so
// that callers can map clicks on the per-block copy affordance back to the
// underlying raw code.
func (mv *messageModel) renderAssistantMarkdown(content string, width int) (string, []markdown.CodeBlock, error) {
if mv.finalized {
r := markdown.NewIncrementalRenderer(width)
return r.RenderWithCodeBlocks(content)
}
if mv.mdRenderer == nil {
mv.mdRenderer = markdown.NewIncrementalRenderer(width)
} else {
Expand Down Expand Up @@ -442,6 +481,54 @@ func (mv *messageModel) StopAnimation() {
}
}

// Finalize releases per-message render state that no longer needs to be kept
// resident once the message is no longer the actively streaming view. This is
// called by the parent message list when a new top-level message arrives, and
// for every historical view loaded from a session.
//
// Finalize is a no-op for non-assistant message types: only assistant views
// allocate an IncrementalRenderer and accumulate large rendered ANSI strings
// during streaming, so user messages, tool calls, error/welcome banners and
// the like have nothing to release. Setting `finalized = true` on those views
// would only have the side-effect of permanently disabling renderCache for
// selected/hovered states (which bypass the parent's bounded LRU), forcing a
// fresh re-render on every animation tick. Restricting the disable to
// assistant views keeps the leak fix scoped to the type that actually leaks.
//
// The retained payload of an assistant view is dominated by the renderCache
// (a copy of the rendered ANSI string) and the IncrementalRenderer's internal
// caches (last rendered prefix, glamour AST state). Both are pure render
// state — they can be regenerated from mv.message on demand. We deliberately
// leave mv.message, mv.codeBlocks and the spinner untouched so that View()
// keeps returning correct output, click-targeting on code blocks still works,
// and the spinner-driven types continue to animate.
//
// Finalize is idempotent and durable: subsequent renders do not re-populate
// renderCache or store an IncrementalRenderer on the struct. This is
// important because the parent message list invalidates its own LRU on
// several events (spinner removal, theme change, window resize) and would
// otherwise re-render every previously finalized view, putting the per-
// message render state right back where it was.
func (mv *messageModel) Finalize() {
if mv.message == nil || mv.message.Type != types.MessageTypeAssistant {
return
}
mv.renderCache = renderCache{}
if mv.mdRenderer != nil {
mv.mdRenderer.Reset()
mv.mdRenderer = nil
}
mv.finalized = true
}

// HasLiveRenderState reports whether this view still retains per-message
// render state — either a populated renderCache or an IncrementalRenderer
// instance. Used as a structural assertion in regression tests that verify
// Finalize() actually released what it was supposed to release.
func (mv *messageModel) HasLiveRenderState() bool {
return mv.renderCache.result != "" || mv.mdRenderer != nil
}

// SetSize sets the dimensions of the message view
func (mv *messageModel) SetSize(width, height int) tea.Cmd {
if mv.width != width {
Expand Down
169 changes: 169 additions & 0 deletions pkg/tui/components/messages/leak_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package messages

import (
"fmt"
"runtime"
"strings"
"testing"

"github.com/docker/docker-agent/pkg/tui/components/message"
"github.com/docker/docker-agent/pkg/tui/core/layout"
"github.com/docker/docker-agent/pkg/tui/service"
)

// TestLongSessionDoesNotRetainPerMessageRenderState streams a long sequence
// of large assistant messages through the same public API the TUI uses
// (AddUserMessage / AddAssistantMessage / AppendToLastMessage), rendering the
// active assistant view between chunks so messageModel.renderCache and
// IncrementalRenderer state are exercised. It asserts two things:
//
// 1. Heap growth stays far below the pre-fix slope. Before the fix every
// assistant view kept its own renderCache (~70 KB rendered ANSI) and an
// IncrementalRenderer (cached prefix + rendered output) for a measured
// ~161 KB of retained state per message. After the fix, per-message render
// state is released as soon as the message stops being the active
// streaming view, so the remaining slope is mostly the raw session content.
//
// 2. The structural invariant: after every non-active assistant view has
// been finalized, only the single actively-streaming view (at most one)
// should be holding a non-empty renderCache or a live IncrementalRenderer.
//
// Run with -short to skip; the test is deterministic but takes a few seconds.
func TestLongSessionDoesNotRetainPerMessageRenderState(t *testing.T) {
if testing.Short() {
t.Skip("skipping leak test in -short mode")
}

// `go test` may reset runtime.MemProfileRate when its own -memprofile flag
// is unset, which would override the value set in init().
runtime.MemProfileRate = 1

const (
warmupMessages = 200
measureMessages = 200
chunkSize = 256
messageBytes = 8192
viewWidth = 120
viewHeight = 40

// Per-message heap budget for the retained slope. Pre-fix slope was
// ~161 KB/msg and did not flatten. The remaining post-fix slope is mostly
// the original message content and small per-view bookkeeping.
maxGrowthPerMessageBytes = 30 * 1024
)

sessionState := &service.SessionState{}
m := NewScrollableView(viewWidth, viewHeight, sessionState).(*model)
m.SetSize(viewWidth, viewHeight)

body := buildMarkdownBody(messageBytes)

heapInuse := func() uint64 {
runtime.GC()
runtime.GC() // second pass to settle finalizers
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
return ms.HeapInuse
}

renderLastView := func() {
if len(m.views) == 0 {
return
}
_ = m.views[len(m.views)-1].View()
}

streamMessage := func(i int) {
m.AddUserMessage(fmt.Sprintf("user message %d", i))
m.AddAssistantMessage()
for off := 0; off < len(body); off += chunkSize {
end := min(off+chunkSize, len(body))
m.AppendToLastMessage("root", body[off:end])
if (off/chunkSize)%4 == 0 {
renderLastView()
}
}
renderLastView()
}

// Warmup: stream enough messages to saturate the chat-list LRU.
for i := range warmupMessages {
streamMessage(i)
}
afterWarmup := heapInuse()

// Measurement: stream another batch and observe the slope.
for i := range measureMessages {
streamMessage(warmupMessages + i)
}
afterMeasure := heapInuse()

growth := max(int64(afterMeasure)-int64(afterWarmup), 0)
perMessage := growth / measureMessages

t.Logf("warmup_heap=%d KB measure_heap=%d KB delta=%d KB perMsg=%d KB (budget=%d KB)",
afterWarmup/1024, afterMeasure/1024, growth/1024, perMessage/1024,
maxGrowthPerMessageBytes/1024)

live := countLiveRenderState(t, m.views)
t.Logf("views=%d live render-state holders=%d (LRU cap=%d, current=%d)",
len(m.views), live, renderedItemsCacheSize, m.renderedItems.Len())

if perMessage > maxGrowthPerMessageBytes {
t.Fatalf("heap growth %d B/message exceeds budget %d B/message — "+
"per-message render state appears to be retained after finalization",
perMessage, maxGrowthPerMessageBytes)
}

if live > 2 {
t.Fatalf("expected at most 2 messageModels to hold render-cache state "+
"(the active streaming view), got %d out of %d views",
live, len(m.views))
}
}

// countLiveRenderState walks the view list and reports how many message.Model
// views still hold per-message render state — a populated renderCache or an
// IncrementalRenderer. Uses the exported HasLiveRenderState() so the test
// does not depend on reflection across package boundaries.
func countLiveRenderState(t *testing.T, views []layout.Model) int {
t.Helper()
var live int
for _, v := range views {
mv, ok := v.(message.Model)
if !ok {
continue
}
if mv.HasLiveRenderState() {
live++
}
}
return live
}

// buildMarkdownBody returns a deterministic markdown blob of approximately
// the requested size. The mix of prose paragraphs and a fenced code block
// exercises both stable-boundary detection and code-block caching in the
// IncrementalRenderer (the two structures called out in the leak hypothesis).
func buildMarkdownBody(targetBytes int) string {
const paragraph = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " +
"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris " +
"nisi ut aliquip ex ea commodo consequat.\n\n"
const codeBlock = "```go\n" +
"func example(x int) int {\n" +
" // some representative code\n" +
" return x * 2\n" +
"}\n" +
"```\n\n"

var b strings.Builder
b.Grow(targetBytes + len(paragraph))
for b.Len() < targetBytes {
b.WriteString(paragraph)
if b.Len()%3 == 0 {
b.WriteString(codeBlock)
}
}
return b.String()
}
Loading
Loading