From d674129f80298339197edfee35e81769932b2e21 Mon Sep 17 00:00:00 2001 From: MinJae Kwon Date: Thu, 21 May 2026 11:42:52 +0900 Subject: [PATCH] perf(runtime): add typed-slice fast paths to `in` operator `in` dispatches through `runtime.In`, which uses reflect to iterate the right-hand side. The reflect path is correct for any slice type but pays one heap allocation per element on every typed slice, because `reflect.Value.Index(i).Interface()` must box the element when the slice's element type is not already `interface{}`. For `[]any` this boxing is a no-op (the cell is already an interface), so the existing path is already zero-alloc-per-element. For `[]string`, `[]float64`, `[]int64`, `[]int`, and `[]bool` it adds N heap allocations per `in` evaluation, which is significant when `in` runs in a hot loop (e.g. rule engines or expression-based filters over candidate lists). This patch adds a type-switch at the top of `In` for those five common shapes. Each case uses a pure-Go `for ... range` loop, so no reflect, no per-element boxing, no Equal() round-trip. On a needle/element type mismatch the case falls through to the existing reflect path so Equal()'s cross-type promotion semantics are preserved (e.g. an int needle against a []float64 still matches). Benchmarks (Apple M4 Pro, darwin/arm64, -benchtime=1s): bench (N elements) before after speedup StringSlice/N=8 112.8 ns/op, 6 allocs 18.96 ns/op, 1 alloc 6.0x StringSlice/N=64 659.8 ns/op, 34 allocs 31.69 ns/op, 1 alloc 20.8x StringSlice/N=256 2240 ns/op, 130 allocs 60.28 ns/op, 1 alloc 37.2x Float64Slice/N=8 85.1 ns/op, 6 allocs 14.99 ns/op, 1 alloc 5.7x Float64Slice/N=64 442.1 ns/op, 34 allocs 23.66 ns/op, 1 alloc 18.7x Float64Slice/N=256 1794 ns/op, 130 allocs 169.1 ns/op, 1 alloc 10.6x Int64Slice/N=8 82.0 ns/op, 6 allocs 14.77 ns/op, 1 alloc 5.6x Int64Slice/N=64 973.8 ns/op, 34 allocs 23.15 ns/op, 1 alloc 42.1x Int64Slice/N=256 1610 ns/op, 130 allocs 166.0 ns/op, 1 alloc 9.7x AnySliceOfString/N=* unchanged (already uses zero-alloc reflect path) The remaining 1 alloc/op is the call-site boxing the needle into `any` when calling runtime.In; it lives outside the changed code. Tests in `vm/runtime/runtime_test.go` cover hit/miss for each fast path, empty typed slice, cross-type needle (must fall through to reflect), and unchanged `[]any` semantics. The existing test suite is untouched and still passes. Signed-off-by: MinJae Kwon --- vm/runtime/runtime.go | 59 +++++++++++++++++++++ vm/runtime/runtime_bench_test.go | 88 ++++++++++++++++++++++++++++++++ vm/runtime/runtime_test.go | 65 +++++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 vm/runtime/runtime_bench_test.go create mode 100644 vm/runtime/runtime_test.go diff --git a/vm/runtime/runtime.go b/vm/runtime/runtime.go index bc6f2b4d..d0eeb31f 100644 --- a/vm/runtime/runtime.go +++ b/vm/runtime/runtime.go @@ -210,6 +210,65 @@ func In(needle any, array any) bool { if array == nil { return false } + + // Fast paths for common typed-slice shapes. The generic reflect path below + // works for these too, but it pays one heap allocation per element + // (reflect.Value.Index(i).Interface() boxes the element when the slice's + // element type is not interface{}). These switch cases let `in` over + // []string / []float64 / []int64 / []int / []bool run with zero + // per-element allocations, matching the cost of []any. + // + // On a needle/element type mismatch the case falls through to the reflect + // path below, so Equal()'s cross-type promotion semantics are preserved + // (e.g. comparing int needle against []float64 still works). + switch arr := array.(type) { + case []string: + if s, ok := needle.(string); ok { + for _, e := range arr { + if e == s { + return true + } + } + return false + } + case []float64: + if f, ok := needle.(float64); ok { + for _, e := range arr { + if e == f { + return true + } + } + return false + } + case []int64: + if n, ok := needle.(int64); ok { + for _, e := range arr { + if e == n { + return true + } + } + return false + } + case []int: + if n, ok := needle.(int); ok { + for _, e := range arr { + if e == n { + return true + } + } + return false + } + case []bool: + if bn, ok := needle.(bool); ok { + for _, e := range arr { + if e == bn { + return true + } + } + return false + } + } + v := reflect.ValueOf(array) switch v.Kind() { diff --git a/vm/runtime/runtime_bench_test.go b/vm/runtime/runtime_bench_test.go new file mode 100644 index 00000000..d83a8273 --- /dev/null +++ b/vm/runtime/runtime_bench_test.go @@ -0,0 +1,88 @@ +package runtime_test + +import ( + "strconv" + "testing" + + "github.com/expr-lang/expr/vm/runtime" +) + +// BenchmarkIn benchmarks the `in` operator over the common slice shapes at +// representative list sizes. The interesting comparison is between the typed +// slice variants (which previously paid one heap alloc per element through +// reflect.Value.Index(i).Interface()) and the []any variant (which has always +// been zero-alloc per element because the slice's element type is interface). +// +// Run with: +// +// go test -bench=BenchmarkIn -benchmem ./vm/runtime/ +func BenchmarkIn(b *testing.B) { + sizes := []int{8, 64, 256} + + for _, n := range sizes { + // Plant a hit roughly halfway through so the loop's short-circuit + // fires at the same position in every variant. + strs := make([]string, n) + anys := make([]any, n) + for i := 0; i < n; i++ { + s := strconv.Itoa(i) + strs[i] = s + anys[i] = s + } + strs[n/2] = "needle" + anys[n/2] = "needle" + + b.Run("StringSlice/N="+strconv.Itoa(n), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + if !runtime.In("needle", strs) { + b.Fatal("expected hit") + } + } + }) + b.Run("AnySliceOfString/N="+strconv.Itoa(n), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + if !runtime.In("needle", anys) { + b.Fatal("expected hit") + } + } + }) + + floats := make([]float64, n) + floatAnys := make([]any, n) + for i := 0; i < n; i++ { + floats[i] = float64(i) + floatAnys[i] = float64(i) + } + floats[n/2] = 99999.0 + floatAnys[n/2] = 99999.0 + + b.Run("Float64Slice/N="+strconv.Itoa(n), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + if !runtime.In(99999.0, floats) { + b.Fatal("expected hit") + } + } + }) + + ints := make([]int64, n) + intAnys := make([]any, n) + for i := 0; i < n; i++ { + ints[i] = int64(i) + intAnys[i] = int64(i) + } + ints[n/2] = 99999 + intAnys[n/2] = int64(99999) + + b.Run("Int64Slice/N="+strconv.Itoa(n), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + if !runtime.In(int64(99999), ints) { + b.Fatal("expected hit") + } + } + }) + } +} diff --git a/vm/runtime/runtime_test.go b/vm/runtime/runtime_test.go new file mode 100644 index 00000000..caad9d6f --- /dev/null +++ b/vm/runtime/runtime_test.go @@ -0,0 +1,65 @@ +package runtime_test + +import ( + "testing" + + "github.com/expr-lang/expr/internal/testify/assert" + + "github.com/expr-lang/expr/vm/runtime" +) + +// TestIn_TypedSlices exercises the typed-slice fast paths in runtime.In to +// guarantee they preserve the semantics of the reflect-based fallback. +func TestIn_TypedSlices(t *testing.T) { + cases := []struct { + name string + needle any + array any + want bool + }{ + // []string fast path + {"string in []string (hit)", "b", []string{"a", "b", "c"}, true}, + {"string in []string (miss)", "z", []string{"a", "b", "c"}, false}, + {"string in empty []string", "x", []string{}, false}, + + // []float64 fast path + {"float64 in []float64 (hit)", 2.5, []float64{1.0, 2.5, 3.0}, true}, + {"float64 in []float64 (miss)", 9.9, []float64{1.0, 2.5, 3.0}, false}, + + // []int64 fast path + {"int64 in []int64 (hit)", int64(2), []int64{1, 2, 3}, true}, + {"int64 in []int64 (miss)", int64(9), []int64{1, 2, 3}, false}, + + // []int fast path + {"int in []int (hit)", 2, []int{1, 2, 3}, true}, + {"int in []int (miss)", 9, []int{1, 2, 3}, false}, + + // []bool fast path + {"true in []bool (hit)", true, []bool{false, true, false}, true}, + {"false in []bool (hit)", false, []bool{true, true, false}, true}, + {"true in []bool (miss all-false)", true, []bool{false, false}, false}, + + // Type-mismatched needles must fall through to the reflect path so + // Equal()'s cross-type semantics are preserved. e.g. an int needle + // against a []float64 should still match via numeric promotion. + {"int needle in []float64 (promoted hit)", 2, []float64{1.0, 2.0, 3.0}, true}, + {"int needle in []float64 (promoted miss)", 9, []float64{1.0, 2.0, 3.0}, false}, + {"int needle in []int64 (promoted hit)", 2, []int64{1, 2, 3}, true}, + + // []any keeps using the reflect path (unchanged). + {"string in []any (hit)", "b", []any{"a", "b", "c"}, true}, + {"int in []any (hit)", 2, []any{1, 2, 3}, true}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, runtime.In(tc.needle, tc.array)) + }) + } +} + +// TestIn_NilArray ensures the early-return for a nil right-hand side is +// preserved (it lives above the typed-slice fast paths). +func TestIn_NilArray(t *testing.T) { + assert.False(t, runtime.In("x", nil)) +}