From d674129f80298339197edfee35e81769932b2e21 Mon Sep 17 00:00:00 2001
From: MinJae Kwon <mingrammer@gmail.com>
Date: Thu, 21 May 2026 11:42:52 +0900
Subject: [PATCH] perf(runtime): add typed-slice fast paths to `in` operator

`in` dispatches through `runtime.In`, which uses reflect to iterate the
right-hand side. The reflect path is correct for any slice type but pays
one heap allocation per element on every typed slice, because
`reflect.Value.Index(i).Interface()` must box the element when the slice's
element type is not already `interface{}`.

For `[]any` this boxing is a no-op (the cell is already an interface), so
the existing path is already zero-alloc-per-element. For `[]string`,
`[]float64`, `[]int64`, `[]int`, and `[]bool` it adds N heap allocations
per `in` evaluation, which is significant when `in` runs in a hot loop
(e.g. rule engines or expression-based filters over candidate lists).

This patch adds a type-switch at the top of `In` for those five common
shapes. Each case uses a pure-Go `for ... range` loop, so no reflect, no
per-element boxing, no Equal() round-trip. On a needle/element type
mismatch the case falls through to the existing reflect path so Equal()'s
cross-type promotion semantics are preserved (e.g. an int needle against
a []float64 still matches).

Benchmarks (Apple M4 Pro, darwin/arm64, -benchtime=1s):

  bench (N elements)             before                     after                    speedup
  StringSlice/N=8        112.8 ns/op,   6 allocs    18.96 ns/op, 1 alloc       6.0x
  StringSlice/N=64       659.8 ns/op,  34 allocs    31.69 ns/op, 1 alloc      20.8x
  StringSlice/N=256     2240   ns/op, 130 allocs    60.28 ns/op, 1 alloc      37.2x
  Float64Slice/N=8        85.1 ns/op,   6 allocs    14.99 ns/op, 1 alloc       5.7x
  Float64Slice/N=64      442.1 ns/op,  34 allocs    23.66 ns/op, 1 alloc      18.7x
  Float64Slice/N=256    1794   ns/op, 130 allocs   169.1  ns/op, 1 alloc      10.6x
  Int64Slice/N=8          82.0 ns/op,   6 allocs    14.77 ns/op, 1 alloc       5.6x
  Int64Slice/N=64        973.8 ns/op,  34 allocs    23.15 ns/op, 1 alloc      42.1x
  Int64Slice/N=256      1610   ns/op, 130 allocs   166.0  ns/op, 1 alloc       9.7x
  AnySliceOfString/N=*  unchanged (already uses zero-alloc reflect path)

The remaining 1 alloc/op is the call-site boxing the needle into `any`
when calling runtime.In; it lives outside the changed code.

Tests in `vm/runtime/runtime_test.go` cover hit/miss for each fast path,
empty typed slice, cross-type needle (must fall through to reflect), and
unchanged `[]any` semantics. The existing test suite is untouched and
still passes.

Signed-off-by: MinJae Kwon <mingrammer@gmail.com>
---
 vm/runtime/runtime.go            | 59 +++++++++++++++++++++
 vm/runtime/runtime_bench_test.go | 88 ++++++++++++++++++++++++++++++++
 vm/runtime/runtime_test.go       | 65 +++++++++++++++++++++++
 3 files changed, 212 insertions(+)
 create mode 100644 vm/runtime/runtime_bench_test.go
 create mode 100644 vm/runtime/runtime_test.go

diff --git a/vm/runtime/runtime.go b/vm/runtime/runtime.go
index bc6f2b4d..d0eeb31f 100644
--- a/vm/runtime/runtime.go
+++ b/vm/runtime/runtime.go
@@ -210,6 +210,65 @@ func In(needle any, array any) bool {
 	if array == nil {
 		return false
 	}
+
+	// Fast paths for common typed-slice shapes. The generic reflect path below
+	// works for these too, but it pays one heap allocation per element
+	// (reflect.Value.Index(i).Interface() boxes the element when the slice's
+	// element type is not interface{}). These switch cases let `in` over
+	// []string / []float64 / []int64 / []int / []bool run with zero
+	// per-element allocations, matching the cost of []any.
+	//
+	// On a needle/element type mismatch the case falls through to the reflect
+	// path below, so Equal()'s cross-type promotion semantics are preserved
+	// (e.g. comparing int needle against []float64 still works).
+	switch arr := array.(type) {
+	case []string:
+		if s, ok := needle.(string); ok {
+			for _, e := range arr {
+				if e == s {
+					return true
+				}
+			}
+			return false
+		}
+	case []float64:
+		if f, ok := needle.(float64); ok {
+			for _, e := range arr {
+				if e == f {
+					return true
+				}
+			}
+			return false
+		}
+	case []int64:
+		if n, ok := needle.(int64); ok {
+			for _, e := range arr {
+				if e == n {
+					return true
+				}
+			}
+			return false
+		}
+	case []int:
+		if n, ok := needle.(int); ok {
+			for _, e := range arr {
+				if e == n {
+					return true
+				}
+			}
+			return false
+		}
+	case []bool:
+		if bn, ok := needle.(bool); ok {
+			for _, e := range arr {
+				if e == bn {
+					return true
+				}
+			}
+			return false
+		}
+	}
+
 	v := reflect.ValueOf(array)
 
 	switch v.Kind() {
diff --git a/vm/runtime/runtime_bench_test.go b/vm/runtime/runtime_bench_test.go
new file mode 100644
index 00000000..d83a8273
--- /dev/null
+++ b/vm/runtime/runtime_bench_test.go
@@ -0,0 +1,88 @@
+package runtime_test
+
+import (
+	"strconv"
+	"testing"
+
+	"github.com/expr-lang/expr/vm/runtime"
+)
+
+// BenchmarkIn benchmarks the `in` operator over the common slice shapes at
+// representative list sizes. The interesting comparison is between the typed
+// slice variants (which previously paid one heap alloc per element through
+// reflect.Value.Index(i).Interface()) and the []any variant (which has always
+// been zero-alloc per element because the slice's element type is interface).
+//
+// Run with:
+//
+//	go test -bench=BenchmarkIn -benchmem ./vm/runtime/
+func BenchmarkIn(b *testing.B) {
+	sizes := []int{8, 64, 256}
+
+	for _, n := range sizes {
+		// Plant a hit roughly halfway through so the loop's short-circuit
+		// fires at the same position in every variant.
+		strs := make([]string, n)
+		anys := make([]any, n)
+		for i := 0; i < n; i++ {
+			s := strconv.Itoa(i)
+			strs[i] = s
+			anys[i] = s
+		}
+		strs[n/2] = "needle"
+		anys[n/2] = "needle"
+
+		b.Run("StringSlice/N="+strconv.Itoa(n), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				if !runtime.In("needle", strs) {
+					b.Fatal("expected hit")
+				}
+			}
+		})
+		b.Run("AnySliceOfString/N="+strconv.Itoa(n), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				if !runtime.In("needle", anys) {
+					b.Fatal("expected hit")
+				}
+			}
+		})
+
+		floats := make([]float64, n)
+		floatAnys := make([]any, n)
+		for i := 0; i < n; i++ {
+			floats[i] = float64(i)
+			floatAnys[i] = float64(i)
+		}
+		floats[n/2] = 99999.0
+		floatAnys[n/2] = 99999.0
+
+		b.Run("Float64Slice/N="+strconv.Itoa(n), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				if !runtime.In(99999.0, floats) {
+					b.Fatal("expected hit")
+				}
+			}
+		})
+
+		ints := make([]int64, n)
+		intAnys := make([]any, n)
+		for i := 0; i < n; i++ {
+			ints[i] = int64(i)
+			intAnys[i] = int64(i)
+		}
+		ints[n/2] = 99999
+		intAnys[n/2] = int64(99999)
+
+		b.Run("Int64Slice/N="+strconv.Itoa(n), func(b *testing.B) {
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				if !runtime.In(int64(99999), ints) {
+					b.Fatal("expected hit")
+				}
+			}
+		})
+	}
+}
diff --git a/vm/runtime/runtime_test.go b/vm/runtime/runtime_test.go
new file mode 100644
index 00000000..caad9d6f
--- /dev/null
+++ b/vm/runtime/runtime_test.go
@@ -0,0 +1,65 @@
+package runtime_test
+
+import (
+	"testing"
+
+	"github.com/expr-lang/expr/internal/testify/assert"
+
+	"github.com/expr-lang/expr/vm/runtime"
+)
+
+// TestIn_TypedSlices exercises the typed-slice fast paths in runtime.In to
+// guarantee they preserve the semantics of the reflect-based fallback.
+func TestIn_TypedSlices(t *testing.T) {
+	cases := []struct {
+		name   string
+		needle any
+		array  any
+		want   bool
+	}{
+		// []string fast path
+		{"string in []string (hit)", "b", []string{"a", "b", "c"}, true},
+		{"string in []string (miss)", "z", []string{"a", "b", "c"}, false},
+		{"string in empty []string", "x", []string{}, false},
+
+		// []float64 fast path
+		{"float64 in []float64 (hit)", 2.5, []float64{1.0, 2.5, 3.0}, true},
+		{"float64 in []float64 (miss)", 9.9, []float64{1.0, 2.5, 3.0}, false},
+
+		// []int64 fast path
+		{"int64 in []int64 (hit)", int64(2), []int64{1, 2, 3}, true},
+		{"int64 in []int64 (miss)", int64(9), []int64{1, 2, 3}, false},
+
+		// []int fast path
+		{"int in []int (hit)", 2, []int{1, 2, 3}, true},
+		{"int in []int (miss)", 9, []int{1, 2, 3}, false},
+
+		// []bool fast path
+		{"true in []bool (hit)", true, []bool{false, true, false}, true},
+		{"false in []bool (hit)", false, []bool{true, true, false}, true},
+		{"true in []bool (miss all-false)", true, []bool{false, false}, false},
+
+		// Type-mismatched needles must fall through to the reflect path so
+		// Equal()'s cross-type semantics are preserved. e.g. an int needle
+		// against a []float64 should still match via numeric promotion.
+		{"int needle in []float64 (promoted hit)", 2, []float64{1.0, 2.0, 3.0}, true},
+		{"int needle in []float64 (promoted miss)", 9, []float64{1.0, 2.0, 3.0}, false},
+		{"int needle in []int64 (promoted hit)", 2, []int64{1, 2, 3}, true},
+
+		// []any keeps using the reflect path (unchanged).
+		{"string in []any (hit)", "b", []any{"a", "b", "c"}, true},
+		{"int in []any (hit)", 2, []any{1, 2, 3}, true},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			assert.Equal(t, tc.want, runtime.In(tc.needle, tc.array))
+		})
+	}
+}
+
+// TestIn_NilArray ensures the early-return for a nil right-hand side is
+// preserved (it lives above the typed-slice fast paths).
+func TestIn_NilArray(t *testing.T) {
+	assert.False(t, runtime.In("x", nil))
+}