ENTERPILOT · SantiagoDePolonia · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -114,7 +114,7 @@ Full reference: `.env.template` and `config/config.yaml`
   - `ENABLE_PASSTHROUGH_ROUTES` (true: Enable provider-native passthrough routes under /p/{provider}/...)
   - `ALLOW_PASSTHROUGH_V1_ALIAS` (true: Allow /p/{provider}/v1/... aliases while keeping /p/{provider}/... canonical)
   - `ENABLED_PASSTHROUGH_PROVIDERS` (openai,anthropic,openrouter,zai,vllm: Comma-separated list of enabled passthrough providers)
-  - `REALTIME_ENABLED` (true: Expose the realtime speech-to-speech websocket at `/v1/realtime` and the `/p/{provider}/v1/realtime` upgrade. The canonical `/v1/realtime` route needs only `REALTIME_ENABLED`; the `/p/{provider}/v1/realtime` upgrade additionally requires passthrough routes enabled (`ENABLE_PASSTHROUGH_ROUTES`) with the provider listed in `ENABLED_PASSTHROUGH_PROVIDERS`. The gateway is a transparent websocket reverse proxy — it injects provider credentials and relays the provider's realtime event schema verbatim (no translation), so clients connect without provider API keys. Only providers implementing realtime accept sessions; currently OpenAI and xAI/Grok Voice Agent (both `wss://…/v1/realtime`, OpenAI-realtime-compatible) and Bailian/Qwen-Omni (`wss://dashscope…/api-ws/v1/realtime`). xAI's voice models (e.g. `grok-voice-latest`) are not returned by upstream `/models` discovery, so configure them via `XAI_MODELS`; xAI bills realtime per-minute and reports no token usage. Sessions are gated by the same model-access and budget rules as other model endpoints; usage is tracked per `response.done` event, accepting both the OpenAI singular and Alibaba plural token-detail spellings.)
+  - `REALTIME_ENABLED` (true: Expose the realtime speech-to-speech websocket at `/v1/realtime` and the `/p/{provider}/v1/realtime` upgrade. The canonical `/v1/realtime` route needs only `REALTIME_ENABLED`; the `/p/{provider}/v1/realtime` upgrade additionally requires passthrough routes enabled (`ENABLE_PASSTHROUGH_ROUTES`) with the provider listed in `ENABLED_PASSTHROUGH_PROVIDERS`. The gateway is a transparent websocket reverse proxy — it injects provider credentials and relays the provider's realtime event schema verbatim (no translation), so clients connect without provider API keys. Only providers implementing realtime accept sessions. Currently: OpenAI and xAI/Grok Voice Agent (both `wss://…/v1/realtime`); Z.ai/Zhipu GLM-Realtime (`wss://…/api/paas/v4/realtime`); Bailian/Qwen-Omni (`wss://dashscope…/api-ws/v1/realtime`); and Azure OpenAI (`wss://<resource>/openai/realtime?api-version=…&deployment=…`, `api-key` header). All use OpenAI's realtime event schema (Z.ai adds extensions that relay transparently). Provider-specific notes: xAI voice models (e.g. `grok-voice-latest`) aren't in upstream `/models` discovery, so configure them via `XAI_MODELS`, and xAI bills realtime per-minute (no token usage reported); Azure realtime requires a realtime-capable `AZURE_API_VERSION` (the default may be too old) and the model selects the Azure deployment. (MiniMax was evaluated but skipped — its conversational realtime schema is not OpenAI-compatible.) Sessions are gated by the same model-access and budget rules as other model endpoints; usage is tracked per `response.done` event, accepting both the OpenAI singular and Alibaba plural token-detail spellings.)
 - **Storage:** `STORAGE_TYPE` (sqlite), `SQLITE_PATH` (data/gomodel.db), `POSTGRES_URL`, `MONGODB_URL`
 - **Models:** `MODELS_ENABLED_BY_DEFAULT` (true), `MODEL_OVERRIDES_ENABLED` (true), `KEEP_ONLY_ALIASES_AT_MODELS_ENDPOINT` (false), `CONFIGURED_PROVIDER_MODELS_MODE` (`fallback` or `allowlist`, default `fallback`; `allowlist` skips upstream `/models` for providers with configured lists); persisted overrides restrict/allow selectors with `user_paths`. When alias-only models listing is enabled, `GET /v1/models` returns only model aliases, not full concrete model specs, to operators.
 - **Audit logging:** `LOGGING_ENABLED` (false), `LOGGING_LOG_BODIES` (false), `LOGGING_LOG_AUDIO_BODIES` (false: refines `LOGGING_LOG_BODIES` for audio endpoints — base64 audio for both `/v1/audio/speech` output and `/v1/audio/transcriptions` upload (≤8 MB each, else `too_large`) + dashboard playback, plus transcription upload metadata; no effect unless `LOGGING_LOG_BODIES` is on, in which case audio-off records a placeholder), `LOGGING_LOG_HEADERS` (false), `LOGGING_RETENTION_DAYS` (30)

diff --git a/internal/providers/azure/azure.go b/internal/providers/azure/azure.go
@@ -29,12 +29,13 @@ type Provider struct {
 	resourceProvider       *openai.CompatibleProvider
 	openAIResourceProvider *openai.CompatibleProvider
 	apiVersion             string
+	apiKey                 string // retained to inject the api-key header on the realtime target
 }
 
 func New(providerCfg providers.ProviderConfig, opts providers.ProviderOptions) core.Provider {
 	baseURL := providers.ResolveBaseURL(providerCfg.BaseURL, "https://example.invalid")
 	apiVersion := providers.ResolveAPIVersion(providerCfg.APIVersion, defaultAPIVersion)
-	p := &Provider{apiVersion: apiVersion}
+	p := &Provider{apiVersion: apiVersion, apiKey: providerCfg.APIKey}
 	clientCfg := openai.CompatibleProviderConfig{
 		ProviderName: "azure",
 		BaseURL:      baseURL,
@@ -51,7 +52,7 @@ func New(providerCfg providers.ProviderConfig, opts providers.ProviderOptions) c
 }
 
 func NewWithHTTPClient(apiKey string, httpClient *http.Client, hooks llmclient.Hooks) *Provider {
-	p := &Provider{apiVersion: defaultAPIVersion}
+	p := &Provider{apiVersion: defaultAPIVersion, apiKey: apiKey}
 	cfg := openai.CompatibleProviderConfig{
 		ProviderName: "azure",
 		BaseURL:      "https://example.invalid",

diff --git a/internal/providers/azure/realtime.go b/internal/providers/azure/realtime.go
@@ -0,0 +1,66 @@
+package azure
+
+import (
+	"context"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"gomodel/internal/core"
+)
+
+// RealtimeTarget implements core.RealtimeProvider for Azure OpenAI's GPT Realtime
+// API, which uses OpenAI's realtime event schema. Azure differs from OpenAI only
+// in the dial shape: the websocket lives at <resource>/openai/realtime with the
+// deployment and api-version as query parameters, and auth uses the api-key
+// header (not Bearer). The api-key is injected here and must never be logged.
+func (p *Provider) RealtimeTarget(_ context.Context, req *core.RealtimeRequest) (*core.RealtimeTarget, error) {
+	if req == nil || strings.TrimSpace(req.Model) == "" {
+		return nil, core.NewInvalidRequestError("model is required for realtime sessions", nil)
+	}
+
+	endpoint, err := p.realtimeURL(strings.TrimSpace(req.Model))
+	if err != nil {
+		return nil, err
+	}
+
+	headers := http.Header{}
+	if p.apiKey != "" {
+		headers.Set("api-key", p.apiKey)
+	}
+
+	return &core.RealtimeTarget{URL: endpoint, Headers: headers}, nil
+}
+
+// realtimeURL builds wss://<resource>/openai/realtime?api-version=…&deployment=…
+// from the configured base URL's resource root. The model selects the Azure
+// deployment.
+func (p *Provider) realtimeURL(deployment string) (string, error) {
+	root := resourceRootBaseURL(p.GetBaseURL())
+	u, err := url.Parse(root)
+	if err != nil || u.Host == "" {
+		return "", core.NewInvalidRequestError("invalid azure realtime base url: "+root, err)
+	}
+	switch strings.ToLower(u.Scheme) {
+	case "https", "wss", "":
+		u.Scheme = "wss"
+	case "http", "ws":
+		u.Scheme = "ws"
+	default:
+		return "", core.NewInvalidRequestError("unsupported azure realtime base url scheme: "+u.Scheme, nil)
+	}
+	// Strip any existing /openai[/v1] root so a base already pointing at the
+	// OpenAI sub-path doesn't produce /openai/openai/realtime.
+	path := strings.TrimRight(u.Path, "/")
+	path = strings.TrimSuffix(path, "/openai/v1")
+	path = strings.TrimSuffix(path, "/openai")
+	u.Path = path + "/openai/realtime"
+	q := url.Values{}
+	q.Set("api-version", p.apiVersion)
+	q.Set("deployment", deployment)
+	u.RawQuery = q.Encode()
+	return u.String(), nil
+}
+
+// Compile-time assertion that Azure implements the realtime capability.
+var _ core.RealtimeProvider = (*Provider)(nil)
diff --git a/internal/providers/azure/realtime_test.go b/internal/providers/azure/realtime_test.go
@@ -0,0 +1,87 @@
+package azure
+
+import (
+	"context"
+	"net/url"
+	"testing"
+
+	"gomodel/internal/core"
+	"gomodel/internal/providers"
+)
+
+func TestRealtimeTarget(t *testing.T) {
+	const apiKey = "azure-secret-key"
+	p := New(providers.ProviderConfig{
+		APIKey:     apiKey,
+		BaseURL:    "https://myres.openai.azure.com/openai/deployments/gpt-realtime",
+		APIVersion: "2025-04-01-preview",
+	}, providers.ProviderOptions{}).(*Provider)
+
+	target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "gpt-realtime"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	u, err := url.Parse(target.URL)
+	if err != nil {
+		t.Fatalf("parse target url: %v", err)
+	}
+	if u.Scheme != "wss" || u.Host != "myres.openai.azure.com" || u.Path != "/openai/realtime" {
+		t.Errorf("endpoint = %q, want wss://myres.openai.azure.com/openai/realtime", target.URL)
+	}
+	if got := u.Query().Get("deployment"); got != "gpt-realtime" {
+		t.Errorf("deployment = %q, want gpt-realtime", got)
+	}
+	if got := u.Query().Get("api-version"); got != "2025-04-01-preview" {
+		t.Errorf("api-version = %q, want 2025-04-01-preview", got)
+	}
+	// Azure authenticates with the api-key header, not Bearer.
+	if got := target.Headers.Get("api-key"); got != apiKey {
+		t.Errorf("api-key = %q, want %q", got, apiKey)
+	}
+	if target.Headers.Get("Authorization") != "" {
+		t.Error("Authorization header must not be set for Azure (uses api-key)")
+	}
+}
+
+func TestRealtimeTargetStripsExistingOpenAIPath(t *testing.T) {
+	// A base already rooted at /openai must not yield /openai/openai/realtime.
+	for _, base := range []string{
+		"https://myres.openai.azure.com/openai",
+		"https://myres.openai.azure.com/openai/v1",
+	} {
+		p := New(providers.ProviderConfig{APIKey: "k", BaseURL: base}, providers.ProviderOptions{}).(*Provider)
+		target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"})
+		if err != nil {
+			t.Fatalf("base %q: unexpected error: %v", base, err)
+		}
+		u, err := url.Parse(target.URL)
+		if err != nil {
+			t.Fatalf("base %q: parse target url: %v", base, err)
+		}
+		if u.Path != "/openai/realtime" {
+			t.Errorf("base %q: path = %q, want /openai/realtime", base, u.Path)
+		}
+	}
+}
+
+func TestRealtimeTargetOmitsAuthWhenNoKey(t *testing.T) {
+	p := New(providers.ProviderConfig{
+		APIKey:  "",
+		BaseURL: "https://myres.openai.azure.com",
+	}, providers.ProviderOptions{}).(*Provider)
+	target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if _, present := target.Headers["Api-Key"]; present {
+		t.Error("api-key header should be absent when no key is configured")
+	}
+}
+
+func TestRealtimeTargetMissingModel(t *testing.T) {
+	p := New(providers.ProviderConfig{APIKey: "k", BaseURL: "https://myres.openai.azure.com"}, providers.ProviderOptions{}).(*Provider)
+	if _, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: " "}); err == nil {
+		t.Fatal("expected error for missing model")
+	}
+}
diff --git a/internal/providers/openai/chat_compatible.go b/internal/providers/openai/chat_compatible.go
@@ -62,6 +62,12 @@ func (c *ChatCompatible) SetBaseURL(url string) {
 	c.compatible.SetBaseURL(url)
 }
 
+// GetBaseURL returns the provider's current base URL (reads live from the client,
+// so it reflects SetBaseURL overrides). Used to derive realtime websocket targets.
+func (c *ChatCompatible) GetBaseURL() string {
+	return c.compatible.GetBaseURL()
+}
+
 // ChatCompletion sends a chat completion request to the provider.
 func (c *ChatCompatible) ChatCompletion(ctx context.Context, req *core.ChatRequest) (*core.ChatResponse, error) {
 	return c.compatible.ChatCompletion(ctx, req)

diff --git a/internal/providers/zai/realtime.go b/internal/providers/zai/realtime.go
@@ -0,0 +1,68 @@
+package zai
+
+import (
+	"context"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"gomodel/internal/core"
+)
+
+// realtimePath is the fixed GLM-Realtime websocket path. It is the same across
+// regions and is independent of the chat base path (paas/v4, coding/paas/v4,
+// anthropic, …), so only the host and scheme are taken from the base URL.
+const realtimePath = "/api/paas/v4/realtime"
+
+// RealtimeTarget implements core.RealtimeProvider for Z.ai / Zhipu GLM-Realtime,
+// whose core event schema mirrors OpenAI's Realtime API. The host (region) comes
+// from the configured base URL while the path is pinned to realtimePath so chat
+// base variants like the Coding Plan endpoint still resolve correctly. Bearer
+// auth is injected here and must never be logged.
+func (p *Provider) RealtimeTarget(_ context.Context, req *core.RealtimeRequest) (*core.RealtimeTarget, error) {
+	model := ""
+	if req != nil {
+		model = strings.TrimSpace(req.Model)
+	}
+	if model == "" {
+		return nil, core.NewInvalidRequestError("model is required for realtime sessions", nil)
+	}
+
+	endpoint, err := realtimeURL(p.GetBaseURL(), model)
+	if err != nil {
+		return nil, err
+	}
+
+	headers := http.Header{}
+	if p.apiKey != "" {
+		headers.Set("Authorization", "Bearer "+p.apiKey)
+	}
+
+	return &core.RealtimeTarget{URL: endpoint, Headers: headers}, nil
+}
+
+// realtimeURL maps the configured base URL host to the GLM-Realtime endpoint
+// wss://<host>/api/paas/v4/realtime?model=... preserving the region host and
+// mapping the scheme to ws/wss.
+func realtimeURL(baseURL, model string) (string, error) {
+	base := strings.TrimSpace(baseURL)
+	if base == "" {
+		base = defaultBaseURL
+	}
+	u, err := url.Parse(base)
+	if err != nil || u.Host == "" {
+		return "", core.NewInvalidRequestError("invalid realtime base url: "+base, err)
+	}
+	scheme := "wss"
+	if strings.EqualFold(u.Scheme, "http") || strings.EqualFold(u.Scheme, "ws") {
+		scheme = "ws"
+	}
+	rt := url.URL{Scheme: scheme, Host: u.Host, Path: realtimePath}
+	q := url.Values{}
+	q.Set("model", model)
+	rt.RawQuery = q.Encode()
+	return rt.String(), nil
+}
+
+// Compile-time assertion that Z.ai implements the realtime capability.
+var _ core.RealtimeProvider = (*Provider)(nil)
diff --git a/internal/providers/zai/realtime_test.go b/internal/providers/zai/realtime_test.go
@@ -0,0 +1,83 @@
+package zai
+
+import (
+	"context"
+	"net/url"
+	"strings"
+	"testing"
+
+	"gomodel/internal/core"
+	"gomodel/internal/providers"
+)
+
+func TestRealtimeTarget(t *testing.T) {
+	const apiKey = "zai-secret-key"
+	p := New(providers.ProviderConfig{APIKey: apiKey}, providers.ProviderOptions{}).(*Provider)
+
+	target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.HasPrefix(target.URL, "wss://api.z.ai/api/paas/v4/realtime?") {
+		t.Errorf("url = %q, want Z.ai realtime endpoint", target.URL)
+	}
+	u, err := url.Parse(target.URL)
+	if err != nil {
+		t.Fatalf("parse target url: %v", err)
+	}
+	if got := u.Query().Get("model"); got != "glm-realtime" {
+		t.Errorf("model query = %q, want %q", got, "glm-realtime")
+	}
+	if got := target.Headers.Get("Authorization"); got != "Bearer "+apiKey {
+		t.Errorf("Authorization = %q, want bearer with key", got)
+	}
+}
+
+func TestRealtimeTargetFollowsSetBaseURL(t *testing.T) {
+	// open.bigmodel.cn region must be honored when configured via ZAI_BASE_URL.
+	p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider)
+	p.SetBaseURL("https://open.bigmodel.cn/api/paas/v4")
+	target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.HasPrefix(target.URL, "wss://open.bigmodel.cn/api/paas/v4/realtime?") {
+		t.Errorf("url = %q, want the configured region host", target.URL)
+	}
+}
+
+func TestRealtimeTargetNormalizesCodingPlanBase(t *testing.T) {
+	// The GLM Coding Plan base (/api/coding/paas/v4) must still resolve to the
+	// fixed realtime path /api/paas/v4/realtime, not /api/coding/paas/v4/realtime.
+	p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider)
+	p.SetBaseURL("https://api.z.ai/api/coding/paas/v4")
+	target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	u, err := url.Parse(target.URL)
+	if err != nil {
+		t.Fatalf("parse target url: %v", err)
+	}
+	if u.Path != "/api/paas/v4/realtime" {
+		t.Errorf("path = %q, want /api/paas/v4/realtime", u.Path)
+	}
+}
+
+func TestRealtimeTargetOmitsAuthWhenNoKey(t *testing.T) {
+	p := New(providers.ProviderConfig{APIKey: ""}, providers.ProviderOptions{}).(*Provider)
+	target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if _, present := target.Headers["Authorization"]; present {
+		t.Error("Authorization header should be absent when no API key is configured")
+	}
+}
+
+func TestRealtimeTargetMissingModel(t *testing.T) {
+	p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider)
+	if _, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: " "}); err == nil {
+		t.Fatal("expected error for missing model")
+	}
+}
diff --git a/internal/providers/zai/zai.go b/internal/providers/zai/zai.go
@@ -22,25 +22,33 @@ var Registration = providers.Registration{
 }
 
 // Provider implements the core.Provider interface for Z.ai.
+// apiKey is retained to inject auth on the GLM-Realtime websocket target.
 type Provider struct {
 	*openai.ChatCompatible
+	apiKey string
 }
 
 var _ core.Provider = (*Provider)(nil)
 
 // New creates a new Z.ai provider.
 func New(cfg providers.ProviderConfig, opts providers.ProviderOptions) core.Provider {
-	return &Provider{openai.NewChatCompatible(cfg.APIKey, opts, openai.CompatibleProviderConfig{
-		ProviderName: "zai",
-		BaseURL:      providers.ResolveBaseURL(cfg.BaseURL, defaultBaseURL),
-	})}
+	return &Provider{
+		ChatCompatible: openai.NewChatCompatible(cfg.APIKey, opts, openai.CompatibleProviderConfig{
+			ProviderName: "zai",
+			BaseURL:      providers.ResolveBaseURL(cfg.BaseURL, defaultBaseURL),
+		}),
+		apiKey: cfg.APIKey,
+	}
 }
 
 // NewWithHTTPClient creates a new Z.ai provider with a custom HTTP client.
 // If httpClient is nil, http.DefaultClient is used.
 func NewWithHTTPClient(apiKey string, baseURL string, httpClient *http.Client, hooks llmclient.Hooks) *Provider {
-	return &Provider{openai.NewChatCompatibleWithHTTPClient(apiKey, httpClient, hooks, openai.CompatibleProviderConfig{
-		ProviderName: "zai",
-		BaseURL:      providers.ResolveBaseURL(baseURL, defaultBaseURL),
-	})}
+	return &Provider{
+		ChatCompatible: openai.NewChatCompatibleWithHTTPClient(apiKey, httpClient, hooks, openai.CompatibleProviderConfig{
+			ProviderName: "zai",
+			BaseURL:      providers.ResolveBaseURL(baseURL, defaultBaseURL),
+		}),
+		apiKey: apiKey,
+	}
 }