From d666f5abe51083a24fd2cbd28ecf4b3fad62fa11 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Mon, 15 Jun 2026 14:00:07 +0200 Subject: [PATCH 1/3] feat(realtime): add Azure OpenAI and Z.ai/Zhipu GLM realtime support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both use OpenAI's realtime event schema, so they slot into the existing transparent proxy with just a provider RealtimeTarget. - Z.ai/Zhipu GLM-Realtime: endpoint and core events mirror OpenAI (…/api/paas/v4/realtime); derives via the shared providers.OpenAIRealtimeURL, honors ZAI_BASE_URL region (api.z.ai vs open.bigmodel.cn), Bearer auth. Adds ChatCompatible.GetBaseURL so the base URL is read live (no staleable copy). - Azure OpenAI GPT Realtime: same schema, different dial shape — builds wss:///openai/realtime?api-version=…&deployment=… from the resource root and authenticates with the api-key header (not Bearer). The model selects the deployment. - Compile-time core.RealtimeProvider assertions and unit tests (URL/auth/region/ no-key/missing-model) for both. Skipped MiniMax: its only documented realtime websocket (TTS) uses a non-OpenAI schema (task_start/continue/finish) and no OpenAI-compatible conversational realtime endpoint is published, so a transparent relay can't be confirmed. Gemini Live and AWS Nova Sonic remain out of scope (separate event schemas, need a translation adapter). Note: these were unit-tested only — no Azure/Z.ai credentials available locally to live-verify, unlike OpenAI/Bailian/xAI in the prior PR. Co-Authored-By: Claude Opus 4.8 (1M context) --- CLAUDE.md | 2 +- internal/providers/azure/azure.go | 5 +- internal/providers/azure/realtime.go | 61 ++++++++++++++++++ internal/providers/azure/realtime_test.go | 66 ++++++++++++++++++++ internal/providers/openai/chat_compatible.go | 6 ++ internal/providers/zai/realtime.go | 36 +++++++++++ internal/providers/zai/realtime_test.go | 65 +++++++++++++++++++ internal/providers/zai/zai.go | 24 ++++--- 8 files changed, 254 insertions(+), 11 deletions(-) create mode 100644 internal/providers/azure/realtime.go create mode 100644 internal/providers/azure/realtime_test.go create mode 100644 internal/providers/zai/realtime.go create mode 100644 internal/providers/zai/realtime_test.go diff --git a/CLAUDE.md b/CLAUDE.md index 0e03aef5..f97524ab 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -114,7 +114,7 @@ Full reference: `.env.template` and `config/config.yaml` - `ENABLE_PASSTHROUGH_ROUTES` (true: Enable provider-native passthrough routes under /p/{provider}/...) - `ALLOW_PASSTHROUGH_V1_ALIAS` (true: Allow /p/{provider}/v1/... aliases while keeping /p/{provider}/... canonical) - `ENABLED_PASSTHROUGH_PROVIDERS` (openai,anthropic,openrouter,zai,vllm: Comma-separated list of enabled passthrough providers) - - `REALTIME_ENABLED` (true: Expose the realtime speech-to-speech websocket at `/v1/realtime` and the `/p/{provider}/v1/realtime` upgrade. The canonical `/v1/realtime` route needs only `REALTIME_ENABLED`; the `/p/{provider}/v1/realtime` upgrade additionally requires passthrough routes enabled (`ENABLE_PASSTHROUGH_ROUTES`) with the provider listed in `ENABLED_PASSTHROUGH_PROVIDERS`. The gateway is a transparent websocket reverse proxy — it injects provider credentials and relays the provider's realtime event schema verbatim (no translation), so clients connect without provider API keys. Only providers implementing realtime accept sessions; currently OpenAI and xAI/Grok Voice Agent (both `wss://…/v1/realtime`, OpenAI-realtime-compatible) and Bailian/Qwen-Omni (`wss://dashscope…/api-ws/v1/realtime`). xAI's voice models (e.g. `grok-voice-latest`) are not returned by upstream `/models` discovery, so configure them via `XAI_MODELS`; xAI bills realtime per-minute and reports no token usage. Sessions are gated by the same model-access and budget rules as other model endpoints; usage is tracked per `response.done` event, accepting both the OpenAI singular and Alibaba plural token-detail spellings.) + - `REALTIME_ENABLED` (true: Expose the realtime speech-to-speech websocket at `/v1/realtime` and the `/p/{provider}/v1/realtime` upgrade. The canonical `/v1/realtime` route needs only `REALTIME_ENABLED`; the `/p/{provider}/v1/realtime` upgrade additionally requires passthrough routes enabled (`ENABLE_PASSTHROUGH_ROUTES`) with the provider listed in `ENABLED_PASSTHROUGH_PROVIDERS`. The gateway is a transparent websocket reverse proxy — it injects provider credentials and relays the provider's realtime event schema verbatim (no translation), so clients connect without provider API keys. Only providers implementing realtime accept sessions. Currently: OpenAI and xAI/Grok Voice Agent (both `wss://…/v1/realtime`); Z.ai/Zhipu GLM-Realtime (`wss://…/api/paas/v4/realtime`); Bailian/Qwen-Omni (`wss://dashscope…/api-ws/v1/realtime`); and Azure OpenAI (`wss:///openai/realtime?api-version=…&deployment=…`, `api-key` header). All use OpenAI's realtime event schema (Z.ai adds extensions that relay transparently). Provider-specific notes: xAI voice models (e.g. `grok-voice-latest`) aren't in upstream `/models` discovery, so configure them via `XAI_MODELS`, and xAI bills realtime per-minute (no token usage reported); Azure realtime requires a realtime-capable `AZURE_API_VERSION` (the default may be too old) and the model selects the Azure deployment. (MiniMax was evaluated but skipped — its conversational realtime schema is not OpenAI-compatible.) Sessions are gated by the same model-access and budget rules as other model endpoints; usage is tracked per `response.done` event, accepting both the OpenAI singular and Alibaba plural token-detail spellings.) - **Storage:** `STORAGE_TYPE` (sqlite), `SQLITE_PATH` (data/gomodel.db), `POSTGRES_URL`, `MONGODB_URL` - **Models:** `MODELS_ENABLED_BY_DEFAULT` (true), `MODEL_OVERRIDES_ENABLED` (true), `KEEP_ONLY_ALIASES_AT_MODELS_ENDPOINT` (false), `CONFIGURED_PROVIDER_MODELS_MODE` (`fallback` or `allowlist`, default `fallback`; `allowlist` skips upstream `/models` for providers with configured lists); persisted overrides restrict/allow selectors with `user_paths`. When alias-only models listing is enabled, `GET /v1/models` returns only model aliases, not full concrete model specs, to operators. - **Audit logging:** `LOGGING_ENABLED` (false), `LOGGING_LOG_BODIES` (false), `LOGGING_LOG_AUDIO_BODIES` (false: refines `LOGGING_LOG_BODIES` for audio endpoints — base64 audio for both `/v1/audio/speech` output and `/v1/audio/transcriptions` upload (≤8 MB each, else `too_large`) + dashboard playback, plus transcription upload metadata; no effect unless `LOGGING_LOG_BODIES` is on, in which case audio-off records a placeholder), `LOGGING_LOG_HEADERS` (false), `LOGGING_RETENTION_DAYS` (30) diff --git a/internal/providers/azure/azure.go b/internal/providers/azure/azure.go index 15e1de42..e074c43e 100644 --- a/internal/providers/azure/azure.go +++ b/internal/providers/azure/azure.go @@ -29,12 +29,13 @@ type Provider struct { resourceProvider *openai.CompatibleProvider openAIResourceProvider *openai.CompatibleProvider apiVersion string + apiKey string // retained to inject the api-key header on the realtime target } func New(providerCfg providers.ProviderConfig, opts providers.ProviderOptions) core.Provider { baseURL := providers.ResolveBaseURL(providerCfg.BaseURL, "https://example.invalid") apiVersion := providers.ResolveAPIVersion(providerCfg.APIVersion, defaultAPIVersion) - p := &Provider{apiVersion: apiVersion} + p := &Provider{apiVersion: apiVersion, apiKey: providerCfg.APIKey} clientCfg := openai.CompatibleProviderConfig{ ProviderName: "azure", BaseURL: baseURL, @@ -51,7 +52,7 @@ func New(providerCfg providers.ProviderConfig, opts providers.ProviderOptions) c } func NewWithHTTPClient(apiKey string, httpClient *http.Client, hooks llmclient.Hooks) *Provider { - p := &Provider{apiVersion: defaultAPIVersion} + p := &Provider{apiVersion: defaultAPIVersion, apiKey: apiKey} cfg := openai.CompatibleProviderConfig{ ProviderName: "azure", BaseURL: "https://example.invalid", diff --git a/internal/providers/azure/realtime.go b/internal/providers/azure/realtime.go new file mode 100644 index 00000000..d2d0a996 --- /dev/null +++ b/internal/providers/azure/realtime.go @@ -0,0 +1,61 @@ +package azure + +import ( + "context" + "net/http" + "net/url" + "strings" + + "gomodel/internal/core" +) + +// RealtimeTarget implements core.RealtimeProvider for Azure OpenAI's GPT Realtime +// API, which uses OpenAI's realtime event schema. Azure differs from OpenAI only +// in the dial shape: the websocket lives at /openai/realtime with the +// deployment and api-version as query parameters, and auth uses the api-key +// header (not Bearer). The api-key is injected here and must never be logged. +func (p *Provider) RealtimeTarget(_ context.Context, req *core.RealtimeRequest) (*core.RealtimeTarget, error) { + if req == nil || strings.TrimSpace(req.Model) == "" { + return nil, core.NewInvalidRequestError("model is required for realtime sessions", nil) + } + + endpoint, err := p.realtimeURL(strings.TrimSpace(req.Model)) + if err != nil { + return nil, err + } + + headers := http.Header{} + if p.apiKey != "" { + headers.Set("api-key", p.apiKey) + } + + return &core.RealtimeTarget{URL: endpoint, Headers: headers}, nil +} + +// realtimeURL builds wss:///openai/realtime?api-version=…&deployment=… +// from the configured base URL's resource root. The model selects the Azure +// deployment. +func (p *Provider) realtimeURL(deployment string) (string, error) { + root := resourceRootBaseURL(p.GetBaseURL()) + u, err := url.Parse(root) + if err != nil || u.Host == "" { + return "", core.NewInvalidRequestError("invalid azure realtime base url: "+root, err) + } + switch strings.ToLower(u.Scheme) { + case "https", "wss", "": + u.Scheme = "wss" + case "http", "ws": + u.Scheme = "ws" + default: + return "", core.NewInvalidRequestError("unsupported azure realtime base url scheme: "+u.Scheme, nil) + } + u.Path = strings.TrimRight(u.Path, "/") + "/openai/realtime" + q := url.Values{} + q.Set("api-version", p.apiVersion) + q.Set("deployment", deployment) + u.RawQuery = q.Encode() + return u.String(), nil +} + +// Compile-time assertion that Azure implements the realtime capability. +var _ core.RealtimeProvider = (*Provider)(nil) diff --git a/internal/providers/azure/realtime_test.go b/internal/providers/azure/realtime_test.go new file mode 100644 index 00000000..fbd2bc7d --- /dev/null +++ b/internal/providers/azure/realtime_test.go @@ -0,0 +1,66 @@ +package azure + +import ( + "context" + "net/url" + "testing" + + "gomodel/internal/core" + "gomodel/internal/providers" +) + +func TestRealtimeTarget(t *testing.T) { + const apiKey = "azure-secret-key" + p := New(providers.ProviderConfig{ + APIKey: apiKey, + BaseURL: "https://myres.openai.azure.com/openai/deployments/gpt-realtime", + APIVersion: "2025-04-01-preview", + }, providers.ProviderOptions{}).(*Provider) + + target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "gpt-realtime"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + u, err := url.Parse(target.URL) + if err != nil { + t.Fatalf("parse target url: %v", err) + } + if u.Scheme != "wss" || u.Host != "myres.openai.azure.com" || u.Path != "/openai/realtime" { + t.Errorf("endpoint = %q, want wss://myres.openai.azure.com/openai/realtime", target.URL) + } + if got := u.Query().Get("deployment"); got != "gpt-realtime" { + t.Errorf("deployment = %q, want gpt-realtime", got) + } + if got := u.Query().Get("api-version"); got != "2025-04-01-preview" { + t.Errorf("api-version = %q, want 2025-04-01-preview", got) + } + // Azure authenticates with the api-key header, not Bearer. + if got := target.Headers.Get("api-key"); got != apiKey { + t.Errorf("api-key = %q, want %q", got, apiKey) + } + if target.Headers.Get("Authorization") != "" { + t.Error("Authorization header must not be set for Azure (uses api-key)") + } +} + +func TestRealtimeTargetOmitsAuthWhenNoKey(t *testing.T) { + p := New(providers.ProviderConfig{ + APIKey: "", + BaseURL: "https://myres.openai.azure.com", + }, providers.ProviderOptions{}).(*Provider) + target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, present := target.Headers["Api-Key"]; present { + t.Error("api-key header should be absent when no key is configured") + } +} + +func TestRealtimeTargetMissingModel(t *testing.T) { + p := New(providers.ProviderConfig{APIKey: "k", BaseURL: "https://myres.openai.azure.com"}, providers.ProviderOptions{}).(*Provider) + if _, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: " "}); err == nil { + t.Fatal("expected error for missing model") + } +} diff --git a/internal/providers/openai/chat_compatible.go b/internal/providers/openai/chat_compatible.go index e005ae0c..cf6d890a 100644 --- a/internal/providers/openai/chat_compatible.go +++ b/internal/providers/openai/chat_compatible.go @@ -62,6 +62,12 @@ func (c *ChatCompatible) SetBaseURL(url string) { c.compatible.SetBaseURL(url) } +// GetBaseURL returns the provider's current base URL (reads live from the client, +// so it reflects SetBaseURL overrides). Used to derive realtime websocket targets. +func (c *ChatCompatible) GetBaseURL() string { + return c.compatible.GetBaseURL() +} + // ChatCompletion sends a chat completion request to the provider. func (c *ChatCompatible) ChatCompletion(ctx context.Context, req *core.ChatRequest) (*core.ChatResponse, error) { return c.compatible.ChatCompletion(ctx, req) diff --git a/internal/providers/zai/realtime.go b/internal/providers/zai/realtime.go new file mode 100644 index 00000000..79c3ef20 --- /dev/null +++ b/internal/providers/zai/realtime.go @@ -0,0 +1,36 @@ +package zai + +import ( + "context" + "net/http" + "strings" + + "gomodel/internal/core" + "gomodel/internal/providers" +) + +// RealtimeTarget implements core.RealtimeProvider for Z.ai / Zhipu GLM-Realtime, +// whose websocket endpoint (…/api/paas/v4/realtime) and core event schema mirror +// OpenAI's Realtime API. The URL derives from the configured base URL exactly +// like OpenAI's, so region/host overrides (api.z.ai vs open.bigmodel.cn) are +// honored. Bearer auth is injected here and must never be logged. +func (p *Provider) RealtimeTarget(_ context.Context, req *core.RealtimeRequest) (*core.RealtimeTarget, error) { + if req == nil || strings.TrimSpace(req.Model) == "" { + return nil, core.NewInvalidRequestError("model is required for realtime sessions", nil) + } + + endpoint, err := providers.OpenAIRealtimeURL(p.GetBaseURL(), req.Model) + if err != nil { + return nil, err + } + + headers := http.Header{} + if p.apiKey != "" { + headers.Set("Authorization", "Bearer "+p.apiKey) + } + + return &core.RealtimeTarget{URL: endpoint, Headers: headers}, nil +} + +// Compile-time assertion that Z.ai implements the realtime capability. +var _ core.RealtimeProvider = (*Provider)(nil) diff --git a/internal/providers/zai/realtime_test.go b/internal/providers/zai/realtime_test.go new file mode 100644 index 00000000..a6002ada --- /dev/null +++ b/internal/providers/zai/realtime_test.go @@ -0,0 +1,65 @@ +package zai + +import ( + "context" + "net/url" + "strings" + "testing" + + "gomodel/internal/core" + "gomodel/internal/providers" +) + +func TestRealtimeTarget(t *testing.T) { + const apiKey = "zai-secret-key" + p := New(providers.ProviderConfig{APIKey: apiKey}, providers.ProviderOptions{}).(*Provider) + + target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.HasPrefix(target.URL, "wss://api.z.ai/api/paas/v4/realtime?") { + t.Errorf("url = %q, want Z.ai realtime endpoint", target.URL) + } + u, err := url.Parse(target.URL) + if err != nil { + t.Fatalf("parse target url: %v", err) + } + if got := u.Query().Get("model"); got != "glm-realtime" { + t.Errorf("model query = %q, want %q", got, "glm-realtime") + } + if got := target.Headers.Get("Authorization"); got != "Bearer "+apiKey { + t.Errorf("Authorization = %q, want bearer with key", got) + } +} + +func TestRealtimeTargetFollowsSetBaseURL(t *testing.T) { + // open.bigmodel.cn region must be honored when configured via ZAI_BASE_URL. + p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider) + p.SetBaseURL("https://open.bigmodel.cn/api/paas/v4") + target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.HasPrefix(target.URL, "wss://open.bigmodel.cn/api/paas/v4/realtime?") { + t.Errorf("url = %q, want the configured region host", target.URL) + } +} + +func TestRealtimeTargetOmitsAuthWhenNoKey(t *testing.T) { + p := New(providers.ProviderConfig{APIKey: ""}, providers.ProviderOptions{}).(*Provider) + target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, present := target.Headers["Authorization"]; present { + t.Error("Authorization header should be absent when no API key is configured") + } +} + +func TestRealtimeTargetMissingModel(t *testing.T) { + p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider) + if _, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: " "}); err == nil { + t.Fatal("expected error for missing model") + } +} diff --git a/internal/providers/zai/zai.go b/internal/providers/zai/zai.go index 3f8675fb..a64bde1d 100644 --- a/internal/providers/zai/zai.go +++ b/internal/providers/zai/zai.go @@ -22,25 +22,33 @@ var Registration = providers.Registration{ } // Provider implements the core.Provider interface for Z.ai. +// apiKey is retained to inject auth on the GLM-Realtime websocket target. type Provider struct { *openai.ChatCompatible + apiKey string } var _ core.Provider = (*Provider)(nil) // New creates a new Z.ai provider. func New(cfg providers.ProviderConfig, opts providers.ProviderOptions) core.Provider { - return &Provider{openai.NewChatCompatible(cfg.APIKey, opts, openai.CompatibleProviderConfig{ - ProviderName: "zai", - BaseURL: providers.ResolveBaseURL(cfg.BaseURL, defaultBaseURL), - })} + return &Provider{ + ChatCompatible: openai.NewChatCompatible(cfg.APIKey, opts, openai.CompatibleProviderConfig{ + ProviderName: "zai", + BaseURL: providers.ResolveBaseURL(cfg.BaseURL, defaultBaseURL), + }), + apiKey: cfg.APIKey, + } } // NewWithHTTPClient creates a new Z.ai provider with a custom HTTP client. // If httpClient is nil, http.DefaultClient is used. func NewWithHTTPClient(apiKey string, baseURL string, httpClient *http.Client, hooks llmclient.Hooks) *Provider { - return &Provider{openai.NewChatCompatibleWithHTTPClient(apiKey, httpClient, hooks, openai.CompatibleProviderConfig{ - ProviderName: "zai", - BaseURL: providers.ResolveBaseURL(baseURL, defaultBaseURL), - })} + return &Provider{ + ChatCompatible: openai.NewChatCompatibleWithHTTPClient(apiKey, httpClient, hooks, openai.CompatibleProviderConfig{ + ProviderName: "zai", + BaseURL: providers.ResolveBaseURL(baseURL, defaultBaseURL), + }), + apiKey: apiKey, + } } From 7f142013a1399396146053e80ef201a572aab620 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Mon, 15 Jun 2026 14:10:34 +0200 Subject: [PATCH 2/3] fix(realtime): pin Z.ai realtime path; strip Azure /openai root Address PR #396 review: - Z.ai (Greptile P1): GLM-Realtime lives at a fixed /api/paas/v4/realtime path, but appending /realtime to the configured chat base broke the Coding Plan base (/api/coding/paas/v4 -> .../coding/paas/v4/realtime). Derive the realtime URL from the host with the pinned path instead (mirrors Bailian). Also trims the model once (CodeRabbit nitpick). - Azure (Greptile P2): strip a trailing /openai or /openai/v1 from the resource root before appending /openai/realtime, so a base already rooted at /openai no longer yields /openai/openai/realtime. Regression tests added for both (Coding Plan base; /openai[/v1] base). Skipped Greptile P2 "reject old Azure API version": hardcoding which api-versions support realtime is brittle (would break at GA); it is documented, and a failed dial surfaces as a clear 502 rather than a silent error. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/providers/azure/realtime.go | 7 +++- internal/providers/azure/realtime_test.go | 18 +++++++++ internal/providers/zai/realtime.go | 46 +++++++++++++++++++---- internal/providers/zai/realtime_test.go | 15 ++++++++ 4 files changed, 78 insertions(+), 8 deletions(-) diff --git a/internal/providers/azure/realtime.go b/internal/providers/azure/realtime.go index d2d0a996..815da444 100644 --- a/internal/providers/azure/realtime.go +++ b/internal/providers/azure/realtime.go @@ -49,7 +49,12 @@ func (p *Provider) realtimeURL(deployment string) (string, error) { default: return "", core.NewInvalidRequestError("unsupported azure realtime base url scheme: "+u.Scheme, nil) } - u.Path = strings.TrimRight(u.Path, "/") + "/openai/realtime" + // Strip any existing /openai[/v1] root so a base already pointing at the + // OpenAI sub-path doesn't produce /openai/openai/realtime. + path := strings.TrimRight(u.Path, "/") + path = strings.TrimSuffix(path, "/openai/v1") + path = strings.TrimSuffix(path, "/openai") + u.Path = path + "/openai/realtime" q := url.Values{} q.Set("api-version", p.apiVersion) q.Set("deployment", deployment) diff --git a/internal/providers/azure/realtime_test.go b/internal/providers/azure/realtime_test.go index fbd2bc7d..c7418bd3 100644 --- a/internal/providers/azure/realtime_test.go +++ b/internal/providers/azure/realtime_test.go @@ -44,6 +44,24 @@ func TestRealtimeTarget(t *testing.T) { } } +func TestRealtimeTargetStripsExistingOpenAIPath(t *testing.T) { + // A base already rooted at /openai must not yield /openai/openai/realtime. + for _, base := range []string{ + "https://myres.openai.azure.com/openai", + "https://myres.openai.azure.com/openai/v1", + } { + p := New(providers.ProviderConfig{APIKey: "k", BaseURL: base}, providers.ProviderOptions{}).(*Provider) + target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"}) + if err != nil { + t.Fatalf("base %q: unexpected error: %v", base, err) + } + u, _ := url.Parse(target.URL) + if u.Path != "/openai/realtime" { + t.Errorf("base %q: path = %q, want /openai/realtime", base, u.Path) + } + } +} + func TestRealtimeTargetOmitsAuthWhenNoKey(t *testing.T) { p := New(providers.ProviderConfig{ APIKey: "", diff --git a/internal/providers/zai/realtime.go b/internal/providers/zai/realtime.go index 79c3ef20..b37b3131 100644 --- a/internal/providers/zai/realtime.go +++ b/internal/providers/zai/realtime.go @@ -3,23 +3,32 @@ package zai import ( "context" "net/http" + "net/url" "strings" "gomodel/internal/core" - "gomodel/internal/providers" ) +// realtimePath is the fixed GLM-Realtime websocket path. It is the same across +// regions and is independent of the chat base path (paas/v4, coding/paas/v4, +// anthropic, …), so only the host and scheme are taken from the base URL. +const realtimePath = "/api/paas/v4/realtime" + // RealtimeTarget implements core.RealtimeProvider for Z.ai / Zhipu GLM-Realtime, -// whose websocket endpoint (…/api/paas/v4/realtime) and core event schema mirror -// OpenAI's Realtime API. The URL derives from the configured base URL exactly -// like OpenAI's, so region/host overrides (api.z.ai vs open.bigmodel.cn) are -// honored. Bearer auth is injected here and must never be logged. +// whose core event schema mirrors OpenAI's Realtime API. The host (region) comes +// from the configured base URL while the path is pinned to realtimePath so chat +// base variants like the Coding Plan endpoint still resolve correctly. Bearer +// auth is injected here and must never be logged. func (p *Provider) RealtimeTarget(_ context.Context, req *core.RealtimeRequest) (*core.RealtimeTarget, error) { - if req == nil || strings.TrimSpace(req.Model) == "" { + model := "" + if req != nil { + model = strings.TrimSpace(req.Model) + } + if model == "" { return nil, core.NewInvalidRequestError("model is required for realtime sessions", nil) } - endpoint, err := providers.OpenAIRealtimeURL(p.GetBaseURL(), req.Model) + endpoint, err := realtimeURL(p.GetBaseURL(), model) if err != nil { return nil, err } @@ -32,5 +41,28 @@ func (p *Provider) RealtimeTarget(_ context.Context, req *core.RealtimeRequest) return &core.RealtimeTarget{URL: endpoint, Headers: headers}, nil } +// realtimeURL maps the configured base URL host to the GLM-Realtime endpoint +// wss:///api/paas/v4/realtime?model=... preserving the region host and +// mapping the scheme to ws/wss. +func realtimeURL(baseURL, model string) (string, error) { + base := strings.TrimSpace(baseURL) + if base == "" { + base = defaultBaseURL + } + u, err := url.Parse(base) + if err != nil || u.Host == "" { + return "", core.NewInvalidRequestError("invalid realtime base url: "+base, err) + } + scheme := "wss" + if strings.EqualFold(u.Scheme, "http") || strings.EqualFold(u.Scheme, "ws") { + scheme = "ws" + } + rt := url.URL{Scheme: scheme, Host: u.Host, Path: realtimePath} + q := url.Values{} + q.Set("model", model) + rt.RawQuery = q.Encode() + return rt.String(), nil +} + // Compile-time assertion that Z.ai implements the realtime capability. var _ core.RealtimeProvider = (*Provider)(nil) diff --git a/internal/providers/zai/realtime_test.go b/internal/providers/zai/realtime_test.go index a6002ada..64647c27 100644 --- a/internal/providers/zai/realtime_test.go +++ b/internal/providers/zai/realtime_test.go @@ -46,6 +46,21 @@ func TestRealtimeTargetFollowsSetBaseURL(t *testing.T) { } } +func TestRealtimeTargetNormalizesCodingPlanBase(t *testing.T) { + // The GLM Coding Plan base (/api/coding/paas/v4) must still resolve to the + // fixed realtime path /api/paas/v4/realtime, not /api/coding/paas/v4/realtime. + p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider) + p.SetBaseURL("https://api.z.ai/api/coding/paas/v4") + target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + u, _ := url.Parse(target.URL) + if u.Path != "/api/paas/v4/realtime" { + t.Errorf("path = %q, want /api/paas/v4/realtime", u.Path) + } +} + func TestRealtimeTargetOmitsAuthWhenNoKey(t *testing.T) { p := New(providers.ProviderConfig{APIKey: ""}, providers.ProviderOptions{}).(*Provider) target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"}) From 0b46228e71e99583385902637dc66ee77c7cd267 Mon Sep 17 00:00:00 2001 From: "Jakub A. W" Date: Mon, 15 Jun 2026 14:24:55 +0200 Subject: [PATCH 3/3] test(realtime): check url.Parse errors in zai/azure realtime tests Address CodeRabbit: the new Coding-Plan and /openai-strip tests ignored the url.Parse error and would panic on a nil *url.URL if parsing failed. Check the error consistently with the other realtime tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/providers/azure/realtime_test.go | 5 ++++- internal/providers/zai/realtime_test.go | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/providers/azure/realtime_test.go b/internal/providers/azure/realtime_test.go index c7418bd3..5882001d 100644 --- a/internal/providers/azure/realtime_test.go +++ b/internal/providers/azure/realtime_test.go @@ -55,7 +55,10 @@ func TestRealtimeTargetStripsExistingOpenAIPath(t *testing.T) { if err != nil { t.Fatalf("base %q: unexpected error: %v", base, err) } - u, _ := url.Parse(target.URL) + u, err := url.Parse(target.URL) + if err != nil { + t.Fatalf("base %q: parse target url: %v", base, err) + } if u.Path != "/openai/realtime" { t.Errorf("base %q: path = %q, want /openai/realtime", base, u.Path) } diff --git a/internal/providers/zai/realtime_test.go b/internal/providers/zai/realtime_test.go index 64647c27..c6d3f4f1 100644 --- a/internal/providers/zai/realtime_test.go +++ b/internal/providers/zai/realtime_test.go @@ -55,7 +55,10 @@ func TestRealtimeTargetNormalizesCodingPlanBase(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - u, _ := url.Parse(target.URL) + u, err := url.Parse(target.URL) + if err != nil { + t.Fatalf("parse target url: %v", err) + } if u.Path != "/api/paas/v4/realtime" { t.Errorf("path = %q, want /api/paas/v4/realtime", u.Path) }