Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ Full reference: `.env.template` and `config/config.yaml`
- `ENABLE_PASSTHROUGH_ROUTES` (true: Enable provider-native passthrough routes under /p/{provider}/...)
- `ALLOW_PASSTHROUGH_V1_ALIAS` (true: Allow /p/{provider}/v1/... aliases while keeping /p/{provider}/... canonical)
- `ENABLED_PASSTHROUGH_PROVIDERS` (openai,anthropic,openrouter,zai,vllm: Comma-separated list of enabled passthrough providers)
- `REALTIME_ENABLED` (true: Expose the realtime speech-to-speech websocket at `/v1/realtime` and the `/p/{provider}/v1/realtime` upgrade. The canonical `/v1/realtime` route needs only `REALTIME_ENABLED`; the `/p/{provider}/v1/realtime` upgrade additionally requires passthrough routes enabled (`ENABLE_PASSTHROUGH_ROUTES`) with the provider listed in `ENABLED_PASSTHROUGH_PROVIDERS`. The gateway is a transparent websocket reverse proxy — it injects provider credentials and relays the provider's realtime event schema verbatim (no translation), so clients connect without provider API keys. Only providers implementing realtime accept sessions; currently OpenAI and xAI/Grok Voice Agent (both `wss://…/v1/realtime`, OpenAI-realtime-compatible) and Bailian/Qwen-Omni (`wss://dashscope…/api-ws/v1/realtime`). xAI's voice models (e.g. `grok-voice-latest`) are not returned by upstream `/models` discovery, so configure them via `XAI_MODELS`; xAI bills realtime per-minute and reports no token usage. Sessions are gated by the same model-access and budget rules as other model endpoints; usage is tracked per `response.done` event, accepting both the OpenAI singular and Alibaba plural token-detail spellings.)
- `REALTIME_ENABLED` (true: Expose the realtime speech-to-speech websocket at `/v1/realtime` and the `/p/{provider}/v1/realtime` upgrade. The canonical `/v1/realtime` route needs only `REALTIME_ENABLED`; the `/p/{provider}/v1/realtime` upgrade additionally requires passthrough routes enabled (`ENABLE_PASSTHROUGH_ROUTES`) with the provider listed in `ENABLED_PASSTHROUGH_PROVIDERS`. The gateway is a transparent websocket reverse proxy — it injects provider credentials and relays the provider's realtime event schema verbatim (no translation), so clients connect without provider API keys. Only providers implementing realtime accept sessions. Currently: OpenAI and xAI/Grok Voice Agent (both `wss://…/v1/realtime`); Z.ai/Zhipu GLM-Realtime (`wss://…/api/paas/v4/realtime`); Bailian/Qwen-Omni (`wss://dashscope…/api-ws/v1/realtime`); and Azure OpenAI (`wss://<resource>/openai/realtime?api-version=…&deployment=…`, `api-key` header). All use OpenAI's realtime event schema (Z.ai adds extensions that relay transparently). Provider-specific notes: xAI voice models (e.g. `grok-voice-latest`) aren't in upstream `/models` discovery, so configure them via `XAI_MODELS`, and xAI bills realtime per-minute (no token usage reported); Azure realtime requires a realtime-capable `AZURE_API_VERSION` (the default may be too old) and the model selects the Azure deployment. (MiniMax was evaluated but skipped — its conversational realtime schema is not OpenAI-compatible.) Sessions are gated by the same model-access and budget rules as other model endpoints; usage is tracked per `response.done` event, accepting both the OpenAI singular and Alibaba plural token-detail spellings.)
- **Storage:** `STORAGE_TYPE` (sqlite), `SQLITE_PATH` (data/gomodel.db), `POSTGRES_URL`, `MONGODB_URL`
- **Models:** `MODELS_ENABLED_BY_DEFAULT` (true), `MODEL_OVERRIDES_ENABLED` (true), `KEEP_ONLY_ALIASES_AT_MODELS_ENDPOINT` (false), `CONFIGURED_PROVIDER_MODELS_MODE` (`fallback` or `allowlist`, default `fallback`; `allowlist` skips upstream `/models` for providers with configured lists); persisted overrides restrict/allow selectors with `user_paths`. When alias-only models listing is enabled, `GET /v1/models` returns only model aliases, not full concrete model specs, to operators.
- **Audit logging:** `LOGGING_ENABLED` (false), `LOGGING_LOG_BODIES` (false), `LOGGING_LOG_AUDIO_BODIES` (false: refines `LOGGING_LOG_BODIES` for audio endpoints — base64 audio for both `/v1/audio/speech` output and `/v1/audio/transcriptions` upload (≤8 MB each, else `too_large`) + dashboard playback, plus transcription upload metadata; no effect unless `LOGGING_LOG_BODIES` is on, in which case audio-off records a placeholder), `LOGGING_LOG_HEADERS` (false), `LOGGING_RETENTION_DAYS` (30)
Expand Down
5 changes: 3 additions & 2 deletions internal/providers/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@ type Provider struct {
resourceProvider *openai.CompatibleProvider
openAIResourceProvider *openai.CompatibleProvider
apiVersion string
apiKey string // retained to inject the api-key header on the realtime target
}

func New(providerCfg providers.ProviderConfig, opts providers.ProviderOptions) core.Provider {
baseURL := providers.ResolveBaseURL(providerCfg.BaseURL, "https://example.invalid")
apiVersion := providers.ResolveAPIVersion(providerCfg.APIVersion, defaultAPIVersion)
p := &Provider{apiVersion: apiVersion}
p := &Provider{apiVersion: apiVersion, apiKey: providerCfg.APIKey}
clientCfg := openai.CompatibleProviderConfig{
ProviderName: "azure",
BaseURL: baseURL,
Expand All @@ -51,7 +52,7 @@ func New(providerCfg providers.ProviderConfig, opts providers.ProviderOptions) c
}

func NewWithHTTPClient(apiKey string, httpClient *http.Client, hooks llmclient.Hooks) *Provider {
p := &Provider{apiVersion: defaultAPIVersion}
p := &Provider{apiVersion: defaultAPIVersion, apiKey: apiKey}
cfg := openai.CompatibleProviderConfig{
ProviderName: "azure",
BaseURL: "https://example.invalid",
Expand Down
66 changes: 66 additions & 0 deletions internal/providers/azure/realtime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package azure

import (
"context"
"net/http"
"net/url"
"strings"

"gomodel/internal/core"
)

// RealtimeTarget implements core.RealtimeProvider for Azure OpenAI's GPT Realtime
// API, which uses OpenAI's realtime event schema. Azure differs from OpenAI only
// in the dial shape: the websocket lives at <resource>/openai/realtime with the
// deployment and api-version as query parameters, and auth uses the api-key
// header (not Bearer). The api-key is injected here and must never be logged.
func (p *Provider) RealtimeTarget(_ context.Context, req *core.RealtimeRequest) (*core.RealtimeTarget, error) {
if req == nil || strings.TrimSpace(req.Model) == "" {
return nil, core.NewInvalidRequestError("model is required for realtime sessions", nil)
}

endpoint, err := p.realtimeURL(strings.TrimSpace(req.Model))
if err != nil {
return nil, err
}

headers := http.Header{}
if p.apiKey != "" {
headers.Set("api-key", p.apiKey)
}

return &core.RealtimeTarget{URL: endpoint, Headers: headers}, nil
}

// realtimeURL builds wss://<resource>/openai/realtime?api-version=…&deployment=…
// from the configured base URL's resource root. The model selects the Azure
// deployment.
func (p *Provider) realtimeURL(deployment string) (string, error) {
root := resourceRootBaseURL(p.GetBaseURL())
u, err := url.Parse(root)
if err != nil || u.Host == "" {
return "", core.NewInvalidRequestError("invalid azure realtime base url: "+root, err)
}
switch strings.ToLower(u.Scheme) {
case "https", "wss", "":
u.Scheme = "wss"
case "http", "ws":
u.Scheme = "ws"
default:
return "", core.NewInvalidRequestError("unsupported azure realtime base url scheme: "+u.Scheme, nil)
}
// Strip any existing /openai[/v1] root so a base already pointing at the
// OpenAI sub-path doesn't produce /openai/openai/realtime.
path := strings.TrimRight(u.Path, "/")
path = strings.TrimSuffix(path, "/openai/v1")
path = strings.TrimSuffix(path, "/openai")
u.Path = path + "/openai/realtime"
q := url.Values{}
q.Set("api-version", p.apiVersion)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Reject old API version

When AZURE_API_VERSION is omitted, this sends the provider default 2024-10-21 on realtime websocket dials. The PR documentation says Azure realtime requires a realtime-capable API version and that the default may be too old, so existing Azure configs can now fail with an opaque upstream websocket error instead of a clear local configuration error.

q.Set("deployment", deployment)
u.RawQuery = q.Encode()
return u.String(), nil
}

// Compile-time assertion that Azure implements the realtime capability.
var _ core.RealtimeProvider = (*Provider)(nil)
87 changes: 87 additions & 0 deletions internal/providers/azure/realtime_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package azure

import (
"context"
"net/url"
"testing"

"gomodel/internal/core"
"gomodel/internal/providers"
)

func TestRealtimeTarget(t *testing.T) {
const apiKey = "azure-secret-key"
p := New(providers.ProviderConfig{
APIKey: apiKey,
BaseURL: "https://myres.openai.azure.com/openai/deployments/gpt-realtime",
APIVersion: "2025-04-01-preview",
}, providers.ProviderOptions{}).(*Provider)

target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "gpt-realtime"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

u, err := url.Parse(target.URL)
if err != nil {
t.Fatalf("parse target url: %v", err)
}
if u.Scheme != "wss" || u.Host != "myres.openai.azure.com" || u.Path != "/openai/realtime" {
t.Errorf("endpoint = %q, want wss://myres.openai.azure.com/openai/realtime", target.URL)
}
if got := u.Query().Get("deployment"); got != "gpt-realtime" {
t.Errorf("deployment = %q, want gpt-realtime", got)
}
if got := u.Query().Get("api-version"); got != "2025-04-01-preview" {
t.Errorf("api-version = %q, want 2025-04-01-preview", got)
}
// Azure authenticates with the api-key header, not Bearer.
if got := target.Headers.Get("api-key"); got != apiKey {
t.Errorf("api-key = %q, want %q", got, apiKey)
}
if target.Headers.Get("Authorization") != "" {
t.Error("Authorization header must not be set for Azure (uses api-key)")
}
}

func TestRealtimeTargetStripsExistingOpenAIPath(t *testing.T) {
// A base already rooted at /openai must not yield /openai/openai/realtime.
for _, base := range []string{
"https://myres.openai.azure.com/openai",
"https://myres.openai.azure.com/openai/v1",
} {
p := New(providers.ProviderConfig{APIKey: "k", BaseURL: base}, providers.ProviderOptions{}).(*Provider)
target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"})
if err != nil {
t.Fatalf("base %q: unexpected error: %v", base, err)
}
u, err := url.Parse(target.URL)
if err != nil {
t.Fatalf("base %q: parse target url: %v", base, err)
}
if u.Path != "/openai/realtime" {
t.Errorf("base %q: path = %q, want /openai/realtime", base, u.Path)
}
}
}

func TestRealtimeTargetOmitsAuthWhenNoKey(t *testing.T) {
p := New(providers.ProviderConfig{
APIKey: "",
BaseURL: "https://myres.openai.azure.com",
}, providers.ProviderOptions{}).(*Provider)
target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if _, present := target.Headers["Api-Key"]; present {
t.Error("api-key header should be absent when no key is configured")
}
}

func TestRealtimeTargetMissingModel(t *testing.T) {
p := New(providers.ProviderConfig{APIKey: "k", BaseURL: "https://myres.openai.azure.com"}, providers.ProviderOptions{}).(*Provider)
if _, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: " "}); err == nil {
t.Fatal("expected error for missing model")
}
}
6 changes: 6 additions & 0 deletions internal/providers/openai/chat_compatible.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ func (c *ChatCompatible) SetBaseURL(url string) {
c.compatible.SetBaseURL(url)
}

// GetBaseURL returns the provider's current base URL (reads live from the client,
// so it reflects SetBaseURL overrides). Used to derive realtime websocket targets.
func (c *ChatCompatible) GetBaseURL() string {
return c.compatible.GetBaseURL()
}

// ChatCompletion sends a chat completion request to the provider.
func (c *ChatCompatible) ChatCompletion(ctx context.Context, req *core.ChatRequest) (*core.ChatResponse, error) {
return c.compatible.ChatCompletion(ctx, req)
Expand Down
68 changes: 68 additions & 0 deletions internal/providers/zai/realtime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package zai

import (
"context"
"net/http"
"net/url"
"strings"

"gomodel/internal/core"
)

// realtimePath is the fixed GLM-Realtime websocket path. It is the same across
// regions and is independent of the chat base path (paas/v4, coding/paas/v4,
// anthropic, …), so only the host and scheme are taken from the base URL.
const realtimePath = "/api/paas/v4/realtime"

// RealtimeTarget implements core.RealtimeProvider for Z.ai / Zhipu GLM-Realtime,
// whose core event schema mirrors OpenAI's Realtime API. The host (region) comes
// from the configured base URL while the path is pinned to realtimePath so chat
// base variants like the Coding Plan endpoint still resolve correctly. Bearer
// auth is injected here and must never be logged.
func (p *Provider) RealtimeTarget(_ context.Context, req *core.RealtimeRequest) (*core.RealtimeTarget, error) {
model := ""
if req != nil {
model = strings.TrimSpace(req.Model)
}
if model == "" {
return nil, core.NewInvalidRequestError("model is required for realtime sessions", nil)
}

endpoint, err := realtimeURL(p.GetBaseURL(), model)
if err != nil {
return nil, err
}

headers := http.Header{}
if p.apiKey != "" {
headers.Set("Authorization", "Bearer "+p.apiKey)
}

return &core.RealtimeTarget{URL: endpoint, Headers: headers}, nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// realtimeURL maps the configured base URL host to the GLM-Realtime endpoint
// wss://<host>/api/paas/v4/realtime?model=... preserving the region host and
// mapping the scheme to ws/wss.
func realtimeURL(baseURL, model string) (string, error) {
base := strings.TrimSpace(baseURL)
if base == "" {
base = defaultBaseURL
}
u, err := url.Parse(base)
if err != nil || u.Host == "" {
return "", core.NewInvalidRequestError("invalid realtime base url: "+base, err)
}
scheme := "wss"
if strings.EqualFold(u.Scheme, "http") || strings.EqualFold(u.Scheme, "ws") {
scheme = "ws"
}
rt := url.URL{Scheme: scheme, Host: u.Host, Path: realtimePath}
q := url.Values{}
q.Set("model", model)
rt.RawQuery = q.Encode()
return rt.String(), nil
}

// Compile-time assertion that Z.ai implements the realtime capability.
var _ core.RealtimeProvider = (*Provider)(nil)
83 changes: 83 additions & 0 deletions internal/providers/zai/realtime_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package zai

import (
"context"
"net/url"
"strings"
"testing"

"gomodel/internal/core"
"gomodel/internal/providers"
)

func TestRealtimeTarget(t *testing.T) {
const apiKey = "zai-secret-key"
p := New(providers.ProviderConfig{APIKey: apiKey}, providers.ProviderOptions{}).(*Provider)

target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !strings.HasPrefix(target.URL, "wss://api.z.ai/api/paas/v4/realtime?") {
t.Errorf("url = %q, want Z.ai realtime endpoint", target.URL)
}
u, err := url.Parse(target.URL)
if err != nil {
t.Fatalf("parse target url: %v", err)
}
if got := u.Query().Get("model"); got != "glm-realtime" {
t.Errorf("model query = %q, want %q", got, "glm-realtime")
}
if got := target.Headers.Get("Authorization"); got != "Bearer "+apiKey {
t.Errorf("Authorization = %q, want bearer with key", got)
}
}

func TestRealtimeTargetFollowsSetBaseURL(t *testing.T) {
// open.bigmodel.cn region must be honored when configured via ZAI_BASE_URL.
p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider)
p.SetBaseURL("https://open.bigmodel.cn/api/paas/v4")
target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !strings.HasPrefix(target.URL, "wss://open.bigmodel.cn/api/paas/v4/realtime?") {
t.Errorf("url = %q, want the configured region host", target.URL)
}
}

func TestRealtimeTargetNormalizesCodingPlanBase(t *testing.T) {
// The GLM Coding Plan base (/api/coding/paas/v4) must still resolve to the
// fixed realtime path /api/paas/v4/realtime, not /api/coding/paas/v4/realtime.
p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider)
p.SetBaseURL("https://api.z.ai/api/coding/paas/v4")
target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "glm-realtime"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
u, err := url.Parse(target.URL)
if err != nil {
t.Fatalf("parse target url: %v", err)
}
if u.Path != "/api/paas/v4/realtime" {
t.Errorf("path = %q, want /api/paas/v4/realtime", u.Path)
}
}

func TestRealtimeTargetOmitsAuthWhenNoKey(t *testing.T) {
p := New(providers.ProviderConfig{APIKey: ""}, providers.ProviderOptions{}).(*Provider)
target, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: "m"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if _, present := target.Headers["Authorization"]; present {
t.Error("Authorization header should be absent when no API key is configured")
}
}

func TestRealtimeTargetMissingModel(t *testing.T) {
p := New(providers.ProviderConfig{APIKey: "k"}, providers.ProviderOptions{}).(*Provider)
if _, err := p.RealtimeTarget(context.Background(), &core.RealtimeRequest{Model: " "}); err == nil {
t.Fatal("expected error for missing model")
}
}
24 changes: 16 additions & 8 deletions internal/providers/zai/zai.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,33 @@ var Registration = providers.Registration{
}

// Provider implements the core.Provider interface for Z.ai.
// apiKey is retained to inject auth on the GLM-Realtime websocket target.
type Provider struct {
*openai.ChatCompatible
apiKey string
}

var _ core.Provider = (*Provider)(nil)

// New creates a new Z.ai provider.
func New(cfg providers.ProviderConfig, opts providers.ProviderOptions) core.Provider {
return &Provider{openai.NewChatCompatible(cfg.APIKey, opts, openai.CompatibleProviderConfig{
ProviderName: "zai",
BaseURL: providers.ResolveBaseURL(cfg.BaseURL, defaultBaseURL),
})}
return &Provider{
ChatCompatible: openai.NewChatCompatible(cfg.APIKey, opts, openai.CompatibleProviderConfig{
ProviderName: "zai",
BaseURL: providers.ResolveBaseURL(cfg.BaseURL, defaultBaseURL),
}),
apiKey: cfg.APIKey,
}
}

// NewWithHTTPClient creates a new Z.ai provider with a custom HTTP client.
// If httpClient is nil, http.DefaultClient is used.
func NewWithHTTPClient(apiKey string, baseURL string, httpClient *http.Client, hooks llmclient.Hooks) *Provider {
return &Provider{openai.NewChatCompatibleWithHTTPClient(apiKey, httpClient, hooks, openai.CompatibleProviderConfig{
ProviderName: "zai",
BaseURL: providers.ResolveBaseURL(baseURL, defaultBaseURL),
})}
return &Provider{
ChatCompatible: openai.NewChatCompatibleWithHTTPClient(apiKey, httpClient, hooks, openai.CompatibleProviderConfig{
ProviderName: "zai",
BaseURL: providers.ResolveBaseURL(baseURL, defaultBaseURL),
}),
apiKey: apiKey,
}
}