diff --git a/cmd/root/sandbox.go b/cmd/root/sandbox.go index 9594afdac..408af0780 100644 --- a/cmd/root/sandbox.go +++ b/cmd/root/sandbox.go @@ -18,7 +18,6 @@ import ( "github.com/spf13/pflag" "github.com/docker/docker-agent/pkg/config" - "github.com/docker/docker-agent/pkg/desktop" "github.com/docker/docker-agent/pkg/environment" "github.com/docker/docker-agent/pkg/paths" "github.com/docker/docker-agent/pkg/sandbox" @@ -99,51 +98,38 @@ func runInSandbox(ctx context.Context, cmd *cobra.Command, args []string, runCon // ...) but blocks every *.docker.com host as well as every // package-registry / source-host the auto-installer reaches for. // Open the minimum: the configured Docker AI gateway when set, and - // the package-host set only when the kit-build determined the - // agent has at least one MCP / LSP toolset that may auto-install. - needsToolInstall := kitResult != nil && kitResult.NeedsToolInstall - allowSandboxHosts(ctx, backend, name, runConfig.ModelsGateway, needsToolInstall) + // the per-toolset package hosts the kit-build resolved against the + // aqua registry. The kit narrows by package type (Go module proxy + // for go_install, GitHub releases for github_release) so we don't + // open holes for hosts the agent doesn't actually need. + var toolHosts []string + if kitResult != nil { + toolHosts = kitResult.ToolInstallHosts + } + allowSandboxHosts(ctx, backend, name, runConfig.ModelsGateway, toolHosts) // Resolve env vars the agent needs and forward them into the sandbox. // Docker Desktop proxies well-known API keys automatically; this handles // any additional vars (e.g. MCP tool secrets). envFlags, envVars := sandbox.EnvForAgent(ctx, agentRef, envProvider) - // Forward the gateway via the docker-agent process env (not as an - // inline `-e KEY=VALUE` argument) so a gateway URL that happens to - // carry credentials never leaks into the slog'd `docker sandbox - // exec` argv. + // Forward the gateway by name so a URL with credentials never + // shows up in the slog'd `docker sandbox exec` argv. We do not + // forward DOCKER_TOKEN: inside the sandbox it must come only from + // sandbox-tokens.json (kept fresh by StartTokenWriterIfNeeded). if gateway := runConfig.ModelsGateway; gateway != "" { envFlags = append(envFlags, "-e", envModelsGateway) envVars = append(envVars, envModelsGateway+"="+gateway) - - // Forward a *fresh* Docker Desktop token. We deliberately bypass - // envProvider here: that chain consults the OS environment first, - // where any pre-existing DOCKER_TOKEN value is by definition stale - // (the gateway issues short-lived JWTs that expire roughly - // hourly). Going straight to the Docker Desktop backend gives us - // the same fresh token that [sandbox.StartTokenWriterIfNeeded] - // will keep refreshing in the background; seeding it as an env - // var lets the inner agent's startup check - // ([config.CheckRequiredEnvVars]) succeed even on existing sandbox - // images that read sandbox-tokens.json from the wrong path because - // of the persistent-pre-run bug fixed in pkg/cli/flags.go. - // - // Like the gateway above, the token is forwarded by name only — - // it would otherwise show up in the slog'd argv as plaintext. - if token := desktop.GetToken(ctx); token != "" { - envFlags = append(envFlags, "-e", environment.DockerDesktopTokenEnv) - envVars = append(envVars, environment.DockerDesktopTokenEnv+"="+token) - } } - // Point the in-sandbox resolvers at the staged kit. We use the - // `-e KEY=VALUE` form so the value is set directly inside the - // container; we deliberately do not append it to envVars (which - // would set it on the host docker CLI process too — a path that - // only makes sense inside the sandbox). + // Point the in-sandbox resolvers at the staged kit. The sandbox CLI + // exposes extra workspaces at the same path as on the host, so we + // forward HostDir verbatim. We use the `-e KEY=VALUE` form so the + // value is set directly inside the container; we deliberately do not + // append it to envVars (which would set it on the host docker CLI + // process too — a path that only makes sense inside the sandbox). if kitResult != nil { - envFlags = append(envFlags, "-e", skills.KitDirEnv+"="+kit.MountPath) + envFlags = append(envFlags, "-e", skills.KitDirEnv+"="+kitResult.HostDir) } dockerCmd := backend.BuildExecCmd(ctx, name, wd, dockerAgentArgs, envFlags, envVars) @@ -194,55 +180,31 @@ func dockerAgentArgs(cmd *cobra.Command, args []string, configDir string) []stri return dockerAgentArgs } -// autoInstallHosts is the set of hostnames the toolinstall package -// reaches for when fetching tools at runtime: the aqua registry data -// (raw.githubusercontent.com), the GitHub API (latest release -// resolution), GitHub releases themselves, the redirected release -// asset host (objects.githubusercontent.com), and the Go module -// proxy + checksum DB used by `go install`. We allowlist this whole -// set whenever a sandbox is launched with the kit pipeline so that -// auto-install — which is on by default for every lsp / mcp toolset -// — can actually fetch what it needs. Without this, missing tools -// (gopls, golangci-lint, ...) report a misleading "403 blocked by -// network policy" from go install / curl instead of installing. -var autoInstallHosts = []string{ - "github.com", - "api.github.com", - "raw.githubusercontent.com", - "objects.githubusercontent.com", - "codeload.github.com", - "proxy.golang.org", - "sum.golang.org", - // `go install` downloads the Go toolchain from Google's blob - // storage when a module's go.mod pins a newer Go than the one - // already in the sandbox image. - "storage.googleapis.com", -} - // allowSandboxHosts adds per-sandbox allow-network rules for every // host the in-sandbox runtime is known to need: the configured // models gateway (when set) and the package hosts the auto-installer -// reaches for (when needsToolInstall is true). The default sandbox -// proxy denies all of them; without this, the inner agent's first -// request returns a misleading "403 Blocked by network policy". +// reaches for (when the kit build identified at least one +// auto-installable toolset). The default sandbox proxy denies all of +// them; without this, the inner agent's first request returns a +// misleading "403 Blocked by network policy". // // Holes are punched only when the corresponding feature is in play: // - the gateway host is added only when gatewayURL is non-empty; -// - the autoInstallHosts set is added only when needsToolInstall -// is true (i.e. the kit build saw at least one MCP / LSP -// toolset that might auto-install). Sandboxes that don't run -// auto-install keep the strict default-deny. +// - the per-agent install hosts come from the kit build, which +// looks each toolset up against the aqua registry and contributes +// only the hosts that toolset's install path actually uses (Go +// module proxy + toolchain bootstrap for go_install packages, +// GitHub release hosts for github_release packages). When a +// lookup failed, the kit folds in [toolinstall.FallbackHosts] +// so the run can still succeed. // // Best-effort: a malformed gateway URL or a backend that doesn't // support per-sandbox policies is logged at debug level and the run // proceeds. The user will then see a network-policy 403 from the // inner and we surface that diagnostic verbatim. -func allowSandboxHosts(ctx context.Context, backend *sandbox.Backend, name, gatewayURL string, needsToolInstall bool) { +func allowSandboxHosts(ctx context.Context, backend *sandbox.Backend, name, gatewayURL string, toolInstallHosts []string) { var hosts []string - - if needsToolInstall { - hosts = append(hosts, autoInstallHosts...) - } + hosts = append(hosts, toolInstallHosts...) if gatewayURL != "" { if h := gatewayHostPort(gatewayURL); h != "" { @@ -420,15 +382,27 @@ func printModelsGateway(w io.Writer, gateway string) { fmt.Fprintf(w, "Models gateway: %s (allowlisting %s in the sandbox proxy)\n", display, host) } -// printToolInstallAllowance prints a single line announcing whether -// the package-host allowlist was opened for this sandbox, and why. -// We surface this so the user sees what holes were punched in the -// default-deny network policy. Silent when the kit isn't built or -// when no auto-installable toolset was detected. +// printToolInstallAllowance prints a multi-line description of the +// package-host allowlist opened for this sandbox: a one-liner +// summary followed by every host on its own indented line so the +// user can see exactly what holes the run punched in the default- +// deny network policy. Silent when the kit isn't built or when no +// auto-installable toolset was detected. +// +// When per-toolset registry resolution failed for at least one +// toolset, a best-effort fallback union was used instead and a +// warning line names each unresolved toolset so the user can spot +// why the allowlist is wider than expected. func printToolInstallAllowance(w io.Writer, kitResult *kit.Result) { if kitResult == nil || !kitResult.NeedsToolInstall { return } - fmt.Fprintf(w, "Tool install: agent has at least one MCP/LSP toolset, allowlisting %d package hosts in the sandbox proxy\n", - len(autoInstallHosts)) + fmt.Fprintf(w, "Tool install: agent has at least one MCP/LSP toolset, allowlisting %d package host(s) in the sandbox proxy:\n", + len(kitResult.ToolInstallHosts)) + for _, h := range kitResult.ToolInstallHosts { + fmt.Fprintf(w, " - %s\n", h) + } + for _, e := range kitResult.ToolInstallHostsResolutionErr { + fmt.Fprintf(w, " ! %s (using fallback host set)\n", e.Error()) + } } diff --git a/cmd/root/sandbox_test.go b/cmd/root/sandbox_test.go index 1aaadc9e0..6cd44aaaf 100644 --- a/cmd/root/sandbox_test.go +++ b/cmd/root/sandbox_test.go @@ -1,6 +1,7 @@ package root import ( + "errors" "slices" "strings" "testing" @@ -8,6 +9,8 @@ import ( "github.com/spf13/cobra" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/sandbox/kit" ) // TestDockerAgentArgs_NoDuplicateArgs is a regression test for a bug where the @@ -189,33 +192,62 @@ func TestPrintModelsGateway(t *testing.T) { } } -func TestAutoInstallHosts(t *testing.T) { +func TestPrintToolInstallAllowance(t *testing.T) { t.Parallel() - // Spot-check the static set: the package hosts the auto-installer - // reaches at runtime (Go module proxy, GitHub releases, the toolchain - // blob storage backing `go install`) must all be in the allowlist - // or the inner agent will see "403 Blocked by network policy" with - // no other diagnostic. - required := []string{ - "github.com", - "api.github.com", - "raw.githubusercontent.com", - "objects.githubusercontent.com", - "proxy.golang.org", - "sum.golang.org", - "storage.googleapis.com", - } - for _, host := range required { - assert.Contains(t, autoInstallHosts, host, - "%s must be in autoInstallHosts so auto-install can reach it inside the sandbox", host) + tests := []struct { + name string + result *kit.Result + want string + wantNot []string + }{ + { + name: "nil result is silent", + result: nil, + want: "", + }, + { + name: "no auto-install is silent", + result: &kit.Result{NeedsToolInstall: false}, + want: "", + }, + { + name: "lists every host on its own line", + result: &kit.Result{ + NeedsToolInstall: true, + ToolInstallHosts: []string{ + "api.github.com", "proxy.golang.org", "sum.golang.org", + }, + }, + want: "Tool install: agent has at least one MCP/LSP toolset, allowlisting 3 package host(s) in the sandbox proxy:\n" + + " - api.github.com\n" + + " - proxy.golang.org\n" + + " - sum.golang.org\n", + }, + { + name: "resolution errors are surfaced after the host list", + result: &kit.Result{ + NeedsToolInstall: true, + ToolInstallHosts: []string{"api.github.com"}, + ToolInstallHostsResolutionErr: []kit.ToolHostError{ + {Command: "gopls", Version: "golang/tools@v0.21.0", Err: errors.New("boom")}, + }, + }, + want: "Tool install: agent has at least one MCP/LSP toolset, allowlisting 1 package host(s) in the sandbox proxy:\n" + + " - api.github.com\n" + + " ! resolving install hosts for \"gopls\"@\"golang/tools@v0.21.0\": boom (using fallback host set)\n", + wantNot: []string{}, + }, } - // And the list itself must be commaless / spaceless: AllowHosts - // rejects entries that look like they could smuggle several rules - // past the policy engine. - for _, host := range autoInstallHosts { - assert.NotContains(t, host, ",", "%q must not contain a comma", host) - assert.NotContains(t, host, " ", "%q must not contain whitespace", host) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var buf strings.Builder + printToolInstallAllowance(&buf, tt.result) + assert.Equal(t, tt.want, buf.String()) + for _, ne := range tt.wantNot { + assert.NotContains(t, buf.String(), ne) + } + }) } } diff --git a/pkg/environment/sandbox_tokens.go b/pkg/environment/sandbox_tokens.go index af55cccb4..1f70f8fb7 100644 --- a/pkg/environment/sandbox_tokens.go +++ b/pkg/environment/sandbox_tokens.go @@ -152,13 +152,19 @@ func (w *SandboxTokenWriter) writeOnce(ctx context.Context) { return } - // Ensure the parent directory exists. + // Ensure the parent directory exists. Keep it 0o700: the directory's + // mode is the actual confidentiality boundary on the host (no other + // host user can traverse into it). if err := os.MkdirAll(filepath.Dir(w.path), 0o700); err != nil { slog.DebugContext(ctx, "Failed to create sandbox tokens directory", "path", w.path, "error", err) return } - if err := atomicfile.Write(w.path, bytes.NewReader(data), 0o600); err != nil { + // The file is bind-mounted into a sandbox whose user is not the host + // user that wrote it; 0o600 would make it unreadable inside the + // sandbox and break DOCKER_TOKEN forwarding. The 0o700 parent dir + // already prevents other host users from reaching this file. + if err := atomicfile.Write(w.path, bytes.NewReader(data), 0o644); err != nil { slog.DebugContext(ctx, "Failed to write sandbox tokens file", "path", w.path, "error", err) return } diff --git a/pkg/environment/sandbox_tokens_test.go b/pkg/environment/sandbox_tokens_test.go index 9405f5e41..0d864e1a5 100644 --- a/pkg/environment/sandbox_tokens_test.go +++ b/pkg/environment/sandbox_tokens_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "os" "path/filepath" + "runtime" "testing" "time" @@ -108,6 +109,30 @@ func TestSandboxTokenWriter_WritesFileOnStart(t *testing.T) { assert.Equal(t, "fresh-token", tokens.DockerToken) } +// The token file is bind-mounted into a sandbox whose UID does not +// match the host user that wrote it. The mode must therefore include +// other-read; the 0o700 parent dir is what keeps other host users out. +func TestSandboxTokenWriter_FileIsReadableByOther(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("POSIX file modes are not enforced on Windows") + } + t.Parallel() + + dir := t.TempDir() + path := filepath.Join(dir, SandboxTokensFileName) + + provider := NewEnvListProvider([]string{"DOCKER_TOKEN=fresh-token"}) + w := NewSandboxTokenWriter(path, provider, time.Hour) + w.Start(t.Context()) + defer w.Stop() + + info, err := os.Stat(path) + require.NoError(t, err) + assert.NotZero(t, info.Mode().Perm()&0o004, + "sandbox-tokens.json must be readable by other so the sandbox UID can read it; got mode %#o", + info.Mode().Perm()) +} + func TestSandboxTokenWriter_RefreshesToken(t *testing.T) { t.Parallel() diff --git a/pkg/sandbox/kit/kit.go b/pkg/sandbox/kit/kit.go index cb7d5e636..3babc0029 100644 --- a/pkg/sandbox/kit/kit.go +++ b/pkg/sandbox/kit/kit.go @@ -48,11 +48,9 @@ import ( "github.com/docker/docker-agent/pkg/paths" "github.com/docker/docker-agent/pkg/promptfiles" "github.com/docker/docker-agent/pkg/skills" + "github.com/docker/docker-agent/pkg/toolinstall" ) -// MountPath is the path at which the kit is bind-mounted inside the sandbox. -const MountPath = "/agent-kit" - // manifestFile is the on-disk name of the kit's table of contents. const manifestFile = "manifest.json" @@ -91,9 +89,10 @@ type Options struct { // Result is what [Build] returns. type Result struct { // HostDir is the absolute host path of the staged kit. Mount it - // read-only at [MountPath] inside the sandbox and forward - // `-e DOCKER_AGENT_KIT_DIR=` so the in-sandbox - // resolvers find it. + // read-only into the sandbox (the sandbox CLI exposes extras at + // the same path as on the host) and forward + // `-e DOCKER_AGENT_KIT_DIR=` so the in-sandbox resolvers + // find it. HostDir string // Manifest describes what was staged. It contains absolute host @@ -110,6 +109,30 @@ type Result struct { // don't open holes in the sandbox proxy when no agent could // possibly need them. NeedsToolInstall bool + + // ToolInstallHosts is the sorted, deduplicated set of hostnames + // the in-sandbox auto-installer needs to reach in order to install + // every auto-installable toolset declared by the agent. It is + // populated only when NeedsToolInstall is true. + // + // Each toolset's package is looked up against the aqua registry + // and contributes only the hosts its install path actually uses + // (Go module proxy + toolchain bootstrap for go_install packages, + // GitHub release hosts for github_release packages, plus the + // shared registry-lookup hosts in both cases). When a lookup + // fails, [toolinstall.FallbackHosts] is folded in instead so the + // install can still succeed at the cost of opening every install + // host — callers that want to fail closed should inspect + // ToolInstallHostsResolutionErr. + ToolInstallHosts []string + + // ToolInstallHostsResolutionErr lists the per-toolset registry + // lookup errors encountered while computing ToolInstallHosts. + // When non-empty, ToolInstallHosts conservatively contains the + // fallback union of every install host so the run can still + // proceed; callers can choose stricter behaviour (refuse the run, + // surface the error to the user) by checking this slice. + ToolInstallHostsResolutionErr []ToolHostError } // Manifest is the kit's table of contents. @@ -234,10 +257,14 @@ func Build(ctx context.Context, opts Options) (*Result, error) { "prompt_files", len(manifest.PromptFiles), "redactions", len(manifest.Redactions)) + hosts, hostErrs := resolveToolInstallHosts(ctx, cfg) + return &Result{ - HostDir: finalDir, - Manifest: manifest, - NeedsToolInstall: needsAutoInstall(cfg), + HostDir: finalDir, + Manifest: manifest, + NeedsToolInstall: len(hosts) > 0, + ToolInstallHosts: hosts, + ToolInstallHostsResolutionErr: hostErrs, }, nil } @@ -816,17 +843,7 @@ func needsAutoInstall(cfg *latestcfg.Config) bool { if cfg == nil { return false } - for _, m := range cfg.MCPs { - if isAutoInstallable(m.Toolset) { - return true - } - } - for _, agent := range cfg.Agents { - if slices.ContainsFunc(agent.Toolsets, isAutoInstallable) { - return true - } - } - return false + return len(collectAutoInstallable(cfg)) > 0 } // isAutoInstallable returns true if ts is the kind of toolset @@ -846,3 +863,115 @@ func isAutoInstallable(ts latestcfg.Toolset) bool { } return true } + +// ToolHostError records a single toolset whose package could not be +// resolved against the aqua registry while computing the sandbox +// allowlist. Callers can use it to surface a precise diagnostic to +// the user ("could not resolve gopls; falling back to the union of +// every install host") instead of silently degrading the network +// policy. +type ToolHostError struct { + // Command is the toolset's Command field — the same string the + // in-sandbox runtime would auto-install. + Command string + // Version is the toolset's Version field. Empty means "latest / + // resolve by command". + Version string + // Err is the underlying registry / lookup error. + Err error +} + +func (e ToolHostError) Error() string { + if e.Version == "" { + return fmt.Sprintf("resolving install hosts for %q: %v", e.Command, e.Err) + } + return fmt.Sprintf("resolving install hosts for %q@%q: %v", e.Command, e.Version, e.Err) +} + +func (e ToolHostError) Unwrap() error { return e.Err } + +// resolveToolInstallHosts walks every auto-installable toolset in cfg +// and returns the merged set of hosts the in-sandbox auto-installer +// must reach to install them, plus any per-toolset resolution errors. +// +// On any resolution error, the conservative fallback union (every +// install host known to the toolinstall package) is folded in so +// that the run still succeeds — trading minimisation for +// availability. Callers that want strict failure behaviour can +// inspect the returned error slice and refuse to launch. +// +// Returns (nil, nil) when cfg has no auto-installable toolset — +// callers use len(hosts)==0 to mean "don't open any holes". +func resolveToolInstallHosts(ctx context.Context, cfg *latestcfg.Config) ([]string, []ToolHostError) { + if cfg == nil { + return nil, nil + } + + toolsets := collectAutoInstallable(cfg) + if len(toolsets) == 0 { + return nil, nil + } + + seen := make(map[string]bool) + var hosts []string + var errs []ToolHostError + + for _, ts := range toolsets { + resolved, err := toolinstall.ResolveHosts(ctx, ts.Command, ts.Version) + if err != nil { + errs = append(errs, ToolHostError{Command: ts.Command, Version: ts.Version, Err: err}) + continue + } + for _, h := range resolved { + if !seen[h] { + seen[h] = true + hosts = append(hosts, h) + } + } + } + + if len(errs) > 0 { + // Fail-open: fold in the fallback union so the run can still + // install. Callers that prefer fail-closed inspect errs. + for _, h := range toolinstall.FallbackHosts() { + if !seen[h] { + seen[h] = true + hosts = append(hosts, h) + } + } + } + + sort.Strings(hosts) + return hosts, errs +} + +// collectAutoInstallable returns every toolset in cfg whose Command +// the in-sandbox runtime would push through +// [toolinstall.EnsureCommand]. Order is deterministic (top-level +// MCPs by map-key, then per-agent toolsets in declaration order) +// so the resolution loop's slog output is reproducible. +func collectAutoInstallable(cfg *latestcfg.Config) []latestcfg.Toolset { + var out []latestcfg.Toolset + + names := make([]string, 0, len(cfg.MCPs)) + for name := range cfg.MCPs { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + ts := cfg.MCPs[name].Toolset + if isAutoInstallable(ts) { + out = append(out, ts) + } + } + + for _, agent := range cfg.Agents { + for _, ts := range agent.Toolsets { + if isAutoInstallable(ts) { + out = append(out, ts) + } + } + } + + return out +} diff --git a/pkg/sandbox/sandbox.go b/pkg/sandbox/sandbox.go index 45972c647..c92ef1280 100644 --- a/pkg/sandbox/sandbox.go +++ b/pkg/sandbox/sandbox.go @@ -291,8 +291,9 @@ func StartTokenWriterIfNeeded(ctx context.Context, dir, modelsGateway string) fu return w.Stop } -// proxyManagedEnvVars lists the environment variables that Docker Desktop -// automatically proxies into sandboxes. We don't need to forward these. +// proxyManagedEnvVars lists env vars we never forward to the sandbox. +// Docker Desktop proxies the API keys automatically; DOCKER_TOKEN must +// come from sandbox-tokens.json, not a one-shot env var. var proxyManagedEnvVars = []string{ "OPENAI_API_KEY", "ANTHROPIC_API_KEY", @@ -300,6 +301,7 @@ var proxyManagedEnvVars = []string{ "MISTRAL_API_KEY", "XAI_API_KEY", "NEBIUS_API_KEY", + environment.DockerDesktopTokenEnv, } // EnvForAgent loads the agent config and gathers the environment diff --git a/pkg/toolinstall/hosts.go b/pkg/toolinstall/hosts.go new file mode 100644 index 000000000..40f787dca --- /dev/null +++ b/pkg/toolinstall/hosts.go @@ -0,0 +1,104 @@ +package toolinstall + +import ( + "context" + "slices" + "sort" +) + +// Package-host sets used by the auto-installer at runtime. +// +// We split them by what the install code actually does so callers +// (notably the docker-agent sandbox) can open the smallest possible +// hole in a default-deny network policy: +// +// - lookupHosts: the registry index + per-package YAML +// (raw.githubusercontent.com), and the GitHub API used for +// latest-version resolution and as auth boost for raw fetches. +// Every install path consults at least these. +// +// - goInstallHosts: what `go install` reaches for. proxy.golang.org +// and sum.golang.org are the module proxy + checksum DB. The Go +// toolchain bootstrap (GOTOOLCHAIN=auto) consults go.dev/dl, +// downloads from dl.google.com, which redirects to +// storage.googleapis.com — all three need to be reachable when a +// module pins a newer Go than the sandbox image ships +// (e.g. gopls@v0.21.0 needs Go 1.24 on a 1.23 image). +// +// - githubReleaseHosts: github.com (release page), the asset host +// that releases redirect to (objects.githubusercontent.com), and +// codeload (used for source tarballs by some package types). +// +// Hosts must be bare (no scheme, no port, no path) so they pass +// through Backend.AllowHosts unchanged. +var ( + lookupHosts = []string{ + "raw.githubusercontent.com", + "api.github.com", + } + + goInstallHosts = []string{ + "proxy.golang.org", + "sum.golang.org", + "go.dev", + "dl.google.com", + "storage.googleapis.com", + } + + githubReleaseHosts = []string{ + "github.com", + "objects.githubusercontent.com", + "codeload.github.com", + } +) + +// FallbackHosts returns the union of every host any auto-install path +// might reach. Callers use it when they decide to allow a tool to +// install but cannot resolve the specific package on the host (e.g. +// the registry is unreachable and there's no disk cache yet). It +// preserves install behaviour at the cost of giving up the per-package +// narrowing — security-conscious callers should prefer reporting the +// resolution error and refusing the run instead. +func FallbackHosts() []string { + return mergeHosts(lookupHosts, goInstallHosts, githubReleaseHosts) +} + +// InstallHosts returns the hostnames the auto-installer will reach +// for when installing this package. Always includes the registry-side +// lookup hosts because the in-sandbox installer re-runs the same +// LookupByName/LookupByCommand path the host uses here. +func (p *Package) InstallHosts() []string { + if p.IsGoPackage() { + return mergeHosts(lookupHosts, goInstallHosts) + } + return mergeHosts(lookupHosts, githubReleaseHosts) +} + +// ResolveHosts returns the set of hostnames the auto-installer needs +// to reach inside a sandbox in order to install command at version. +// +// version follows the [EnsureCommand] convention: +// - "" → resolve by command name; latest version. +// - "owner/repo" → look up by aqua name; latest version. +// - "owner/repo@version" → look up by aqua name; explicit version. +// +// The returned list is sorted and deduplicated. Errors propagate the +// underlying registry failure verbatim so the caller can log it and +// decide whether to fail the run or fall back to [FallbackHosts]. +func ResolveHosts(ctx context.Context, command, version string) ([]string, error) { + pkg, _, err := lookupPackage(ctx, SharedRegistry(), command, version) + if err != nil { + return nil, err + } + return pkg.InstallHosts(), nil +} + +// mergeHosts returns the sorted, deduplicated union of its arguments. +func mergeHosts(sets ...[]string) []string { + var out []string + for _, s := range sets { + out = append(out, s...) + } + sort.Strings(out) + return slices.Compact(out) +} diff --git a/pkg/toolinstall/hosts_test.go b/pkg/toolinstall/hosts_test.go new file mode 100644 index 000000000..ecc2ccbcb --- /dev/null +++ b/pkg/toolinstall/hosts_test.go @@ -0,0 +1,100 @@ +package toolinstall + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPackageInstallHosts_GoPackage(t *testing.T) { + t.Parallel() + + // Go packages go through `go install`, which pulls from the Go + // module proxy + checksum DB and may bootstrap a newer toolchain + // from go.dev / dl.google.com / storage.googleapis.com. The + // registry lookup itself still hits raw.githubusercontent.com + // and api.github.com, so those must be present too. + pkg := &Package{Type: "go_install", RepoOwner: "golang", RepoName: "tools"} + + got := pkg.InstallHosts() + + assert.ElementsMatch(t, []string{ + "raw.githubusercontent.com", + "api.github.com", + "proxy.golang.org", + "sum.golang.org", + "go.dev", + "dl.google.com", + "storage.googleapis.com", + }, got, "go_install package must allow Go module proxy + toolchain bootstrap, not GitHub releases") + + // Crucially, GitHub release hosts must not be opened for a Go + // package — `go install` never reaches them. + assert.NotContains(t, got, "github.com") + assert.NotContains(t, got, "objects.githubusercontent.com") + assert.NotContains(t, got, "codeload.github.com") +} + +func TestPackageInstallHosts_GitHubRelease(t *testing.T) { + t.Parallel() + + pkg := &Package{Type: "github_release", RepoOwner: "junegunn", RepoName: "fzf"} + + got := pkg.InstallHosts() + + assert.ElementsMatch(t, []string{ + "raw.githubusercontent.com", + "api.github.com", + "github.com", + "objects.githubusercontent.com", + "codeload.github.com", + }, got, "github_release package must allow GitHub release hosts, not the Go module proxy") + + // And a github_release tool must NOT punch holes for the Go + // toolchain bootstrap — those hosts have nothing to do with it. + assert.NotContains(t, got, "proxy.golang.org") + assert.NotContains(t, got, "go.dev") + assert.NotContains(t, got, "dl.google.com") +} + +func TestPackageInstallHosts_SortedAndDeduped(t *testing.T) { + t.Parallel() + + pkg := &Package{Type: "go_install"} + got := pkg.InstallHosts() + + for i := 1; i < len(got); i++ { + assert.Less(t, got[i-1], got[i], + "InstallHosts must return sorted, dedup'd entries; got %v", got) + } +} + +func TestFallbackHosts_CoversBothPaths(t *testing.T) { + t.Parallel() + + // FallbackHosts is what we open when we *cannot* narrow per + // package (registry unreachable, no cache). It must therefore + // cover every host any install path might reach — both Go + // module proxy + toolchain AND GitHub releases — otherwise the + // fallback is silently incomplete. + got := FallbackHosts() + + for _, want := range []string{ + "raw.githubusercontent.com", "api.github.com", + "proxy.golang.org", "sum.golang.org", + "go.dev", "dl.google.com", "storage.googleapis.com", + "github.com", "objects.githubusercontent.com", "codeload.github.com", + } { + assert.Contains(t, got, want, "fallback must cover %q", want) + } +} + +func TestMergeHosts_DedupAndSort(t *testing.T) { + t.Parallel() + + got := mergeHosts( + []string{"b", "a"}, + []string{"a", "c"}, + ) + assert.Equal(t, []string{"a", "b", "c"}, got) +}