Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions daemon/command/config_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
flags.StringVar(&conf.SeccompProfile, "seccomp-profile", conf.SeccompProfile, `Path to seccomp profile. Set to "unconfined" to disable the default seccomp profile`)
flags.Var(&conf.ShmSize, "default-shm-size", "Default shm size for containers")
flags.BoolVar(&conf.NoNewPrivileges, "no-new-privileges", false, "Set no-new-privileges by default for new containers")
flags.BoolVar(&conf.AdoptUserCgroups, "adopt-user-cgroups", false, "Automatically set container cgroup parent based on the API client's cgroup")
flags.StringVar(&conf.IpcMode, "default-ipc-mode", conf.IpcMode, `Default mode for containers ipc ("shareable" | "private")`)
flags.Var(&conf.NetworkConfig.DefaultAddressPools, "default-address-pool", "Default address pools for node specific local networks")
flags.StringVar(&conf.NetworkConfig.FirewallBackend, "firewall-backend", "", "Firewall backend to use, iptables or nftables")
Expand Down
12 changes: 12 additions & 0 deletions daemon/command/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,12 @@ func (cli *daemonCLI) start(ctx context.Context) (retErr error) {

httpServer := &http.Server{
ReadHeaderTimeout: 5 * time.Minute, // "G112: Potential Slowloris Attack (gosec)"; not a real concern for our use, so setting a long timeout.
ConnContext: func(ctx context.Context, c net.Conn) context.Context {
// Store the connection in context so middleware can access it for peer credentials
// Use a custom key instead of http.LocalAddrContextKey because the HTTP stack
// overwrites that with the address, losing the connection.
return context.WithValue(ctx, middleware.PeerConnKey, c)
},
}
apiShutdownCtx, apiShutdownCancel := context.WithCancel(context.WithoutCancel(ctx))
apiShutdownDone := make(chan struct{})
Expand Down Expand Up @@ -868,6 +874,12 @@ func initMiddlewares(_ context.Context, s *apiserver.Server, cfg *config.Config,
}
s.UseMiddleware(*vm)

// Register peer credential middleware for Unix socket connections.
// This extracts UID/GID/PID from the connection and adds them to request context.
// Required for features like cgroup adoption that need to know the API client's identity.
peerCredMiddleware := middleware.NewPeerCredMiddleware()
s.UseMiddleware(peerCredMiddleware)

authzMiddleware := authorization.NewMiddleware(cfg.AuthorizationPlugins, pluginStore)
s.UseMiddleware(authzMiddleware)
return authzMiddleware, nil
Expand Down
4 changes: 4 additions & 0 deletions daemon/config/config_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ type Config struct {
// ResolvConf is the path to the configuration of the host resolver
ResolvConf string `json:"resolv-conf,omitempty"`
Rootless bool `json:"rootless,omitempty"`
// AdoptUserCgroups forces containers to inherit their creator's cgroup parent.
// When enabled, containers cannot override CgroupParent and will be placed under
// the cgroup of the process making the API request (requires Unix socket connection).
AdoptUserCgroups bool `json:"adopt-user-cgroups,omitempty"`
}

// GetExecRoot returns the user configured Exec-root
Expand Down
2 changes: 1 addition & 1 deletion daemon/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (daemon *Daemon) containerCreate(ctx context.Context, daemonCfg *configStor
if opts.params.HostConfig == nil {
opts.params.HostConfig = &containertypes.HostConfig{}
}
err = daemon.adaptContainerSettings(&daemonCfg.Config, opts.params.HostConfig)
err = daemon.adaptContainerSettings(ctx, &daemonCfg.Config, opts.params.HostConfig)
if err != nil {
return containertypes.CreateResponse{Warnings: warnings}, errdefs.InvalidParameter(err)
}
Expand Down
40 changes: 39 additions & 1 deletion daemon/daemon_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/moby/moby/v2/daemon/internal/otelutil"
"github.com/moby/moby/v2/daemon/internal/usergroup"
"github.com/moby/moby/v2/daemon/libnetwork"
"github.com/moby/moby/v2/daemon/server/middleware"
nwconfig "github.com/moby/moby/v2/daemon/libnetwork/config"
"github.com/moby/moby/v2/daemon/libnetwork/drivers/bridge"
"github.com/moby/moby/v2/daemon/libnetwork/netlabel"
Expand All @@ -39,6 +40,7 @@ import (
"github.com/moby/moby/v2/daemon/pkg/opts"
volumemounts "github.com/moby/moby/v2/daemon/volume/mounts"
"github.com/moby/moby/v2/errdefs"
cgroupsadopt "github.com/moby/moby/v2/pkg/cgroups"
"github.com/moby/moby/v2/pkg/sysinfo"
"github.com/moby/sys/mount"
"github.com/moby/sys/user"
Expand Down Expand Up @@ -317,7 +319,7 @@ func adjustParallelLimit(n int, limit int) int {

// adaptContainerSettings is called during container creation to modify any
// settings necessary in the HostConfig structure.
func (daemon *Daemon) adaptContainerSettings(daemonCfg *config.Config, hostConfig *containertypes.HostConfig) error {
func (daemon *Daemon) adaptContainerSettings(ctx context.Context, daemonCfg *config.Config, hostConfig *containertypes.HostConfig) error {
if hostConfig.Memory > 0 && hostConfig.MemorySwap == 0 {
// By default, MemorySwap is set to twice the size of Memory.
hostConfig.MemorySwap = hostConfig.Memory * 2
Expand Down Expand Up @@ -368,6 +370,42 @@ func (daemon *Daemon) adaptContainerSettings(daemonCfg *config.Config, hostConfi
hostConfig.OomKillDisable = &defaultOomKillDisable
}

// Apply cgroup adoption if enabled
if daemonCfg.AdoptUserCgroups {
if err := daemon.applyCgroupAdoption(ctx, hostConfig); err != nil {
return fmt.Errorf("failed to apply cgroup adoption: %w", err)
}
}

return nil
}

// applyCgroupAdoption enforces cgroup parent adoption based on the API client's cgroup.
// When enabled via daemon config, this ensures containers run under their creator's cgroup.
func (daemon *Daemon) applyCgroupAdoption(ctx context.Context, hostConfig *containertypes.HostConfig) error {
// Extract peer credentials from context (set by peer credential middleware)
creds, ok := ctx.Value(middleware.PeerCredKey).(*middleware.PeerCredentials)
if !ok || creds == nil {
return fmt.Errorf("peer credentials not available")
}

// Derive the cgroup parent from the peer's PID
parent, err := cgroupsadopt.DeriveParentFromPid(creds.PID)
if err != nil {
return fmt.Errorf("failed to derive cgroup parent: %w", err)
}

// ENFORCE: Reject if user specified a different cgroup parent
// This ensures ALL containers run under their creator's cgroup without exception
if hostConfig.CgroupParent != "" && hostConfig.CgroupParent != parent {
return errdefs.InvalidParameter(fmt.Errorf(
"cannot set cgroup parent when --adopt-user-cgroups is enabled: "+
"containers must run under creator's cgroup (%s)", parent))
}

// Set the adopted cgroup parent
hostConfig.CgroupParent = parent

return nil
}

Expand Down
1 change: 1 addition & 0 deletions daemon/daemon_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,4 @@ func TestGetBlkioThrottleDevices(t *testing.T) {
assert.Check(t, retDevs[0].Rate == WEIGHT, "get device rate")
})
}

2 changes: 1 addition & 1 deletion daemon/daemon_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func setupInitLayer(uid int, gid int) func(string) error {

// adaptContainerSettings is called during container creation to modify any
// settings necessary in the HostConfig structure.
func (daemon *Daemon) adaptContainerSettings(daemonCfg *config.Config, hostConfig *containertypes.HostConfig) error {
func (daemon *Daemon) adaptContainerSettings(ctx context.Context, daemonCfg *config.Config, hostConfig *containertypes.HostConfig) error {
return nil
}

Expand Down
105 changes: 105 additions & 0 deletions daemon/server/middleware/peercred_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package middleware

import (
"context"
"fmt"
"net"
"net/http"
"syscall"

"golang.org/x/sys/unix"
)

// PeerCredKey is the context key for storing peer credentials
var PeerCredKey = &struct{ name string }{"peercred"}

// PeerConnKey is the context key for storing the raw connection (set by ConnContext)
// We use a custom key instead of http.LocalAddrContextKey because the HTTP stack
// overwrites that key with the address, losing the original connection.
var PeerConnKey = &struct{ name string }{"peerconn"}

// PeerCredentials contains the credentials of a peer connection
type PeerCredentials struct {
PID int // Process ID
UID int // User ID
GID int // Group ID
}

// PeerCredMiddleware extracts peer credentials from Unix socket connections
// and adds them to the request context.
type PeerCredMiddleware struct{}

// NewPeerCredMiddleware creates a new peer credential middleware
func NewPeerCredMiddleware() PeerCredMiddleware {
return PeerCredMiddleware{}
}

// WrapHandler wraps an HTTP handler to extract peer credentials from Unix socket connections
func (m PeerCredMiddleware) WrapHandler(handler func(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error) func(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
return func(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
// Attempt to extract peer credentials from the connection
if creds, err := extractPeerCredentials(r); err == nil && creds != nil {
// Add credentials to context for downstream handlers
ctx = context.WithValue(ctx, PeerCredKey, creds)
}

return handler(ctx, w, r, vars)
}
}

// extractPeerCredentials extracts the peer credentials from an HTTP request
// by accessing the underlying Unix socket file descriptor and calling SO_PEERCRED.
//
// This only works for Unix domain socket connections. For TCP connections or
// other transport types, this function returns nil, nil (no error, no credentials).
func extractPeerCredentials(r *http.Request) (*PeerCredentials, error) {
// Try to get the underlying connection from the request context
// We use PeerConnKey (set by ConnContext) instead of http.LocalAddrContextKey
// because the HTTP stack overwrites that key with the address.
conn, ok := r.Context().Value(PeerConnKey).(net.Conn)
if !ok || conn == nil {
// Not a direct connection or connection not available - this is expected for some scenarios
return nil, nil
}

// Cast to syscall.Conn to get access to raw file descriptor operations
sc, ok := conn.(syscall.Conn)
if !ok {
// Connection doesn't support syscall operations - probably not a Unix socket
return nil, nil
}

// Get the raw syscall connection
rc, err := sc.SyscallConn()
if err != nil {
return nil, fmt.Errorf("failed to get syscall connection: %w", err)
}

// Extract peer credentials using SO_PEERCRED
var creds *PeerCredentials
var ctrlErr error

// Control() provides access to the underlying file descriptor
err = rc.Control(func(fd uintptr) {
ucred, err := unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED)
if err != nil {
ctrlErr = fmt.Errorf("SO_PEERCRED failed: %w", err)
return
}

creds = &PeerCredentials{
PID: int(ucred.Pid),
UID: int(ucred.Uid),
GID: int(ucred.Gid),
}
})

if err != nil {
return nil, fmt.Errorf("failed to access file descriptor: %w", err)
}
if ctrlErr != nil {
return nil, ctrlErr
}

return creds, nil
}
102 changes: 102 additions & 0 deletions daemon/server/middleware/peercred_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package middleware

import (
"context"
"net/http"
"net/http/httptest"
"testing"

"gotest.tools/v3/assert"
)

func TestPeerCredentials_ContextValue(t *testing.T) {
// Test that PeerCredKey can be used to store/retrieve credentials from context
ctx := context.Background()

creds := &PeerCredentials{
PID: 1234,
UID: 1000,
GID: 1000,
}

ctx = context.WithValue(ctx, PeerCredKey, creds)

retrieved, ok := ctx.Value(PeerCredKey).(*PeerCredentials)
assert.Assert(t, ok, "should be able to retrieve peer credentials from context")
assert.Equal(t, retrieved.PID, 1234)
assert.Equal(t, retrieved.UID, 1000)
assert.Equal(t, retrieved.GID, 1000)
}

func TestPeerCredentials_NilContext(t *testing.T) {
// Test that retrieving from context without credentials returns nil gracefully
ctx := context.Background()

retrieved, ok := ctx.Value(PeerCredKey).(*PeerCredentials)
assert.Assert(t, !ok || retrieved == nil, "should return nil when no credentials in context")
}

// TestPeerCredMiddleware_UnixSocket tests that the middleware properly handles Unix socket connections
// Note: This is a basic structure test. Actual SO_PEERCRED extraction can only be tested with real Unix sockets.
func TestPeerCredMiddleware_Structure(t *testing.T) {
middleware := NewPeerCredMiddleware()

handlerCalled := false
testHandler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
handlerCalled = true
return nil
}

wrapped := middleware.WrapHandler(testHandler)

// Create a test request
req := httptest.NewRequest("GET", "http://example.com/test", nil)
w := httptest.NewRecorder()

// Call the wrapped handler
err := wrapped(context.Background(), w, req, nil)

assert.NilError(t, err)
assert.Assert(t, handlerCalled, "handler should have been called")
}

func TestPeerCredMiddleware_NoConnection(t *testing.T) {
// Test that middleware doesn't fail when no connection is in context
middleware := NewPeerCredMiddleware()

var capturedCtx context.Context
testHandler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
capturedCtx = ctx
return nil
}

wrapped := middleware.WrapHandler(testHandler)

// Create a test request without connection in context
req := httptest.NewRequest("GET", "http://example.com/test", nil)
w := httptest.NewRecorder()

err := wrapped(context.Background(), w, req, nil)

assert.NilError(t, err)
// Verify no credentials were added (since no connection was available)
creds, ok := capturedCtx.Value(PeerCredKey).(*PeerCredentials)
assert.Assert(t, !ok || creds == nil, "should not have credentials when no connection")
}

func TestPeerConnKey_Uniqueness(t *testing.T) {
// Verify that PeerConnKey is distinct from http.LocalAddrContextKey
// This is important because http.LocalAddrContextKey gets overwritten by the HTTP stack
ctx := context.Background()

// Simulate what happens in ConnContext and the HTTP stack
ctx = context.WithValue(ctx, PeerConnKey, "connection")
ctx = context.WithValue(ctx, http.LocalAddrContextKey, "address")

// Both values should be retrievable independently
conn := ctx.Value(PeerConnKey)
addr := ctx.Value(http.LocalAddrContextKey)

assert.Equal(t, conn, "connection")
assert.Equal(t, addr, "address")
}
31 changes: 31 additions & 0 deletions daemon/server/middleware/peercred_unsupported.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//go:build !linux

package middleware

import (
"context"
"net/http"
)

// PeerCredKey is the context key for storing peer credentials
var PeerCredKey = &struct{ name string }{"peercred"}

// PeerCredentials contains the credentials of a peer connection
type PeerCredentials struct {
PID int // Process ID
UID int // User ID
GID int // Group ID
}

// PeerCredMiddleware is a no-op on non-Linux platforms
type PeerCredMiddleware struct{}

// NewPeerCredMiddleware creates a new peer credential middleware
func NewPeerCredMiddleware() PeerCredMiddleware {
return PeerCredMiddleware{}
}

// WrapHandler returns the handler unchanged on non-Linux platforms
func (m PeerCredMiddleware) WrapHandler(handler func(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error) func(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
return handler
}
1 change: 1 addition & 0 deletions daemon/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func (s *Server) makeHTTPHandler(route router.Route) http.HandlerFunc {
// use intermediate variable to prevent "should not use basic type
// string as key in context.WithValue" golint errors
ua := r.Header.Get("User-Agent")

ctx := baggage.ContextWithBaggage(context.WithValue(r.Context(), dockerversion.UAStringKey{}, ua), otelutil.MustNewBaggage(
otelutil.MustNewMemberRaw(otelutil.TriggerKey, "api"),
))
Expand Down
Loading
Loading