Skip to content

Commit 5223160

Browse files
Add support for NVKIND
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent 815070d commit 5223160

File tree

9 files changed

+118
-55
lines changed

9 files changed

+118
-55
lines changed

api/holodeck/v1alpha1/types.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -190,16 +190,16 @@ const (
190190
type Kubernetes struct {
191191
Install bool `json:"install"`
192192
// KubeConfig is the path to the kubeconfig file on the local machine
193-
KubeConfig string `json:"kubeConfig"`
194-
KubernetesFeatures []string `json:"Features"`
195-
KubernetesVersion string `json:"Version"`
196-
KubernetesInstaller string `json:"Installer"`
197-
KubeletReleaseVersion string `json:"KubeletReleaseVersion"`
198-
Arch string `json:"Arch"`
199-
CniPluginsVersion string `json:"CniPluginsVersion"`
200-
CalicoVersion string `json:"CalicoVersion"`
201-
CrictlVersion string `json:"CrictlVersion"`
202-
K8sEndpointHost string `json:"K8sEndpointHost"`
193+
Config string `json:"kubeConfig"`
194+
Features []string `json:"Features"`
195+
Version string `json:"Version"`
196+
KindVersion string `json:"KindVersion"`
197+
Installer string `json:"Installer"`
198+
Arch string `json:"Arch"`
199+
CniPluginsVersion string `json:"CniPluginsVersion"`
200+
CalicoVersion string `json:"CalicoVersion"`
201+
CrictlVersion string `json:"CrictlVersion"`
202+
K8sEndpointHost string `json:"K8sEndpointHost"`
203203
// A set of key=value pairs that describe feature gates for
204204
// alpha/experimental features
205205
K8sFeatureGates []string `json:"K8sFeatureGates"`

api/holodeck/v1alpha1/zz_generated.deepcopy.go

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/cli/create/create.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,14 @@ func runProvision(log *logger.FunLogger, opts *options) error {
196196
}
197197

198198
// Download kubeconfig
199-
if opts.cfg.Spec.Kubernetes.Install && (opts.cfg.Spec.Kubernetes.KubeConfig != "" || opts.kubeconfig != "") {
200-
if opts.cfg.Spec.Kubernetes.KubernetesInstaller == "microk8s" || opts.cfg.Spec.Kubernetes.KubernetesInstaller == "kind" {
201-
log.Warning("kubeconfig retrieval is not supported for %s, skipping kubeconfig download", opts.cfg.Spec.Kubernetes.KubernetesInstaller)
199+
if opts.cfg.Spec.Kubernetes.Install && (opts.cfg.Spec.Kubernetes.Config != "" || opts.kubeconfig != "") {
200+
doesNotSupportKubeconfigRetrieval := map[string]bool{
201+
"mikrok8s": true,
202+
"kind": true,
203+
"nvkind": true,
204+
}
205+
if doesNotSupportKubeconfigRetrieval[opts.cfg.Spec.Kubernetes.Installer] {
206+
log.Warning("kubeconfig retrieval is not supported for %s, skipping kubeconfig download", opts.cfg.Spec.Kubernetes.Installer)
202207
return nil
203208
}
204209

pkg/provisioner/dependency.go

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
const (
2727
kubeadmInstaller = "kubeadm"
2828
kindInstaller = "kind"
29+
nvkindInstaller = "nvkind"
2930
microk8sInstaller = "microk8s"
3031
containerdRuntime = "containerd"
3132
crioRuntime = "crio"
@@ -36,9 +37,10 @@ const (
3637

3738
var (
3839
functions = map[string]ProvisionFunc{
39-
kubeadmInstaller: kubeadm,
40-
kindInstaller: kind,
41-
microk8sInstaller: microk8s,
40+
kubeadmInstaller: k8s,
41+
kindInstaller: k8s,
42+
nvkindInstaller: k8s,
43+
microk8sInstaller: k8s,
4244
containerdRuntime: containerd,
4345
crioRuntime: criO,
4446
dockerRuntime: docker,
@@ -74,30 +76,14 @@ func containerToolkit(tpl *bytes.Buffer, env v1alpha1.Environment) error {
7476
return containerToolkit.Execute(tpl, env)
7577
}
7678

77-
func kubeadm(tpl *bytes.Buffer, env v1alpha1.Environment) error {
79+
func k8s(tpl *bytes.Buffer, env v1alpha1.Environment) error {
7880
kubernetes, err := templates.NewKubernetes(env)
7981
if err != nil {
8082
return err
8183
}
8284
return kubernetes.Execute(tpl, env)
8385
}
8486

85-
func microk8s(tpl *bytes.Buffer, env v1alpha1.Environment) error {
86-
microk8s, err := templates.NewKubernetes(env)
87-
if err != nil {
88-
return err
89-
}
90-
return microk8s.Execute(tpl, env)
91-
}
92-
93-
func kind(tpl *bytes.Buffer, env v1alpha1.Environment) error {
94-
kind, err := templates.NewKubernetes(env)
95-
if err != nil {
96-
return err
97-
}
98-
return kind.Execute(tpl, env)
99-
}
100-
10187
// DependencySolver is a struct that holds the dependency list
10288
type DependencyResolver struct {
10389
Dependencies []ProvisionFunc
@@ -120,11 +106,13 @@ func NewDependencies(env v1alpha1.Environment) *DependencyResolver {
120106
}
121107

122108
func (d *DependencyResolver) withKubernetes() {
123-
switch d.env.Spec.Kubernetes.KubernetesInstaller {
109+
switch d.env.Spec.Kubernetes.Installer {
124110
case kubeadmInstaller:
125111
d.Dependencies = append(d.Dependencies, functions[kubeadmInstaller])
126112
case kindInstaller:
127113
d.Dependencies = append(d.Dependencies, functions[kindInstaller])
114+
case nvkindInstaller:
115+
d.Dependencies = append(d.Dependencies, functions[nvkindInstaller])
128116
case microk8sInstaller:
129117
// reset the list to only include microk8s
130118
d.Dependencies = nil

pkg/provisioner/dryrun.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ func Dryrun(log *logger.FunLogger, env v1alpha1.Environment) error {
3232
// Kubernetes -> Container Toolkit -> Container Runtime -> NVDriver
3333
if env.Spec.Kubernetes.Install {
3434
// check if env.Spec.Kubernetes.KubernetesVersion is in the format of vX.Y.Z
35-
if env.Spec.Kubernetes.KubernetesInstaller == "kubeadm" && !strings.HasPrefix(env.Spec.Kubernetes.KubernetesVersion, "v") {
35+
if env.Spec.Kubernetes.Installer == "kubeadm" && !strings.HasPrefix(env.Spec.Kubernetes.Version, "v") {
3636
log.Fail <- struct{}{}
37-
return fmt.Errorf("kubernetes version %s is not in the format of vX.Y.Z", env.Spec.Kubernetes.KubernetesVersion)
37+
return fmt.Errorf("kubernetes version %s is not in the format of vX.Y.Z", env.Spec.Kubernetes.Version)
3838
}
3939
}
4040

pkg/provisioner/provisioner.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@ func (p *Provisioner) Run(env v1alpha1.Environment) error {
7575

7676
// kind-config
7777
// Create kind config file if it is provided
78-
if env.Spec.Kubernetes.KubernetesInstaller == "kind" && env.Spec.Kubernetes.KindConfig != "" {
78+
if (env.Spec.Kubernetes.Installer == "kind" || env.Spec.Kubernetes.Installer == "nvkind") && env.Spec.Kubernetes.KindConfig != "" {
7979
if err := p.createKindConfig(env); err != nil {
8080
return fmt.Errorf("failed to create kind config file: %v", err)
8181
}
8282
}
8383

84-
if env.Spec.Kubernetes.KubernetesInstaller == "kubeadm" {
84+
if env.Spec.Kubernetes.Installer == "kubeadm" {
8585
env.Spec.Kubernetes.K8sEndpointHost = p.HostUrl
8686
}
8787

pkg/provisioner/templates/container-toolkit.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dear
3232
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
3333
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
3434
&& \
35-
sudo apt-get update
35+
with_retry 3 10s sudo apt-get update
3636
37-
sudo apt-get install -y nvidia-container-toolkit
37+
install_packages_with_retry nvidia-container-toolkit
3838
3939
# Configure container runtime
4040
sudo nvidia-ctk runtime configure --runtime={{.ContainerRuntime}} --set-as-default

pkg/provisioner/templates/docker.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ const dockerTemplate = `
3030
: ${DOCKER_VERSION:={{.Version}}}
3131
3232
# Add Docker's official GPG key:
33-
sudo apt-get update
33+
with_retry 3 10s sudo apt-get update
3434
install_packages_with_retry ca-certificates curl gnupg
3535
sudo install -m 0755 -d /etc/apt/keyrings
3636
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
@@ -41,7 +41,7 @@ echo \
4141
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
4242
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
4343
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
44-
sudo apt-get update
44+
with_retry 3 10s sudo apt-get update
4545
4646
# if DOCKER_VERSION is latest, then install latest version, else install specific version
4747
if [ "$DOCKER_VERSION" = "latest" ]; then

pkg/provisioner/templates/kubernetes.go

Lines changed: 81 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,18 +105,25 @@ kubectl label node --all node-role.kubernetes.io/worker=
105105
kubectl label node --all nvidia.com/holodeck.managed=true
106106
`
107107

108-
const KindTemplate = `
108+
const KindBaseTemplate = `
109109
110+
: ${KIND_VERSION:={{.KindVersion}}}
110111
: ${INSTANCE_ENDPOINT_HOST:={{.K8sEndpointHost}}}
111112
KIND_CONFIG=""
112113
if [ -n "{{.KindConfig}}"]; then
113114
KIND_CONFIG="--config {{.KindConfig}}"
114115
fi
115116
117+
ARCH=$(uname -m)
118+
if [ "$ARCH" == "x86_64" ]; then
119+
ARCH="amd64"
120+
fi
121+
if [ "$ARCH" == "aarch64" ]; then
122+
ARCH="arm64"
123+
fi
116124
117125
# Download kind
118-
[ $(uname -m) = x86_64 ] && curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-amd64
119-
[ $(uname -m) = aarch64 ] && curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-arm64
126+
curl -Lo ./kind https://kind.sigs.k8s.io/dl/${KIND_VERSION}/kind-linux-${ARCH}
120127
chmod +x ./kind
121128
sudo install ./kind /usr/local/bin/kind
122129
@@ -135,6 +142,9 @@ cd $HOME
135142
sudo nvidia-ctk runtime configure --set-as-default
136143
sudo systemctl restart docker
137144
sudo nvidia-ctk config --set accept-nvidia-visible-devices-as-volume-mounts --in-place
145+
`
146+
147+
const KindTemplate = `
138148
139149
# Create a cluster with the config file
140150
export KUBECONFIG="${HOME}/.kube/config:/var/run/kubernetes/admin.kubeconfig"
@@ -147,12 +157,54 @@ echo "you can now access the cluster with:"
147157
echo "ssh -i <your-private-key> ubuntu@${INSTANCE_ENDPOINT_HOST}"
148158
`
149159

160+
const NVKindTemplate = `
161+
162+
# Go
163+
# Set GO version
164+
GO_VERSION="1.23.6"
165+
wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz -O /tmp/go${GO_VERSION}.linux-amd64.tar.gz
166+
sudo rm -rf /usr/local/go
167+
sudo tar -C /usr/local -xzf /tmp/go${GO_VERSION}.linux-amd64.tar.gz
168+
169+
# Add Go to PATH
170+
if ! grep -q 'export PATH="/usr/local/go/bin:$PATH"' ~/.bashrc; then
171+
echo 'export PATH="/usr/local/go/bin:/$HOME/go/bin:$PATH"' >> ~/.bashrc
172+
fi
173+
export PATH="/usr/local/go/bin:$HOME/go/bin:$PATH"
174+
175+
# Make
176+
install_packages_with_retry make
177+
178+
# install nvkind
179+
go install github.com/NVIDIA/nvkind/cmd/nvkind@latest
180+
181+
# Load basic Kind config file
182+
cat <<EOF > kind-cluster-config.yml
183+
kind: Cluster
184+
apiVersion: kind.x-k8s.io/v1alpha4
185+
featureGates:
186+
DynamicResourceAllocation: true
187+
containerdConfigPatches:
188+
# Enable CDI as described in
189+
# https://tags.cncf.io/container-device-interface#containerd-configuration
190+
- |-
191+
[plugins."io.containerd.grpc.v1.cri"]
192+
enable_cdi = true
193+
EOF
194+
195+
nvkind cluster create --config-values kind-cluster-config.yml
196+
197+
echo "NVKIND installed successfully"
198+
echo "you can now access the cluster with:"
199+
echo "ssh -i <your-private-key> ubuntu@${INSTANCE_ENDPOINT_HOST}"
200+
`
201+
150202
const microk8sTemplate = `
151203
152204
: ${INSTANCE_ENDPOINT_HOST:={{.K8sEndpointHost}}}
153205
154206
# Install microk8s
155-
sudo apt-get update
207+
with_retry 3 10s sudo apt-get update
156208
157209
sudo snap install microk8s --classic --channel={{.Version}}
158210
sudo microk8s enable gpu dashboard dns registry
@@ -180,6 +232,7 @@ const (
180232

181233
type Kubernetes struct {
182234
Version string
235+
KindVersion string
183236
Installer string
184237
KubeletReleaseVersion string
185238
Arch string
@@ -194,16 +247,23 @@ type Kubernetes struct {
194247

195248
func NewKubernetes(env v1alpha1.Environment) (*Kubernetes, error) {
196249
kubernetes := &Kubernetes{
197-
Version: env.Spec.Kubernetes.KubernetesVersion,
250+
Version: env.Spec.Kubernetes.Version,
198251
}
199252
// check if env.Spec.Kubernetes.KubernetesVersion is in the format of vX.Y.Z
200253
// if not, set the default version
201-
if !strings.HasPrefix(env.Spec.Kubernetes.KubernetesVersion, "v") && env.Spec.Kubernetes.KubernetesInstaller != "microk8s" {
202-
fmt.Printf("Kubernetes version %s is not in the format of vX.Y.Z, setting default version v1.32.1\n", env.Spec.Kubernetes.KubernetesVersion)
254+
if !strings.HasPrefix(env.Spec.Kubernetes.Version, "v") && env.Spec.Kubernetes.Installer != "microk8s" {
255+
fmt.Printf("Kubernetes version %s is not in the format of vX.Y.Z, setting default version v1.32.1\n", env.Spec.Kubernetes.Version)
203256
kubernetes.Version = defaultKubernetesVersion
204257
}
205-
if env.Spec.Kubernetes.KubeletReleaseVersion != "" {
206-
kubernetes.KubeletReleaseVersion = env.Spec.Kubernetes.KubeletReleaseVersion
258+
if env.Spec.Kubernetes.Installer == "kind" || env.Spec.Kubernetes.Installer == "nvkind" {
259+
if env.Spec.Kubernetes.KindVersion != "" {
260+
kubernetes.KindVersion = env.Spec.Kubernetes.KindVersion
261+
} else {
262+
kubernetes.KindVersion = defaultKubernetesVersion
263+
}
264+
}
265+
if env.Spec.Kubernetes.Version != "" {
266+
kubernetes.KubeletReleaseVersion = env.Spec.Kubernetes.Version
207267
} else {
208268
kubernetes.KubeletReleaseVersion = defaultKubeletReleaseVersion
209269
}
@@ -243,15 +303,25 @@ func NewKubernetes(env v1alpha1.Environment) (*Kubernetes, error) {
243303
func (k *Kubernetes) Execute(tpl *bytes.Buffer, env v1alpha1.Environment) error {
244304
kubernetesTemplate := new(template.Template)
245305

246-
switch env.Spec.Kubernetes.KubernetesInstaller {
306+
switch env.Spec.Kubernetes.Installer {
247307
case "kubeadm":
248308
kubernetesTemplate = template.Must(template.New("kubeadm").Parse(KubeadmTemplate))
249309
case "kind":
310+
kindBase := template.Must(template.New("common-functions").Parse(KindBaseTemplate))
311+
if err := kindBase.Execute(tpl, k); err != nil {
312+
return fmt.Errorf("failed to execute kind base template: %v", err)
313+
}
250314
kubernetesTemplate = template.Must(template.New("kind").Parse(KindTemplate))
315+
case "nvkind":
316+
kindBase := template.Must(template.New("common-functions").Parse(KindBaseTemplate))
317+
if err := kindBase.Execute(tpl, k); err != nil {
318+
return fmt.Errorf("failed to execute kind base template: %v", err)
319+
}
320+
kubernetesTemplate = template.Must(template.New("nvkind").Parse(NVKindTemplate))
251321
case "microk8s":
252322
kubernetesTemplate = template.Must(template.New("microk8s").Parse(microk8sTemplate))
253323
default:
254-
return fmt.Errorf("unknown kubernetes installer %s", env.Spec.Kubernetes.KubernetesInstaller)
324+
return fmt.Errorf("unknown kubernetes installer %s", env.Spec.Kubernetes.Installer)
255325
}
256326

257327
err := kubernetesTemplate.Execute(tpl, k)

0 commit comments

Comments
 (0)