ComputeCanada · etiennedub · Jan 12, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/docs/README.md b/docs/README.md
@@ -618,6 +618,13 @@ available models per region
 ##### Incus
 
 - `target`: name of the [specific cluster member](https://linuxcontainers.org/incus/docs/main/howto/cluster_manage_instance/#launch-an-instance-on-a-specific-cluster-member) to deploy the instance. **Only use with Incus cluster.** 
+* `gpus_pci`: list of [PCI addresses of the GPU devices](https://linuxcontainers.org/incus/docs/main/reference/devices_gpu/#devices-gpu_physical:pci) to pass through to instances on the node. Use `incus info --resources` to list available resources. The Incus host must have the NVIDIA GPU driver and the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#with-dnf-rhel-centos-fedora-amazon-linux) installed. ***
+
+**Limitations:**
+ * The node `count` **must be 1** 
+ * The container **must be unprivileged**
+ * **Do not use the `gpu` tag**, as it would install the NVIDIA driver. We use the host’s driver instead.
+
 
 #### 4.7.3 Post build modification effect
 

diff --git a/examples/incus/main.tf b/examples/incus/main.tf
@@ -15,6 +15,9 @@ module "incus" {
     mgmt  = { type = "container", cpus = 4, ram = 6000, gpus = 0, tags = ["puppet", "mgmt", "nfs"], count = 1 }
     login = { type = "container", cpus = 2, ram = 3000, gpus = 0, tags = ["login", "proxy"], count = 1 }
     node  = { type = "container", cpus = 2, ram = 3000, gpus = 0, tags = ["node"], count = 1 }
+    # Uncomment the folowing line to mount a GPU. The PCI id must match with the GPU and the container must be unprivileged
+    # Do not use the gpu tag, as it would install the NVIDIA driver. We use the host’s driver instead.
+    # node_gpu = { type = "container", cpus = 2, ram = 3000, gpus = 0, tags = ["node"], count = 1, gpu_pci = ["0000:00:06.0"] }
   }
 
   firewall_rules = {

diff --git a/examples/incus/unprivileged.yaml b/examples/incus/unprivileged.yaml
@@ -6,6 +6,9 @@ lookup_options:
 
 jupyterhub::kernel::venv::python: "3.12"
 
+profile::cvmfs::local_user::uid: 40001
+profile::cvmfs::local_user::gid: 40001
+
 magic_castle::site::all:
   - profile::base
   - profile::consul

diff --git a/incus/infrastructure.tf b/incus/infrastructure.tf
@@ -91,6 +91,8 @@ resource "incus_instance" "instances" {
   config = {
     "cloud-init.user-data" = module.configuration.user_data[each.key]
     "security.privileged"  = var.privileged
+    # nvidia.runtime is incompatible with privileged containers
+    "nvidia.runtime" = length(try(each.value.gpu_pci, [])) > 0 ? !var.privileged : false
   }
 
   device {
@@ -137,6 +139,20 @@ resource "incus_instance" "instances" {
     }
   }
 
+  dynamic "device" {
+    for_each = length(try(each.value.gpu_pci, [])) > 0 ? { for idx, pci in each.value.gpu_pci : idx => pci } : {}
+
+
+    content {
+      name = "gpu${device.key}"
+      type = "gpu"
+      properties = {
+        gputype : "physical"
+        pci = device.value
+      }
+    }
+  }
+
   wait_for {
     type = "ipv4"
   }
@@ -145,9 +161,14 @@ resource "incus_instance" "instances" {
 locals {
   inventory = { for host, values in module.design.instances :
     host => {
-      prefix  = values.prefix
-      tags    = values.tags
-      specs   = values.specs
+      prefix = values.prefix
+      tags   = values.tags
+      specs = merge(
+        values.specs,
+        {
+          gpus = length(try(values.gpu_pci, []))
+        }
+      )
       volumes = {}
     }
   }