From c4871d1ad97e4911cd099a2a3dfd8d52cf8d83a1 Mon Sep 17 00:00:00 2001 From: james Date: Wed, 3 Jun 2026 12:00:09 +0800 Subject: [PATCH] support biren Signed-off-by: james --- .../biren-device/enable-biren-sharing.md | 167 ++++++++++++++++++ .../biren-device/examples/default-use.md | 23 +++ docs/userguide/device-supported.md | 1 + .../biren-device/enable-biren-sharing.md | 167 ++++++++++++++++++ .../biren-device/examples/default-use.md | 23 +++ sidebars.js | 15 ++ 6 files changed, 396 insertions(+) create mode 100644 docs/userguide/biren-device/enable-biren-sharing.md create mode 100644 docs/userguide/biren-device/examples/default-use.md create mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md create mode 100644 i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md diff --git a/docs/userguide/biren-device/enable-biren-sharing.md b/docs/userguide/biren-device/enable-biren-sharing.md new file mode 100644 index 00000000..b2a9a4d1 --- /dev/null +++ b/docs/userguide/biren-device/enable-biren-sharing.md @@ -0,0 +1,167 @@ +--- +title: Enable Biren Sharing +--- + +## Introduction + +HAMi now supports sharing `birentech.com/gpu` (Birentech) devices and provides the following capabilities: + +**Supports both full-card and SVI partitioning**: You can use either the full-card device or the SVI-based partitioning device. + +**Device UUID selection**: You can specify or exclude particular devices through annotations. + +## Using Biren Devices + +### Enabling Biren Device Sharing + +#### Label the Node + +```bash +kubectl label node {biren-node} biren=on +``` + +#### Deploy the `biren-device-plugin` + +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: biren-gpu +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: device-plugin-sa + namespace: biren-gpu +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: birentech-device-plugin +rules: +- apiGroups: [""] + resources: + - nodes + - pods + verbs: ["get", "list", "watch", "update", "patch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: birentech-device-plugin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: birentech-device-plugin +subjects: +- kind: ServiceAccount + name: device-plugin-sa + namespace: biren-gpu + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: biren-device-plugin-daemonset + namespace: biren-gpu +spec: + selector: + matchLabels: + name: biren-device-plugin + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + name: biren-device-plugin + app.kubernetes.io/component: exporter + app.kubernetes.io/name: gpu-exporter + spec: + nodeSelector: + birentech.com: gpu + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: birentech.com/gpu + operator: Exists + effect: NoSchedule + priorityClassName: "system-node-critical" + containers: + - name: k8s-device-plugin + image: projecthami/biren-device-plugin:latest + imagePullPolicy: Always + env: + - name: LD_LIBRARY_PATH + value: /usr/lib + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: ["/root/k8s-device-plugin"] + args: ["--pulse", "300", "--container-runtime", "runc"] + securityContext: + privileged: true + volumeMounts: + - name: dp + mountPath: /var/lib/kubelet/device-plugins + - name: sys + mountPath: /sys + - name: brml + mountPath: /usr/lib + - name: brml-lib + mountPath: /usr/local/birensupa/driver/biren-smi/lib + readOnly: true + - name: brsmi + mountPath: /opt/birentech/bin + - mountPath: /dev + name: device + - name: cdi-config + mountPath: /etc/cdi + serviceAccountName: device-plugin-sa + volumes: + - name: dp + hostPath: + path: /var/lib/kubelet/device-plugins + - name: sys + hostPath: + path: /sys + - name: brml + hostPath: + path: /usr/lib + - name: brsmi + hostPath: + path: /usr/bin + - name: device + hostPath: + path: /dev + - name: cdi-config + hostPath: + path: /etc/cdi + - name: brml-lib + hostPath: + path: /usr/local/birensupa/driver/biren-smi/lib +``` + +### Run Biren jobs + +```yaml +kind: Pod +metadata: + name: pod1 +spec: + containers: + - image: ubuntu + name: pod1-ctr + command: ["sleep"] + args: ["infinity"] + resources: + limits: + birentech.com/gpu: 1 +``` + +## Notes +1. When requesting Biren resources, you cannot specify the memory size. +2. SVI partitioning can only split a single card into either two or four partitions. diff --git a/docs/userguide/biren-device/examples/default-use.md b/docs/userguide/biren-device/examples/default-use.md new file mode 100644 index 00000000..002138fc --- /dev/null +++ b/docs/userguide/biren-device/examples/default-use.md @@ -0,0 +1,23 @@ +--- +title: Allocate Biren Device +--- + +This example shows how to request a single Biren device in a plain Kubernetes Pod. +The Pod runs a long-running container image provided by Birentech and requests one `birentech.com/gpu` device through the `resources.limits` section. +You can use this as a starting point and adjust the image and resource limits to fit your own workloads. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod1 +spec: + containers: + - image: ubuntu + name: pod1-ctr + command: ["sleep"] + args: ["infinity"] + resources: + limits: + birentech.com/gpu: 1 +``` \ No newline at end of file diff --git a/docs/userguide/device-supported.md b/docs/userguide/device-supported.md index c80c4fdb..4bf1a2a0 100644 --- a/docs/userguide/device-supported.md +++ b/docs/userguide/device-supported.md @@ -16,4 +16,5 @@ The table below lists the devices supported by HAMi: | GCU | Enflame | S60 | Yes | Yes | No | | XPU | Kunlunxin | P800 | Yes | Yes | No | | GPU | Vastai | VA16 | Yes | Yes | No | +| GPU | Biren | Biren166M | Yes | Yes | No | | DPU | Teco | Checking | In progress | In progress | No | diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md new file mode 100644 index 00000000..f3d37329 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/enable-biren-sharing.md @@ -0,0 +1,167 @@ +--- +title: 启用壁仞设备共享 +--- + +## Introduction + +HAMi 现在支持共享 `birentech.com/gpu` (壁仞科技) 设备,并提供以下能力: + +**支持整卡和 SVI 切分 SVI**: 可以在 HAMI 中使用整卡和SVI切分出来的卡。 + +**设备 UUID 选择n**: 可以通过注解指定或排除某些特定设备。 + +## 使用壁仞设备 + +### 启用壁仞设备共享 + +#### 给节点打标签 + +```bash +kubectl label node {biren-node} biren=on +``` + +#### 部署 `biren-device-plugin` + +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: biren-gpu +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: device-plugin-sa + namespace: biren-gpu +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: birentech-device-plugin +rules: +- apiGroups: [""] + resources: + - nodes + - pods + verbs: ["get", "list", "watch", "update", "patch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: birentech-device-plugin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: birentech-device-plugin +subjects: +- kind: ServiceAccount + name: device-plugin-sa + namespace: biren-gpu + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: biren-device-plugin-daemonset + namespace: biren-gpu +spec: + selector: + matchLabels: + name: biren-device-plugin + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + name: biren-device-plugin + app.kubernetes.io/component: exporter + app.kubernetes.io/name: gpu-exporter + spec: + nodeSelector: + birentech.com: gpu + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: birentech.com/gpu + operator: Exists + effect: NoSchedule + priorityClassName: "system-node-critical" + containers: + - name: k8s-device-plugin + image: projecthami/biren-device-plugin:latest + imagePullPolicy: Always + env: + - name: LD_LIBRARY_PATH + value: /usr/lib + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: ["/root/k8s-device-plugin"] + args: ["--pulse", "300", "--container-runtime", "runc"] + securityContext: + privileged: true + volumeMounts: + - name: dp + mountPath: /var/lib/kubelet/device-plugins + - name: sys + mountPath: /sys + - name: brml + mountPath: /usr/lib + - name: brml-lib + mountPath: /usr/local/birensupa/driver/biren-smi/lib + readOnly: true + - name: brsmi + mountPath: /opt/birentech/bin + - mountPath: /dev + name: device + - name: cdi-config + mountPath: /etc/cdi + serviceAccountName: device-plugin-sa + volumes: + - name: dp + hostPath: + path: /var/lib/kubelet/device-plugins + - name: sys + hostPath: + path: /sys + - name: brml + hostPath: + path: /usr/lib + - name: brsmi + hostPath: + path: /usr/bin + - name: device + hostPath: + path: /dev + - name: cdi-config + hostPath: + path: /etc/cdi + - name: brml-lib + hostPath: + path: /usr/local/birensupa/driver/biren-smi/lib +``` + +### 运行壁仞任务 + +```yaml +kind: Pod +metadata: + name: pod1 +spec: + containers: + - image: ubuntu + name: pod1-ctr + command: ["sleep"] + args: ["infinity"] + resources: + limits: + birentech.com/gpu: 1 +``` + +## 注意事项 +1. 在申请壁仞资源时,**不能**指定显存大小。 +2. 使用 SVI 切分时,一张卡只能切成两份或者四份。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md new file mode 100644 index 00000000..79d56681 --- /dev/null +++ b/i18n/zh/docusaurus-plugin-content-docs/current/userguide/biren-device/examples/default-use.md @@ -0,0 +1,23 @@ +--- +title: 申请壁仞设备 +--- + +下面的示例展示了如何在一个普通的 Kubernetes Pod 中申请一个翰博半导体的设备。 +该 Pod 以长时间运行的方式启动容器,并通过 `resources.limits` 中声明一个 `birentech.com/gpu` 设备。 +你可以在此基础上替换镜像、命令或资源配额,以适配自己的业务场景。 + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod1 +spec: + containers: + - image: ubuntu + name: pod1-ctr + command: ["sleep"] + args: ["infinity"] + resources: + limits: + birentech.com/gpu: 1 +``` \ No newline at end of file diff --git a/sidebars.js b/sidebars.js index 32ad688e..ae5d6b23 100644 --- a/sidebars.js +++ b/sidebars.js @@ -226,6 +226,21 @@ module.exports = { } ] }, + { + "type": "category", + "label": "Managing Biren devices", + "items": [ + "userguide/biren-device/enable-biren-sharing", + { + "type": "category", + "label": "Examples", + "key": "biren-examples", + "items": [ + "userguide/biren-device/examples/default-use" + ] + } + ] + }, { "type": "category", "label": "Optimize Kunlunxin devices scheduling",