Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,10 @@ public class VmGlobalConfig {
@GlobalConfigValidation(validValues = {"None", "AuthenticAMD"})
@BindResourceConfig(value = {VmInstanceVO.class})
public static GlobalConfig VM_CPUID_VENDOR = new GlobalConfig(CATEGORY, "vm.cpuid.vendor");

@GlobalConfigValidation(numberGreaterThan = 1)
public static GlobalConfig GC_INTERVAL = new GlobalConfig(CATEGORY, "deletion.gcInterval");

@GlobalConfigValidation(validValues = {"true", "false"})
public static GlobalConfig VM_METADATA = new GlobalConfig(CATEGORY, "vm.metadata");
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.zstack.header.message.APIMessage;
import org.zstack.header.network.l2.*;
import org.zstack.header.network.l3.*;
import org.zstack.header.storage.primary.APIRegisterVmInstanceMsg;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if APIRegisterVmInstanceMsg is used in VmInstanceApiInterceptor.java
rg -n "APIRegisterVmInstanceMsg" compute/src/main/java/org/zstack/compute/vm/VmInstanceApiInterceptor.java

# Also check the intercept method to see all handled message types
rg -n "instanceof.*Msg" compute/src/main/java/org/zstack/compute/vm/VmInstanceApiInterceptor.java | head -30

Repository: MatheMatrix/zstack

Length of output: 2170


🏁 Script executed:

#!/bin/bash
# Check if there's a handler method for APIRegisterVmInstanceMsg anywhere in the intercept method
cat -n compute/src/main/java/org/zstack/compute/vm/VmInstanceApiInterceptor.java | sed -n '85,180p'

# Also check if the method signature and pattern continues beyond line 146
tail -n +140 compute/src/main/java/org/zstack/compute/vm/VmInstanceApiInterceptor.java | head -50

Repository: MatheMatrix/zstack

Length of output: 8697


🏁 Script executed:

#!/bin/bash
# Verify if APIRegisterVmInstanceMsg is used in other parts of the codebase
rg -r "APIRegisterVmInstanceMsg" compute/src/main/java/org/zstack/compute/vm/ --type java

# Check if there's any mention of RegisterVmInstance in other interceptor or handler files
rg -r "RegisterVmInstance" compute/src/main/java/org/zstack/compute/ --type java | head -20

Repository: MatheMatrix/zstack

Length of output: 44


🏁 Script executed:

#!/bin/bash
# Double-check: search entire repository for APIRegisterVmInstanceMsg to be thorough
rg "APIRegisterVmInstanceMsg" --type java

Repository: MatheMatrix/zstack

Length of output: 1264


移除未使用的 APIRegisterVmInstanceMsg 导入。

该导入在 intercept() 方法中不存在对应的处理逻辑。APIRegisterVmInstanceMsg 实际由 PrimaryStorageBase 处理,与本文件无关。

🤖 Prompt for AI Agents
In `@compute/src/main/java/org/zstack/compute/vm/VmInstanceApiInterceptor.java` at
line 24, Remove the unused import of APIRegisterVmInstanceMsg from
VmInstanceApiInterceptor: open the VmInstanceApiInterceptor class, locate the
import statement "import
org.zstack.header.storage.primary.APIRegisterVmInstanceMsg" and delete it (the
intercept() method in VmInstanceApiInterceptor does not handle this message and
APIRegisterVmInstanceMsg is handled by PrimaryStorageBase), leaving only the
imports actually used by the class.

import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO;
import org.zstack.header.storage.primary.PrimaryStorageClusterRefVO_;
import org.zstack.header.storage.snapshot.VolumeSnapshotVO;
Expand Down
158 changes: 151 additions & 7 deletions compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@
import org.zstack.header.message.*;
import org.zstack.header.network.l3.*;
import org.zstack.header.storage.primary.*;
import org.zstack.header.storage.snapshot.*;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO_;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_;
import org.zstack.header.tag.SystemTagVO;
import org.zstack.header.tag.SystemTagVO_;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import org.zstack.header.vm.*;
import org.zstack.header.vm.ChangeVmMetaDataMsg.AtomicHostUuid;
import org.zstack.header.vm.ChangeVmMetaDataMsg.AtomicVmState;
Expand All @@ -66,30 +75,27 @@
import org.zstack.network.l3.L3NetworkManager;
import org.zstack.network.service.DnsUtils;
import org.zstack.network.service.NetworkServiceManager;
import org.zstack.resourceconfig.ResourceConfig;
import org.zstack.resourceconfig.ResourceConfigFacade;
import org.zstack.resourceconfig.*;
import org.zstack.tag.SystemTagCreator;
import org.zstack.tag.SystemTagUtils;
import org.zstack.tag.TagManager;
import org.zstack.utils.CollectionUtils;
import org.zstack.utils.ExceptionDSL;
import org.zstack.utils.ObjectUtils;
import org.zstack.utils.Utils;
import org.zstack.utils.*;
import org.zstack.utils.function.ForEachFunction;
import org.zstack.utils.function.Function;
import org.zstack.utils.gson.JSONObjectUtil;
import org.zstack.utils.logging.CLogger;
import org.zstack.utils.network.NicIpAddressInfo;
import org.zstack.utils.network.IPv6Constants;
import org.zstack.utils.network.IPv6NetworkUtils;
import org.zstack.utils.network.NetworkUtils;
import org.zstack.utils.network.NicIpAddressInfo;

import javax.persistence.PersistenceException;
import javax.persistence.Tuple;
import javax.persistence.TypedQuery;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import static java.util.Arrays.asList;
Expand Down Expand Up @@ -140,6 +146,8 @@ public class VmInstanceBase extends AbstractVmInstance {
private VmInstanceResourceMetadataManager vidm;
@Autowired
private NetworkServiceManager nwServiceMgr;
@Autowired
private ResourceDestinationMaker destMaker;

protected VmInstanceVO self;
protected VmInstanceVO originalCopy;
Expand Down Expand Up @@ -533,6 +541,8 @@ protected void handleLocalMessage(Message msg) {
handle((CancelFlattenVmInstanceMsg) msg);
} else if (msg instanceof KvmReportVmShutdownEventMsg) {
handle((KvmReportVmShutdownEventMsg) msg);
} else if (msg instanceof UpdateVmInstanceMetadataMsg) {
handle((UpdateVmInstanceMetadataMsg) msg);
} else {
VmInstanceBaseExtensionFactory ext = vmMgr.getVmInstanceBaseExtensionFactory(msg);
if (ext != null) {
Expand Down Expand Up @@ -9369,5 +9379,139 @@ public void run(MessageReply reply) {
}
});
}

/**
* 处理元数据更新消息。
*
* <p>Layer 2 串行化保证:通过 ChainTask 确保同一 VM 的元数据更新串行执行。
* 该消息由 hash 环路由,同一 VM 必定到达同一 MN,因此此处的 ChainTask
* 是跨 MN 串行的全局唯一汇聚点。</p>
*
* <p>失败路径不创建新 GC,直接返回错误 reply,由 GC 端的 onUpdateFail() 统一处理重试。</p>
*/
private void handle(UpdateVmInstanceMetadataMsg msg) {
thdf.chainSubmit(new ChainTask(msg) {
@Override
public String getSyncSignature() {
return String.format("handle-update-vm-%s-metadata", msg.getUuid());
}

@Override
public void run(SyncTaskChain chain) {
doHandleUpdateVmInstanceMetadata(msg);
chain.next();
}
Comment on lines +9400 to +9403
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

链路串行化被提前 chain.next() 破坏。

Line 9402 在异步 bus.send(...) 发出后立即 chain.next(),会让同一 VM 的后续更新并发进入,和注释声明的串行保证不一致。

🛠️ 建议修复
-    private void handle(UpdateVmInstanceMetadataMsg msg) {
+    private void handle(UpdateVmInstanceMetadataMsg msg) {
         thdf.chainSubmit(new ChainTask(msg) {
@@
             `@Override`
             public void run(SyncTaskChain chain) {
-                doHandleUpdateVmInstanceMetadata(msg);
-                chain.next();
+                doHandleUpdateVmInstanceMetadata(msg, new NoErrorCompletion(chain) {
+                    `@Override`
+                    public void done() {
+                        chain.next();
+                    }
+                });
             }
@@
-    private void doHandleUpdateVmInstanceMetadata(UpdateVmInstanceMetadataMsg msg) {
+    private void doHandleUpdateVmInstanceMetadata(UpdateVmInstanceMetadataMsg msg, NoErrorCompletion completion) {
@@
         bus.send(umsg, new CloudBusCallBack(msg) {
             `@Override`
             public void run(MessageReply r) {
                 UpdateVmInstanceMetadataOnPrimaryStorageReply reply = new UpdateVmInstanceMetadataOnPrimaryStorageReply();
@@
                 bus.reply(msg, reply);
+                completion.done();
             }
         });
     }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java` around lines
9400 - 9403, The current run(SyncTaskChain chain) calls
doHandleUpdateVmInstanceMetadata(msg) then immediately invokes chain.next(),
which breaks the intended per-VM serialization because
doHandleUpdateVmInstanceMetadata performs an asynchronous bus.send(...); fix by
making run defer chain.next() until the async work completes — i.e., change
doHandleUpdateVmInstanceMetadata(msg) (or the block that calls bus.send) to
accept a completion callback/future and invoke chain.next() in the bus.send
reply handler (or when that future completes); ensure chain.next() is removed
from the synchronous path and only called from within the bus.send callback or
completion handler so subsequent tasks for the same VM run only after the async
update finishes.


@Override
public String getName() {
return String.format("handle-update-vm-%s-metadata-task", msg.getUuid());
}
});
}

private void doHandleUpdateVmInstanceMetadata(UpdateVmInstanceMetadataMsg msg) {
Tuple tuple = Q.New(VolumeVO.class).select(VolumeVO_.primaryStorageUuid, VolumeVO_.uuid)
.eq(VolumeVO_.vmInstanceUuid, msg.getUuid()).eq(VolumeVO_.type, VolumeType.Root).findTuple();
String primaryStorageUuid = tuple.get(0, String.class);
String rootVolumeUuid = tuple.get(1, String.class);
Comment on lines +9413 to +9416
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

补充根卷查询为空的兜底处理。

Line 9389-9392 若根卷不存在,findTuple() 可能返回 null,直接 tuple.get(...) 会 NPE。建议先返回错误并回复消息。

🛠️ 建议修复
-        Tuple tuple = Q.New(VolumeVO.class).select(VolumeVO_.primaryStorageUuid, VolumeVO_.uuid)
-                .eq(VolumeVO_.vmInstanceUuid, msg.getUuid()).eq(VolumeVO_.type, VolumeType.Root).findTuple();
-        String primaryStorageUuid = tuple.get(0, String.class);
-        String rootVolumeUuid = tuple.get(1, String.class);
+        Tuple tuple = Q.New(VolumeVO.class).select(VolumeVO_.primaryStorageUuid, VolumeVO_.uuid)
+                .eq(VolumeVO_.vmInstanceUuid, msg.getUuid()).eq(VolumeVO_.type, VolumeType.Root).findTuple();
+        if (tuple == null) {
+            UpdateVmInstanceMetadataOnPrimaryStorageReply reply = new UpdateVmInstanceMetadataOnPrimaryStorageReply();
+            reply.setError(operr("cannot find root volume for vm[uuid:%s]", msg.getUuid()));
+            bus.reply(msg, reply);
+            return;
+        }
+        String primaryStorageUuid = tuple.get(0, String.class);
+        String rootVolumeUuid = tuple.get(1, String.class);
🤖 Prompt for AI Agents
In `@compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java` around lines
9389 - 9392, The code assumes Tuple tuple = Q.New(VolumeVO.class)...findTuple()
always returns non-null; if tuple is null the subsequent tuple.get(...) causes
NPE. Fix: after obtaining tuple in VmInstanceBase (the block that queries
VolumeVO/VolumeVO_/VolumeType.Root with msg.getUuid()), check if tuple == null
and if so construct and send an error reply and return (do not proceed to read
primaryStorageUuid/rootVolumeUuid); ensure the error contains context (vm uuid)
and use the normal reply/error path used elsewhere in this class.


UpdateVmInstanceMetadataOnPrimaryStorageMsg umsg = new UpdateVmInstanceMetadataOnPrimaryStorageMsg();
umsg.setMetadata(buildVmInstanceMetadata(msg.getUuid()));
umsg.setPrimaryStorageUuid(primaryStorageUuid);
umsg.setRootVolumeUuid(rootVolumeUuid);
bus.makeLocalServiceId(umsg, PrimaryStorageConstant.SERVICE_ID);
bus.send(umsg, new CloudBusCallBack(msg) {
@Override
public void run(MessageReply r) {
UpdateVmInstanceMetadataOnPrimaryStorageReply reply = new UpdateVmInstanceMetadataOnPrimaryStorageReply();

if (!r.isSuccess()) {
// 失败:直接返回错误 reply,不创建新 GC(避免滚雪球膨胀)
// GC 端收到失败 reply 后由 onUpdateFail() 统一走指数退避重试
reply.setError(Platform.operr("failed to update vm[uuid=%s] metadata on primary storage",
msg.getUuid()).withCause(r.getError()));
}
bus.reply(msg, reply);
}
});
}

private String buildVmInstanceMetadata(String vmInstanceUuid) {
VmInstanceMetadataDTO dto = new VmInstanceMetadataDTO();

// ── VM 本体 ──
VmInstanceVO vm = Q.New(VmInstanceVO.class).eq(VmInstanceVO_.uuid, vmInstanceUuid).find();
dto.vm = buildResourceMetadata(vm.getUuid(), vm);

// ── 云盘(挂载的 + 已卸载但 lastVmInstanceUuid 指向本 VM 的) ──
List<VolumeVO> volumes = new ArrayList<>();
volumes.addAll(Q.New(VolumeVO.class).eq(VolumeVO_.vmInstanceUuid, vmInstanceUuid).list());
volumes.addAll(Q.New(VolumeVO.class).isNull(VolumeVO_.vmInstanceUuid)
.eq(VolumeVO_.lastVmInstanceUuid, vmInstanceUuid).list());
volumes.forEach(v -> dto.volumes.add(buildResourceMetadata(v.getUuid(), v)));

// ── 网卡 ──
List<VmNicVO> nics = Q.New(VmNicVO.class).eq(VmNicVO_.vmInstanceUuid, vmInstanceUuid).list();
nics.forEach(n -> dto.nics.add(buildResourceMetadata(n.getUuid(), n)));

// ── 快照 ──
List<String> volumeUuids = volumes.stream().map(VolumeVO::getUuid).collect(Collectors.toList());
if (!volumeUuids.isEmpty()) {
Q.New(VolumeSnapshotVO.class).in(VolumeSnapshotVO_.volumeUuid, volumeUuids).list()
.forEach(s -> dto.snapshots
.computeIfAbsent(s.getVolumeUuid(), k -> new ArrayList<>())
.add(JSONObjectUtil.toJsonString(s)));
}

// ── 快照组 ──
List<VolumeSnapshotGroupVO> groups = Q.New(VolumeSnapshotGroupVO.class)
.eq(VolumeSnapshotGroupVO_.vmInstanceUuid, vmInstanceUuid).list();
dto.snapshotGroups = groups.stream()
.map(JSONObjectUtil::toJsonString).collect(Collectors.toList());

List<String> groupUuids = groups.stream()
.map(VolumeSnapshotGroupVO::getUuid).collect(Collectors.toList());
if (!groupUuids.isEmpty()) {
dto.snapshotGroupRefs = Q.New(VolumeSnapshotGroupRefVO.class)
.in(VolumeSnapshotGroupRefVO_.volumeSnapshotGroupUuid, groupUuids).list()
.stream().map(JSONObjectUtil::toJsonString).collect(Collectors.toList());
}

return JSONObjectUtil.toJsonString(dto);
}

/**
* 构建单个资源的 {@link VmInstanceMetadataDTO.ResourceMetadata}。
*
* <p>VO 全量 JSON 明文存储;SystemTagVO 和 ResourceConfigVO 整体列表序列化为 JSON 数组后
* 一次性 Base64 编码,以保护可能包含的密码、密钥等敏感信息。</p>
*
* @param resourceUuid 资源 UUID
* @param vo 资源 VO 对象(VmInstanceVO / VolumeVO / VmNicVO)
* @return 填充完毕的 ResourceMetadata
*/
private VmInstanceMetadataDTO.ResourceMetadata buildResourceMetadata(String resourceUuid, Object vo) {
VmInstanceMetadataDTO.ResourceMetadata meta = new VmInstanceMetadataDTO.ResourceMetadata();
meta.resourceUuid = resourceUuid;
meta.vo = JSONObjectUtil.toJsonString(vo);

// SystemTagVO: 全部 → JSON 数组 → Base64
List<SystemTagVO> tagVOs = Q.New(SystemTagVO.class)
.eq(SystemTagVO_.resourceUuid, resourceUuid).list();
List<String> tagJsons = tagVOs.stream()
.map(JSONObjectUtil::toJsonString).collect(Collectors.toList());
meta.systemTags = Base64.getEncoder().encodeToString(
JSONObjectUtil.toJsonString(tagJsons).getBytes(StandardCharsets.UTF_8));

// ResourceConfigVO: 全部 → JSON 数组 → Base64
List<ResourceConfigVO> cfgVOs = Q.New(ResourceConfigVO.class)
.eq(ResourceConfigVO_.resourceUuid, resourceUuid).list();
List<String> cfgJsons = cfgVOs.stream()
.map(JSONObjectUtil::toJsonString).collect(Collectors.toList());
meta.resourceConfigs = Base64.getEncoder().encodeToString(
JSONObjectUtil.toJsonString(cfgJsons).getBytes(StandardCharsets.UTF_8));

return meta;
}
}

31 changes: 19 additions & 12 deletions compute/src/main/java/org/zstack/compute/vm/VmInstanceUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,29 @@

import org.apache.commons.collections.CollectionUtils;
import org.zstack.core.Platform;
import org.zstack.core.db.Q;
import org.zstack.header.configuration.InstanceOfferingInventory;
import org.zstack.header.errorcode.OperationFailureException;
import org.zstack.header.vm.APIChangeInstanceOfferingMsg;
import org.zstack.header.vm.APICreateVmInstanceMsg;
import org.zstack.header.vm.CreateVmInstanceMsg;
import org.zstack.header.vm.DiskAO;
import org.zstack.header.vm.UpdateVmInstanceMsg;
import org.zstack.header.vm.UpdateVmInstanceSpec;
import org.zstack.header.vm.VmInstanceVO;
import org.zstack.header.storage.snapshot.VolumeSnapshotInventory;
import org.zstack.header.storage.snapshot.VolumeSnapshotVO;
import org.zstack.header.storage.snapshot.VolumeSnapshotVO_;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupRefVO_;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO;
import org.zstack.header.storage.snapshot.group.VolumeSnapshotGroupVO_;
import org.zstack.header.tag.SystemTagVO;
import org.zstack.header.tag.SystemTagVO_;
import org.zstack.header.vm.*;
import org.zstack.header.volume.VolumeVO;
import org.zstack.header.volume.VolumeVO_;
import org.zstack.resourceconfig.ResourceConfigVO;
import org.zstack.resourceconfig.ResourceConfigVO_;
import org.zstack.tag.SystemTagUtils;
import org.zstack.utils.function.ForEachFunction;
import org.zstack.utils.gson.JSONObjectUtil;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.*;
import java.util.stream.Collectors;

import static java.util.Objects.requireNonNull;
import static org.zstack.compute.vm.VmSystemTags.PRIMARY_STORAGE_UUID_FOR_DATA_VOLUME;
Expand Down
Loading