From 1472b949d15090794163dbaf1352b4495b4da9ab Mon Sep 17 00:00:00 2001 From: Woojoong Kim Date: Mon, 6 Apr 2026 02:49:00 +0000 Subject: [PATCH 1/2] fix live migration issues in qemu --- ...-Add-active-field-to-BlockDeviceInfo.patch | 317 ++++++++++ ...-Add-blockdev-set-active-QMP-command.patch | 187 ++++++ ...-Add-option-to-create-inactive-nodes.patch | 102 ++++ ...-inactivating-already-inactive-nodes.patch | 80 +++ ...attach-inactive-child-to-active-node.patch | 46 ++ ...Drain-nodes-before-inactivating-them.patch | 52 ++ ...ash-on-block_resize-on-inactive-node.patch | 68 +++ ...-external-snapshot-overlays-when-nec.patch | 68 +++ ...port-inactive-nodes-in-blk_insert_bs.patch | 66 ++ ...-option-to-allow-export-of-inactive-.patch | 135 +++++ ...-t-ignore-image-activation-error-in-.patch | 50 ++ ...on-Add-helper-to-get-target-runstate.patch | 84 +++ ...ration-Ensure-vmstate_save-sets-errp.patch | 92 +++ ...F-for-incoming-migration-on-Migratio.patch | 180 ++++++ ...Apply-late-block-active-behavior-to-.patch | 72 +++ ...Fix-possible-race-with-block_inactiv.patch | 78 +++ ...k-Make-late-block-active-the-default.patch | 94 +++ ...ration-block-Rewrite-disk-activation.patch | 565 ++++++++++++++++++ ...ock-active-Remove-global-active-flag.patch | 158 +++++ ...vm-nbd-server-Support-inactive-nodes.patch | 68 +++ SPECS/qemu/kvm-net-Fix-announce_self.patch | 82 +++ ...ctivate-disks-if-migration-completed.patch | 73 +++ ...o-net-Add-queues-before-loading-them.patch | 94 +++ SPECS/qemu/qemu.spec | 39 +- 24 files changed, 2849 insertions(+), 1 deletion(-) create mode 100644 SPECS/qemu/kvm-block-Add-active-field-to-BlockDeviceInfo.patch create mode 100644 SPECS/qemu/kvm-block-Add-blockdev-set-active-QMP-command.patch create mode 100644 SPECS/qemu/kvm-block-Add-option-to-create-inactive-nodes.patch create mode 100644 SPECS/qemu/kvm-block-Allow-inactivating-already-inactive-nodes.patch create mode 100644 SPECS/qemu/kvm-block-Don-t-attach-inactive-child-to-active-node.patch create mode 100644 SPECS/qemu/kvm-block-Drain-nodes-before-inactivating-them.patch create mode 100644 SPECS/qemu/kvm-block-Fix-crash-on-block_resize-on-inactive-node.patch create mode 100644 SPECS/qemu/kvm-block-Inactivate-external-snapshot-overlays-when-nec.patch create mode 100644 SPECS/qemu/kvm-block-Support-inactive-nodes-in-blk_insert_bs.patch create mode 100644 SPECS/qemu/kvm-block-export-Add-option-to-allow-export-of-inactive-.patch create mode 100644 SPECS/qemu/kvm-block-export-Don-t-ignore-image-activation-error-in-.patch create mode 100644 SPECS/qemu/kvm-migration-Add-helper-to-get-target-runstate.patch create mode 100644 SPECS/qemu/kvm-migration-Ensure-vmstate_save-sets-errp.patch create mode 100644 SPECS/qemu/kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch create mode 100644 SPECS/qemu/kvm-migration-block-Apply-late-block-active-behavior-to-.patch create mode 100644 SPECS/qemu/kvm-migration-block-Fix-possible-race-with-block_inactiv.patch create mode 100644 SPECS/qemu/kvm-migration-block-Make-late-block-active-the-default.patch create mode 100644 SPECS/qemu/kvm-migration-block-Rewrite-disk-activation.patch create mode 100644 SPECS/qemu/kvm-migration-block-active-Remove-global-active-flag.patch create mode 100644 SPECS/qemu/kvm-nbd-server-Support-inactive-nodes.patch create mode 100644 SPECS/qemu/kvm-net-Fix-announce_self.patch create mode 100644 SPECS/qemu/kvm-qmp-cont-Only-activate-disks-if-migration-completed.patch create mode 100644 SPECS/qemu/kvm-virtio-net-Add-queues-before-loading-them.patch diff --git a/SPECS/qemu/kvm-block-Add-active-field-to-BlockDeviceInfo.patch b/SPECS/qemu/kvm-block-Add-active-field-to-BlockDeviceInfo.patch new file mode 100644 index 00000000000..42ce35bbce7 --- /dev/null +++ b/SPECS/qemu/kvm-block-Add-active-field-to-BlockDeviceInfo.patch @@ -0,0 +1,317 @@ +From 454a498569318365a16ed65bd2110daef5bb3fc3 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:13:52 +0100 +Subject: [PATCH 08/23] block: Add 'active' field to BlockDeviceInfo + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [7/22] 6309d1a9ac2a7a21013cfe71dc40474cbdc89464 (kmwolf/centos-qemu-kvm) + +This allows querying from QMP (and also HMP) whether an image is +currently active or inactive (in the sense of BDRV_O_INACTIVE). + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit aec81049c2daa8a97b89e59f03733b21ae0f8c2d) +Signed-off-by: Kevin Wolf +--- + block.c | 4 ++++ + block/monitor/block-hmp-cmds.c | 5 +++-- + block/qapi.c | 1 + + include/block/block-global-state.h | 3 +++ + qapi/block-core.json | 6 +++++- + tests/qemu-iotests/184.out | 2 ++ + tests/qemu-iotests/191.out | 16 ++++++++++++++++ + tests/qemu-iotests/273.out | 5 +++++ + 8 files changed, 39 insertions(+), 3 deletions(-) + +diff --git a/block.c b/block.c +index c317de9eaa..c94d78eefd 100644 +--- a/block.c ++++ b/block.c +@@ -6824,6 +6824,10 @@ void bdrv_init_with_whitelist(void) + bdrv_init(); + } + ++bool bdrv_is_inactive(BlockDriverState *bs) { ++ return bs->open_flags & BDRV_O_INACTIVE; ++} ++ + int bdrv_activate(BlockDriverState *bs, Error **errp) + { + BdrvChild *child, *parent; +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index bdf2eb50b6..cc832549e1 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -630,11 +630,12 @@ static void print_block_info(Monitor *mon, BlockInfo *info, + } + + if (inserted) { +- monitor_printf(mon, ": %s (%s%s%s)\n", ++ monitor_printf(mon, ": %s (%s%s%s%s)\n", + inserted->file, + inserted->drv, + inserted->ro ? ", read-only" : "", +- inserted->encrypted ? ", encrypted" : ""); ++ inserted->encrypted ? ", encrypted" : "", ++ inserted->active ? "" : ", inactive"); + } else { + monitor_printf(mon, ": [not inserted]\n"); + } +diff --git a/block/qapi.c b/block/qapi.c +index 2b5793f1d9..709170e63d 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -63,6 +63,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + info->file = g_strdup(bs->filename); + info->ro = bdrv_is_read_only(bs); + info->drv = g_strdup(bs->drv->format_name); ++ info->active = !bdrv_is_inactive(bs); + info->encrypted = bs->encrypted; + + info->cache = g_new(BlockdevCacheInfo, 1); +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index bd7cecd1cf..a826bf5f78 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -175,6 +175,9 @@ BlockDriverState * GRAPH_RDLOCK + check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, + Error **errp); + ++ ++bool GRAPH_RDLOCK bdrv_is_inactive(BlockDriverState *bs); ++ + int no_coroutine_fn GRAPH_RDLOCK + bdrv_activate(BlockDriverState *bs, Error **errp); + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index aa40d44f1d..92af032744 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -486,6 +486,10 @@ + # @backing_file_depth: number of files in the backing file chain + # (since: 1.2) + # ++# @active: true if the backend is active; typical cases for inactive backends ++# are on the migration source instance after migration completes and on the ++# destination before it completes. (since: 10.0) ++# + # @encrypted: true if the backing device is encrypted + # + # @detect_zeroes: detect and optimize zero writes (Since 2.1) +@@ -556,7 +560,7 @@ + { 'struct': 'BlockDeviceInfo', + 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str', + '*backing_file': 'str', 'backing_file_depth': 'int', +- 'encrypted': 'bool', ++ 'active': 'bool', 'encrypted': 'bool', + 'detect_zeroes': 'BlockdevDetectZeroesOptions', + 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', + 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int', +diff --git a/tests/qemu-iotests/184.out b/tests/qemu-iotests/184.out +index e8f631f853..52692b6b3b 100644 +--- a/tests/qemu-iotests/184.out ++++ b/tests/qemu-iotests/184.out +@@ -26,6 +26,7 @@ Testing: + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "virtual-size": 1073741824, +@@ -59,6 +60,7 @@ Testing: + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 1073741824, + "filename": "null-co://", +diff --git a/tests/qemu-iotests/191.out b/tests/qemu-iotests/191.out +index c3309e4bc6..2a72ca7106 100644 +--- a/tests/qemu-iotests/191.out ++++ b/tests/qemu-iotests/191.out +@@ -114,6 +114,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "virtual-size": 67108864, +@@ -155,6 +156,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.IMGFMT.ovl2", +@@ -183,6 +185,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "virtual-size": 67108864, +@@ -224,6 +227,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.IMGFMT", +@@ -252,6 +256,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "virtual-size": 67108864, +@@ -293,6 +298,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 393216, + "filename": "TEST_DIR/t.IMGFMT.mid", +@@ -321,6 +327,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.IMGFMT.base", +@@ -350,6 +357,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 393216, + "filename": "TEST_DIR/t.IMGFMT.base", +@@ -521,6 +529,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "virtual-size": 67108864, +@@ -562,6 +571,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.IMGFMT.ovl2", +@@ -590,6 +600,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "backing-image": { +@@ -642,6 +653,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.IMGFMT.ovl3", +@@ -670,6 +682,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 67108864, + "filename": "TEST_DIR/t.IMGFMT.base", +@@ -699,6 +712,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 393216, + "filename": "TEST_DIR/t.IMGFMT.base", +@@ -727,6 +741,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "virtual-size": 67108864, +@@ -768,6 +783,7 @@ wrote 65536/65536 bytes at offset 1048576 + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.IMGFMT", +diff --git a/tests/qemu-iotests/273.out b/tests/qemu-iotests/273.out +index 71843f02de..c19753c685 100644 +--- a/tests/qemu-iotests/273.out ++++ b/tests/qemu-iotests/273.out +@@ -23,6 +23,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "backing-image": { +@@ -74,6 +75,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.IMGFMT", +@@ -102,6 +104,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "backing-image": { + "virtual-size": 197120, +@@ -142,6 +145,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.IMGFMT.mid", +@@ -170,6 +174,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + { + "iops_rd": 0, + "detect_zeroes": "off", ++ "active": true, + "image": { + "virtual-size": 197120, + "filename": "TEST_DIR/t.IMGFMT.base", +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-Add-blockdev-set-active-QMP-command.patch b/SPECS/qemu/kvm-block-Add-blockdev-set-active-QMP-command.patch new file mode 100644 index 00000000000..1c0b59a00cb --- /dev/null +++ b/SPECS/qemu/kvm-block-Add-blockdev-set-active-QMP-command.patch @@ -0,0 +1,187 @@ +From c6c40cc92fbb91d704d3739bb78bfd936f111625 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:13:59 +0100 +Subject: [PATCH 15/23] block: Add blockdev-set-active QMP command + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [14/22] e494fb6df6c363b6266e5fb7d09c31b3e7694f04 (kmwolf/centos-qemu-kvm) + +The system emulator tries to automatically activate and inactivate block +nodes at the right point during migration. However, there are still +cases where it's necessary that the user can do this manually. + +Images are only activated on the destination VM of a migration when the +VM is actually resumed. If the VM was paused, this doesn't happen +automatically. The user may want to perform some operation on a block +device (e.g. taking a snapshot or starting a block job) without also +resuming the VM yet. This is an example where a manual command is +necessary. + +Another example is VM migration when the image files are opened by an +external qemu-storage-daemon instance on each side. In this case, the +process that needs to hand over the images isn't even part of the +migration and can't know when the migration completes. Management tools +need a way to explicitly inactivate images on the source and activate +them on the destination. + +This adds a new blockdev-set-active QMP command that lets the user +change the status of individual nodes (this is necessary in +qemu-storage-daemon because it could be serving multiple VMs and only +one of them migrates at a time). For convenience, operating on all +devices (like QEMU does automatically during migration) is offered as an +option, too, and can be used in the context of single VM. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-9-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 8cd37207f8a90c5f995283ecf95f1cb5f7518a77) +Signed-off-by: Kevin Wolf +--- + block.c | 21 ++++++++++++++++++++ + blockdev.c | 32 ++++++++++++++++++++++++++++++ + include/block/block-global-state.h | 3 +++ + qapi/block-core.json | 32 ++++++++++++++++++++++++++++++ + 4 files changed, 88 insertions(+) + +diff --git a/block.c b/block.c +index fd2ac177ef..2140a5d3b7 100644 +--- a/block.c ++++ b/block.c +@@ -7052,6 +7052,27 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) + return 0; + } + ++int bdrv_inactivate(BlockDriverState *bs, Error **errp) ++{ ++ int ret; ++ ++ GLOBAL_STATE_CODE(); ++ GRAPH_RDLOCK_GUARD_MAINLOOP(); ++ ++ if (bdrv_has_bds_parent(bs, true)) { ++ error_setg(errp, "Node has active parent node"); ++ return -EPERM; ++ } ++ ++ ret = bdrv_inactivate_recurse(bs, true); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "Failed to inactivate node"); ++ return ret; ++ } ++ ++ return 0; ++} ++ + int bdrv_inactivate_all(void) + { + BlockDriverState *bs = NULL; +diff --git a/blockdev.c b/blockdev.c +index 81430122df..70046b6690 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3468,6 +3468,38 @@ void qmp_blockdev_del(const char *node_name, Error **errp) + bdrv_unref(bs); + } + ++void qmp_blockdev_set_active(const char *node_name, bool active, Error **errp) ++{ ++ int ret; ++ ++ GLOBAL_STATE_CODE(); ++ GRAPH_RDLOCK_GUARD_MAINLOOP(); ++ ++ if (!node_name) { ++ if (active) { ++ bdrv_activate_all(errp); ++ } else { ++ ret = bdrv_inactivate_all(); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "Failed to inactivate all nodes"); ++ } ++ } ++ } else { ++ BlockDriverState *bs = bdrv_find_node(node_name); ++ if (!bs) { ++ error_setg(errp, "Failed to find node with node-name='%s'", ++ node_name); ++ return; ++ } ++ ++ if (active) { ++ bdrv_activate(bs, errp); ++ } else { ++ bdrv_inactivate(bs, errp); ++ } ++ } ++} ++ + static BdrvChild * GRAPH_RDLOCK + bdrv_find_child(BlockDriverState *parent_bs, const char *child_name) + { +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index a826bf5f78..9be34b3c99 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -184,6 +184,9 @@ bdrv_activate(BlockDriverState *bs, Error **errp); + int coroutine_fn no_co_wrapper_bdrv_rdlock + bdrv_co_activate(BlockDriverState *bs, Error **errp); + ++int no_coroutine_fn ++bdrv_inactivate(BlockDriverState *bs, Error **errp); ++ + void bdrv_activate_all(Error **errp); + int bdrv_inactivate_all(void); + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 6ec603aa6f..c1af3d1f7d 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -4930,6 +4930,38 @@ + { 'command': 'blockdev-del', 'data': { 'node-name': 'str' }, + 'allow-preconfig': true } + ++## ++# @blockdev-set-active: ++# ++# Activate or inactivate a block device. Use this to manage the handover of ++# block devices on migration with qemu-storage-daemon. ++# ++# Activating a node automatically activates all of its child nodes first. ++# Inactivating a node automatically inactivates any of its child nodes that are ++# not in use by a still active node. ++# ++# @node-name: Name of the graph node to activate or inactivate. By default, all ++# nodes are affected by the operation. ++# ++# @active: true if the nodes should be active when the command returns success, ++# false if they should be inactive. ++# ++# Since: 10.0 ++# ++# .. qmp-example:: ++# ++# -> { "execute": "blockdev-set-active", ++# "arguments": { ++# "node-name": "node0", ++# "active": false ++# } ++# } ++# <- { "return": {} } ++## ++{ 'command': 'blockdev-set-active', ++ 'data': { '*node-name': 'str', 'active': 'bool' }, ++ 'allow-preconfig': true } ++ + ## + # @BlockdevCreateOptionsFile: + # +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-Add-option-to-create-inactive-nodes.patch b/SPECS/qemu/kvm-block-Add-option-to-create-inactive-nodes.patch new file mode 100644 index 00000000000..8502b88c418 --- /dev/null +++ b/SPECS/qemu/kvm-block-Add-option-to-create-inactive-nodes.patch @@ -0,0 +1,102 @@ +From d44250363b08e627e06a9afe288d02a3d995afc0 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:13:58 +0100 +Subject: [PATCH 14/23] block: Add option to create inactive nodes + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [13/22] 45b01b9c09d5f12715e4977cab6140d2cac90714 (kmwolf/centos-qemu-kvm) + +In QEMU, nodes are automatically created inactive while expecting an +incoming migration (i.e. RUN_STATE_INMIGRATE). In qemu-storage-daemon, +the notion of runstates doesn't exist. It also wouldn't necessarily make +sense to introduce it because a single daemon can serve multiple VMs +that can be in different states. + +Therefore, allow the user to explicitly open images as inactive with a +new option. The default is as before: Nodes are usually active, except +when created during RUN_STATE_INMIGRATE. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-8-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit faecd16fe5c65a25b5b55b5edbe4322cec5a9d96) +Signed-off-by: Kevin Wolf +--- + block.c | 9 +++++++++ + include/block/block-common.h | 1 + + qapi/block-core.json | 6 ++++++ + 3 files changed, 16 insertions(+) + +diff --git a/block.c b/block.c +index bedd54deaa..fd2ac177ef 100644 +--- a/block.c ++++ b/block.c +@@ -1573,6 +1573,10 @@ static void update_flags_from_options(int *flags, QemuOpts *opts) + if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) { + *flags |= BDRV_O_AUTO_RDONLY; + } ++ ++ if (!qemu_opt_get_bool_del(opts, BDRV_OPT_ACTIVE, true)) { ++ *flags |= BDRV_O_INACTIVE; ++ } + } + + static void update_options_from_flags(QDict *options, int flags) +@@ -1799,6 +1803,11 @@ QemuOptsList bdrv_runtime_opts = { + .type = QEMU_OPT_BOOL, + .help = "Ignore flush requests", + }, ++ { ++ .name = BDRV_OPT_ACTIVE, ++ .type = QEMU_OPT_BOOL, ++ .help = "Node is activated", ++ }, + { + .name = BDRV_OPT_READ_ONLY, + .type = QEMU_OPT_BOOL, +diff --git a/include/block/block-common.h b/include/block/block-common.h +index 338fe5ff7a..7030669f04 100644 +--- a/include/block/block-common.h ++++ b/include/block/block-common.h +@@ -257,6 +257,7 @@ typedef enum { + #define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" + #define BDRV_OPT_DISCARD "discard" + #define BDRV_OPT_FORCE_SHARE "force-share" ++#define BDRV_OPT_ACTIVE "active" + + + #define BDRV_SECTOR_BITS 9 +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 92af032744..6ec603aa6f 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -4668,6 +4668,11 @@ + # + # @cache: cache-related options + # ++# @active: whether the block node should be activated (default: true). ++# Having inactive block nodes is useful primarily for migration because it ++# allows opening an image on the destination while the source is still ++# holding locks for it. (Since 10.0) ++# + # @read-only: whether the block device should be read-only (default: + # false). Note that some block drivers support only read-only + # access, either generally or in certain configurations. In this +@@ -4694,6 +4699,7 @@ + '*node-name': 'str', + '*discard': 'BlockdevDiscardOptions', + '*cache': 'BlockdevCacheOptions', ++ '*active': 'bool', + '*read-only': 'bool', + '*auto-read-only': 'bool', + '*force-share': 'bool', +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-Allow-inactivating-already-inactive-nodes.patch b/SPECS/qemu/kvm-block-Allow-inactivating-already-inactive-nodes.patch new file mode 100644 index 00000000000..7001529a19e --- /dev/null +++ b/SPECS/qemu/kvm-block-Allow-inactivating-already-inactive-nodes.patch @@ -0,0 +1,80 @@ +From f7f73025679c5d001256d87bc47566d1db4e98c7 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:13:53 +0100 +Subject: [PATCH 09/23] block: Allow inactivating already inactive nodes + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [8/22] 781a749ae61e038b5b562407b01d53a1bfba598c (kmwolf/centos-qemu-kvm) + +What we wanted to catch with the assertion is cases where the recursion +finds that a child was inactive before its parent. This should never +happen. But if the user tries to inactivate an image that is already +inactive, that's harmless and we don't want to fail the assertion. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit a6490ec9d56b9e95a13918813585a3a9891710bc) +Signed-off-by: Kevin Wolf +--- + block.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/block.c b/block.c +index c94d78eefd..a2aa454312 100644 +--- a/block.c ++++ b/block.c +@@ -6959,7 +6959,8 @@ bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) + return false; + } + +-static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) ++static int GRAPH_RDLOCK ++bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) + { + BdrvChild *child, *parent; + int ret; +@@ -6977,7 +6978,14 @@ static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) + return 0; + } + +- assert(!(bs->open_flags & BDRV_O_INACTIVE)); ++ /* ++ * Inactivating an already inactive node on user request is harmless, but if ++ * a child is already inactive before its parent, that's bad. ++ */ ++ if (bs->open_flags & BDRV_O_INACTIVE) { ++ assert(top_level); ++ return 0; ++ } + + /* Inactivate this node */ + if (bs->drv->bdrv_inactivate) { +@@ -7014,7 +7022,7 @@ static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) + + /* Recursively inactivate children */ + QLIST_FOREACH(child, &bs->children, next) { +- ret = bdrv_inactivate_recurse(child->bs); ++ ret = bdrv_inactivate_recurse(child->bs, false); + if (ret < 0) { + return ret; + } +@@ -7039,7 +7047,7 @@ int bdrv_inactivate_all(void) + if (bdrv_has_bds_parent(bs, false)) { + continue; + } +- ret = bdrv_inactivate_recurse(bs); ++ ret = bdrv_inactivate_recurse(bs, true); + if (ret < 0) { + bdrv_next_cleanup(&it); + break; +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-Don-t-attach-inactive-child-to-active-node.patch b/SPECS/qemu/kvm-block-Don-t-attach-inactive-child-to-active-node.patch new file mode 100644 index 00000000000..f2a9219a22d --- /dev/null +++ b/SPECS/qemu/kvm-block-Don-t-attach-inactive-child-to-active-node.patch @@ -0,0 +1,46 @@ +From 0f0968badaa11f4ac56f8ee93cbe11f9a6d5fc95 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:13:56 +0100 +Subject: [PATCH 12/23] block: Don't attach inactive child to active node + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [11/22] 1ca560728b97bd1d5f7498a7e5d23e2d8bb0808d (kmwolf/centos-qemu-kvm) + +An active node makes unrestricted use of its children and would possibly +run into assertion failures when it operates on an inactive child node. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-6-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9b81361aedcc47905de5e91f68221de89c6f5467) +Signed-off-by: Kevin Wolf +--- + block.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/block.c b/block.c +index a2aa454312..41e72e6965 100644 +--- a/block.c ++++ b/block.c +@@ -3183,6 +3183,11 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, + child_bs->node_name, child_name, parent_bs->node_name); + return NULL; + } ++ if (bdrv_is_inactive(child_bs) && !bdrv_is_inactive(parent_bs)) { ++ error_setg(errp, "Inactive '%s' can't be a %s child of active '%s'", ++ child_bs->node_name, child_name, parent_bs->node_name); ++ return NULL; ++ } + + bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); + bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-Drain-nodes-before-inactivating-them.patch b/SPECS/qemu/kvm-block-Drain-nodes-before-inactivating-them.patch new file mode 100644 index 00000000000..6514f25c45d --- /dev/null +++ b/SPECS/qemu/kvm-block-Drain-nodes-before-inactivating-them.patch @@ -0,0 +1,52 @@ +From bc9b651ce0f36da5a5b6eb8631e7040e59ea2493 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:14:02 +0100 +Subject: [PATCH 18/23] block: Drain nodes before inactivating them + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [17/22] f0fec0f51ed033038b565297a7217cbf61ab70f7 (kmwolf/centos-qemu-kvm) + +So far the assumption has always been that if we try to inactivate a +node, it is already idle. This doesn't hold true any more if we allow +inactivating exported nodes because we can't know when new external +requests come in. + +Drain the node around setting BDRV_O_INACTIVE so that requests can't +start operating on an active node and then in the middle it suddenly +becomes inactive. With this change, it's enough for exports to check +for new requests that they operate on an active node (or, like reads, +are allowed even on an inactive node). + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Message-ID: <20250204211407.381505-12-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 2849092a0024405e74c96f0a5ec41bb182ec8538) +Signed-off-by: Kevin Wolf +--- + block.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/block.c b/block.c +index 2140a5d3b7..38cb8481a8 100644 +--- a/block.c ++++ b/block.c +@@ -7032,7 +7032,9 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) + return -EPERM; + } + ++ bdrv_drained_begin(bs); + bs->open_flags |= BDRV_O_INACTIVE; ++ bdrv_drained_end(bs); + + /* + * Update permissions, they may differ for inactive nodes. +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-Fix-crash-on-block_resize-on-inactive-node.patch b/SPECS/qemu/kvm-block-Fix-crash-on-block_resize-on-inactive-node.patch new file mode 100644 index 00000000000..63d37e6420c --- /dev/null +++ b/SPECS/qemu/kvm-block-Fix-crash-on-block_resize-on-inactive-node.patch @@ -0,0 +1,68 @@ +From eed2b4b40553cae162407b375f18c935f5025c53 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:13:57 +0100 +Subject: [PATCH 13/23] block: Fix crash on block_resize on inactive node + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [12/22] e352c665f68a865859cc62fe1ed6e4044407b8e3 (kmwolf/centos-qemu-kvm) + +In order for block_resize to fail gracefully on an inactive node instead +of crashing with an assertion failure in bdrv_co_write_req_prepare() +(called from bdrv_co_truncate()), we need to check for inactive nodes +also when they are attached as a root node and make sure that +BLK_PERM_RESIZE isn't among the permissions allowed for inactive nodes. +To this effect, don't enumerate the permissions that are incompatible +with inactive nodes any more, but allow only BLK_PERM_CONSISTENT_READ +for them. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-7-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 8c2c72a33581987af8d8c484d03af3cd69b9e10a) +Signed-off-by: Kevin Wolf +--- + block.c | 7 +++++++ + block/block-backend.c | 2 +- + 2 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 41e72e6965..bedd54deaa 100644 +--- a/block.c ++++ b/block.c +@@ -3077,6 +3077,13 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + assert(child_class->get_parent_desc); + GLOBAL_STATE_CODE(); + ++ if (bdrv_is_inactive(child_bs) && (perm & ~BLK_PERM_CONSISTENT_READ)) { ++ g_autofree char *perm_names = bdrv_perm_names(perm); ++ error_setg(errp, "Permission '%s' unavailable on inactive node", ++ perm_names); ++ return NULL; ++ } ++ + new_child = g_new(BdrvChild, 1); + *new_child = (BdrvChild) { + .bs = NULL, +diff --git a/block/block-backend.c b/block/block-backend.c +index db6f9b92a3..356db1b703 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -253,7 +253,7 @@ static bool blk_can_inactivate(BlockBackend *blk) + * guest. For block job BBs that satisfy this, we can just allow + * it. This is the case for mirror job source, which is required + * by libvirt non-shared block migration. */ +- if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) { ++ if (!(blk->perm & ~BLK_PERM_CONSISTENT_READ)) { + return true; + } + +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-Inactivate-external-snapshot-overlays-when-nec.patch b/SPECS/qemu/kvm-block-Inactivate-external-snapshot-overlays-when-nec.patch new file mode 100644 index 00000000000..3c38d0645ac --- /dev/null +++ b/SPECS/qemu/kvm-block-Inactivate-external-snapshot-overlays-when-nec.patch @@ -0,0 +1,68 @@ +From 6f4a7e3489a104137f60e034157cf687e4c3158f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:13:54 +0100 +Subject: [PATCH 10/23] block: Inactivate external snapshot overlays when + necessary + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [9/22] 59b39138f0da734d497f3a58d458992abc348978 (kmwolf/centos-qemu-kvm) + +Putting an active block node on top of an inactive one is strictly +speaking an invalid configuration and the next patch will turn it into a +hard error. + +However, taking a snapshot while disk images are inactive after +completing migration has an important use case: After migrating to a +file, taking an external snapshot is what is needed to take a full VM +snapshot. + +In order for this to keep working after the later patches, change +creating a snapshot such that it automatically inactivates an overlay +that is added on top of an already inactive node. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit e80210ffb24c4e47650344ba77ce3ed354af596c) +Signed-off-by: Kevin Wolf +--- + blockdev.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/blockdev.c b/blockdev.c +index 835064ed03..81430122df 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1497,6 +1497,22 @@ static void external_snapshot_action(TransactionAction *action, + return; + } + ++ /* ++ * Older QEMU versions have allowed adding an active parent node to an ++ * inactive child node. This is unsafe in the general case, but there is an ++ * important use case, which is taking a VM snapshot with migration to file ++ * and then adding an external snapshot while the VM is still stopped and ++ * images are inactive. Requiring the user to explicitly create the overlay ++ * as inactive would break compatibility, so just do it automatically here ++ * to keep this working. ++ */ ++ if (bdrv_is_inactive(state->old_bs) && !bdrv_is_inactive(state->new_bs)) { ++ ret = bdrv_inactivate(state->new_bs, errp); ++ if (ret < 0) { ++ return; ++ } ++ } ++ + ret = bdrv_append(state->new_bs, state->old_bs, errp); + if (ret < 0) { + return; +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-Support-inactive-nodes-in-blk_insert_bs.patch b/SPECS/qemu/kvm-block-Support-inactive-nodes-in-blk_insert_bs.patch new file mode 100644 index 00000000000..fbe6fdaa98c --- /dev/null +++ b/SPECS/qemu/kvm-block-Support-inactive-nodes-in-blk_insert_bs.patch @@ -0,0 +1,66 @@ +From 9ab8c39284c0fc2d9d685706b2d788ab02930a08 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:14:00 +0100 +Subject: [PATCH 16/23] block: Support inactive nodes in blk_insert_bs() + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [15/22] 17405266afeddc47f73828816d0b8c1ab5ece462 (kmwolf/centos-qemu-kvm) + +Device models have a relatively complex way to set up their block +backends, in which blk_attach_dev() sets blk->disable_perm = true. +We want to support inactive images in exports, too, so that +qemu-storage-daemon can be used with migration. Because they don't use +blk_attach_dev(), they need another way to set this flag. The most +convenient is to do this automatically when an inactive node is attached +to a BlockBackend that can be inactivated. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-10-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c1c5c7cc4ef6c45ca769c640566fd40d2cb7d5c1) +Signed-off-by: Kevin Wolf +--- + block/block-backend.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 356db1b703..4a5a1c1f6a 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -909,14 +909,24 @@ void blk_remove_bs(BlockBackend *blk) + int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; ++ uint64_t perm, shared_perm; + + GLOBAL_STATE_CODE(); + bdrv_ref(bs); + bdrv_graph_wrlock(); ++ ++ if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) { ++ blk->disable_perm = true; ++ perm = 0; ++ shared_perm = BLK_PERM_ALL; ++ } else { ++ perm = blk->perm; ++ shared_perm = blk->shared_perm; ++ } ++ + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, +- blk->perm, blk->shared_perm, +- blk, errp); ++ perm, shared_perm, blk, errp); + bdrv_graph_wrunlock(); + if (blk->root == NULL) { + return -EPERM; +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-export-Add-option-to-allow-export-of-inactive-.patch b/SPECS/qemu/kvm-block-export-Add-option-to-allow-export-of-inactive-.patch new file mode 100644 index 00000000000..e3db67a80b7 --- /dev/null +++ b/SPECS/qemu/kvm-block-export-Add-option-to-allow-export-of-inactive-.patch @@ -0,0 +1,135 @@ +From 22de4ba6cec94a38cd56156d9114f06dc4d2a5a5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:14:03 +0100 +Subject: [PATCH 19/23] block/export: Add option to allow export of inactive + nodes + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [18/22] 985eadb03f27d046b89ffeef1fb36ef2e4579552 (kmwolf/centos-qemu-kvm) + +Add an option in BlockExportOptions to allow creating an export on an +inactive node without activating the node. This mode needs to be +explicitly supported by the export type (so that it doesn't perform any +operations that are forbidden for inactive nodes), so this patch alone +doesn't allow this option to be successfully used yet. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-13-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1600ef01ab1296ca8230daa6bc41ba983751f646) +Signed-off-by: Kevin Wolf +--- + block/export/export.c | 31 +++++++++++++++++++++---------- + include/block/export.h | 3 +++ + qapi/block-export.json | 10 +++++++++- + 3 files changed, 33 insertions(+), 11 deletions(-) + +diff --git a/block/export/export.c b/block/export/export.c +index 23a86efcdb..71af65b3e5 100644 +--- a/block/export/export.c ++++ b/block/export/export.c +@@ -75,6 +75,7 @@ static const BlockExportDriver *blk_exp_find_driver(BlockExportType type) + BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + { + bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread; ++ bool allow_inactive = export->has_allow_inactive && export->allow_inactive; + const BlockExportDriver *drv; + BlockExport *exp = NULL; + BlockDriverState *bs; +@@ -138,17 +139,24 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + } + } + +- /* +- * Block exports are used for non-shared storage migration. Make sure +- * that BDRV_O_INACTIVE is cleared and the image is ready for write +- * access since the export could be available before migration handover. +- * ctx was acquired in the caller. +- */ + bdrv_graph_rdlock_main_loop(); +- ret = bdrv_activate(bs, errp); +- if (ret < 0) { +- bdrv_graph_rdunlock_main_loop(); +- goto fail; ++ if (allow_inactive) { ++ if (!drv->supports_inactive) { ++ error_setg(errp, "Export type does not support inactive exports"); ++ bdrv_graph_rdunlock_main_loop(); ++ goto fail; ++ } ++ } else { ++ /* ++ * Block exports are used for non-shared storage migration. Make sure ++ * that BDRV_O_INACTIVE is cleared and the image is ready for write ++ * access since the export could be available before migration handover. ++ */ ++ ret = bdrv_activate(bs, errp); ++ if (ret < 0) { ++ bdrv_graph_rdunlock_main_loop(); ++ goto fail; ++ } + } + bdrv_graph_rdunlock_main_loop(); + +@@ -162,6 +170,9 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + if (!fixed_iothread) { + blk_set_allow_aio_context_change(blk, true); + } ++ if (allow_inactive) { ++ blk_set_force_allow_inactivate(blk); ++ } + + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { +diff --git a/include/block/export.h b/include/block/export.h +index f2fe0f8078..4bd9531d4d 100644 +--- a/include/block/export.h ++++ b/include/block/export.h +@@ -29,6 +29,9 @@ typedef struct BlockExportDriver { + */ + size_t instance_size; + ++ /* True if the export type supports running on an inactive node */ ++ bool supports_inactive; ++ + /* Creates and starts a new block export */ + int (*create)(BlockExport *, BlockExportOptions *, Error **); + +diff --git a/qapi/block-export.json b/qapi/block-export.json +index ce33fe378d..117b05d13c 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -372,6 +372,13 @@ + # cannot be moved to the iothread. The default is false. + # (since: 5.2) + # ++# @allow-inactive: If true, the export allows the exported node to be inactive. ++# If it is created for an inactive block node, the node remains inactive. If ++# the export type doesn't support running on an inactive node, an error is ++# returned. If false, inactive block nodes are automatically activated before ++# creating the export and trying to inactivate them later fails. ++# (since: 10.0; default: false) ++# + # Since: 4.2 + ## + { 'union': 'BlockExportOptions', +@@ -381,7 +388,8 @@ + '*iothread': 'str', + 'node-name': 'str', + '*writable': 'bool', +- '*writethrough': 'bool' }, ++ '*writethrough': 'bool', ++ '*allow-inactive': 'bool' }, + 'discriminator': 'type', + 'data': { + 'nbd': 'BlockExportOptionsNbd', +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-block-export-Don-t-ignore-image-activation-error-in-.patch b/SPECS/qemu/kvm-block-export-Don-t-ignore-image-activation-error-in-.patch new file mode 100644 index 00000000000..217851f869d --- /dev/null +++ b/SPECS/qemu/kvm-block-export-Don-t-ignore-image-activation-error-in-.patch @@ -0,0 +1,50 @@ +From 0cced76da63a886e6aaaa96a2c40620db27cb8cc Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:14:01 +0100 +Subject: [PATCH 17/23] block/export: Don't ignore image activation error in + blk_exp_add() + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [16/22] 86e791a19948c88a3512bb7ba3bd3ed0f03a2a18 (kmwolf/centos-qemu-kvm) + +Currently, block exports can't handle inactive images correctly. +Incoming write requests would run into assertion failures. Make sure +that we return an error when creating an export can't activate the +image. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-11-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 69f28176ca0af850db23a1c6364f0c8525b20801) +Signed-off-by: Kevin Wolf +--- + block/export/export.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/export/export.c b/block/export/export.c +index 6d51ae8ed7..23a86efcdb 100644 +--- a/block/export/export.c ++++ b/block/export/export.c +@@ -145,7 +145,11 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + * ctx was acquired in the caller. + */ + bdrv_graph_rdlock_main_loop(); +- bdrv_activate(bs, NULL); ++ ret = bdrv_activate(bs, errp); ++ if (ret < 0) { ++ bdrv_graph_rdunlock_main_loop(); ++ goto fail; ++ } + bdrv_graph_rdunlock_main_loop(); + + perm = BLK_PERM_CONSISTENT_READ; +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-migration-Add-helper-to-get-target-runstate.patch b/SPECS/qemu/kvm-migration-Add-helper-to-get-target-runstate.patch new file mode 100644 index 00000000000..7c58ada8e17 --- /dev/null +++ b/SPECS/qemu/kvm-migration-Add-helper-to-get-target-runstate.patch @@ -0,0 +1,84 @@ +From 21e971c2d7a8aee5cb95b7714c15374331add796 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 6 Dec 2024 18:08:33 -0500 +Subject: [PATCH 02/23] migration: Add helper to get target runstate + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/22] d1b2ee0d6a878e71ad04ace9fd82d8aab2d5217b (kmwolf/centos-qemu-kvm) + +In 99% cases, after QEMU migrates to dest host, it tries to detect the +target VM runstate using global_state_get_runstate(). + +There's one outlier so far which is Xen that won't send global state. +That's the major reason why global_state_received() check was always there +together with global_state_get_runstate(). + +However it's utterly confusing why global_state_received() has anything to +do with "let's start VM or not". + +Provide a helper to explain it, then we have an unified entry for getting +the target dest QEMU runstate after migration. + +Suggested-by: Fabiano Rosas +Signed-off-by: Peter Xu +Message-Id: <20241206230838.1111496-2-peterx@redhat.com> +Signed-off-by: Fabiano Rosas +(cherry picked from commit 7815f69867da92335055d4b5248430b0f122ce4e) +Signed-off-by: Kevin Wolf +--- + migration/migration.c | 21 +++++++++++++++++---- + 1 file changed, 17 insertions(+), 4 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 3dea06d577..c7a9e2e026 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -135,6 +135,21 @@ static bool migration_needs_multiple_sockets(void) + return migrate_multifd() || migrate_postcopy_preempt(); + } + ++static RunState migration_get_target_runstate(void) ++{ ++ /* ++ * When the global state is not migrated, it means we don't know the ++ * runstate of the src QEMU. We don't have much choice but assuming ++ * the VM is running. NOTE: this is pretty rare case, so far only Xen ++ * uses it. ++ */ ++ if (!global_state_received()) { ++ return RUN_STATE_RUNNING; ++ } ++ ++ return global_state_get_runstate(); ++} ++ + static bool transport_supports_multi_channels(MigrationAddress *addr) + { + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { +@@ -727,8 +742,7 @@ static void process_incoming_migration_bh(void *opaque) + * unless we really are starting the VM. + */ + if (!migrate_late_block_activate() || +- (autostart && (!global_state_received() || +- runstate_is_live(global_state_get_runstate())))) { ++ (autostart && runstate_is_live(migration_get_target_runstate()))) { + /* Make sure all file formats throw away their mutable metadata. + * If we get an error here, just don't restart the VM yet. */ + bdrv_activate_all(&local_err); +@@ -751,8 +765,7 @@ static void process_incoming_migration_bh(void *opaque) + + dirty_bitmap_mig_before_vm_start(); + +- if (!global_state_received() || +- runstate_is_live(global_state_get_runstate())) { ++ if (runstate_is_live(migration_get_target_runstate())) { + if (autostart) { + vm_start(); + } else { +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-migration-Ensure-vmstate_save-sets-errp.patch b/SPECS/qemu/kvm-migration-Ensure-vmstate_save-sets-errp.patch new file mode 100644 index 00000000000..6493c3e98d6 --- /dev/null +++ b/SPECS/qemu/kvm-migration-Ensure-vmstate_save-sets-errp.patch @@ -0,0 +1,92 @@ +From 91f67a47a3fd31be578988d7ac11bb814314ec5a Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 15 Oct 2024 19:04:37 +0200 +Subject: [PATCH] migration: Ensure vmstate_save() sets errp + +RH-Author: Hanna Czenczek +RH-MergeRequest: 295: migration: Ensure vmstate_save() sets errp +RH-Jira: RHEL-67844 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Jon Maloy +RH-Commit: [1/1] df65f254fa6bf241b1fd4f4d2101f137d2a6e44b (hreitz/qemu-kvm-c-9-s) + +migration/savevm.c contains some calls to vmstate_save() that are +followed by migrate_set_error() if the integer return value indicates an +error. migrate_set_error() requires that the `Error *` object passed to +it is set. Therefore, vmstate_save() is assumed to always set *errp on +error. + +Right now, that assumption is not met: vmstate_save_state_v() (called +internally by vmstate_save()) will not set *errp if +vmstate_subsection_save() or vmsd->post_save() fail. Fix that by adding +an *errp parameter to vmstate_subsection_save(), and by generating a +generic error in case post_save() fails (as is already done for +pre_save()). + +Without this patch, qemu will crash after vmstate_subsection_save() or +post_save() have failed inside of a vmstate_save() call (unless +migrate_set_error() then happen to discard the new error because +s->error is already set). This happens e.g. when receiving the state +from a virtio-fs back-end (virtiofsd) fails. + +Signed-off-by: Hanna Czenczek +Link: https://lore.kernel.org/r/20241015170437.310358-1-hreitz@redhat.com +Signed-off-by: Peter Xu +(cherry picked from commit 37dfcba1a04989830c706f9cbc00450e5d3a7447) +Signed-off-by: Hanna Czenczek +--- + migration/vmstate.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/migration/vmstate.c b/migration/vmstate.c +index ef26f26ccd..d19b42630a 100644 +--- a/migration/vmstate.c ++++ b/migration/vmstate.c +@@ -22,7 +22,8 @@ + #include "trace.h" + + static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, +- void *opaque, JSONWriter *vmdesc); ++ void *opaque, JSONWriter *vmdesc, ++ Error **errp); + static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, + void *opaque); + +@@ -440,12 +441,13 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, + json_writer_end_array(vmdesc); + } + +- ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc); ++ ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc, errp); + + if (vmsd->post_save) { + int ps_ret = vmsd->post_save(opaque); +- if (!ret) { ++ if (!ret && ps_ret) { + ret = ps_ret; ++ error_setg(errp, "post-save failed: %s", vmsd->name); + } + } + return ret; +@@ -517,7 +519,8 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, + } + + static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, +- void *opaque, JSONWriter *vmdesc) ++ void *opaque, JSONWriter *vmdesc, ++ Error **errp) + { + const VMStateDescription * const *sub = vmsd->subsections; + bool vmdesc_has_subsections = false; +@@ -545,7 +548,7 @@ static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, + qemu_put_byte(f, len); + qemu_put_buffer(f, (uint8_t *)vmsdsub->name, len); + qemu_put_be32(f, vmsdsub->version_id); +- ret = vmstate_save_state(f, vmsdsub, opaque, vmdesc); ++ ret = vmstate_save_state_with_err(f, vmsdsub, opaque, vmdesc, errp); + if (ret) { + return ret; + } +-- +2.45.1 + diff --git a/SPECS/qemu/kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch b/SPECS/qemu/kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch new file mode 100644 index 00000000000..d9d12bf95a4 --- /dev/null +++ b/SPECS/qemu/kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch @@ -0,0 +1,180 @@ +From 5d7d7a2ec6301f4d0b0dbea4fbdcab4e41a9cf07 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 20 Feb 2025 08:24:59 -0500 +Subject: [PATCH 7/9] migration: Fix UAF for incoming migration on + MigrationState + +RH-Author: Peter Xu +RH-MergeRequest: 344: migration: Fix UAF for incoming migration on MigrationState +RH-Jira: RHEL-69775 +RH-Acked-by: Juraj Marcin +RH-Acked-by: Jon Maloy +RH-Commit: [1/1] 106e2b4c1c461202c912b5e3ea7e586c4ab05d8c (peterx/qemu-kvm) + +On the incoming migration side, QEMU uses a coroutine to load all the VM +states. Inside, it may reference MigrationState on global states like +migration capabilities, parameters, error state, shared mutexes and more. + +However there's nothing yet to make sure MigrationState won't get +destroyed (e.g. after migration_shutdown()). Meanwhile there's also no API +available to remove the incoming coroutine in migration_shutdown(), +avoiding it to access the freed elements. + +There's a bug report showing this can happen and crash dest QEMU when +migration is cancelled on source. + +When it happens, the dest main thread is trying to cleanup everything: + + #0 qemu_aio_coroutine_enter + #1 aio_dispatch_handler + #2 aio_poll + #3 monitor_cleanup + #4 qemu_cleanup + #5 qemu_default_main + +Then it found the migration incoming coroutine, schedule it (even after +migration_shutdown()), causing crash: + + #0 __pthread_kill_implementation + #1 __pthread_kill_internal + #2 __GI_raise + #3 __GI_abort + #4 __assert_fail_base + #5 __assert_fail + #6 qemu_mutex_lock_impl + #7 qemu_lockable_mutex_lock + #8 qemu_lockable_lock + #9 qemu_lockable_auto_lock + #10 migrate_set_error + #11 process_incoming_migration_co + #12 coroutine_trampoline + +To fix it, take a refcount after an incoming setup is properly done when +qmp_migrate_incoming() succeeded the 1st time. As it's during a QMP +handler which needs BQL, it means the main loop is still alive (without +going into cleanups, which also needs BQL). + +Releasing the refcount now only until the incoming migration coroutine +finished or failed. Hence the refcount is valid for both (1) setup phase +of incoming ports, mostly IO watches (e.g. qio_channel_add_watch_full()), +and (2) the incoming coroutine itself (process_incoming_migration_co()). + +Note that we can't unref in migration_incoming_state_destroy(), because +both qmp_xen_load_devices_state() and load_snapshot() will use it without +an incoming migration. Those hold BQL so they're not prone to this issue. + +PS: I suspect nobody uses Xen's command at all, as it didn't register yank, +hence AFAIU the command should crash on master when trying to unregister +yank in migration_incoming_state_destroy().. but that's another story. + +Also note that in some incoming failure cases we may not always unref the +MigrationState refcount, which is a trade-off to keep things simple. We +could make it accurate, but it can be an overkill. Some examples: + + - Unlike most of the rest protocols, socket_start_incoming_migration() + may create net listener after incoming port setup sucessfully. + It means we can't unref in migration_channel_process_incoming() as a + generic path because socket protocol might keep using MigrationState. + + - For either socket or file, multiple IO watches might be created, it + means logically each IO watch needs to take one refcount for + MigrationState so as to be 100% accurate on ownership of refcount taken. + +In general, we at least need per-protocol handling to make it accurate, +which can be an overkill if we know incoming failed after all. Add a short +comment to explain that when taking the refcount in qmp_migrate_incoming(). + +Bugzilla: https://issues.redhat.com/browse/RHEL-69775 +Tested-by: Yan Fu +Signed-off-by: Peter Xu +Reviewed-by: Fabiano Rosas +Message-ID: <20250220132459.512610-1-peterx@redhat.com> +Signed-off-by: Fabiano Rosas +(cherry picked from commit d657a14de5d597bbfe7b54e4c4f0646f440e98ad) +Signed-off-by: Peter Xu +--- + migration/migration.c | 40 ++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 38 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 999d4cac54..aabdc45c16 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -115,6 +115,27 @@ static void migration_downtime_start(MigrationState *s) + s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + } + ++/* ++ * This is unfortunate: incoming migration actually needs the outgoing ++ * migration state (MigrationState) to be there too, e.g. to query ++ * capabilities, parameters, using locks, setup errors, etc. ++ * ++ * NOTE: when calling this, making sure current_migration exists and not ++ * been freed yet! Otherwise trying to access the refcount is already ++ * an use-after-free itself.. ++ * ++ * TODO: Move shared part of incoming / outgoing out into separate object. ++ * Then this is not needed. ++ */ ++static void migrate_incoming_ref_outgoing_state(void) ++{ ++ object_ref(migrate_get_current()); ++} ++static void migrate_incoming_unref_outgoing_state(void) ++{ ++ object_unref(migrate_get_current()); ++} ++ + static void migration_downtime_end(MigrationState *s) + { + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +@@ -821,7 +842,7 @@ process_incoming_migration_co(void *opaque) + * postcopy thread. + */ + trace_process_incoming_migration_co_postcopy_end_main(); +- return; ++ goto out; + } + /* Else if something went wrong then just fall out of the normal exit */ + } +@@ -837,7 +858,8 @@ process_incoming_migration_co(void *opaque) + } + + migration_bh_schedule(process_incoming_migration_bh, mis); +- return; ++ goto out; ++ + fail: + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_FAILED); +@@ -854,6 +876,9 @@ fail: + + exit(EXIT_FAILURE); + } ++out: ++ /* Pairs with the refcount taken in qmp_migrate_incoming() */ ++ migrate_incoming_unref_outgoing_state(); + } + + /** +@@ -1875,6 +1900,17 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, + return; + } + ++ /* ++ * Making sure MigrationState is available until incoming migration ++ * completes. ++ * ++ * NOTE: QEMU _might_ leak this refcount in some failure paths, but ++ * that's OK. This is the minimum change we need to at least making ++ * sure success case is clean on the refcount. We can try harder to ++ * make it accurate for any kind of failures, but it might be an ++ * overkill and doesn't bring us much benefit. ++ */ ++ migrate_incoming_ref_outgoing_state(); + once = false; + } + +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-migration-block-Apply-late-block-active-behavior-to-.patch b/SPECS/qemu/kvm-migration-block-Apply-late-block-active-behavior-to-.patch new file mode 100644 index 00000000000..f052aa34807 --- /dev/null +++ b/SPECS/qemu/kvm-migration-block-Apply-late-block-active-behavior-to-.patch @@ -0,0 +1,72 @@ +From 149fdbbe765e0153a533d5bed653e7de16f8ad9b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 6 Dec 2024 18:08:36 -0500 +Subject: [PATCH 05/23] migration/block: Apply late-block-active behavior to + postcopy + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/22] 1ba47fc6bb9e3b244f75b2f29a89b9fd4014deea (kmwolf/centos-qemu-kvm) + +Postcopy never cared about late-block-active. However there's no mention +in the capability that it doesn't apply to postcopy. + +Considering that we _assumed_ late activation is always good, do that too +for postcopy unconditionally, just like precopy. After this patch, we +should have unified the behavior across all. + +Signed-off-by: Peter Xu +Reviewed-by: Fabiano Rosas +Message-Id: <20241206230838.1111496-5-peterx@redhat.com> +Signed-off-by: Fabiano Rosas +(cherry picked from commit 61f2b489987c51159c53101a072c6aa901b50506) +Signed-off-by: Kevin Wolf +--- + migration/savevm.c | 25 ++++++++++++------------- + 1 file changed, 12 insertions(+), 13 deletions(-) + +diff --git a/migration/savevm.c b/migration/savevm.c +index 6bb404b9c8..a0c4befdc1 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2156,22 +2156,21 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) + + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced"); + +- /* Make sure all file formats throw away their mutable metadata. +- * If we get an error here, just don't restart the VM yet. */ +- bdrv_activate_all(&local_err); +- if (local_err) { +- error_report_err(local_err); +- local_err = NULL; +- autostart = false; +- } +- +- trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated"); +- + dirty_bitmap_mig_before_vm_start(); + + if (autostart) { +- /* Hold onto your hats, starting the CPU */ +- vm_start(); ++ /* ++ * Make sure all file formats throw away their mutable metadata. ++ * If we get an error here, just don't restart the VM yet. ++ */ ++ bdrv_activate_all(&local_err); ++ trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated"); ++ if (local_err) { ++ error_report_err(local_err); ++ local_err = NULL; ++ } else { ++ vm_start(); ++ } + } else { + /* leave it paused and let management decide when to start the CPU */ + runstate_set(RUN_STATE_PAUSED); +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-migration-block-Fix-possible-race-with-block_inactiv.patch b/SPECS/qemu/kvm-migration-block-Fix-possible-race-with-block_inactiv.patch new file mode 100644 index 00000000000..f0a36d47485 --- /dev/null +++ b/SPECS/qemu/kvm-migration-block-Fix-possible-race-with-block_inactiv.patch @@ -0,0 +1,78 @@ +From 703a2b932f8b6e06f4a1c0cdfdf7f4cf030a6e38 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 6 Dec 2024 18:08:37 -0500 +Subject: [PATCH 06/23] migration/block: Fix possible race with block_inactive + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [5/22] 6300de6a4a19d4648e5a5503ef8cec557fe8e666 (kmwolf/centos-qemu-kvm) + +Src QEMU sets block_inactive=true very early before the invalidation takes +place. It means if something wrong happened during setting the flag but +before reaching qemu_savevm_state_complete_precopy_non_iterable() where it +did the invalidation work, it'll make block_inactive flag inconsistent. + +For example, think about when qemu_savevm_state_complete_precopy_iterable() +can fail: it will have block_inactive set to true even if all block drives +are active. + +Fix that by only update the flag after the invalidation is done. + +No Fixes for any commit, because it's not an issue if bdrv_activate_all() +is re-entrant upon all-active disks - false positive block_inactive can +bring nothing more than "trying to active the blocks but they're already +active". However let's still do it right to avoid the inconsistent flag +v.s. reality. + +Signed-off-by: Peter Xu +Reviewed-by: Fabiano Rosas +Message-Id: <20241206230838.1111496-6-peterx@redhat.com> +Signed-off-by: Fabiano Rosas +(cherry picked from commit 8c97c5a476d146b35b2873ef73df601216a494d9) +Signed-off-by: Kevin Wolf +--- + migration/migration.c | 9 +++------ + migration/savevm.c | 2 ++ + 2 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8a262e01ff..784b7e9b90 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2779,14 +2779,11 @@ static int migration_completion_precopy(MigrationState *s, + goto out_unlock; + } + +- /* +- * Inactivate disks except in COLO, and track that we have done so in order +- * to remember to reactivate them if migration fails or is cancelled. +- */ +- s->block_inactive = !migrate_colo(); + migration_rate_set(RATE_LIMIT_DISABLED); ++ ++ /* Inactivate disks except in COLO */ + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, +- s->block_inactive); ++ !migrate_colo()); + out_unlock: + bql_unlock(); + return ret; +diff --git a/migration/savevm.c b/migration/savevm.c +index a0c4befdc1..b88dadd904 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1577,6 +1577,8 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + qemu_file_set_error(f, ret); + return ret; + } ++ /* Remember that we did this */ ++ s->block_inactive = true; + } + if (!in_postcopy) { + /* Postcopy stream will still be going */ +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-migration-block-Make-late-block-active-the-default.patch b/SPECS/qemu/kvm-migration-block-Make-late-block-active-the-default.patch new file mode 100644 index 00000000000..93134981ad8 --- /dev/null +++ b/SPECS/qemu/kvm-migration-block-Make-late-block-active-the-default.patch @@ -0,0 +1,94 @@ +From 5a3017fbfb1081fc0a074ee53e1ad7ba8489c8c1 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 6 Dec 2024 18:08:35 -0500 +Subject: [PATCH 04/23] migration/block: Make late-block-active the default + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/22] 74d95ded9153bf0969ede7d7d2708e9452d7cd11 (kmwolf/centos-qemu-kvm) + +Migration capability 'late-block-active' controls when the block drives +will be activated. If enabled, block drives will only be activated until +VM starts, either src runstate was "live" (RUNNING, or SUSPENDED), or it'll +be postponed until qmp_cont(). + +Let's do this unconditionally. There's no harm to delay activation of +block drives. Meanwhile there's no ABI breakage if dest does it, because +src QEMU has nothing to do with it, so it's no concern on ABI breakage. + +IIUC we could avoid introducing this cap when introducing it before, but +now it's still not too late to just always do it. Cap now prone to +removal, but it'll be for later patches. + +Signed-off-by: Peter Xu +Reviewed-by: Fabiano Rosas +Message-Id: <20241206230838.1111496-4-peterx@redhat.com> +Signed-off-by: Fabiano Rosas +(cherry picked from commit fca9aef1c8d8fc4482cc541638dbfac76dc125d6) +Signed-off-by: Kevin Wolf +--- + migration/migration.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c7a9e2e026..8a262e01ff 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -735,24 +735,6 @@ static void process_incoming_migration_bh(void *opaque) + + trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter"); + +- /* If capability late_block_activate is set: +- * Only fire up the block code now if we're going to restart the +- * VM, else 'cont' will do it. +- * This causes file locking to happen; so we don't want it to happen +- * unless we really are starting the VM. +- */ +- if (!migrate_late_block_activate() || +- (autostart && runstate_is_live(migration_get_target_runstate()))) { +- /* Make sure all file formats throw away their mutable metadata. +- * If we get an error here, just don't restart the VM yet. */ +- bdrv_activate_all(&local_err); +- if (local_err) { +- error_report_err(local_err); +- local_err = NULL; +- autostart = false; +- } +- } +- + /* + * This must happen after all error conditions are dealt with and + * we're sure the VM is going to be running on this host. +@@ -767,7 +749,25 @@ static void process_incoming_migration_bh(void *opaque) + + if (runstate_is_live(migration_get_target_runstate())) { + if (autostart) { +- vm_start(); ++ /* ++ * Block activation is always delayed until VM starts, either ++ * here (which means we need to start the dest VM right now..), ++ * or until qmp_cont() later. ++ * ++ * We used to have cap 'late-block-activate' but now we do this ++ * unconditionally, as it has no harm but only benefit. E.g., ++ * it's not part of migration ABI on the time of disk activation. ++ * ++ * Make sure all file formats throw away their mutable ++ * metadata. If error, don't restart the VM yet. ++ */ ++ bdrv_activate_all(&local_err); ++ if (local_err) { ++ error_report_err(local_err); ++ local_err = NULL; ++ } else { ++ vm_start(); ++ } + } else { + runstate_set(RUN_STATE_PAUSED); + } +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-migration-block-Rewrite-disk-activation.patch b/SPECS/qemu/kvm-migration-block-Rewrite-disk-activation.patch new file mode 100644 index 00000000000..dc8cb1d2f80 --- /dev/null +++ b/SPECS/qemu/kvm-migration-block-Rewrite-disk-activation.patch @@ -0,0 +1,565 @@ +From f478efadbb1629028af0e65e8408fe49256b2f17 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 6 Dec 2024 18:08:38 -0500 +Subject: [PATCH 07/23] migration/block: Rewrite disk activation + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [6/22] cd000f0b00e6f768288f340e1801dd5a236d430c (kmwolf/centos-qemu-kvm) + +This patch proposes a flag to maintain disk activation status globally. It +mostly rewrites disk activation mgmt for QEMU, including COLO and QMP +command xen_save_devices_state. + +Backgrounds +=========== + +We have two problems on disk activations, one resolved, one not. + +Problem 1: disk activation recover (for switchover interruptions) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When migration is either cancelled or failed during switchover, especially +when after the disks are inactivated, QEMU needs to remember re-activate +the disks again before vm starts. + +It used to be done separately in two paths: one in qmp_migrate_cancel(), +the other one in the failure path of migration_completion(). + +It used to be fixed in different commits, all over the places in QEMU. So +these are the relevant changes I saw, I'm not sure if it's complete list: + + - In 2016, commit fe904ea824 ("migration: regain control of images when + migration fails to complete") + + - In 2017, commit 1d2acc3162 ("migration: re-active images while migration + been canceled after inactive them") + + - In 2023, commit 6dab4c93ec ("migration: Attempt disk reactivation in + more failure scenarios") + +Now since we have a slightly better picture maybe we can unify the +reactivation in a single path. + +One side benefit of doing so is, we can move the disk operation outside QMP +command "migrate_cancel". It's possible that in the future we may want to +make "migrate_cancel" be OOB-compatible, while that requires the command +doesn't need BQL in the first place. This will already do that and make +migrate_cancel command lightweight. + +Problem 2: disk invalidation on top of invalidated disks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is an unresolved bug for current QEMU. Link in "Resolves:" at the +end. It turns out besides the src switchover phase (problem 1 above), QEMU +also needs to remember block activation on destination. + +Consider two continuous migration in a row, where the VM was always paused. +In that scenario, the disks are not activated even until migration +completed in the 1st round. When the 2nd round starts, if QEMU doesn't +know the status of the disks, it needs to try inactivate the disk again. + +Here the issue is the block layer API bdrv_inactivate_all() will crash a +QEMU if invoked on already inactive disks for the 2nd migration. For +detail, see the bug link at the end. + +Implementation +============== + +This patch proposes to maintain disk activation with a global flag, so we +know: + + - If we used to inactivate disks for migration, but migration got + cancelled, or failed, QEMU will know it should reactivate the disks. + + - On incoming side, if the disks are never activated but then another + migration is triggered, QEMU should be able to tell that inactivate is + not needed for the 2nd migration. + +We used to have disk_inactive, but it only solves the 1st issue, not the +2nd. Also, it's done in completely separate paths so it's extremely hard +to follow either how the flag changes, or the duration that the flag is +valid, and when we will reactivate the disks. + +Convert the existing disk_inactive flag into that global flag (also invert +its naming), and maintain the disk activation status for the whole +lifecycle of qemu. That includes the incoming QEMU. + +Put both of the error cases of source migration (failure, cancelled) +together into migration_iteration_finish(), which will be invoked for +either of the scenario. So from that part QEMU should behave the same as +before. However with such global maintenance on disk activation status, we +not only cleanup quite a few temporary paths that we try to maintain the +disk activation status (e.g. in postcopy code), meanwhile it fixes the +crash for problem 2 in one shot. + +For freshly started QEMU, the flag is initialized to TRUE showing that the +QEMU owns the disks by default. + +For incoming migrated QEMU, the flag will be initialized to FALSE once and +for all showing that the dest QEMU doesn't own the disks until switchover. +That is guaranteed by the "once" variable. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2395 +Signed-off-by: Peter Xu +Reviewed-by: Fabiano Rosas +Message-Id: <20241206230838.1111496-7-peterx@redhat.com> +Signed-off-by: Fabiano Rosas +(cherry picked from commit 8597af76153a87068b675d8099063c3ad8695773) +Signed-off-by: Kevin Wolf +--- + include/migration/misc.h | 4 ++ + migration/block-active.c | 94 ++++++++++++++++++++++++++++++++++++++++ + migration/colo.c | 2 +- + migration/meson.build | 1 + + migration/migration.c | 80 ++++++++-------------------------- + migration/migration.h | 5 +-- + migration/savevm.c | 33 ++++++-------- + migration/trace-events | 3 ++ + monitor/qmp-cmds.c | 8 +--- + 9 files changed, 139 insertions(+), 91 deletions(-) + create mode 100644 migration/block-active.c + +diff --git a/include/migration/misc.h b/include/migration/misc.h +index bfadc5613b..35ca8e1194 100644 +--- a/include/migration/misc.h ++++ b/include/migration/misc.h +@@ -111,4 +111,8 @@ bool migration_in_bg_snapshot(void); + /* migration/block-dirty-bitmap.c */ + void dirty_bitmap_mig_init(void); + ++/* Wrapper for block active/inactive operations */ ++bool migration_block_activate(Error **errp); ++bool migration_block_inactivate(void); ++ + #endif +diff --git a/migration/block-active.c b/migration/block-active.c +new file mode 100644 +index 0000000000..d477cf8182 +--- /dev/null ++++ b/migration/block-active.c +@@ -0,0 +1,94 @@ ++/* ++ * Block activation tracking for migration purpose ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ * ++ * Copyright (C) 2024 Red Hat, Inc. ++ */ ++#include "qemu/osdep.h" ++#include "block/block.h" ++#include "qapi/error.h" ++#include "migration/migration.h" ++#include "qemu/error-report.h" ++#include "trace.h" ++ ++/* ++ * Migration-only cache to remember the block layer activation status. ++ * Protected by BQL. ++ * ++ * We need this because.. ++ * ++ * - Migration can fail after block devices are invalidated (during ++ * switchover phase). When that happens, we need to be able to recover ++ * the block drive status by re-activating them. ++ * ++ * - Currently bdrv_inactivate_all() is not safe to be invoked on top of ++ * invalidated drives (even if bdrv_activate_all() is actually safe to be ++ * called any time!). It means remembering this could help migration to ++ * make sure it won't invalidate twice in a row, crashing QEMU. It can ++ * happen when we migrate a PAUSED VM from host1 to host2, then migrate ++ * again to host3 without starting it. TODO: a cleaner solution is to ++ * allow safe invoke of bdrv_inactivate_all() at anytime, like ++ * bdrv_activate_all(). ++ * ++ * For freshly started QEMU, the flag is initialized to TRUE reflecting the ++ * scenario where QEMU owns block device ownerships. ++ * ++ * For incoming QEMU taking a migration stream, the flag is initialized to ++ * FALSE reflecting that the incoming side doesn't own the block devices, ++ * not until switchover happens. ++ */ ++static bool migration_block_active; ++ ++/* Setup the disk activation status */ ++void migration_block_active_setup(bool active) ++{ ++ migration_block_active = active; ++} ++ ++bool migration_block_activate(Error **errp) ++{ ++ ERRP_GUARD(); ++ ++ assert(bql_locked()); ++ ++ if (migration_block_active) { ++ trace_migration_block_activation("active-skipped"); ++ return true; ++ } ++ ++ trace_migration_block_activation("active"); ++ ++ bdrv_activate_all(errp); ++ if (*errp) { ++ error_report_err(error_copy(*errp)); ++ return false; ++ } ++ ++ migration_block_active = true; ++ return true; ++} ++ ++bool migration_block_inactivate(void) ++{ ++ int ret; ++ ++ assert(bql_locked()); ++ ++ if (!migration_block_active) { ++ trace_migration_block_activation("inactive-skipped"); ++ return true; ++ } ++ ++ trace_migration_block_activation("inactive"); ++ ++ ret = bdrv_inactivate_all(); ++ if (ret) { ++ error_report("%s: bdrv_inactivate_all() failed: %d", ++ __func__, ret); ++ return false; ++ } ++ ++ migration_block_active = false; ++ return true; ++} +diff --git a/migration/colo.c b/migration/colo.c +index 6449490221..ab903f34cb 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -836,7 +836,7 @@ static void *colo_process_incoming_thread(void *opaque) + + /* Make sure all file formats throw away their mutable metadata */ + bql_lock(); +- bdrv_activate_all(&local_err); ++ migration_block_activate(&local_err); + bql_unlock(); + if (local_err) { + error_report_err(local_err); +diff --git a/migration/meson.build b/migration/meson.build +index 5ce2acb41e..6b79861d3c 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -11,6 +11,7 @@ migration_files = files( + + system_ss.add(files( + 'block-dirty-bitmap.c', ++ 'block-active.c', + 'channel.c', + 'channel-block.c', + 'dirtyrate.c', +diff --git a/migration/migration.c b/migration/migration.c +index 784b7e9b90..38631d1206 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -730,7 +730,6 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, + + static void process_incoming_migration_bh(void *opaque) + { +- Error *local_err = NULL; + MigrationIncomingState *mis = opaque; + + trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter"); +@@ -761,11 +760,7 @@ static void process_incoming_migration_bh(void *opaque) + * Make sure all file formats throw away their mutable + * metadata. If error, don't restart the VM yet. + */ +- bdrv_activate_all(&local_err); +- if (local_err) { +- error_report_err(local_err); +- local_err = NULL; +- } else { ++ if (migration_block_activate(NULL)) { + vm_start(); + } + } else { +@@ -1562,16 +1557,6 @@ static void migrate_fd_cancel(MigrationState *s) + } + } + } +- if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { +- Error *local_err = NULL; +- +- bdrv_activate_all(&local_err); +- if (local_err) { +- error_report_err(local_err); +- } else { +- s->block_inactive = false; +- } +- } + } + + void migration_add_notifier_mode(NotifierWithReturn *notify, +@@ -1890,6 +1875,12 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, + return; + } + ++ /* ++ * Newly setup incoming QEMU. Mark the block active state to reflect ++ * that the src currently owns the disks. ++ */ ++ migration_block_active_setup(false); ++ + once = false; + } + +@@ -2542,7 +2533,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) + QIOChannelBuffer *bioc; + QEMUFile *fb; + uint64_t bandwidth = migrate_max_postcopy_bandwidth(); +- bool restart_block = false; + int cur_state = MIGRATION_STATUS_ACTIVE; + + if (migrate_postcopy_preempt()) { +@@ -2578,13 +2568,10 @@ static int postcopy_start(MigrationState *ms, Error **errp) + goto fail; + } + +- ret = bdrv_inactivate_all(); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()", +- __func__); ++ if (!migration_block_inactivate()) { ++ error_setg(errp, "%s: Failed in bdrv_inactivate_all()", __func__); + goto fail; + } +- restart_block = true; + + /* + * Cause any non-postcopiable, but iterative devices to +@@ -2654,8 +2641,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) + goto fail_closefb; + } + +- restart_block = false; +- + /* Now send that blob */ + if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { + error_setg(errp, "%s: Failed to send packaged data", __func__); +@@ -2700,17 +2685,7 @@ fail_closefb: + fail: + migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, + MIGRATION_STATUS_FAILED); +- if (restart_block) { +- /* A failure happened early enough that we know the destination hasn't +- * accessed block devices, so we're safe to recover. +- */ +- Error *local_err = NULL; +- +- bdrv_activate_all(&local_err); +- if (local_err) { +- error_report_err(local_err); +- } +- } ++ migration_block_activate(NULL); + migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL); + bql_unlock(); + return -1; +@@ -2808,31 +2783,6 @@ static void migration_completion_postcopy(MigrationState *s) + trace_migration_completion_postcopy_end_after_complete(); + } + +-static void migration_completion_failed(MigrationState *s, +- int current_active_state) +-{ +- if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || +- s->state == MIGRATION_STATUS_DEVICE)) { +- /* +- * If not doing postcopy, vm_start() will be called: let's +- * regain control on images. +- */ +- Error *local_err = NULL; +- +- bql_lock(); +- bdrv_activate_all(&local_err); +- if (local_err) { +- error_report_err(local_err); +- } else { +- s->block_inactive = false; +- } +- bql_unlock(); +- } +- +- migrate_set_state(&s->state, current_active_state, +- MIGRATION_STATUS_FAILED); +-} +- + /** + * migration_completion: Used by migration_thread when there's not much left. + * The caller 'breaks' the loop when this returns. +@@ -2886,7 +2836,8 @@ fail: + error_free(local_err); + } + +- migration_completion_failed(s, current_active_state); ++ migrate_set_state(&s->state, current_active_state, ++ MIGRATION_STATUS_FAILED); + } + + /** +@@ -3309,6 +3260,11 @@ static void migration_iteration_finish(MigrationState *s) + case MIGRATION_STATUS_FAILED: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_CANCELLING: ++ /* ++ * Re-activate the block drives if they're inactivated. Note, COLO ++ * shouldn't use block_active at all, so it should be no-op there. ++ */ ++ migration_block_activate(NULL); + if (runstate_is_live(s->vm_old_state)) { + if (!runstate_check(RUN_STATE_SHUTDOWN)) { + vm_start(); +@@ -3869,6 +3825,8 @@ static void migration_instance_init(Object *obj) + ms->state = MIGRATION_STATUS_NONE; + ms->mbps = -1; + ms->pages_per_second = -1; ++ /* Freshly started QEMU owns all the block devices */ ++ migration_block_active_setup(true); + qemu_sem_init(&ms->pause_sem, 0); + qemu_mutex_init(&ms->error_mutex); + +diff --git a/migration/migration.h b/migration/migration.h +index 38aa1402d5..5b17c1344d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -356,9 +356,6 @@ struct MigrationState { + /* Flag set once the migration thread is running (and needs joining) */ + bool migration_thread_running; + +- /* Flag set once the migration thread called bdrv_inactivate_all */ +- bool block_inactive; +- + /* Migration is waiting for guest to unplug device */ + QemuSemaphore wait_unplug_sem; + +@@ -537,4 +534,6 @@ int migration_rp_wait(MigrationState *s); + */ + void migration_rp_kick(MigrationState *s); + ++/* migration/block-active.c */ ++void migration_block_active_setup(bool active); + #endif +diff --git a/migration/savevm.c b/migration/savevm.c +index b88dadd904..7f8d177462 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1566,19 +1566,18 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + } + + if (inactivate_disks) { +- /* Inactivate before sending QEMU_VM_EOF so that the +- * bdrv_activate_all() on the other end won't fail. */ +- ret = bdrv_inactivate_all(); +- if (ret) { +- error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)", +- __func__, ret); ++ /* ++ * Inactivate before sending QEMU_VM_EOF so that the ++ * bdrv_activate_all() on the other end won't fail. ++ */ ++ if (!migration_block_inactivate()) { ++ error_setg(&local_err, "%s: bdrv_inactivate_all() failed", ++ __func__); + migrate_set_error(ms, local_err); + error_report_err(local_err); +- qemu_file_set_error(f, ret); ++ qemu_file_set_error(f, -EFAULT); + return ret; + } +- /* Remember that we did this */ +- s->block_inactive = true; + } + if (!in_postcopy) { + /* Postcopy stream will still be going */ +@@ -2142,7 +2141,6 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) + + static void loadvm_postcopy_handle_run_bh(void *opaque) + { +- Error *local_err = NULL; + MigrationIncomingState *mis = opaque; + + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter"); +@@ -2165,12 +2163,11 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) + * Make sure all file formats throw away their mutable metadata. + * If we get an error here, just don't restart the VM yet. + */ +- bdrv_activate_all(&local_err); ++ bool success = migration_block_activate(NULL); ++ + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated"); +- if (local_err) { +- error_report_err(local_err); +- local_err = NULL; +- } else { ++ ++ if (success) { + vm_start(); + } + } else { +@@ -3214,11 +3211,7 @@ void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live, + * side of the migration take control of the images. + */ + if (live && !saved_vm_running) { +- ret = bdrv_inactivate_all(); +- if (ret) { +- error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)", +- __func__, ret); +- } ++ migration_block_inactivate(); + } + } + +diff --git a/migration/trace-events b/migration/trace-events +index 0b7c3324fb..62141dc2ff 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -377,3 +377,6 @@ migration_block_progression(unsigned percent) "Completed %u%%" + # page_cache.c + migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64 + migration_pagecache_insert(void) "Error allocating page" ++ ++# block-active.c ++migration_block_activation(const char *name) "%s" +diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c +index 76f21e8af3..6f76d9beaf 100644 +--- a/monitor/qmp-cmds.c ++++ b/monitor/qmp-cmds.c +@@ -31,6 +31,7 @@ + #include "qapi/type-helpers.h" + #include "hw/mem/memory-device.h" + #include "hw/intc/intc.h" ++#include "migration/misc.h" + + NameInfo *qmp_query_name(Error **errp) + { +@@ -103,13 +104,8 @@ void qmp_cont(Error **errp) + * Continuing after completed migration. Images have been + * inactivated to allow the destination to take control. Need to + * get control back now. +- * +- * If there are no inactive block nodes (e.g. because the VM was +- * just paused rather than completing a migration), +- * bdrv_inactivate_all() simply doesn't do anything. + */ +- bdrv_activate_all(&local_err); +- if (local_err) { ++ if (!migration_block_activate(&local_err)) { + error_propagate(errp, local_err); + return; + } +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-migration-block-active-Remove-global-active-flag.patch b/SPECS/qemu/kvm-migration-block-active-Remove-global-active-flag.patch new file mode 100644 index 00000000000..7ae57128013 --- /dev/null +++ b/SPECS/qemu/kvm-migration-block-active-Remove-global-active-flag.patch @@ -0,0 +1,158 @@ +From a0ebf9fca3b4c6a11c4476c1d1a67fecce7c7e3e Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:13:55 +0100 +Subject: [PATCH 11/23] migration/block-active: Remove global active flag + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [10/22] 4bdf1da765bde05e2d427fc3ccca10bcc9f1dfbe (kmwolf/centos-qemu-kvm) + +Block devices have an individual active state, a single global flag +can't cover this correctly. This becomes more important as we allow +users to manually manage which nodes are active or inactive. + +Now that it's allowed to call bdrv_inactivate_all() even when some +nodes are already inactive, we can remove the flag and just +unconditionally call bdrv_inactivate_all() and, more importantly, +bdrv_activate_all() before we make use of the nodes. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-ID: <20250204211407.381505-5-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c2a189976e211c9ff782538d5a5ed5e5cffeccd6) +Signed-off-by: Kevin Wolf +--- + migration/block-active.c | 46 ---------------------------------------- + migration/migration.c | 8 ------- + migration/migration.h | 2 -- + 3 files changed, 56 deletions(-) + +diff --git a/migration/block-active.c b/migration/block-active.c +index d477cf8182..40e986aade 100644 +--- a/migration/block-active.c ++++ b/migration/block-active.c +@@ -12,51 +12,12 @@ + #include "qemu/error-report.h" + #include "trace.h" + +-/* +- * Migration-only cache to remember the block layer activation status. +- * Protected by BQL. +- * +- * We need this because.. +- * +- * - Migration can fail after block devices are invalidated (during +- * switchover phase). When that happens, we need to be able to recover +- * the block drive status by re-activating them. +- * +- * - Currently bdrv_inactivate_all() is not safe to be invoked on top of +- * invalidated drives (even if bdrv_activate_all() is actually safe to be +- * called any time!). It means remembering this could help migration to +- * make sure it won't invalidate twice in a row, crashing QEMU. It can +- * happen when we migrate a PAUSED VM from host1 to host2, then migrate +- * again to host3 without starting it. TODO: a cleaner solution is to +- * allow safe invoke of bdrv_inactivate_all() at anytime, like +- * bdrv_activate_all(). +- * +- * For freshly started QEMU, the flag is initialized to TRUE reflecting the +- * scenario where QEMU owns block device ownerships. +- * +- * For incoming QEMU taking a migration stream, the flag is initialized to +- * FALSE reflecting that the incoming side doesn't own the block devices, +- * not until switchover happens. +- */ +-static bool migration_block_active; +- +-/* Setup the disk activation status */ +-void migration_block_active_setup(bool active) +-{ +- migration_block_active = active; +-} +- + bool migration_block_activate(Error **errp) + { + ERRP_GUARD(); + + assert(bql_locked()); + +- if (migration_block_active) { +- trace_migration_block_activation("active-skipped"); +- return true; +- } +- + trace_migration_block_activation("active"); + + bdrv_activate_all(errp); +@@ -65,7 +26,6 @@ bool migration_block_activate(Error **errp) + return false; + } + +- migration_block_active = true; + return true; + } + +@@ -75,11 +35,6 @@ bool migration_block_inactivate(void) + + assert(bql_locked()); + +- if (!migration_block_active) { +- trace_migration_block_activation("inactive-skipped"); +- return true; +- } +- + trace_migration_block_activation("inactive"); + + ret = bdrv_inactivate_all(); +@@ -89,6 +44,5 @@ bool migration_block_inactivate(void) + return false; + } + +- migration_block_active = false; + return true; + } +diff --git a/migration/migration.c b/migration/migration.c +index 38631d1206..999d4cac54 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1875,12 +1875,6 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, + return; + } + +- /* +- * Newly setup incoming QEMU. Mark the block active state to reflect +- * that the src currently owns the disks. +- */ +- migration_block_active_setup(false); +- + once = false; + } + +@@ -3825,8 +3819,6 @@ static void migration_instance_init(Object *obj) + ms->state = MIGRATION_STATUS_NONE; + ms->mbps = -1; + ms->pages_per_second = -1; +- /* Freshly started QEMU owns all the block devices */ +- migration_block_active_setup(true); + qemu_sem_init(&ms->pause_sem, 0); + qemu_mutex_init(&ms->error_mutex); + +diff --git a/migration/migration.h b/migration/migration.h +index 5b17c1344d..c38d2a37e4 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -534,6 +534,4 @@ int migration_rp_wait(MigrationState *s); + */ + void migration_rp_kick(MigrationState *s); + +-/* migration/block-active.c */ +-void migration_block_active_setup(bool active); + #endif +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-nbd-server-Support-inactive-nodes.patch b/SPECS/qemu/kvm-nbd-server-Support-inactive-nodes.patch new file mode 100644 index 00000000000..c37a3411cc5 --- /dev/null +++ b/SPECS/qemu/kvm-nbd-server-Support-inactive-nodes.patch @@ -0,0 +1,68 @@ +From 77ec8365bf04afadee0a2dd7354deedd57606c54 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 4 Feb 2025 22:14:04 +0100 +Subject: [PATCH 20/23] nbd/server: Support inactive nodes + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [19/22] 8c113dd6a862b1f4ad4e5d498f4e70a5dbfdfb32 (kmwolf/centos-qemu-kvm) + +In order to support running an NBD export on inactive nodes, we must +make sure to return errors for any operations that aren't allowed on +inactive nodes. Reads are the only operation we know we need for +inactive images, so to err on the side of caution, return errors for +everything else, even if some operations could possibly be okay. + +Signed-off-by: Kevin Wolf +Acked-by: Fabiano Rosas +Message-ID: <20250204211407.381505-14-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 2e73a17c68f4d80023dc616e596e8c1f3ea8dd75) +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/nbd/server.c b/nbd/server.c +index f64e47270c..2076fb2666 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -2026,6 +2026,7 @@ static void nbd_export_delete(BlockExport *blk_exp) + const BlockExportDriver blk_exp_nbd = { + .type = BLOCK_EXPORT_TYPE_NBD, + .instance_size = sizeof(NBDExport), ++ .supports_inactive = true, + .create = nbd_export_create, + .delete = nbd_export_delete, + .request_shutdown = nbd_export_request_shutdown, +@@ -2920,6 +2921,22 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, + NBDExport *exp = client->exp; + char *msg; + size_t i; ++ bool inactive; ++ ++ WITH_GRAPH_RDLOCK_GUARD() { ++ inactive = bdrv_is_inactive(blk_bs(exp->common.blk)); ++ if (inactive) { ++ switch (request->type) { ++ case NBD_CMD_READ: ++ /* These commands are allowed on inactive nodes */ ++ break; ++ default: ++ /* Return an error for the rest */ ++ return nbd_send_generic_reply(client, request, -EPERM, ++ "export is inactive", errp); ++ } ++ } ++ } + + switch (request->type) { + case NBD_CMD_CACHE: +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-net-Fix-announce_self.patch b/SPECS/qemu/kvm-net-Fix-announce_self.patch new file mode 100644 index 00000000000..7c99e5c49a8 --- /dev/null +++ b/SPECS/qemu/kvm-net-Fix-announce_self.patch @@ -0,0 +1,82 @@ +From ba42b9bf3f193bbc7f47d494bdc888e881539f4b Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 17 Jan 2025 12:17:08 +0100 +Subject: [PATCH 01/23] net: Fix announce_self +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 338: net: Fix announce_self +RH-Jira: RHEL-73891 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Commit: [1/1] dfee696c1c444af0ba2b2d3d8c7012385e84885c (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-73891 + +b9ad513e1876 ("net: Remove receive_raw()") adds an iovec entry +in qemu_deliver_packet_iov() to add the virtio-net header +in the data when QEMU_NET_PACKET_FLAG_RAW is set but forgets +to increase the number of iovec entries in the array, so +receive_iov() will only send the first entry (the virtio-net +entry, full of 0) and no data. The packet will be discarded. + +The only user of QEMU_NET_PACKET_FLAG_RAW is announce_self. + +We can see the problem with tcpdump: + +- QEMU parameters: + + .. -monitor stdio \ + -netdev bridge,id=netdev0,br=virbr0 \ + -device virtio-net,mac=9a:2b:2c:2d:2e:2f,netdev=netdev0 \ + +- HMP command: + + (qemu) announce_self + +- TCP dump: + + $ sudo tcpdump -nxi virbr0 + + without the fix: + + + + with the fix: + + ARP, Reverse Request who-is 9a:2b:2c:2d:2e:2f tell 9a:2b:2c:2d:2e:2f, length 46 + 0x0000: 0001 0800 0604 0003 9a2b 2c2d 2e2f 0000 + 0x0010: 0000 9a2b 2c2d 2e2f 0000 0000 0000 0000 + 0x0020: 0000 0000 0000 0000 0000 0000 0000 + +Reported-by: Xiaohui Li +Bug: https://issues.redhat.com/browse/RHEL-73891 +Fixes: b9ad513e1876 ("net: Remove receive_raw()") +Cc: akihiko.odaki@daynix.com +Signed-off-by: Laurent Vivier +Reviewed-by: Akihiko Odaki +Reviewed-by: Michael Tokarev +Signed-off-by: Michael Tokarev +(cherry picked from commit 84dfdcbff33fff185528501be408c25c44499f32) +Signed-off-by: Laurent Vivier +--- + net/net.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/net.c b/net/net.c +index fc1125111c..94f51b6e5f 100644 +--- a/net/net.c ++++ b/net/net.c +@@ -828,6 +828,7 @@ static ssize_t qemu_deliver_packet_iov(NetClientState *sender, + iov_copy[0].iov_len = nc->vnet_hdr_len; + memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); + iov = iov_copy; ++ iovcnt++; + } + + if (nc->info->receive_iov) { +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-qmp-cont-Only-activate-disks-if-migration-completed.patch b/SPECS/qemu/kvm-qmp-cont-Only-activate-disks-if-migration-completed.patch new file mode 100644 index 00000000000..db8efd2fcb2 --- /dev/null +++ b/SPECS/qemu/kvm-qmp-cont-Only-activate-disks-if-migration-completed.patch @@ -0,0 +1,73 @@ +From a99282ca32a1fef256c1fc155ef4f43aed9e1e48 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 6 Dec 2024 18:08:34 -0500 +Subject: [PATCH 03/23] qmp/cont: Only activate disks if migration completed + +RH-Author: Kevin Wolf +RH-MergeRequest: 339: QMP command for block device reactivation after migration +RH-Jira: RHEL-54296 RHEL-78397 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/22] 942fea0b972c992c52e6a1c85172afe899e4e312 (kmwolf/centos-qemu-kvm) + +As the comment says, the activation of disks is for the case where +migration has completed, rather than when QEMU is still during +migration (RUN_STATE_INMIGRATE). + +Move the code over to reflect what the comment is describing. + +Cc: Kevin Wolf +Cc: Markus Armbruster +Signed-off-by: Peter Xu +Reviewed-by: Fabiano Rosas +Message-Id: <20241206230838.1111496-3-peterx@redhat.com> +Signed-off-by: Fabiano Rosas +(cherry picked from commit e4e5e89bbd8e731e86735d9d25b7b5f49e8f08b6) +Signed-off-by: Kevin Wolf +--- + monitor/qmp-cmds.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c +index f84a0dc523..76f21e8af3 100644 +--- a/monitor/qmp-cmds.c ++++ b/monitor/qmp-cmds.c +@@ -96,21 +96,23 @@ void qmp_cont(Error **errp) + } + } + +- /* Continuing after completed migration. Images have been inactivated to +- * allow the destination to take control. Need to get control back now. +- * +- * If there are no inactive block nodes (e.g. because the VM was just +- * paused rather than completing a migration), bdrv_inactivate_all() simply +- * doesn't do anything. */ +- bdrv_activate_all(&local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- return; +- } +- + if (runstate_check(RUN_STATE_INMIGRATE)) { + autostart = 1; + } else { ++ /* ++ * Continuing after completed migration. Images have been ++ * inactivated to allow the destination to take control. Need to ++ * get control back now. ++ * ++ * If there are no inactive block nodes (e.g. because the VM was ++ * just paused rather than completing a migration), ++ * bdrv_inactivate_all() simply doesn't do anything. ++ */ ++ bdrv_activate_all(&local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } + vm_start(); + } + } +-- +2.48.1 + diff --git a/SPECS/qemu/kvm-virtio-net-Add-queues-before-loading-them.patch b/SPECS/qemu/kvm-virtio-net-Add-queues-before-loading-them.patch new file mode 100644 index 00000000000..0474c68a04a --- /dev/null +++ b/SPECS/qemu/kvm-virtio-net-Add-queues-before-loading-them.patch @@ -0,0 +1,94 @@ +From 873e57548d92eb916656b6304a780f63958aa9fe Mon Sep 17 00:00:00 2001 +From: Akihiko Odaki +Date: Tue, 22 Oct 2024 15:49:01 +0900 +Subject: [PATCH 01/10] virtio-net: Add queues before loading them +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: 小田喜陽彦 +RH-MergeRequest: 299: virtio-net: Add queues before loading them +RH-Jira: RHEL-69477 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Jason Wang +RH-Commit: [1/1] 7bd06d5f9c0f0ce3d211204c404451d7002bb7fb (akihiko.odaki/qemu-kvm) + +Call virtio_net_set_multiqueue() to add queues before loading their +states. Otherwise the loaded queues will not have handlers and elements +in them will not be processed. + +Cc: qemu-stable@nongnu.org +Fixes: 8c49756825da ("virtio-net: Add only one queue pair when realizing") +Reported-by: Laurent Vivier +Signed-off-by: Akihiko Odaki +Acked-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +(cherry picked from commit 9379ea9db3c0064fa2787db0794a23a30f7b2d2d) +--- + hw/net/virtio-net.c | 10 ++++++++++ + hw/virtio/virtio.c | 7 +++++++ + include/hw/virtio/virtio.h | 2 ++ + 3 files changed, 19 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ed33a32877..90d05f94d4 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3032,6 +3032,15 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) + virtio_net_set_queue_pairs(n); + } + ++static int virtio_net_pre_load_queues(VirtIODevice *vdev) ++{ ++ virtio_net_set_multiqueue(VIRTIO_NET(vdev), ++ virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_RSS) || ++ virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MQ)); ++ ++ return 0; ++} ++ + static int virtio_net_post_load_device(void *opaque, int version_id) + { + VirtIONet *n = opaque; +@@ -4010,6 +4019,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) + vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; + vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; + vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); ++ vdc->pre_load_queues = virtio_net_pre_load_queues; + vdc->post_load = virtio_net_post_load_virtio; + vdc->vmsd = &vmstate_virtio_net_device; + vdc->primary_unplug_pending = primary_unplug_pending; +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 9e10cbc058..10f24a58dd 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3251,6 +3251,13 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) + config_len--; + } + ++ if (vdc->pre_load_queues) { ++ ret = vdc->pre_load_queues(vdev); ++ if (ret) { ++ return ret; ++ } ++ } ++ + num = qemu_get_be32(f); + + if (num > VIRTIO_QUEUE_MAX) { +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 0fcbc5c0c6..953dfca27c 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -210,6 +210,8 @@ struct VirtioDeviceClass { + void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask); + int (*start_ioeventfd)(VirtIODevice *vdev); + void (*stop_ioeventfd)(VirtIODevice *vdev); ++ /* Called before loading queues. Useful to add queues before loading. */ ++ int (*pre_load_queues)(VirtIODevice *vdev); + /* Saving and loading of a device; trying to deprecate save/load + * use vmsd for new devices. + */ +-- +2.39.3 + diff --git a/SPECS/qemu/qemu.spec b/SPECS/qemu/qemu.spec index c624a6877b5..b1df9c1c9e0 100644 --- a/SPECS/qemu/qemu.spec +++ b/SPECS/qemu/qemu.spec @@ -435,7 +435,7 @@ Obsoletes: sgabios-bin <= 1:0.20180715git-10.fc38 Summary: QEMU is a FAST! processor emulator Name: qemu Version: 9.1.0 -Release: 3%{?dist} +Release: 4%{?dist} License: Apache-2.0 AND BSD-2-Clause AND BSD-3-Clause AND FSFAP AND GPL-1.0-or-later AND GPL-2.0-only AND GPL-2.0-or-later AND GPL-2.0-or-later WITH GCC-exception-2.0 AND LGPL-2.0-only AND LGPL-2.0-or-later AND LGPL-2.1-only AND LGPL-2.1-or-later AND MIT AND LicenseRef-Fedora-Public-Domain AND CC-BY-3.0 URL: http://www.qemu.org/ @@ -450,6 +450,29 @@ Patch4: CVE-2025-11234.patch Patch5: CVE-2025-12464.patch Patch6: CVE-2024-8354.patch Patch7: CVE-2025-14876.patch +Patch8: kvm-migration-Ensure-vmstate_save-sets-errp.patch +Patch9: kvm-virtio-net-Add-queues-before-loading-them.patch +Patch10: kvm-net-Fix-announce_self.patch +Patch11: kvm-migration-Add-helper-to-get-target-runstate.patch +Patch12: kvm-qmp-cont-Only-activate-disks-if-migration-completed.patch +Patch13: kvm-migration-block-Make-late-block-active-the-default.patch +Patch14: kvm-migration-block-Apply-late-block-active-behavior-to-.patch +Patch15: kvm-migration-block-Fix-possible-race-with-block_inactiv.patch +Patch16: kvm-migration-block-Rewrite-disk-activation.patch +Patch17: kvm-block-Add-active-field-to-BlockDeviceInfo.patch +Patch18: kvm-block-Allow-inactivating-already-inactive-nodes.patch +Patch19: kvm-block-Inactivate-external-snapshot-overlays-when-nec.patch +Patch20: kvm-migration-block-active-Remove-global-active-flag.patch +Patch21: kvm-block-Don-t-attach-inactive-child-to-active-node.patch +Patch22: kvm-block-Fix-crash-on-block_resize-on-inactive-node.patch +Patch23: kvm-block-Add-option-to-create-inactive-nodes.patch +Patch24: kvm-block-Add-blockdev-set-active-QMP-command.patch +Patch25: kvm-block-Support-inactive-nodes-in-blk_insert_bs.patch +Patch26: kvm-block-export-Don-t-ignore-image-activation-error-in-.patch +Patch27: kvm-block-Drain-nodes-before-inactivating-them.patch +Patch28: kvm-block-export-Add-option-to-allow-export-of-inactive-.patch +Patch29: kvm-nbd-server-Support-inactive-nodes.patch +Patch30: kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch Source10: qemu-guest-agent.service Source11: 99-qemu-guest-agent.rules @@ -3409,6 +3432,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Sun Apr 05 2026 Woojoong Kim - 9.1.0-2 +- Add 23 QEMU patches from RHEL qemu-kvm 9.1.0-20 for KubeVirt live migration +- Post-migration networking: fix announce_self ARP/GARP (RHEL-73891), + add virtio-net queues before loading during incoming migration (RHEL-69477) +- Migration stability: fix UAF on cancelled incoming migration (RHEL-69775), + ensure vmstate_save sets error pointer (RHEL-67844) +- Disk activation: add migration target runstate helper, delay disk activation + until migration completes, extend late-block-active to postcopy, fix race + with block_inactive, rewrite disk activation logic, per-node activation state +- Block layer inactive node framework: expose active/inactive status via QMP, + add blockdev-set-active command, support creating/inactivating inactive nodes, + fix crash on block_resize of inactive node, drain I/O before inactivation, + support NBD export of inactive nodes for storage daemon + * Wed Mar 25 2026 Aditya Singh - 9.1.0-3 - Bump to rebuild with updated glibc From 9f24895cd59e4a99e4942e5cf245e92034a45d8c Mon Sep 17 00:00:00 2001 From: Woojoong Kim Date: Tue, 7 Apr 2026 16:42:25 +0000 Subject: [PATCH 2/2] trigger pipeline