Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:

- name: Run tests
env:
GO_TEST_TIMEOUT: 600s
# Docker auth for tests running as root (sudo)
DOCKER_CONFIG: /home/debianuser/.docker
# TLS/ACME testing (optional - tests will skip if not configured)
Expand Down Expand Up @@ -118,6 +119,7 @@ jobs:

- name: Run tests
env:
GO_TEST_TIMEOUT: 600s
DEFAULT_HYPERVISOR: vz
JWT_SECRET: ci-test-secret
run: make test
Expand Down
3 changes: 2 additions & 1 deletion DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ Note: Full integration tests require Linux. On macOS, focus on unit tests and ma

1. **Disk Format**: vz only supports raw disk images (not qcow2). The image pipeline handles conversion automatically.

2. **Snapshots**: Not currently supported on the vz hypervisor.
2. **Snapshot Compatibility**: vz save/restore requires macOS 14.0+ on Apple Silicon and a VM configuration that passes save/restore validation.

### Troubleshooting

Expand All @@ -496,6 +496,7 @@ brew install caddy
**"snapshot not supported"**
- Requires macOS 14.0+ on Apple Silicon
- Check: `sw_vers` and `uname -m` (should be arm64)
- Ensure the VM has been paused before standby and has a save/restore-compatible configuration

**VM fails to start**
- Check serial log: `<data_dir>/instances/<id>/serial.log`
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ SHELL := /bin/bash

# Directory where local binaries will be installed
BIN_DIR ?= $(CURDIR)/bin
GO_TEST_TIMEOUT ?= 300s

$(BIN_DIR):
mkdir -p $(BIN_DIR)
Expand All @@ -13,7 +14,7 @@ OAPI_CODEGEN_VERSION ?= v2.5.1
AIR ?= $(BIN_DIR)/air
WIRE ?= $(BIN_DIR)/wire
XCADDY ?= $(BIN_DIR)/xcaddy
TEST_TIMEOUT ?= 600s
TEST_TIMEOUT ?= $(GO_TEST_TIMEOUT)

# Install oapi-codegen (pinned to match committed generated code)
$(OAPI_CODEGEN): | $(BIN_DIR)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ hypeman stop my-app
# Start a stopped VM
hypeman start my-app

# Put the VM to sleep (paused)
# Put the VM in standby (snapshot to disk, stop hypervisor)
hypeman standby my-app

# Wake the VM (resumed)
# Restore the VM from standby
hypeman restore my-app

# Delete all VMs
Expand Down
28 changes: 21 additions & 7 deletions cmd/vz-shim/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,36 @@ func main() {
slog.Info("vz-shim starting", "control_socket", config.ControlSocket, "vsock_socket", config.VsockSocket)

// Create the VM
vm, vmConfig, err := createVM(config)
vm, vmConfig, err := createVM(&config)
if err != nil {
slog.Error("failed to create VM", "error", err)
fmt.Fprintf(os.Stderr, "failed to create VM: %v\n", err)
os.Exit(1)
}

if err := vm.Start(); err != nil {
slog.Error("failed to start VM", "error", err)
fmt.Fprintf(os.Stderr, "failed to start VM: %v\n", err)
os.Exit(1)
if config.RestoreMachineStatePath != "" {
if err := validateSaveRestoreSupport(vmConfig); err != nil {
slog.Error("save/restore not supported for VM config", "error", err)
fmt.Fprintf(os.Stderr, "save/restore not supported for VM config: %v\n", err)
os.Exit(1)
}
if err := restoreMachineState(vm, config.RestoreMachineStatePath); err != nil {
slog.Error("failed to restore VM machine state", "error", err, "path", config.RestoreMachineStatePath)
fmt.Fprintf(os.Stderr, "failed to restore VM machine state: %v\n", err)
os.Exit(1)
}
slog.Info("VM restored from machine state", "path", config.RestoreMachineStatePath, "state", vm.State())
} else {
if err := vm.Start(); err != nil {
slog.Error("failed to start VM", "error", err)
fmt.Fprintf(os.Stderr, "failed to start VM: %v\n", err)
os.Exit(1)
}
slog.Info("VM started", "vcpus", config.VCPUs, "memory_mb", config.MemoryBytes/1024/1024)
}
slog.Info("VM started", "vcpus", config.VCPUs, "memory_mb", config.MemoryBytes/1024/1024)

// Create the shim server
server := NewShimServer(vm, vmConfig)
server := NewShimServer(vm, vmConfig, config)

// Start control socket listener (remove stale socket from previous run)
os.Remove(config.ControlSocket)
Expand Down
29 changes: 29 additions & 0 deletions cmd/vz-shim/save_restore_arm64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//go:build darwin && arm64

package main

import (
"fmt"

"github.com/Code-Hex/vz/v3"
)

func validateSaveRestoreSupport(vmConfig *vz.VirtualMachineConfiguration) error {
ok, err := vmConfig.ValidateSaveRestoreSupport()
if err != nil {
return err
}
if !ok {
return fmt.Errorf("virtual machine configuration does not support save/restore")
}
return nil
}

func saveMachineState(vm *vz.VirtualMachine, snapshotPath string) error {
return vm.SaveMachineStateToPath(snapshotPath)
}

func restoreMachineState(vm *vz.VirtualMachine, snapshotPath string) error {
// The vz wrapper accepts a filesystem path and constructs a file URL internally.
return vm.RestoreMachineStateFromURL(snapshotPath)
}
22 changes: 22 additions & 0 deletions cmd/vz-shim/save_restore_unsupported.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//go:build darwin && !arm64

package main

import (
"fmt"
"runtime"

"github.com/Code-Hex/vz/v3"
)

func validateSaveRestoreSupport(vmConfig *vz.VirtualMachineConfiguration) error {
return fmt.Errorf("save/restore is only supported on darwin/arm64 (current arch: %s)", runtime.GOARCH)
}

func saveMachineState(vm *vz.VirtualMachine, snapshotPath string) error {
return fmt.Errorf("save/restore is only supported on darwin/arm64 (current arch: %s)", runtime.GOARCH)
}

func restoreMachineState(vm *vz.VirtualMachine, snapshotPath string) error {
return fmt.Errorf("save/restore is only supported on darwin/arm64 (current arch: %s)", runtime.GOARCH)
}
97 changes: 91 additions & 6 deletions cmd/vz-shim/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,28 @@ import (
"log/slog"
"net"
"net/http"
"os"
"path/filepath"
"sync"

"github.com/Code-Hex/vz/v3"
"github.com/kernel/hypeman/lib/hypervisor/vz/shimconfig"
)

// ShimServer handles control API and vsock proxy for a vz VM.
type ShimServer struct {
vm *vz.VirtualMachine
vmConfig *vz.VirtualMachineConfiguration
mu sync.RWMutex
vm *vz.VirtualMachine
vmConfig *vz.VirtualMachineConfiguration
shimConfig shimconfig.ShimConfig
mu sync.RWMutex
}

// NewShimServer creates a new shim server.
func NewShimServer(vm *vz.VirtualMachine, vmConfig *vz.VirtualMachineConfiguration) *ShimServer {
func NewShimServer(vm *vz.VirtualMachine, vmConfig *vz.VirtualMachineConfiguration, config shimconfig.ShimConfig) *ShimServer {
return &ShimServer{
vm: vm,
vmConfig: vmConfig,
vm: vm,
vmConfig: vmConfig,
shimConfig: config,
}
}

Expand All @@ -35,6 +40,10 @@ type VMInfoResponse struct {
State string `json:"state"`
}

type snapshotRequest struct {
DestinationPath string `json:"destination_path"`
}

// Handler returns the HTTP handler for the control API.
func (s *ShimServer) Handler() http.Handler {
mux := http.NewServeMux()
Expand All @@ -44,6 +53,7 @@ func (s *ShimServer) Handler() http.Handler {
mux.HandleFunc("PUT /api/v1/vm.pause", s.handlePause)
mux.HandleFunc("PUT /api/v1/vm.resume", s.handleResume)
mux.HandleFunc("PUT /api/v1/vm.shutdown", s.handleShutdown)
mux.HandleFunc("PUT /api/v1/vm.snapshot", s.handleSnapshot)
mux.HandleFunc("PUT /api/v1/vm.power-button", s.handlePowerButton)
mux.HandleFunc("GET /api/v1/vmm.ping", s.handlePing)
mux.HandleFunc("PUT /api/v1/vmm.shutdown", s.handleVMMShutdown)
Expand Down Expand Up @@ -118,6 +128,77 @@ func (s *ShimServer) handleShutdown(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNoContent)
}

func (s *ShimServer) handleSnapshot(w http.ResponseWriter, r *http.Request) {
s.mu.Lock()
defer s.mu.Unlock()

var req snapshotRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, fmt.Sprintf("invalid snapshot request: %v", err), http.StatusBadRequest)
return
}
if req.DestinationPath == "" {
http.Error(w, "destination_path is required", http.StatusBadRequest)
return
}
if s.vm.State() != vz.VirtualMachineStatePaused {
http.Error(w, "vm must be paused before snapshot", http.StatusBadRequest)
return
}
if err := validateSaveRestoreSupport(s.vmConfig); err != nil {
http.Error(w, fmt.Sprintf("save/restore not supported: %v", err), http.StatusBadRequest)
return
}

if err := os.MkdirAll(req.DestinationPath, 0755); err != nil {
http.Error(w, fmt.Sprintf("create snapshot dir failed: %v", err), http.StatusInternalServerError)
return
}
snapshotComplete := false
defer func() {
if !snapshotComplete {
_ = os.RemoveAll(req.DestinationPath)
}
}()

machineStatePath := filepath.Join(req.DestinationPath, shimconfig.SnapshotMachineStateFile)
if err := os.RemoveAll(machineStatePath); err != nil {
http.Error(w, fmt.Sprintf("prepare machine state path failed: %v", err), http.StatusInternalServerError)
return
}
if err := saveMachineState(s.vm, machineStatePath); err != nil {
http.Error(w, fmt.Sprintf("save machine state failed: %v", err), http.StatusInternalServerError)
return
}

manifestPath := filepath.Join(req.DestinationPath, shimconfig.SnapshotManifestFile)
tmpManifestPath := manifestPath + ".tmp"
manifest := shimconfig.SnapshotManifest{
Hypervisor: "vz",
MachineStateFile: shimconfig.SnapshotMachineStateFile,
ShimConfig: s.shimConfig,
}
// This field is runtime-only; restore path is populated by the caller on restore.
manifest.ShimConfig.RestoreMachineStatePath = ""
manifestBytes, err := json.Marshal(manifest)
if err != nil {
http.Error(w, fmt.Sprintf("marshal manifest failed: %v", err), http.StatusInternalServerError)
return
}
if err := os.WriteFile(tmpManifestPath, manifestBytes, 0644); err != nil {
http.Error(w, fmt.Sprintf("write manifest failed: %v", err), http.StatusInternalServerError)
return
}
if err := os.Rename(tmpManifestPath, manifestPath); err != nil {
http.Error(w, fmt.Sprintf("finalize manifest failed: %v", err), http.StatusInternalServerError)
return
}

snapshotComplete = true
slog.Info("VM snapshot saved", "destination", req.DestinationPath, "machine_state", machineStatePath)
w.WriteHeader(http.StatusNoContent)
}

func (s *ShimServer) handlePowerButton(w http.ResponseWriter, r *http.Request) {
s.mu.Lock()
defer s.mu.Unlock()
Expand Down Expand Up @@ -173,6 +254,10 @@ func vzStateToString(state vz.VirtualMachineState) string {
return "Resuming"
case vz.VirtualMachineStateStopping:
return "Stopping"
case vz.VirtualMachineStateSaving:
return "Saving"
case vz.VirtualMachineStateRestoring:
return "Restoring"
default:
return "Unknown"
}
Expand Down
44 changes: 40 additions & 4 deletions cmd/vz-shim/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package main

import (
"encoding/base64"
"fmt"
"log/slog"
"net"
Expand All @@ -15,7 +16,7 @@ import (
)

// createVM creates and configures a vz.VirtualMachine from ShimConfig.
func createVM(config shimconfig.ShimConfig) (*vz.VirtualMachine, *vz.VirtualMachineConfiguration, error) {
func createVM(config *shimconfig.ShimConfig) (*vz.VirtualMachine, *vz.VirtualMachineConfiguration, error) {
// Prepare kernel command line (vz uses hvc0 for serial console)
kernelArgs := config.KernelArgs
if kernelArgs == "" {
Expand Down Expand Up @@ -61,15 +62,19 @@ func createVM(config shimconfig.ShimConfig) (*vz.VirtualMachine, *vz.VirtualMach
return nil, nil, fmt.Errorf("configure storage: %w", err)
}

if err := configurePlatform(vmConfig, config); err != nil {
return nil, nil, fmt.Errorf("configure platform: %w", err)
}

vsockConfig, err := vz.NewVirtioSocketDeviceConfiguration()
if err != nil {
return nil, nil, fmt.Errorf("create vsock device: %w", err)
}
vmConfig.SetSocketDevicesVirtualMachineConfiguration([]vz.SocketDeviceConfiguration{vsockConfig})

if balloonConfig, err := vz.NewVirtioTraditionalMemoryBalloonDeviceConfiguration(); err == nil {
vmConfig.SetMemoryBalloonDevicesVirtualMachineConfiguration([]vz.MemoryBalloonDeviceConfiguration{balloonConfig})
}
// Do not attach memory balloon for now.
// Save/restore compatibility on VZ can fail with "invalid argument" for some
// Linux guest configurations when a balloon device is present.

if validated, err := vmConfig.Validate(); !validated || err != nil {
return nil, nil, fmt.Errorf("invalid vm configuration: %w", err)
Expand All @@ -83,6 +88,37 @@ func createVM(config shimconfig.ShimConfig) (*vz.VirtualMachine, *vz.VirtualMach
return vm, vmConfig, nil
}

func configurePlatform(vmConfig *vz.VirtualMachineConfiguration, config *shimconfig.ShimConfig) error {
var machineID *vz.GenericMachineIdentifier
var err error

if config.MachineIdentifierData != "" {
b, decodeErr := base64.StdEncoding.DecodeString(config.MachineIdentifierData)
if decodeErr != nil {
return fmt.Errorf("decode machine identifier data: %w", decodeErr)
}
machineID, err = vz.NewGenericMachineIdentifierWithData(b)
if err != nil {
return fmt.Errorf("recreate machine identifier: %w", err)
}
} else {
machineID, err = vz.NewGenericMachineIdentifier()
if err != nil {
return fmt.Errorf("create machine identifier: %w", err)
}
config.MachineIdentifierData = base64.StdEncoding.EncodeToString(machineID.DataRepresentation())
}

platformConfig, err := vz.NewGenericPlatformConfiguration(
vz.WithGenericMachineIdentifier(machineID),
)
if err != nil {
return fmt.Errorf("create generic platform config: %w", err)
}
vmConfig.SetPlatformVirtualMachineConfiguration(platformConfig)
return nil
}

func configureSerialConsole(vmConfig *vz.VirtualMachineConfiguration, logPath string) error {
var serialAttachment *vz.FileHandleSerialPortAttachment

Expand Down
Loading