diff --git a/pkg/api/v1/storage_pool.go b/pkg/api/v1/storage_pool.go index c686ec60..3a0eaf19 100644 --- a/pkg/api/v1/storage_pool.go +++ b/pkg/api/v1/storage_pool.go @@ -70,6 +70,34 @@ type APICallRc struct { ErrorRep string `json:"error_report_ids,omitempty"` } +// MASK_ERROR / MASK_STOR_POOL / FAIL_STOR_POOL_CONFIGURATION_ERROR mirror the +// upstream LINSTOR `ApiConsts` constants of the same name. Exposed on the +// shared API package (rather than pkg/rest, where the rest-internal copies +// live) so the store layer can stamp structured `Reports[]` entries without +// pulling a cyclic dependency on pkg/rest. The values are byte-identical to +// the upstream Java constants — golinstor and the Python CLI both +// pattern-match on them. +const ( + // APICallRcMaskError is upstream `ApiConsts.MASK_ERROR` + // (`0xC000_0000_0000_0000L`). Negative when stored in int64. The + // Python CLI's `get_replies_state` treats any entry with this bit + // as severity ERROR and renders the state column as "Error". + APICallRcMaskError int64 = -0x4000_0000_0000_0000 + + // APICallRcMaskStorPool is upstream `ApiConsts.MASK_STOR_POOL` + // (`0x0000_0000_0014_0000L`). Tags the entry as "subject = + // StoragePool" so audit-log greppers can route it. + APICallRcMaskStorPool int64 = 0x0000_0000_0014_0000 + + // APICallRcFailStorPoolConfigurationError is upstream + // `ApiConsts.FAIL_STOR_POOL_CONFIGURATION_ERROR` (`990 | + // MASK_ERROR`). Closest semantic match in upstream's catalogue + // for "the storage pool's backing configuration is broken on the + // satellite" — exactly the situation `PoolMissing=true` describes + // after `zpool destroy` / `vgremove` on a backing pool (Bug 83). + APICallRcFailStorPoolConfigurationError int64 = 990 +) + // Storage provider kind constants — the canonical strings LINSTOR uses. const ( StoragePoolKindLVM = "LVM" diff --git a/pkg/rest/storage_pools_test.go b/pkg/rest/storage_pools_test.go index 1753fbae..a3fba209 100644 --- a/pkg/rest/storage_pools_test.go +++ b/pkg/rest/storage_pools_test.go @@ -1515,3 +1515,80 @@ func TestStoragePoolListPropertiesEmptyDecodes(t *testing.T) { t.Errorf("Props: unexpected entry %q=%q on an empty seed", k, v) } } + +// TestViewStoragePoolsFaultyStampsReports pins the Bug 83 fix +// end-to-end through the REST view path: a pool whose store-side +// PoolMissing carrier is true MUST surface on /v1/view/storage- +// pools with both state=="Faulty" AND a non-empty reports[] array +// whose first entry has MASK_ERROR set + a message identifying the +// missing pool. +// +// Why both: the upstream Python linstor-client renders the State +// column from `get_replies_state(storpool.reports)` — the +// structured ApiCallRc array — NOT from the bare `state` string. +// An empty reports[] always returns ("Ok", green), so a satellite- +// reported "Faulty" pool kept rendering as Ok in `linstor sp l` +// before the fix. This test pins the wire contract that closes +// that gap: `linstor sp l` on a destroyed-zpool node must show +// Faulty (or the Python CLI's equivalent of "error band first +// reports entry") — not Ok. +func TestViewStoragePoolsFaultyStampsReports(t *testing.T) { + st := store.NewInMemory() + ctx := t.Context() + + // Seed a Faulty pool: PoolMissing=true is the only way to + // reach the synthesise-reports path. We DON'T pre-populate + // State/Reports — the store's derive path must do that. + sp := apiv1.StoragePool{ + StoragePoolName: "zfs-thin", + NodeName: "w3", + ProviderKind: apiv1.StoragePoolKindZFSThin, + PoolMissing: true, + } + if err := st.StoragePools().Create(ctx, &sp); err != nil { + t.Fatalf("seed pool: %v", err) + } + + base, stop := startServerWithStore(t, st) + defer stop() + + resp := httpGet(t, base+"/v1/view/storage-pools") + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("status: got %d, want 200", resp.StatusCode) + } + + var got []apiv1.StoragePool + if err := json.NewDecoder(resp.Body).Decode(&got); err != nil { + t.Fatalf("decode: %v", err) + } + + if len(got) != 1 { + t.Fatalf("entries: got %d, want 1", len(got)) + } + + if got[0].State != "Faulty" { + t.Errorf("state: got %q, want %q", got[0].State, "Faulty") + } + + if len(got[0].Reports) == 0 { + t.Fatalf("reports: empty; want >=1 ERROR entry so python-" + + "linstor's State column derives to Faulty (Bug 83)") + } + + rc := got[0].Reports[0] + if rc.RetCode&apiv1.APICallRcMaskError == 0 { + t.Errorf("ret_code missing MASK_ERROR bit: %#x", rc.RetCode) + } + + // Wording invariant: must contain "missing" so audit-log greppers + // can pattern-match on the failure mode without parsing ret_code. + if !strings.Contains(rc.Message, "missing") { + t.Errorf("message %q missing substring %q", rc.Message, "missing") + } + + if !strings.Contains(rc.Message, "zfs-thin") { + t.Errorf("message %q missing pool name", rc.Message) + } +} diff --git a/pkg/store/inmemory_storage_pool.go b/pkg/store/inmemory_storage_pool.go index e7ca3bcd..18dfa584 100644 --- a/pkg/store/inmemory_storage_pool.go +++ b/pkg/store/inmemory_storage_pool.go @@ -111,6 +111,12 @@ func (s *inMemoryStoragePools) Get(_ context.Context, node, pool string) (apiv1. // The Python CLI's `storpool_cmds.py` does `':' not in // free_space_mgr_name` and crashes with TypeError when the field is // null — so filling this defensively is also a CLI-stability fix. +// +// Bug 83: when PoolMissing=true we ALSO synthesise an ERROR-severity +// reports[] entry, because python-linstor renders the State column +// from `get_replies_state(storpool.reports)` rather than the bare +// `state` string. Without the reports entry, `linstor sp l` shows Ok +// against a Faulty pool. Mirrors the k8s store's poolMissingReport. func withDerivedState(sp *apiv1.StoragePool) { if sp.State == "" { if sp.PoolMissing { @@ -120,6 +126,10 @@ func withDerivedState(sp *apiv1.StoragePool) { } } + if sp.PoolMissing && len(sp.Reports) == 0 { + sp.Reports = []apiv1.APICallRc{poolMissingReport(sp.NodeName, sp.StoragePoolName)} + } + if sp.FreeSpaceMgrName == "" { if sp.SharedSpaceID != "" { sp.FreeSpaceMgrName = sp.SharedSpaceID @@ -129,6 +139,31 @@ func withDerivedState(sp *apiv1.StoragePool) { } } +// poolMissingReport builds the same ERROR-severity ApiCallRc entry the k8s +// store stamps onto a Faulty pool. Duplicated here (rather than importing +// from pkg/store/k8s) to avoid a cyclic import — k8s already imports +// pkg/store. Bug 83. +func poolMissingReport(node, pool string) apiv1.APICallRc { + return apiv1.APICallRc{ + RetCode: apiv1.APICallRcMaskError | + apiv1.APICallRcMaskStorPool | + apiv1.APICallRcFailStorPoolConfigurationError, + Message: "pool backing storage missing on node " + node + + ": storage pool " + pool + " is not present", + Cause: "The satellite probed the backing volume group / zpool / " + + "directory for this storage pool and found it absent — " + + "typically the result of an operator `zpool destroy`, " + + "`vgremove`, or an unmounted filesystem.", + Correc: "Re-discover the pool via `linstor ps cdp " + + " ` once the backing storage is restored, or " + + "recreate the pool with `linstor sp c`.", + ObjRefs: map[string]string{ + "Node": node, + "StorPool": pool, + }, + } +} + // Create inserts a new pool. Conflict on (node, name). func (s *inMemoryStoragePools) Create(_ context.Context, sp *apiv1.StoragePool) error { if sp == nil { diff --git a/pkg/store/k8s/storage_pools.go b/pkg/store/k8s/storage_pools.go index cc04d594..2d387b16 100644 --- a/pkg/store/k8s/storage_pools.go +++ b/pkg/store/k8s/storage_pools.go @@ -241,9 +241,23 @@ func crdToWireStoragePool(crd *crdv1alpha1.StoragePool) apiv1.StoragePool { // Surface the satellite's PoolMissing signal as the wire `state` // field. Upstream LINSTOR uses "Ok" / "Faulty" / "Error" — we map // PoolMissing=true → "Faulty", everything else → "Ok". + // + // The `state` string alone is not enough for the Python linstor- + // client to render the State column: `storpool_cmds.py` derives + // the column value from `get_replies_state(storpool.reports)`, the + // structured ApiCallRc array, NOT from the bare `state` field. + // With an empty reports[], `get_replies_state` returns ("Ok", + // green) regardless of `state` — so a satellite-reported "Faulty" + // pool still rendered as Ok in `linstor sp l`. Synthesise an + // ERROR-severity reports[] entry whenever PoolMissing=true so the + // CLI sees the real state. Bug 83. state := "Ok" + + var reports []apiv1.APICallRc + if crd.Status.PoolMissing { state = "Faulty" + reports = []apiv1.APICallRc{poolMissingReport(crd.Spec.NodeName, poolName)} } return apiv1.StoragePool{ @@ -259,10 +273,43 @@ func crdToWireStoragePool(crd *crdv1alpha1.StoragePool) apiv1.StoragePool { FreeSpaceMgrName: fsmName, UUID: string(crd.UID), State: state, + Reports: reports, PoolMissing: crd.Status.PoolMissing, } } +// poolMissingReport builds the structured ApiCallRc entry blockstor stamps +// onto a StoragePool's wire shape whenever the satellite has flagged its +// backing storage as missing. The Python linstor-client's State column is +// derived from `get_replies_state(storpool.reports)` rather than the bare +// `state` string field — an empty reports[] always renders "Ok" — so the +// entry MUST be present for `linstor sp l` to surface the real condition. +// +// The ret_code combines upstream LINSTOR's `MASK_ERROR` (severity bit) + +// `MASK_STOR_POOL` (subject bit) + `FAIL_STOR_POOL_CONFIGURATION_ERROR` +// (990) — byte-identical to the value upstream's `CtrlStorPoolApiCallHandler` +// emits when a storage-pool's underlying configuration is broken. Bug 83. +func poolMissingReport(node, pool string) apiv1.APICallRc { + return apiv1.APICallRc{ + RetCode: apiv1.APICallRcMaskError | + apiv1.APICallRcMaskStorPool | + apiv1.APICallRcFailStorPoolConfigurationError, + Message: "pool backing storage missing on node " + node + + ": storage pool " + pool + " is not present", + Cause: "The satellite probed the backing volume group / zpool / " + + "directory for this storage pool and found it absent — " + + "typically the result of an operator `zpool destroy`, " + + "`vgremove`, or an unmounted filesystem.", + Correc: "Re-discover the pool via `linstor ps cdp " + + " ` once the backing storage is restored, or " + + "recreate the pool with `linstor sp c`.", + ObjRefs: map[string]string{ + "Node": node, + "StorPool": pool, + }, + } +} + // SetCapacity updates the pool's Status subresource with live free / // total / supports-snapshots numbers. Avoids a Spec rewrite so a // Hello racing with a periodic capacity push doesn't lose either side. diff --git a/pkg/store/k8s/storage_pools_internal_test.go b/pkg/store/k8s/storage_pools_internal_test.go index 5e5489ce..08000246 100644 --- a/pkg/store/k8s/storage_pools_internal_test.go +++ b/pkg/store/k8s/storage_pools_internal_test.go @@ -17,6 +17,7 @@ limitations under the License. package k8s import ( + "strings" "testing" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -101,6 +102,111 @@ func TestCRDNameUsesPoolDotNodeOrder(t *testing.T) { } } +// TestCrdToWireStoragePoolFaultyStampsReports pins the Bug 83 fix: +// when the CRD's Status.PoolMissing is true, the wire-shape value +// MUST carry both `state == "Faulty"` AND a non-empty `Reports` +// slice with an ERROR-severity ApiCallRc whose message identifies +// the missing pool. Without the reports[] entry the Python linstor- +// client's State column derivation (`get_replies_state` over an +// empty reports[]) collapses to "Ok" — defeating the whole reason +// state="Faulty" was added in Bug 50. +// +// We assert structurally (not on the exact wording) so the message +// copy can evolve without breaking the test, but the wording MUST +// contain "missing" + the pool name so audit-log greppers and +// operator dashboards can pattern-match on the failure mode. +func TestCrdToWireStoragePoolFaultyStampsReports(t *testing.T) { + t.Parallel() + + crd := &crdv1alpha1.StoragePool{ + ObjectMeta: metav1.ObjectMeta{Name: "zfs-thin.w3"}, + Spec: crdv1alpha1.StoragePoolSpec{ + NodeName: "w3", + PoolName: "zfs-thin", + ProviderKind: apiv1.StoragePoolKindZFSThin, + }, + Status: crdv1alpha1.StoragePoolStatus{ + PoolMissing: true, + }, + } + + got := crdToWireStoragePool(crd) + + if got.State != "Faulty" { + t.Errorf("State: got %q, want %q", got.State, "Faulty") + } + + if len(got.Reports) == 0 { + t.Fatalf("Reports: got empty slice; want >=1 entry so " + + "python-linstor renders the State column as Faulty") + } + + rc := got.Reports[0] + + // MASK_ERROR bit must be set so python-linstor's + // `get_replies_state` classifies the entry as ERROR-severity. + if rc.RetCode&apiv1.APICallRcMaskError == 0 { + t.Errorf("RetCode missing MASK_ERROR bit: %#x", rc.RetCode) + } + + // Sub-code must be the storpool-configuration-error mirror. + if rc.RetCode&0xFFFF != apiv1.APICallRcFailStorPoolConfigurationError { + t.Errorf("RetCode sub-code: got %d, want %d (FAIL_STOR_POOL_CONFIGURATION_ERROR)", + rc.RetCode&0xFFFF, apiv1.APICallRcFailStorPoolConfigurationError) + } + + // Message must mention both "missing" and the pool name so the + // operator can correlate from `linstor sp l` output alone. + if !strings.Contains(rc.Message, "missing") { + t.Errorf("Message: %q, want substring %q", rc.Message, "missing") + } + + if !strings.Contains(rc.Message, "zfs-thin") { + t.Errorf("Message: %q, want substring %q (pool name)", + rc.Message, "zfs-thin") + } + + // ObjRefs must tag the entry with the (node, pool) pair so audit + // log queries can filter by either axis. + if rc.ObjRefs["Node"] != "w3" { + t.Errorf("ObjRefs[Node]: got %q, want %q", rc.ObjRefs["Node"], "w3") + } + + if rc.ObjRefs["StorPool"] != "zfs-thin" { + t.Errorf("ObjRefs[StorPool]: got %q, want %q", + rc.ObjRefs["StorPool"], "zfs-thin") + } +} + +// TestCrdToWireStoragePoolHealthyHasNoReports pins the inverse: +// a pool whose Status.PoolMissing is false MUST emit an empty +// Reports slice. Stamping a no-op entry on a healthy pool would +// trip python-linstor's `get_replies_state` into rendering the +// State column as the first-entry severity rather than Ok. +func TestCrdToWireStoragePoolHealthyHasNoReports(t *testing.T) { + t.Parallel() + + crd := &crdv1alpha1.StoragePool{ + ObjectMeta: metav1.ObjectMeta{Name: "zfs-thin.w3"}, + Spec: crdv1alpha1.StoragePoolSpec{ + NodeName: "w3", + PoolName: "zfs-thin", + ProviderKind: apiv1.StoragePoolKindZFSThin, + }, + } + + got := crdToWireStoragePool(crd) + + if got.State != "Ok" { + t.Errorf("State: got %q, want %q", got.State, "Ok") + } + + if len(got.Reports) != 0 { + t.Errorf("Reports: got %d entries, want 0 (healthy pool)", + len(got.Reports)) + } +} + // TestWireToCRDStoragePoolProducesCanonicalName pins the wire→CRD // converter to the canonical `.` shape — same rule the // apiserver CEL validation enforces. Without this test a regression