Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions pkg/api/v1/storage_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,34 @@ type APICallRc struct {
ErrorRep string `json:"error_report_ids,omitempty"`
}

// MASK_ERROR / MASK_STOR_POOL / FAIL_STOR_POOL_CONFIGURATION_ERROR mirror the
// upstream LINSTOR `ApiConsts` constants of the same name. Exposed on the
// shared API package (rather than pkg/rest, where the rest-internal copies
// live) so the store layer can stamp structured `Reports[]` entries without
// pulling a cyclic dependency on pkg/rest. The values are byte-identical to
// the upstream Java constants — golinstor and the Python CLI both
// pattern-match on them.
const (
// APICallRcMaskError is upstream `ApiConsts.MASK_ERROR`
// (`0xC000_0000_0000_0000L`). Negative when stored in int64. The
// Python CLI's `get_replies_state` treats any entry with this bit
// as severity ERROR and renders the state column as "Error".
APICallRcMaskError int64 = -0x4000_0000_0000_0000

// APICallRcMaskStorPool is upstream `ApiConsts.MASK_STOR_POOL`
// (`0x0000_0000_0014_0000L`). Tags the entry as "subject =
// StoragePool" so audit-log greppers can route it.
APICallRcMaskStorPool int64 = 0x0000_0000_0014_0000

// APICallRcFailStorPoolConfigurationError is upstream
// `ApiConsts.FAIL_STOR_POOL_CONFIGURATION_ERROR` (`990 |
// MASK_ERROR`). Closest semantic match in upstream's catalogue
// for "the storage pool's backing configuration is broken on the
// satellite" — exactly the situation `PoolMissing=true` describes
// after `zpool destroy` / `vgremove` on a backing pool (Bug 83).
APICallRcFailStorPoolConfigurationError int64 = 990
)

// Storage provider kind constants — the canonical strings LINSTOR uses.
const (
StoragePoolKindLVM = "LVM"
Expand Down
77 changes: 77 additions & 0 deletions pkg/rest/storage_pools_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1515,3 +1515,80 @@ func TestStoragePoolListPropertiesEmptyDecodes(t *testing.T) {
t.Errorf("Props: unexpected entry %q=%q on an empty seed", k, v)
}
}

// TestViewStoragePoolsFaultyStampsReports pins the Bug 83 fix
// end-to-end through the REST view path: a pool whose store-side
// PoolMissing carrier is true MUST surface on /v1/view/storage-
// pools with both state=="Faulty" AND a non-empty reports[] array
// whose first entry has MASK_ERROR set + a message identifying the
// missing pool.
//
// Why both: the upstream Python linstor-client renders the State
// column from `get_replies_state(storpool.reports)` — the
// structured ApiCallRc array — NOT from the bare `state` string.
// An empty reports[] always returns ("Ok", green), so a satellite-
// reported "Faulty" pool kept rendering as Ok in `linstor sp l`
// before the fix. This test pins the wire contract that closes
// that gap: `linstor sp l` on a destroyed-zpool node must show
// Faulty (or the Python CLI's equivalent of "error band first
// reports entry") — not Ok.
func TestViewStoragePoolsFaultyStampsReports(t *testing.T) {
st := store.NewInMemory()
ctx := t.Context()

// Seed a Faulty pool: PoolMissing=true is the only way to
// reach the synthesise-reports path. We DON'T pre-populate
// State/Reports — the store's derive path must do that.
sp := apiv1.StoragePool{
StoragePoolName: "zfs-thin",
NodeName: "w3",
ProviderKind: apiv1.StoragePoolKindZFSThin,
PoolMissing: true,
}
if err := st.StoragePools().Create(ctx, &sp); err != nil {
t.Fatalf("seed pool: %v", err)
}

base, stop := startServerWithStore(t, st)
defer stop()

resp := httpGet(t, base+"/v1/view/storage-pools")
defer func() { _ = resp.Body.Close() }()

if resp.StatusCode != http.StatusOK {
t.Fatalf("status: got %d, want 200", resp.StatusCode)
}

var got []apiv1.StoragePool
if err := json.NewDecoder(resp.Body).Decode(&got); err != nil {
t.Fatalf("decode: %v", err)
}

if len(got) != 1 {
t.Fatalf("entries: got %d, want 1", len(got))
}

if got[0].State != "Faulty" {
t.Errorf("state: got %q, want %q", got[0].State, "Faulty")
}

if len(got[0].Reports) == 0 {
t.Fatalf("reports: empty; want >=1 ERROR entry so python-" +
"linstor's State column derives to Faulty (Bug 83)")
}

rc := got[0].Reports[0]
if rc.RetCode&apiv1.APICallRcMaskError == 0 {
t.Errorf("ret_code missing MASK_ERROR bit: %#x", rc.RetCode)
}

// Wording invariant: must contain "missing" so audit-log greppers
// can pattern-match on the failure mode without parsing ret_code.
if !strings.Contains(rc.Message, "missing") {
t.Errorf("message %q missing substring %q", rc.Message, "missing")
}

if !strings.Contains(rc.Message, "zfs-thin") {
t.Errorf("message %q missing pool name", rc.Message)
}
}
35 changes: 35 additions & 0 deletions pkg/store/inmemory_storage_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ func (s *inMemoryStoragePools) Get(_ context.Context, node, pool string) (apiv1.
// The Python CLI's `storpool_cmds.py` does `':' not in
// free_space_mgr_name` and crashes with TypeError when the field is
// null — so filling this defensively is also a CLI-stability fix.
//
// Bug 83: when PoolMissing=true we ALSO synthesise an ERROR-severity
// reports[] entry, because python-linstor renders the State column
// from `get_replies_state(storpool.reports)` rather than the bare
// `state` string. Without the reports entry, `linstor sp l` shows Ok
// against a Faulty pool. Mirrors the k8s store's poolMissingReport.
func withDerivedState(sp *apiv1.StoragePool) {
if sp.State == "" {
if sp.PoolMissing {
Expand All @@ -120,6 +126,10 @@ func withDerivedState(sp *apiv1.StoragePool) {
}
}

if sp.PoolMissing && len(sp.Reports) == 0 {
sp.Reports = []apiv1.APICallRc{poolMissingReport(sp.NodeName, sp.StoragePoolName)}
}

if sp.FreeSpaceMgrName == "" {
if sp.SharedSpaceID != "" {
sp.FreeSpaceMgrName = sp.SharedSpaceID
Expand All @@ -129,6 +139,31 @@ func withDerivedState(sp *apiv1.StoragePool) {
}
}

// poolMissingReport builds the same ERROR-severity ApiCallRc entry the k8s
// store stamps onto a Faulty pool. Duplicated here (rather than importing
// from pkg/store/k8s) to avoid a cyclic import — k8s already imports
// pkg/store. Bug 83.
func poolMissingReport(node, pool string) apiv1.APICallRc {
return apiv1.APICallRc{
RetCode: apiv1.APICallRcMaskError |
apiv1.APICallRcMaskStorPool |
apiv1.APICallRcFailStorPoolConfigurationError,
Message: "pool backing storage missing on node " + node +
": storage pool " + pool + " is not present",
Cause: "The satellite probed the backing volume group / zpool / " +
"directory for this storage pool and found it absent — " +
"typically the result of an operator `zpool destroy`, " +
"`vgremove`, or an unmounted filesystem.",
Correc: "Re-discover the pool via `linstor ps cdp <provider> " +
"<node> <pool>` once the backing storage is restored, or " +
"recreate the pool with `linstor sp c`.",
ObjRefs: map[string]string{
"Node": node,
"StorPool": pool,
},
}
}
Comment on lines +146 to +165
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The poolMissingReport function is identical to the one added in pkg/store/k8s/storage_pools.go. Since both packages already depend on pkg/api/v1, moving this logic to a shared helper in that package (e.g., NewPoolMissingReport) would eliminate duplication and ensure consistency across store implementations. This is particularly important for maintaining the specific error messages and codes used for this state.


// Create inserts a new pool. Conflict on (node, name).
func (s *inMemoryStoragePools) Create(_ context.Context, sp *apiv1.StoragePool) error {
if sp == nil {
Expand Down
47 changes: 47 additions & 0 deletions pkg/store/k8s/storage_pools.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,23 @@ func crdToWireStoragePool(crd *crdv1alpha1.StoragePool) apiv1.StoragePool {
// Surface the satellite's PoolMissing signal as the wire `state`
// field. Upstream LINSTOR uses "Ok" / "Faulty" / "Error" — we map
// PoolMissing=true → "Faulty", everything else → "Ok".
//
// The `state` string alone is not enough for the Python linstor-
// client to render the State column: `storpool_cmds.py` derives
// the column value from `get_replies_state(storpool.reports)`, the
// structured ApiCallRc array, NOT from the bare `state` field.
// With an empty reports[], `get_replies_state` returns ("Ok",
// green) regardless of `state` — so a satellite-reported "Faulty"
// pool still rendered as Ok in `linstor sp l`. Synthesise an
// ERROR-severity reports[] entry whenever PoolMissing=true so the
// CLI sees the real state. Bug 83.
state := "Ok"

var reports []apiv1.APICallRc

if crd.Status.PoolMissing {
state = "Faulty"
reports = []apiv1.APICallRc{poolMissingReport(crd.Spec.NodeName, poolName)}
}

return apiv1.StoragePool{
Expand All @@ -259,10 +273,43 @@ func crdToWireStoragePool(crd *crdv1alpha1.StoragePool) apiv1.StoragePool {
FreeSpaceMgrName: fsmName,
UUID: string(crd.UID),
State: state,
Reports: reports,
PoolMissing: crd.Status.PoolMissing,
}
}

// poolMissingReport builds the structured ApiCallRc entry blockstor stamps
// onto a StoragePool's wire shape whenever the satellite has flagged its
// backing storage as missing. The Python linstor-client's State column is
// derived from `get_replies_state(storpool.reports)` rather than the bare
// `state` string field — an empty reports[] always renders "Ok" — so the
// entry MUST be present for `linstor sp l` to surface the real condition.
//
// The ret_code combines upstream LINSTOR's `MASK_ERROR` (severity bit) +
// `MASK_STOR_POOL` (subject bit) + `FAIL_STOR_POOL_CONFIGURATION_ERROR`
// (990) — byte-identical to the value upstream's `CtrlStorPoolApiCallHandler`
// emits when a storage-pool's underlying configuration is broken. Bug 83.
func poolMissingReport(node, pool string) apiv1.APICallRc {
return apiv1.APICallRc{
RetCode: apiv1.APICallRcMaskError |
apiv1.APICallRcMaskStorPool |
apiv1.APICallRcFailStorPoolConfigurationError,
Message: "pool backing storage missing on node " + node +
": storage pool " + pool + " is not present",
Cause: "The satellite probed the backing volume group / zpool / " +
"directory for this storage pool and found it absent — " +
"typically the result of an operator `zpool destroy`, " +
"`vgremove`, or an unmounted filesystem.",
Correc: "Re-discover the pool via `linstor ps cdp <provider> " +
"<node> <pool>` once the backing storage is restored, or " +
"recreate the pool with `linstor sp c`.",
ObjRefs: map[string]string{
"Node": node,
"StorPool": pool,
},
}
}

// SetCapacity updates the pool's Status subresource with live free /
// total / supports-snapshots numbers. Avoids a Spec rewrite so a
// Hello racing with a periodic capacity push doesn't lose either side.
Expand Down
106 changes: 106 additions & 0 deletions pkg/store/k8s/storage_pools_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package k8s

import (
"strings"
"testing"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -101,6 +102,111 @@ func TestCRDNameUsesPoolDotNodeOrder(t *testing.T) {
}
}

// TestCrdToWireStoragePoolFaultyStampsReports pins the Bug 83 fix:
// when the CRD's Status.PoolMissing is true, the wire-shape value
// MUST carry both `state == "Faulty"` AND a non-empty `Reports`
// slice with an ERROR-severity ApiCallRc whose message identifies
// the missing pool. Without the reports[] entry the Python linstor-
// client's State column derivation (`get_replies_state` over an
// empty reports[]) collapses to "Ok" — defeating the whole reason
// state="Faulty" was added in Bug 50.
//
// We assert structurally (not on the exact wording) so the message
// copy can evolve without breaking the test, but the wording MUST
// contain "missing" + the pool name so audit-log greppers and
// operator dashboards can pattern-match on the failure mode.
func TestCrdToWireStoragePoolFaultyStampsReports(t *testing.T) {
t.Parallel()

crd := &crdv1alpha1.StoragePool{
ObjectMeta: metav1.ObjectMeta{Name: "zfs-thin.w3"},
Spec: crdv1alpha1.StoragePoolSpec{
NodeName: "w3",
PoolName: "zfs-thin",
ProviderKind: apiv1.StoragePoolKindZFSThin,
},
Status: crdv1alpha1.StoragePoolStatus{
PoolMissing: true,
},
}

got := crdToWireStoragePool(crd)

if got.State != "Faulty" {
t.Errorf("State: got %q, want %q", got.State, "Faulty")
}

if len(got.Reports) == 0 {
t.Fatalf("Reports: got empty slice; want >=1 entry so " +
"python-linstor renders the State column as Faulty")
}

rc := got.Reports[0]

// MASK_ERROR bit must be set so python-linstor's
// `get_replies_state` classifies the entry as ERROR-severity.
if rc.RetCode&apiv1.APICallRcMaskError == 0 {
t.Errorf("RetCode missing MASK_ERROR bit: %#x", rc.RetCode)
}

// Sub-code must be the storpool-configuration-error mirror.
if rc.RetCode&0xFFFF != apiv1.APICallRcFailStorPoolConfigurationError {
t.Errorf("RetCode sub-code: got %d, want %d (FAIL_STOR_POOL_CONFIGURATION_ERROR)",
rc.RetCode&0xFFFF, apiv1.APICallRcFailStorPoolConfigurationError)
}

// Message must mention both "missing" and the pool name so the
// operator can correlate from `linstor sp l` output alone.
if !strings.Contains(rc.Message, "missing") {
t.Errorf("Message: %q, want substring %q", rc.Message, "missing")
}

if !strings.Contains(rc.Message, "zfs-thin") {
t.Errorf("Message: %q, want substring %q (pool name)",
rc.Message, "zfs-thin")
}

// ObjRefs must tag the entry with the (node, pool) pair so audit
// log queries can filter by either axis.
if rc.ObjRefs["Node"] != "w3" {
t.Errorf("ObjRefs[Node]: got %q, want %q", rc.ObjRefs["Node"], "w3")
}

if rc.ObjRefs["StorPool"] != "zfs-thin" {
t.Errorf("ObjRefs[StorPool]: got %q, want %q",
rc.ObjRefs["StorPool"], "zfs-thin")
}
}

// TestCrdToWireStoragePoolHealthyHasNoReports pins the inverse:
// a pool whose Status.PoolMissing is false MUST emit an empty
// Reports slice. Stamping a no-op entry on a healthy pool would
// trip python-linstor's `get_replies_state` into rendering the
// State column as the first-entry severity rather than Ok.
func TestCrdToWireStoragePoolHealthyHasNoReports(t *testing.T) {
t.Parallel()

crd := &crdv1alpha1.StoragePool{
ObjectMeta: metav1.ObjectMeta{Name: "zfs-thin.w3"},
Spec: crdv1alpha1.StoragePoolSpec{
NodeName: "w3",
PoolName: "zfs-thin",
ProviderKind: apiv1.StoragePoolKindZFSThin,
},
}

got := crdToWireStoragePool(crd)

if got.State != "Ok" {
t.Errorf("State: got %q, want %q", got.State, "Ok")
}

if len(got.Reports) != 0 {
t.Errorf("Reports: got %d entries, want 0 (healthy pool)",
len(got.Reports))
}
}

// TestWireToCRDStoragePoolProducesCanonicalName pins the wire→CRD
// converter to the canonical `<pool>.<node>` shape — same rule the
// apiserver CEL validation enforces. Without this test a regression
Expand Down
Loading