Skip to content

Commit 0b40448

Browse files
committed
feat: Add per-workspace backup status tracking with annotations
Enhance the backup cronjob controller to track backup status individually for each workspace using annotations. This allows more granular control over when backups are triggered based on each workspace's specific backup history rather than a global timestamp. Key changes: - Add handleBackupJobStatus to update DevWorkspace annotations when backup jobs complete or fail - Modify wasStoppedSinceLastBackup to check workspace-specific annotations first, falling back to global time - Add job event handler and predicate to watch for backup job status changes - Include DevWorkspace name label on backup jobs for easier association - Add comprehensive test coverage for new functionality - Fix typo in success log message Assisted-by: Claude
1 parent dad556b commit 0b40448

File tree

4 files changed

+757
-14
lines changed

4 files changed

+757
-14
lines changed

controllers/backupcronjob/backupcronjob_controller.go

Lines changed: 57 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package controllers
1818
import (
1919
"context"
2020
"reflect"
21+
"time"
2122

2223
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
2324
"sigs.k8s.io/controller-runtime/pkg/reconcile"
@@ -40,6 +41,7 @@ import (
4041
"k8s.io/apimachinery/pkg/runtime"
4142
"k8s.io/utils/ptr"
4243
ctrl "sigs.k8s.io/controller-runtime"
44+
"sigs.k8s.io/controller-runtime/pkg/builder"
4345
"sigs.k8s.io/controller-runtime/pkg/client"
4446
"sigs.k8s.io/controller-runtime/pkg/event"
4547
"sigs.k8s.io/controller-runtime/pkg/handler"
@@ -93,7 +95,8 @@ func (r *BackupCronJobReconciler) SetupWithManager(mgr ctrl.Manager) error {
9395

9496
return ctrl.NewControllerManagedBy(mgr).
9597
Named("BackupCronJob").
96-
Watches(&controllerv1alpha1.DevWorkspaceOperatorConfig{},
98+
Watches(
99+
&controllerv1alpha1.DevWorkspaceOperatorConfig{},
97100
handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, object client.Object) []reconcile.Request {
98101
operatorNamespace, err := infrastructure.GetNamespace()
99102
// Ignore events from other namespaces
@@ -111,17 +114,22 @@ func (r *BackupCronJobReconciler) SetupWithManager(mgr ctrl.Manager) error {
111114
},
112115
}
113116
}),
117+
builder.WithPredicates(configPredicate),
118+
).
119+
Watches(
120+
&batchv1.Job{},
121+
r.getBackupJobEventHandler(),
122+
builder.WithPredicates(r.getBackupJobPredicate()),
114123
).
115-
WithEventFilter(configPredicate).
116124
Complete(r)
117125
}
118126

119127
// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list
120128
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;create;update;patch;delete
121129
// +kubebuilder:rbac:groups="",resources=serviceaccounts;,verbs=get;list;create;update;patch;delete
122-
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;create;update;patch;delete
130+
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;create;update;patch;delete;watch
123131
// +kubebuilder:rbac:groups=controller.devfile.io,resources=devworkspaceoperatorconfigs,verbs=get;list;update;patch;watch
124-
// +kubebuilder:rbac:groups=workspace.devfile.io,resources=devworkspaces,verbs=get;list
132+
// +kubebuilder:rbac:groups=workspace.devfile.io,resources=devworkspaces,verbs=get;list;update;patch
125133
// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;create;update;patch;delete
126134
// +kubebuilder:rbac:groups="",resources=builds,verbs=get
127135
// +kubebuilder:rbac:groups="",resources=builds/details,verbs=update
@@ -215,7 +223,7 @@ func (r *BackupCronJobReconciler) stopCron(log logr.Logger) {
215223
}
216224

217225
// executeBackupSync executes the backup job for all DevWorkspaces in the cluster that
218-
// have been stopped in the last N minutes.
226+
// have been stopped since their last backup.
219227
func (r *BackupCronJobReconciler) executeBackupSync(ctx context.Context, dwOperatorConfig *controllerv1alpha1.DevWorkspaceOperatorConfig, log logr.Logger) error {
220228
log.Info("Executing backup sync for all DevWorkspaces")
221229

@@ -264,31 +272,65 @@ func (r *BackupCronJobReconciler) executeBackupSync(ctx context.Context, dwOpera
264272
return nil
265273
}
266274

267-
// wasStoppedSinceLastBackup checks if the DevWorkspace was stopped since the last backup time.
268-
func (r *BackupCronJobReconciler) wasStoppedSinceLastBackup(workspace *dw.DevWorkspace, lastBackupTime *metav1.Time, log logr.Logger) bool {
275+
// wasStoppedSinceLastBackup checks if the DevWorkspace was stopped since its last backup.
276+
// It reads the last backup time from the DevWorkspace annotation, or falls back to the
277+
// provided globalLastBackupTime if the annotation doesn't exist.
278+
func (r *BackupCronJobReconciler) wasStoppedSinceLastBackup(
279+
workspace *dw.DevWorkspace,
280+
globalLastBackupTime *metav1.Time,
281+
log logr.Logger,
282+
) bool {
269283
if workspace.Status.Phase != dw.DevWorkspaceStatusStopped {
270284
return false
271285
}
272286
log.Info("DevWorkspace is currently stopped, checking if it was stopped since last backup", "namespace", workspace.Namespace, "name", workspace.Name)
273-
// Check if the workspace was stopped in the last N minutes
287+
288+
// Get the last backup time and success status from the workspace annotations
289+
var lastBackupTime *metav1.Time
290+
var lastBackupSuccessful bool
291+
if workspace.Annotations != nil {
292+
if lastBackupTimeStr, ok := workspace.Annotations[constants.DevWorkspaceLastBackupTimeAnnotation]; ok {
293+
parsedTime, err := time.Parse(time.RFC3339Nano, lastBackupTimeStr)
294+
if err != nil {
295+
log.Error(err, "Failed to parse last backup time annotation, treating as no previous backup", "value", lastBackupTimeStr)
296+
} else {
297+
lastBackupTime = &metav1.Time{Time: parsedTime}
298+
}
299+
}
300+
301+
lastBackupSuccessful = workspace.Annotations[constants.DevWorkspaceLastBackupSuccessfulAnnotation] == "true"
302+
}
303+
304+
// Fall back to globalLastBackupTime if annotation doesn't exist
305+
if lastBackupTime == nil && globalLastBackupTime != nil {
306+
lastBackupTime = globalLastBackupTime
307+
}
308+
309+
if lastBackupTime == nil {
310+
return true
311+
}
312+
313+
if !lastBackupSuccessful {
314+
return true
315+
}
316+
317+
// Check if the workspace was stopped since the last successful backup
274318
if workspace.Status.Conditions != nil {
275319
lastTimeStopped := metav1.Time{}
276320
for _, condition := range workspace.Status.Conditions {
277321
if condition.Type == conditions.Started && condition.Status == corev1.ConditionFalse {
278322
lastTimeStopped = condition.LastTransitionTime
279323
}
280324
}
325+
281326
if !lastTimeStopped.IsZero() {
282-
if lastBackupTime == nil {
283-
// No previous backup, so consider it stopped since last backup
284-
return true
285-
}
286327
if lastTimeStopped.Time.After(lastBackupTime.Time) {
287-
log.Info("DevWorkspace was stopped since last backup", "namespace", workspace.Namespace, "name", workspace.Name)
328+
log.Info("DevWorkspace was stopped since last successful backup", "namespace", workspace.Namespace, "name", workspace.Name)
288329
return true
289330
}
290331
}
291332
}
333+
292334
return false
293335
}
294336

@@ -336,6 +378,7 @@ func (r *BackupCronJobReconciler) createBackupJob(
336378
Namespace: workspace.Namespace,
337379
Labels: map[string]string{
338380
constants.DevWorkspaceIDLabel: dwID,
381+
constants.DevWorkspaceNameLabel: workspace.Name,
339382
constants.DevWorkspaceBackupJobLabel: "true",
340383
},
341384
},
@@ -532,7 +575,7 @@ func (r *BackupCronJobReconciler) copySecret(ctx context.Context, workspace *dw.
532575
}
533576
err = r.Create(ctx, namespaceSecret)
534577
if err == nil {
535-
log.Info("Sucesfully created secret", "name", namespaceSecret.Name, "namespace", workspace.Namespace)
578+
log.Info("Successfully created secret", "name", namespaceSecret.Name, "namespace", workspace.Namespace)
536579
}
537580
return namespaceSecret, err
538581
}

0 commit comments

Comments
 (0)