Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/notifier/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ func matrixNotifierFunc(opts notifierOptions) (Interface, error) {
}

func opsgenieNotifierFunc(opts notifierOptions) (Interface, error) {
return NewOpsgenie(opts.URL, opts.ProxyURL, opts.TLSConfig, opts.Token)
return NewOpsgenie(opts.URL, opts.ProxyURL, opts.TLSConfig, opts.Token, opts.ProviderUID)
}

func alertmanagerNotifierFunc(opts notifierOptions) (Interface, error) {
Expand Down
50 changes: 41 additions & 9 deletions internal/notifier/opsgenie.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package notifier

import (
"context"
"crypto/sha256"
"crypto/tls"
"errors"
"fmt"
Expand All @@ -28,19 +29,21 @@ import (
)

type Opsgenie struct {
URL string
ProxyURL string
TLSConfig *tls.Config
ApiKey string
URL string
ProxyURL string
TLSConfig *tls.Config
ApiKey string
ProviderUID string
}

type OpsgenieAlert struct {
Message string `json:"message"`
Alias string `json:"alias,omitempty"`
Description string `json:"description"`
Details map[string]string `json:"details"`
}

func NewOpsgenie(hookURL string, proxyURL string, tlsConfig *tls.Config, token string) (*Opsgenie, error) {
func NewOpsgenie(hookURL string, proxyURL string, tlsConfig *tls.Config, token string, providerUID string) (*Opsgenie, error) {
_, err := url.ParseRequestURI(hookURL)
if err != nil {
return nil, fmt.Errorf("invalid Opsgenie hook URL %s: '%w'", hookURL, err)
Expand All @@ -51,10 +54,11 @@ func NewOpsgenie(hookURL string, proxyURL string, tlsConfig *tls.Config, token s
}

return &Opsgenie{
URL: hookURL,
ProxyURL: proxyURL,
ApiKey: token,
TLSConfig: tlsConfig,
URL: hookURL,
ProxyURL: proxyURL,
ApiKey: token,
TLSConfig: tlsConfig,
ProviderUID: providerUID,
}, nil
}

Expand All @@ -67,8 +71,15 @@ func (s *Opsgenie) Post(ctx context.Context, event eventv1.Event) error {
}
details["severity"] = event.Severity

// Construct a stable alias for deduplication in Opsgenie.
// The alias is derived from the involved object's kind, namespace,
// name, and the event reason so that repeated alerts for the same
// source are deduplicated while different reasons create separate alerts.
alias := generateOpsgenieAlias(s.ProviderUID, event)

payload := OpsgenieAlert{
Message: event.InvolvedObject.Kind + "/" + event.InvolvedObject.Name,
Alias: alias,
Description: event.Message,
Details: details,
}
Expand All @@ -91,3 +102,24 @@ func (s *Opsgenie) Post(ctx context.Context, event eventv1.Event) error {

return nil
}

// generateOpsgenieAlias creates a stable, deterministic alias string from
// the provider UID and the event's involved object and reason. Opsgenie uses
// the alias field to deduplicate alerts — alerts with the same alias are
// treated as the same incident instead of creating new pages. The provider UID
// is included so that alerts from different clusters (each with their own
// Provider resource) produce distinct aliases even when the involved objects
// share the same kind/namespace/name. The alias is a SHA-256 hash (truncated
// to 64 chars) to stay within Opsgenie's 512-char alias limit while remaining
// collision-resistant.
func generateOpsgenieAlias(providerUID string, event eventv1.Event) string {
key := fmt.Sprintf("%s/%s/%s/%s/%s",
providerUID,
event.InvolvedObject.Kind,
event.InvolvedObject.Namespace,
event.InvolvedObject.Name,
event.Reason,
Comment on lines +118 to +121
Copy link
Copy Markdown
Member

@stefanprodan stefanprodan Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will cause alerts from multiple clusters to aggregate under the same incident since all clusters have Kustomization/flux-system/flux-system, which is a major breaking change. Adding the Alert Provider UID to the checksum would ensure each cluster gets a dedicated incident.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — in a multi-cluster setup Kustomization/flux-system/flux-system would hash identically across clusters, collapsing separate incidents into one.

ProviderUID is already available in notifierOptions but opsgenieNotifierFunc doesn't pass it through to NewOpsgenie. I'll thread it into the Opsgenie struct and prepend it to the alias hash input so each Provider (and therefore each cluster) gets a unique alias. Will push the fix shortly.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks like AI talking

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point — I did lean on AI for drafting that reply. The fix is in f322c46 though: threads ProviderUID into the Opsgenie struct and prepends it to the alias hash, so each cluster's Provider produces distinct aliases. Added tests for the multi-cluster case. Happy to adjust if the approach doesn't look right.

)
hash := fmt.Sprintf("%x", sha256.Sum256([]byte(key)))
return hash[:64]
}
109 changes: 108 additions & 1 deletion internal/notifier/opsgenie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ package notifier

import (
"context"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/httptest"
Expand Down Expand Up @@ -61,11 +63,116 @@ func TestOpsgenie_Post(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
opsgenie, err := NewOpsgenie(ts.URL, "", nil, "token")
opsgenie, err := NewOpsgenie(ts.URL, "", nil, "token", "")
g.Expect(err).ToNot(HaveOccurred())

err = opsgenie.Post(context.TODO(), tt.event())
g.Expect(err).ToNot(HaveOccurred())
})
}
}

func TestOpsgenie_PostAlias(t *testing.T) {
var receivedPayload OpsgenieAlert
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
b, err := io.ReadAll(r.Body)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
return
}
json.Unmarshal(b, &receivedPayload)
}))
defer ts.Close()

providerUID := "test-provider-uid-123"

tests := []struct {
name string
event func() v1beta1.Event
expectedAlias string
}{
{
name: "alias includes provider UID for cluster uniqueness",
event: testEvent,
expectedAlias: fmt.Sprintf("%x",
sha256.Sum256([]byte("test-provider-uid-123/GitRepository/gitops-system/webapp/reason")))[:64],
},
{
name: "alias is stable for same event",
event: func() v1beta1.Event {
e := testEvent()
e.Message = "different message should not change alias"
return e
},
expectedAlias: fmt.Sprintf("%x",
sha256.Sum256([]byte("test-provider-uid-123/GitRepository/gitops-system/webapp/reason")))[:64],
},
{
name: "alias differs for different reason",
event: func() v1beta1.Event {
e := testEvent()
e.Reason = "HealthCheckFailed"
return e
},
expectedAlias: fmt.Sprintf("%x",
sha256.Sum256([]byte("test-provider-uid-123/GitRepository/gitops-system/webapp/HealthCheckFailed")))[:64],
},
{
name: "alias differs for different namespace",
event: func() v1beta1.Event {
e := testEvent()
e.InvolvedObject.Namespace = "production"
return e
},
expectedAlias: fmt.Sprintf("%x",
sha256.Sum256([]byte("test-provider-uid-123/GitRepository/production/webapp/reason")))[:64],
},
{
name: "alias with empty metadata",
event: func() v1beta1.Event {
e := testEvent()
e.Metadata = nil
return e
},
expectedAlias: fmt.Sprintf("%x",
sha256.Sum256([]byte("test-provider-uid-123/GitRepository/gitops-system/webapp/reason")))[:64],
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
opsgenie, err := NewOpsgenie(ts.URL, "", nil, "token", providerUID)
g.Expect(err).ToNot(HaveOccurred())

err = opsgenie.Post(context.TODO(), tt.event())
g.Expect(err).ToNot(HaveOccurred())
g.Expect(receivedPayload.Alias).To(Equal(tt.expectedAlias))
g.Expect(receivedPayload.Alias).ToNot(BeEmpty())
})
}
}

func TestGenerateOpsgenieAlias(t *testing.T) {
g := NewWithT(t)
event := testEvent()
providerUID := "test-uid"

// Alias should be deterministic
alias1 := generateOpsgenieAlias(providerUID, event)
alias2 := generateOpsgenieAlias(providerUID, event)
g.Expect(alias1).To(Equal(alias2))

// Alias should be 64 chars (hex-encoded SHA-256 truncated)
g.Expect(alias1).To(HaveLen(64))

// Different reason should produce different alias
event2 := testEvent()
event2.Reason = "DifferentReason"
alias3 := generateOpsgenieAlias(providerUID, event2)
g.Expect(alias1).ToNot(Equal(alias3))

// Different provider UID should produce different alias
alias4 := generateOpsgenieAlias("different-uid", event)
g.Expect(alias1).ToNot(Equal(alias4))
}