-
Notifications
You must be signed in to change notification settings - Fork 455
Handle cluster migrations gracefully in front-proxy #4219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,12 +18,16 @@ package lookup | |
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "net/http" | ||
| "net/url" | ||
| "strings" | ||
|
|
||
| apierrors "k8s.io/apimachinery/pkg/api/errors" | ||
| "k8s.io/apimachinery/pkg/runtime/schema" | ||
| "k8s.io/apiserver/pkg/endpoints/filters" | ||
| "k8s.io/apiserver/pkg/endpoints/handlers/responsewriters" | ||
| "k8s.io/apiserver/pkg/endpoints/request" | ||
| kubernetesscheme "k8s.io/client-go/kubernetes/scheme" | ||
| "k8s.io/klog/v2" | ||
|
|
||
|
|
@@ -87,6 +91,18 @@ func newClusterResolveHandler(delegate http.Handler, index proxyindex.Index) htt | |
| return | ||
| } | ||
|
|
||
| // If the cluster was recently migrated and the request is not a fresh list or watch | ||
| // return a 410 to force the client to do a relist. | ||
| // | ||
| // This is only required for watches. | ||
| // Lists with a lower RV will get appropriate objects from the shard. | ||
| // Lists with a higher RV will get a 504 from the shard. | ||
| if index.RecentlyMigrated(result.Cluster) && isInProgressWatch(req) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, what about clients that HAVE done a list before, already know a good RV on the new shard, and want to watch from there on; all while this 90s period is ticking? Maybe the error message could be a bit more helpful, like: This could be called a pretty hard nit-picking, but a random user, looking at logs of their client might not know what is going on.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lists shouldn't be affected, they are just passed through to the shard and as mentioned in the comment they'll either have a lower RV than the new shard, in which case they'll get a diff to the current RV of the new shard.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original implementation also rejected lists but the more I read the code for reflectors the less it made sense to handle lists instead of letting them through^^ |
||
| err := apierrors.NewResourceExpired(fmt.Sprintf("logical cluster %q migrated to another shard; relist required", result.Cluster)) | ||
| responsewriters.ErrorNegotiated(err, kubernetesscheme.Codecs, schema.GroupVersion{}, w, req) | ||
| return | ||
| } | ||
|
|
||
| shardURL, err := url.Parse(result.URL) | ||
| if err != nil { | ||
| responsewriters.InternalError(w, req, err) | ||
|
|
@@ -105,12 +121,40 @@ func newClusterResolveHandler(delegate http.Handler, index proxyindex.Index) htt | |
| } | ||
|
|
||
| ctx = WithShardURL(ctx, shardURL) | ||
|
|
||
| // Bound watch requests by the per-cluster context to cancel them when the cluster is migrated or deleted. | ||
| if isWatchRequest(req) { | ||
| var cancel context.CancelFunc | ||
| ctx, cancel = index.ClusterContext(ctx, result.Cluster) | ||
| defer cancel() | ||
| } | ||
|
|
||
| req = req.WithContext(ctx) | ||
|
|
||
| delegate.ServeHTTP(w, req) | ||
| } | ||
| } | ||
|
|
||
| func isWatchRequest(req *http.Request) bool { | ||
| if info, ok := request.RequestInfoFrom(req.Context()); ok && info.IsResourceRequest { | ||
| return info.Verb == "watch" | ||
| } | ||
| switch req.URL.Query().Get("watch") { | ||
| case "true", "1": | ||
| return true | ||
| } | ||
| return false | ||
| } | ||
|
|
||
| func isInProgressWatch(req *http.Request) bool { | ||
| rv := req.URL.Query().Get("resourceVersion") | ||
| if rv == "" || rv == "0" { | ||
| return false | ||
| } | ||
|
|
||
| return isWatchRequest(req) | ||
| } | ||
|
|
||
| func newMappingHandler(delegate http.Handler, index proxyindex.Index) http.HandlerFunc { | ||
| return func(w http.ResponseWriter, req *http.Request) { | ||
| // Not every virtual workspace and/or every mapping has a {cluster} in its URL; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Huh? Sounds like the agent had a bit more context than what's in the comment here :P
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
:D Sortof yes, the comments in the original code (from where I copied the context handling) talked about the sticky cancellation and thats what it ran with when building the tests :D