From e23f7a04604c8129627f694fbae2e8c74ce3898f Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Fri, 15 Nov 2024 23:07:22 +0100 Subject: [PATCH] Improve CAPD load balancer --- .../controllers/dockermachine_controller.go | 59 ++++++++++++++----- .../docker/internal/docker/loadbalancer.go | 30 ++++++---- .../docker/internal/docker/machine.go | 4 +- .../docker/internal/docker/manager.go | 20 +++++-- .../docker/internal/docker/util.go | 3 +- .../docker/internal/loadbalancer/config.go | 21 +++++-- .../internal/loadbalancer/config_test.go | 37 ++++++++---- .../docker/internal/loadbalancer/const.go | 2 +- 8 files changed, 124 insertions(+), 52 deletions(-) diff --git a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go index fd855062948f..fb1060a1883d 100644 --- a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go @@ -184,7 +184,7 @@ func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reques // Handle deleted machines if !dockerMachine.ObjectMeta.DeletionTimestamp.IsZero() { - return ctrl.Result{}, r.reconcileDelete(ctx, dockerCluster, machine, dockerMachine, externalMachine, externalLoadBalancer) + return ctrl.Result{}, r.reconcileDelete(ctx, cluster, dockerCluster, machine, dockerMachine, externalMachine, externalLoadBalancer) } // Handle non-deleted machines @@ -251,6 +251,16 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * version = machine.Spec.Version } + // if the corresponding machine is deleted but the docker machine not yet, update load balancer configuration to divert all traffic from this instance + if util.IsControlPlaneMachine(machine) && !machine.DeletionTimestamp.IsZero() && dockerMachine.DeletionTimestamp.IsZero() { + if _, ok := dockerMachine.Annotations["dockermachine.infrastructure.cluster.x-k8s.io/weight"]; !ok { + if err := r.reconcileLoadBalancerConfiguration(ctx, cluster, dockerCluster, externalLoadBalancer); err != nil { + return ctrl.Result{}, err + } + } + dockerMachine.Annotations["dockermachine.infrastructure.cluster.x-k8s.io/weight"] = "0" + } + // if the machine is already provisioned, return if dockerMachine.Spec.ProviderID != nil { // ensure ready state is set. @@ -308,12 +318,8 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * // we should only do this once, as reconfiguration more or less ensures // node ref setting fails if util.IsControlPlaneMachine(machine) && !dockerMachine.Status.LoadBalancerConfigured { - unsafeLoadBalancerConfigTemplate, err := r.getUnsafeLoadBalancerConfigTemplate(ctx, dockerCluster) - if err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to retrieve HAProxy configuration from CustomHAProxyConfigTemplateRef") - } - if err := externalLoadBalancer.UpdateConfiguration(ctx, unsafeLoadBalancerConfigTemplate); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + if err := r.reconcileLoadBalancerConfiguration(ctx, cluster, dockerCluster, externalLoadBalancer); err != nil { + return ctrl.Result{}, err } dockerMachine.Status.LoadBalancerConfigured = true } @@ -439,7 +445,7 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * return ctrl.Result{}, nil } -func (r *DockerMachineReconciler) reconcileDelete(ctx context.Context, dockerCluster *infrav1.DockerCluster, machine *clusterv1.Machine, dockerMachine *infrav1.DockerMachine, externalMachine *docker.Machine, externalLoadBalancer *docker.LoadBalancer) error { +func (r *DockerMachineReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, dockerCluster *infrav1.DockerCluster, machine *clusterv1.Machine, dockerMachine *infrav1.DockerMachine, externalMachine *docker.Machine, externalLoadBalancer *docker.LoadBalancer) error { // Set the ContainerProvisionedCondition reporting delete is started, and issue a patch in order to make // this visible to the users. // NB. The operation in docker is fast, so there is the chance the user will not notice the status change; @@ -460,12 +466,8 @@ func (r *DockerMachineReconciler) reconcileDelete(ctx context.Context, dockerClu // if the deleted machine is a control-plane node, remove it from the load balancer configuration; if util.IsControlPlaneMachine(machine) { - unsafeLoadBalancerConfigTemplate, err := r.getUnsafeLoadBalancerConfigTemplate(ctx, dockerCluster) - if err != nil { - return errors.Wrap(err, "failed to retrieve HAProxy configuration from CustomHAProxyConfigTemplateRef") - } - if err := externalLoadBalancer.UpdateConfiguration(ctx, unsafeLoadBalancerConfigTemplate); err != nil { - return errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + if err := r.reconcileLoadBalancerConfiguration(ctx, cluster, dockerCluster, externalLoadBalancer); err != nil { + return err } } @@ -474,6 +476,35 @@ func (r *DockerMachineReconciler) reconcileDelete(ctx context.Context, dockerClu return nil } +func (r *DockerMachineReconciler) reconcileLoadBalancerConfiguration(ctx context.Context, cluster *clusterv1.Cluster, dockerCluster *infrav1.DockerCluster, externalLoadBalancer *docker.LoadBalancer) error { + controlPlaneWeight := map[string]int{} + + controlPlaneMachineList := &clusterv1.MachineList{} + if err := r.Client.List(ctx, controlPlaneMachineList, client.InNamespace(cluster.Namespace), client.MatchingLabels{ + clusterv1.MachineControlPlaneLabel: "", + clusterv1.ClusterNameLabel: cluster.Name, + }); err != nil { + return errors.Wrap(err, "failed to list control plane machines") + } + + for _, m := range controlPlaneMachineList.Items { + containerName := docker.MachineContainerName(cluster.Name, m.Name) + controlPlaneWeight[containerName] = 100 + if !m.DeletionTimestamp.IsZero() && len(controlPlaneMachineList.Items) > 1 { + controlPlaneWeight[containerName] = 0 + } + } + + unsafeLoadBalancerConfigTemplate, err := r.getUnsafeLoadBalancerConfigTemplate(ctx, dockerCluster) + if err != nil { + return errors.Wrap(err, "failed to retrieve HAProxy configuration from CustomHAProxyConfigTemplateRef") + } + if err := externalLoadBalancer.UpdateConfiguration(ctx, controlPlaneWeight, unsafeLoadBalancerConfigTemplate); err != nil { + return errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + } + return nil +} + // SetupWithManager will add watches for this controller. func (r *DockerMachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { if r.Client == nil || r.ContainerRuntime == nil || r.ClusterCache == nil { diff --git a/test/infrastructure/docker/internal/docker/loadbalancer.go b/test/infrastructure/docker/internal/docker/loadbalancer.go index c52b5002ed47..5f85c743b5d6 100644 --- a/test/infrastructure/docker/internal/docker/loadbalancer.go +++ b/test/infrastructure/docker/internal/docker/loadbalancer.go @@ -142,13 +142,20 @@ func (s *LoadBalancer) Create(ctx context.Context) error { } // UpdateConfiguration updates the external load balancer configuration with new control plane nodes. -func (s *LoadBalancer) UpdateConfiguration(ctx context.Context, unsafeLoadBalancerConfig string) error { +func (s *LoadBalancer) UpdateConfiguration(ctx context.Context, weights map[string]int, unsafeLoadBalancerConfig string) error { log := ctrl.LoggerFrom(ctx) if s.container == nil { return errors.New("unable to configure load balancer: load balancer container does not exists") } + configData := &loadbalancer.ConfigData{ + FrontendControlPlanePort: s.frontendControlPlanePort, + BackendControlPlanePort: s.backendControlPlanePort, + BackendServers: map[string]loadbalancer.BackendServer{}, + IPv6: s.ipFamily == clusterv1.IPv6IPFamily, + } + // collect info about the existing controlplane nodes filters := container.FilterBuilder{} filters.AddKeyNameValue(filterLabel, clusterLabelKey, s.name) @@ -159,17 +166,23 @@ func (s *LoadBalancer) UpdateConfiguration(ctx context.Context, unsafeLoadBalanc return errors.WithStack(err) } - var backendServers = map[string]string{} for _, n := range controlPlaneNodes { + backendServer := loadbalancer.BackendServer{} controlPlaneIPv4, controlPlaneIPv6, err := n.IP(ctx) if err != nil { return errors.Wrapf(err, "failed to get IP for container %s", n.String()) } if s.ipFamily == clusterv1.IPv6IPFamily { - backendServers[n.String()] = controlPlaneIPv6 + backendServer.Address = controlPlaneIPv6 } else { - backendServers[n.String()] = controlPlaneIPv4 + backendServer.Address = controlPlaneIPv4 } + + backendServer.Weight = 100 + if w, ok := weights[n.String()]; ok { + backendServer.Weight = w + } + configData.BackendServers[n.String()] = backendServer } loadBalancerConfigTemplate := loadbalancer.DefaultTemplate @@ -177,14 +190,7 @@ func (s *LoadBalancer) UpdateConfiguration(ctx context.Context, unsafeLoadBalanc loadBalancerConfigTemplate = unsafeLoadBalancerConfig } - loadBalancerConfig, err := loadbalancer.Config(&loadbalancer.ConfigData{ - FrontendControlPlanePort: s.frontendControlPlanePort, - BackendControlPlanePort: s.backendControlPlanePort, - BackendServers: backendServers, - IPv6: s.ipFamily == clusterv1.IPv6IPFamily, - }, - loadBalancerConfigTemplate, - ) + loadBalancerConfig, err := loadbalancer.Config(configData, loadBalancerConfigTemplate) if err != nil { return errors.WithStack(err) } diff --git a/test/infrastructure/docker/internal/docker/machine.go b/test/infrastructure/docker/internal/docker/machine.go index 332e9b15f7e9..80fe9fab2328 100644 --- a/test/infrastructure/docker/internal/docker/machine.go +++ b/test/infrastructure/docker/internal/docker/machine.go @@ -82,7 +82,7 @@ func NewMachine(ctx context.Context, cluster *clusterv1.Cluster, machine string, filters := container.FilterBuilder{} filters.AddKeyNameValue(filterLabel, clusterLabelKey, cluster.Name) - filters.AddKeyValue(filterName, fmt.Sprintf("^%s$", machineContainerName(cluster.Name, machine))) + filters.AddKeyValue(filterName, fmt.Sprintf("^%s$", MachineContainerName(cluster.Name, machine))) for key, val := range filterLabels { filters.AddKeyNameValue(filterLabel, key, val) } @@ -171,7 +171,7 @@ func (m *Machine) Name() string { // ContainerName return the name of the container for this machine. func (m *Machine) ContainerName() string { - return machineContainerName(m.cluster, m.machine) + return MachineContainerName(m.cluster, m.machine) } // ProviderID return the provider identifier for this machine. diff --git a/test/infrastructure/docker/internal/docker/manager.go b/test/infrastructure/docker/internal/docker/manager.go index 96d5b4144ea5..0f4979bb25a0 100644 --- a/test/infrastructure/docker/internal/docker/manager.go +++ b/test/infrastructure/docker/internal/docker/manager.go @@ -106,12 +106,20 @@ func (m *Manager) CreateWorkerNode(ctx context.Context, name, clusterName string // This can break the Kubeconfig in kind, i.e. the file resulting from `kind get kubeconfig -n $CLUSTER_NAME' if the load balancer container is restarted. func (m *Manager) CreateExternalLoadBalancerNode(ctx context.Context, name, image, clusterName, listenAddress string, port int32, _ clusterv1.ClusterIPFamily) (*types.Node, error) { // load balancer port mapping - portMappings := []v1alpha4.PortMapping{{ - ListenAddress: listenAddress, - HostPort: port, - ContainerPort: ControlPlanePort, - Protocol: v1alpha4.PortMappingProtocolTCP, - }} + portMappings := []v1alpha4.PortMapping{ + { + ListenAddress: listenAddress, + HostPort: port, + ContainerPort: ControlPlanePort, + Protocol: v1alpha4.PortMappingProtocolTCP, + }, + { + ListenAddress: listenAddress, + HostPort: port, + ContainerPort: 8404, + Protocol: v1alpha4.PortMappingProtocolTCP, + }, + } createOpts := &nodeCreateOpts{ Name: name, ClusterName: clusterName, diff --git a/test/infrastructure/docker/internal/docker/util.go b/test/infrastructure/docker/internal/docker/util.go index 9fe5121e48de..7955cd381d31 100644 --- a/test/infrastructure/docker/internal/docker/util.go +++ b/test/infrastructure/docker/internal/docker/util.go @@ -44,7 +44,8 @@ func FailureDomainLabel(failureDomain *string) map[string]string { return nil } -func machineContainerName(cluster, machine string) string { +// MachineContainerName computes the name of a container for a given machine. +func MachineContainerName(cluster, machine string) string { if strings.HasPrefix(machine, cluster) { return machine } diff --git a/test/infrastructure/docker/internal/loadbalancer/config.go b/test/infrastructure/docker/internal/loadbalancer/config.go index 0e31f2a0277e..ada49d6d184d 100644 --- a/test/infrastructure/docker/internal/loadbalancer/config.go +++ b/test/infrastructure/docker/internal/loadbalancer/config.go @@ -28,10 +28,16 @@ import ( type ConfigData struct { FrontendControlPlanePort string BackendControlPlanePort string - BackendServers map[string]string + BackendServers map[string]BackendServer IPv6 bool } +// BackendServer defines a loadbalancer backend. +type BackendServer struct { + Address string + Weight int +} + // DefaultTemplate is the loadbalancer config template. const DefaultTemplate = `# generated by kind global @@ -56,6 +62,14 @@ defaults # allow to boot despite dns don't resolve backends default-server init-addr none +frontend stats + mode http + bind *:8404 + stats enable + stats uri /stats + stats refresh 1s + stats admin if TRUE + frontend control-plane bind *:{{ .FrontendControlPlanePort }} {{ if .IPv6 -}} @@ -65,9 +79,8 @@ frontend control-plane backend kube-apiservers option httpchk GET /healthz - # TODO: we should be verifying (!) - {{range $server, $address := .BackendServers}} - server {{ $server }} {{ JoinHostPort $address $.BackendControlPlanePort }} check check-ssl verify none resolvers docker resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} + {{range $server, $backend := .BackendServers}} + server {{ $server }} {{ JoinHostPort $backend.Address $.BackendControlPlanePort }} weight {{ $backend.Weight }} check check-ssl verify none resolvers docker resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} {{- end}} ` diff --git a/test/infrastructure/docker/internal/loadbalancer/config_test.go b/test/infrastructure/docker/internal/loadbalancer/config_test.go index f398d3cc6b13..6585699a8433 100644 --- a/test/infrastructure/docker/internal/loadbalancer/config_test.go +++ b/test/infrastructure/docker/internal/loadbalancer/config_test.go @@ -36,8 +36,11 @@ func TestConfig(t *testing.T) { data: &ConfigData{ BackendControlPlanePort: "6443", FrontendControlPlanePort: "7777", - BackendServers: map[string]string{ - "control-plane-0": "1.1.1.1", + BackendServers: map[string]BackendServer{ + "control-plane-0": { + Address: "1.1.1.1", + Weight: 99, + }, }, }, configTemplate: DefaultTemplate, @@ -64,6 +67,14 @@ defaults # allow to boot despite dns don't resolve backends default-server init-addr none +frontend stats + mode http + bind *:8404 + stats enable + stats uri /stats + stats refresh 1s + stats admin if TRUE + frontend control-plane bind *:7777 @@ -71,9 +82,8 @@ frontend control-plane backend kube-apiservers option httpchk GET /healthz - # TODO: we should be verifying (!) - server control-plane-0 1.1.1.1:6443 check check-ssl verify none resolvers docker resolve-prefer ipv4 + server control-plane-0 1.1.1.1:6443 weight 99 check check-ssl verify none resolvers docker resolve-prefer ipv4 `, }, { @@ -81,8 +91,11 @@ backend kube-apiservers data: &ConfigData{ FrontendControlPlanePort: "7777", BackendControlPlanePort: "6443", - BackendServers: map[string]string{ - "control-plane-0": "1.1.1.1", + BackendServers: map[string]BackendServer{ + "control-plane-0": { + Address: "1.1.1.1", + Weight: 99, + }, }, }, configTemplate: `# generated by kind @@ -118,8 +131,8 @@ frontend control-plane backend kube-apiservers option httpchk GET /healthz # TODO: we should be verifying (!) - {{range $server, $address := .BackendServers}} - server {{ $server }} {{ JoinHostPort $address $.BackendControlPlanePort }} check check-ssl verify none resolvers docker resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} + {{range $server, $backend := .BackendServers}} + server {{ $server }} {{ JoinHostPort $backend.Address $.BackendControlPlanePort }} check check-ssl verify none resolvers docker resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} weight {{ $backend.Weight }} {{- end}} frontend rke2-join @@ -131,8 +144,8 @@ frontend rke2-join backend rke2-servers option httpchk GET /v1-rke2/readyz http-check expect status 403 - {{range $server, $address := .BackendServers}} - server {{ $server }} {{ $address }}:9345 check check-ssl verify none resolvers docker resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} + {{range $server, $backend := .BackendServers}} + server {{ $server }} {{ $backend.Address }}:9345 check check-ssl verify none resolvers docker resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} weight {{ $backend.Weight }} {{- end}} `, expectedConfig: `# generated by kind @@ -167,7 +180,7 @@ backend kube-apiservers option httpchk GET /healthz # TODO: we should be verifying (!) - server control-plane-0 1.1.1.1:6443 check check-ssl verify none resolvers docker resolve-prefer ipv4 + server control-plane-0 1.1.1.1:6443 check check-ssl verify none resolvers docker resolve-prefer ipv4 weight 99 frontend rke2-join bind *:9345 @@ -177,7 +190,7 @@ backend rke2-servers option httpchk GET /v1-rke2/readyz http-check expect status 403 - server control-plane-0 1.1.1.1:9345 check check-ssl verify none resolvers docker resolve-prefer ipv4 + server control-plane-0 1.1.1.1:9345 check check-ssl verify none resolvers docker resolve-prefer ipv4 weight 99 `, }, } diff --git a/test/infrastructure/docker/internal/loadbalancer/const.go b/test/infrastructure/docker/internal/loadbalancer/const.go index 18627380f503..971e7467b1bd 100644 --- a/test/infrastructure/docker/internal/loadbalancer/const.go +++ b/test/infrastructure/docker/internal/loadbalancer/const.go @@ -24,7 +24,7 @@ const ( DefaultImageRepository = "kindest" // DefaultImageTag is the loadbalancer image tag. - DefaultImageTag = "v20230510-486859a6" + DefaultImageTag = "v20230606-42a2262b" // ConfigPath is the path to the config file in the image. ConfigPath = "/usr/local/etc/haproxy/haproxy.cfg"