From b7b626bf015a90db4539eebe05ca2dadb97f21f8 Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Wed, 25 Sep 2024 14:42:13 +0300 Subject: [PATCH] Implement dependency management for ResourceGroups Signed-off-by: Stefan Prodan --- api/v1/resourcegroup_types.go | 28 +++++ api/v1/zz_generated.deepcopy.go | 20 ++++ cmd/main.go | 1 + ...fluxcd.controlplane.io_resourcegroups.yaml | 30 ++++++ config/samples/fluxcd_v1_resourcegroup.yaml | 5 + docs/api/v1/resourcegroup.md | 53 ++++++++-- .../controller/resourcegroup_controller.go | 54 +++++++++- .../resourcegroup_controller_test.go | 100 ++++++++++++++++++ 8 files changed, 283 insertions(+), 8 deletions(-) diff --git a/api/v1/resourcegroup_types.go b/api/v1/resourcegroup_types.go index 40f8d8b..2665e80 100644 --- a/api/v1/resourcegroup_types.go +++ b/api/v1/resourcegroup_types.go @@ -33,6 +33,11 @@ type ResourceGroupSpec struct { // +optional Resources []*apiextensionsv1.JSON `json:"resources,omitempty"` + // DependsOn specifies the list of Kubernetes resources that must + // exist on the cluster before the reconciliation process starts. + // +optional + DependsOn []Dependency `json:"dependsOn,omitempty"` + // Wait instructs the controller to check the health of all the reconciled // resources. Defaults to true. // +kubebuilder:default:=true @@ -51,6 +56,29 @@ type CommonMetadata struct { Labels map[string]string `json:"labels,omitempty"` } +// Dependency defines a ResourceGroup dependency on a Kubernetes resource. +type Dependency struct { + // APIVersion of the resource to depend on. + // +required + APIVersion string `json:"apiVersion"` + + // Kind of the resource to depend on. + // +required + Kind string `json:"kind"` + + // Name of the resource to depend on. + // +required + Name string `json:"name"` + + // Namespace of the resource to depend on. + // +optional + Namespace string `json:"namespace,omitempty"` + + // Ready checks if the resource Ready status condition is true. + // +optional + Ready bool `json:"ready,omitempty"` +} + // ResourceGroupInput defines the key-value pairs of the resource group input. type ResourceGroupInput map[string]string diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 459bda7..4ea5192 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -73,6 +73,21 @@ func (in *ComponentImage) DeepCopy() *ComponentImage { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Dependency) DeepCopyInto(out *Dependency) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Dependency. +func (in *Dependency) DeepCopy() *Dependency { + if in == nil { + return nil + } + out := new(Dependency) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Distribution) DeepCopyInto(out *Distribution) { *out = *in @@ -559,6 +574,11 @@ func (in *ResourceGroupSpec) DeepCopyInto(out *ResourceGroupSpec) { } } } + if in.DependsOn != nil { + in, out := &in.DependsOn, &out.DependsOn + *out = make([]Dependency, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceGroupSpec. diff --git a/cmd/main.go b/cmd/main.go index 8e191aa..bdc7355 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -178,6 +178,7 @@ func main() { if err = (&controller.ResourceGroupReconciler{ Client: mgr.GetClient(), + APIReader: mgr.GetAPIReader(), Scheme: mgr.GetScheme(), StatusPoller: polling.NewStatusPoller(mgr.GetClient(), mgr.GetRESTMapper(), polling.Options{}), StatusManager: controllerName, diff --git a/config/crd/bases/fluxcd.controlplane.io_resourcegroups.yaml b/config/crd/bases/fluxcd.controlplane.io_resourcegroups.yaml index 4878cc3..07c9a14 100644 --- a/config/crd/bases/fluxcd.controlplane.io_resourcegroups.yaml +++ b/config/crd/bases/fluxcd.controlplane.io_resourcegroups.yaml @@ -68,6 +68,36 @@ spec: description: Labels to be added to the object's metadata. type: object type: object + dependsOn: + description: |- + DependsOn specifies the list of Kubernetes resources that must + exist on the cluster before the reconciliation process starts. + items: + description: Dependency defines a ResourceGroup dependency on a + Kubernetes resource. + properties: + apiVersion: + description: APIVersion of the resource to depend on. + type: string + kind: + description: Kind of the resource to depend on. + type: string + name: + description: Name of the resource to depend on. + type: string + namespace: + description: Namespace of the resource to depend on. + type: string + ready: + description: Ready checks if the resource Ready status condition + is true. + type: boolean + required: + - apiVersion + - kind + - name + type: object + type: array inputs: description: Inputs contains the list of resource group inputs. items: diff --git a/config/samples/fluxcd_v1_resourcegroup.yaml b/config/samples/fluxcd_v1_resourcegroup.yaml index 16a34da..b0ebff7 100644 --- a/config/samples/fluxcd_v1_resourcegroup.yaml +++ b/config/samples/fluxcd_v1_resourcegroup.yaml @@ -8,6 +8,11 @@ metadata: fluxcd.controlplane.io/reconcileEvery: "30m" fluxcd.controlplane.io/reconcileTimeout: "5m" spec: + dependsOn: + - apiVersion: apiextensions.k8s.io/v1 + kind: CustomResourceDefinition + name: helmreleases.helm.toolkit.fluxcd.io + ready: true commonMetadata: labels: app.kubernetes.io/name: podinfo diff --git a/docs/api/v1/resourcegroup.md b/docs/api/v1/resourcegroup.md index 585e779..9b682be 100644 --- a/docs/api/v1/resourcegroup.md +++ b/docs/api/v1/resourcegroup.md @@ -11,6 +11,7 @@ of defining different configurations for a set of workloads per tenant and/or en Use cases: - Application definition: Bundle a set of Kubernetes resources (Flux HelmRelease, OCIRepository, Alert, Provider, Receiver, ImagePolicy) into a single deployable unit. +- Dependency management: Define dependencies between apps to ensure that the resources are applied in the correct order. The dependencies are more flexible than in Flux, they can be for other ResourceGroups, CRDs, or any other Kubernetes object. - Multi-instance provisioning: Generate multiple instances of the same application with different configurations. - Multi-cluster provisioning: Generate multiple instances of the same application for each target cluster that are deployed by Flux from a management cluster. - Multi-tenancy provisioning: Generate a set of resources (Namespace, ServiceAccount, RoleBinding) for each tenant with specific roles and permissions. @@ -126,11 +127,9 @@ You can run this example by saving the manifest into `podinfo.yaml`. ## Writing a ResourceGroup spec As with all other Kubernetes config, a ResourceGroup needs `apiVersion`, -`kind`, and `metadata` fields. The name of a ResourceGroup object must be a -valid [DNS subdomain name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names#dns-subdomain-names). - -A ResourceGroup also needs a -[`.spec` section](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status). +`kind`, `metadata.name` and `metadata.namespace` fields. +The name of a ResourceGroup object must be a valid [DNS subdomain name](https://kubernetes.io/docs/concepts/overview/working-with-objects/names#dns-subdomain-names). +A ResourceGroup also needs a [`.spec` section](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status). ### Inputs configuration @@ -325,6 +324,47 @@ In the above example, all resources generated by the ResourceGroup will not be pruned by the garbage collection process as the `fluxcd.controlplane.io/prune` annotation is set to `disabled`. +### Dependency management + +`.spec.dependsOn` is an optional list used to refer to Kubernetes +objects that the ResourceGroup depends on. If specified, then the ResourceGroup +is reconciled after the referred objects exist in the cluster. + +A dependency is a reference to a Kubernetes object with the following fields: + +- `apiVersion`: The API version of the referred object (required). +- `kind`: The kind of the referred object (required). +- `name`: The name of the referred object (required). +- `namespace`: The namespace of the referred object (optional). +- `ready`: A boolean indicating if the referred object must have the `Ready` status condition set to `True` (optional, default is `false`). + +Example of conditional reconciliation based on the existence of CustomResourceDefinitions +and the readiness of a ResourceGroup: + +```yaml +spec: + dependsOn: + - apiVersion: apiextensions.k8s.io/v1 + kind: CustomResourceDefinition + name: helmreleases.helm.toolkit.fluxcd.io + - apiVersion: apiextensions.k8s.io/v1 + kind: CustomResourceDefinition + name: servicemonitors.monitoring.coreos.com + - apiVersion: fluxcd.controlplane.io/v1 + kind: ResourceGroup + name: cluster-addons + namespace: flux-system + ready: true +``` + +Note that is recommended to define dependencies on CustomResourceDefinitions if the ResourceGroup +deploys Flux HelmReleases which contain custom resources. + +When the dependencies are not met, the flux-operator will reevaluate the requirements +every five seconds and reconcile the ResourceGroup when the dependencies are satisfied. +Failed dependencies are reported in the ResourceGroup `Ready` [status condition](#ResourceGroup-Status), +in log messages and Kubernetes events. + ### Reconciliation configuration The reconciliation of behaviour of a ResourceGroup can be configured using the following annotations: @@ -396,6 +436,7 @@ following attributes in the ResourceGroup’s `.status.conditions`: The flux-operator may get stuck trying to reconcile and apply a ResourceGroup without completing. This can occur due to some of the following factors: +- The dependencies are not ready. - The templating of the resources fails. - The resources are invalid and cannot be applied. - Garbage collection fails. @@ -407,7 +448,7 @@ and adds a Condition with the following attributes to the ResourceGroup’s - `type: Ready` - `status: "False"` -- `reason: BuildFailed | HealthCheckFailed | ReconciliationFailed` +- `reason: DependencyNotReady | BuildFailed | ReconciliationFailed | HealthCheckFailed` The `message` field of the Condition will contain more information about why the reconciliation failed. diff --git a/internal/controller/resourcegroup_controller.go b/internal/controller/resourcegroup_controller.go index aaf06f9..a8a56ee 100644 --- a/internal/controller/resourcegroup_controller.go +++ b/internal/controller/resourcegroup_controller.go @@ -11,6 +11,7 @@ import ( "time" "github.com/fluxcd/cli-utils/pkg/kstatus/polling" + "github.com/fluxcd/cli-utils/pkg/kstatus/status" "github.com/fluxcd/pkg/apis/meta" "github.com/fluxcd/pkg/runtime/conditions" "github.com/fluxcd/pkg/runtime/patch" @@ -38,6 +39,7 @@ type ResourceGroupReconciler struct { client.Client kuberecorder.EventRecorder + APIReader client.Reader Scheme *runtime.Scheme StatusPoller *polling.StatusPoller StatusManager string @@ -95,6 +97,20 @@ func (r *ResourceGroupReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, nil } + // Check dependencies and requeue the reconciliation if the check fails. + if err := r.checkDependencies(ctx, obj); err != nil { + msg := fmt.Sprintf("Retrying dependency check: %s", err.Error()) + if conditions.GetReason(obj, meta.ReadyCondition) != meta.DependencyNotReadyReason { + log.Error(err, "dependency check failed") + r.Event(obj, corev1.EventTypeNormal, meta.DependencyNotReadyReason, msg) + } + conditions.MarkFalse(obj, + meta.ReadyCondition, + meta.DependencyNotReadyReason, + "%s", msg) + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + // Reconcile the object. return r.reconcile(ctx, obj, patcher) } @@ -118,7 +134,7 @@ func (r *ResourceGroupReconciler) reconcile(ctx context.Context, return ctrl.Result{}, fmt.Errorf("failed to update status: %w", err) } - // Build the distribution manifests. + // Build the resources. buildResult, err := builder.BuildResourceGroup(obj.Spec.Resources, obj.GetInputs()) if err != nil { msg := fmt.Sprintf("build failed: %s", err.Error()) @@ -135,7 +151,7 @@ func (r *ResourceGroupReconciler) reconcile(ctx context.Context, return ctrl.Result{}, nil } - // Apply the distribution manifests. + // Apply the resources to the cluster. if err := r.apply(ctx, obj, buildResult); err != nil { msg := fmt.Sprintf("reconciliation failed: %s", err.Error()) conditions.MarkFalse(obj, @@ -162,6 +178,40 @@ func (r *ResourceGroupReconciler) reconcile(ctx context.Context, return requeueAfterResourceGroup(obj), nil } +func (r *ResourceGroupReconciler) checkDependencies(ctx context.Context, + obj *fluxcdv1.ResourceGroup) error { + + for _, dep := range obj.Spec.DependsOn { + depObj := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": dep.APIVersion, + "kind": dep.Kind, + "metadata": map[string]interface{}{ + "name": dep.Name, + "namespace": dep.Namespace, + }, + }, + } + + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(depObj), depObj); err != nil { + return fmt.Errorf("dependency %s/%s/%s not found: %w", dep.APIVersion, dep.Kind, dep.Name, err) + } + + if dep.Ready { + stat, err := status.Compute(depObj) + if err != nil { + return fmt.Errorf("dependency %s/%s/%s not ready: %w", dep.APIVersion, dep.Kind, dep.Name, err) + } + + if stat.Status != status.CurrentStatus { + return fmt.Errorf("dependency %s/%s/%s not ready: status %s", dep.APIVersion, dep.Kind, dep.Name, stat.Status) + } + } + } + + return nil +} + // apply reconciles the resources in the cluster by performing // a server-side apply, pruning of stale resources and waiting // for the resources to become ready. diff --git a/internal/controller/resourcegroup_controller_test.go b/internal/controller/resourcegroup_controller_test.go index a743713..333e464 100644 --- a/internal/controller/resourcegroup_controller_test.go +++ b/internal/controller/resourcegroup_controller_test.go @@ -196,9 +196,109 @@ spec: g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) } +func TestResourceGroupReconciler_DependsOn(t *testing.T) { + g := NewWithT(t) + reconciler := getResourceGroupReconciler() + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + ns, err := testEnv.CreateNamespace(ctx, "test") + g.Expect(err).ToNot(HaveOccurred()) + + objDef := fmt.Sprintf(` +apiVersion: fluxcd.controlplane.io/v1 +kind: ResourceGroup +metadata: + name: tenants + namespace: "%[1]s" +spec: + dependsOn: + - apiVersion: apiextensions.k8s.io/v1 + kind: CustomResourceDefinition + name: fluxinstances.fluxcd.controlplane.io + ready: true + - apiVersion: v1 + kind: ServiceAccount + name: test + namespace: "%[1]s" + resources: + - apiVersion: v1 + kind: ServiceAccount + metadata: + name: readonly + namespace: "%[1]s" +`, ns.Name) + + obj := &fluxcdv1.ResourceGroup{} + err = yaml.Unmarshal([]byte(objDef), obj) + g.Expect(err).ToNot(HaveOccurred()) + + // Initialize the instance. + err = testEnv.Create(ctx, obj) + g.Expect(err).ToNot(HaveOccurred()) + + r, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(obj), + }) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(r.Requeue).To(BeTrue()) + + // Reconcile with not found dependency. + r, err = reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(obj), + }) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(r.RequeueAfter).To(Equal(5 * time.Second)) + + // Check if the instance was installed. + result := &fluxcdv1.ResourceGroup{} + err = testClient.Get(ctx, client.ObjectKeyFromObject(obj), result) + g.Expect(err).ToNot(HaveOccurred()) + + logObjectStatus(t, result) + g.Expect(conditions.GetReason(result, meta.ReadyCondition)).To(BeIdenticalTo(meta.DependencyNotReadyReason)) + g.Expect(conditions.GetMessage(result, meta.ReadyCondition)).To(ContainSubstring("test not found")) + + // Create the dependency. + dep := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: ns.Name, + }, + } + + err = testClient.Create(ctx, dep) + g.Expect(err).ToNot(HaveOccurred()) + + // Reconcile with ready dependencies. + r, err = reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(obj), + }) + g.Expect(err).ToNot(HaveOccurred()) + + // Check if the instance was installed. + resultFinal := &fluxcdv1.ResourceGroup{} + err = testClient.Get(ctx, client.ObjectKeyFromObject(obj), resultFinal) + g.Expect(err).ToNot(HaveOccurred()) + + logObjectStatus(t, resultFinal) + g.Expect(conditions.GetReason(resultFinal, meta.ReadyCondition)).To(BeIdenticalTo(meta.ReconciliationSucceededReason)) + + // Delete the resource group. + err = testClient.Delete(ctx, obj) + g.Expect(err).ToNot(HaveOccurred()) + + r, err = reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(obj), + }) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(r.IsZero()).To(BeTrue()) +} + func getResourceGroupReconciler() *ResourceGroupReconciler { return &ResourceGroupReconciler{ Client: testClient, + APIReader: testClient, Scheme: NewTestScheme(), StatusPoller: polling.NewStatusPoller(testClient, testEnv.GetRESTMapper(), polling.Options{}), StatusManager: controllerName,