Skip to content

Commit f54a8de

Browse files
authored
(bug) handle helm chart errors in pullmode (#1725)
Previously, when Sveltos processed a Helm chart in pull mode, it would generate ConfigurationBundle instances as it prepared the resources. If an error was encountered halfway through this process, Sveltos would still commit the resources it had successfully prepared up to that point. This led to a critical synchronization failure: 1. Partial State: The ConfigurationBundle contained only a subset of the intended resources. 2. Applier Misinterpretation: The applier would treat the missing resources as "deleted" or "out of scope," causing it to prune existing resources on the managed cluster or deploy an incomplete, broken stack. This PR introduces atomic preparation logic. If an error occurs at any point during the preparation of resources for a Helm chart: 1. Discard: All partially prepared resources are discarded. 2. No Commit: Nothing is committed to the ConfigurationBundle, ensuring the applier does not act on incomplete data. 3. Report: The error is captured and reported specifically within the ClusterSummary status for visibility.
1 parent b173798 commit f54a8de

3 files changed

Lines changed: 218 additions & 7 deletions

File tree

controllers/handlers_helm.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -928,13 +928,21 @@ func handleCharts(ctx context.Context, clusterSummary *configv1beta1.ClusterSumm
928928
}
929929

930930
if isPullMode {
931-
err = commitStagedResourcesForDeployment(ctx, clusterSummary, configurationHash, mgmtResources, logger)
932-
if err != nil {
933-
return err
931+
// In DryRun mode, commit staged resources even on error (e.g. helm conflict) so the
932+
// sveltos-applier agent processes them and populates ClusterReport.HelmResourceReports.
933+
// No action will be performed by the applier in DryRun except generating the reports.
934+
if deployError == nil || clusterSummary.Spec.ClusterProfileSpec.SyncMode == configv1beta1.SyncModeDryRun {
935+
if err := commitStagedResourcesForDeployment(ctx, clusterSummary, configurationHash, mgmtResources, logger); err != nil {
936+
return err
937+
}
938+
} else {
939+
_ = pullmode.DiscardStagedResourcesForDeployment(ctx, c, clusterSummary.Spec.ClusterNamespace,
940+
clusterSummary.Spec.ClusterName, configv1beta1.ClusterSummaryKind, clusterSummary.Name,
941+
string(libsveltosv1beta1.FeatureHelm), logger)
934942
}
935943

936-
err = updateClusterReportWithHelmReports(ctx, c, clusterSummary, releaseReports)
937-
if err != nil {
944+
// deployError might contain conflicts so continue. So create clusterReports irrespective
945+
if err := updateClusterReportWithHelmReports(ctx, c, clusterSummary, releaseReports); err != nil {
938946
return err
939947
}
940948

test/fv/dryrun_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ var _ = Describe("DryRun", Serial, func() {
294294
return nil
295295
}, timeout, pollingInterval).Should(BeNil())
296296

297-
By("Verifying ClusterReport for helm reports")
297+
Byf("Verifying ClusterReport for helm reports for clusterProfile %s", currentClusterProfile.Name)
298298
verifyClusterReportForHelm(clusterReportName, dryRunClusterSummary, currentClusterProfile)
299299

300300
verifyDeployedGroupVersionKind(clusterProfile.Name)
@@ -306,7 +306,7 @@ var _ = Describe("DryRun", Serial, func() {
306306
// be upgrade for mysql helm chart and the Kong ServiceAccount (which were previously reported
307307
// as conflict)
308308

309-
By("Verifying ClusterReport for helm reports")
309+
Byf("Verifying ClusterReport for helm reports for clusterProfile %s", currentClusterProfile.Name)
310310
verifyClusterReportForHelm(clusterReportName, dryRunClusterSummary, currentClusterProfile)
311311

312312
By("Verifying ClusterReport for policy reports")

test/fv/helm_error_test.go

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
/*
2+
Copyright 2026. projectsveltos.io. All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package fv_test
18+
19+
import (
20+
"context"
21+
22+
. "github.com/onsi/ginkgo/v2"
23+
. "github.com/onsi/gomega"
24+
25+
appsv1 "k8s.io/api/apps/v1"
26+
corev1 "k8s.io/api/core/v1"
27+
apierrors "k8s.io/apimachinery/pkg/api/errors"
28+
"k8s.io/apimachinery/pkg/types"
29+
"k8s.io/client-go/util/retry"
30+
31+
configv1beta1 "github.com/projectsveltos/addon-controller/api/v1beta1"
32+
"github.com/projectsveltos/addon-controller/lib/clusterops"
33+
libsveltosv1beta1 "github.com/projectsveltos/libsveltos/api/v1beta1"
34+
"github.com/projectsveltos/libsveltos/lib/k8s_utils"
35+
)
36+
37+
var (
38+
certManagerValues = `apiVersion: v1
39+
kind: ConfigMap
40+
metadata:
41+
name: cert-manager-values
42+
namespace: default
43+
data:
44+
values: |
45+
crds:
46+
enabled: true`
47+
48+
certManagerIncorrectValues = `apiVersion: v1
49+
kind: ConfigMap
50+
metadata:
51+
name: cert-manager-values
52+
namespace: default
53+
data:
54+
values: |
55+
crds:
56+
enabled: true
57+
replicas: 2`
58+
)
59+
60+
var _ = Describe("Feature", Serial, func() {
61+
const (
62+
namePrefix = "helm-error-"
63+
certManager = "cert-manager"
64+
)
65+
66+
It("An error in helm values does not remove helm chart", Label("FV", "PULLMODE"), func() {
67+
Byf("Create a configMap with valid helm values")
68+
configMap, err := k8s_utils.GetUnstructured([]byte(certManagerValues))
69+
Expect(err).To(BeNil())
70+
Expect(k8sClient.Create(context.TODO(), configMap)).To(Succeed())
71+
72+
Byf("Create a ClusterProfile matching Cluster %s/%s", kindWorkloadCluster.GetNamespace(), kindWorkloadCluster.GetName())
73+
clusterProfile := getClusterProfile(namePrefix, map[string]string{key: value})
74+
clusterProfile.Spec.SyncMode = configv1beta1.SyncModeContinuous
75+
Expect(k8sClient.Create(context.TODO(), clusterProfile)).To(Succeed())
76+
77+
verifyClusterProfileMatches(clusterProfile)
78+
79+
verifyClusterSummary(clusterops.ClusterProfileLabelName,
80+
clusterProfile.Name, &clusterProfile.Spec, kindWorkloadCluster.GetNamespace(),
81+
kindWorkloadCluster.GetName(), getClusterType())
82+
83+
Byf("Update ClusterProfile %s to deploy helm charts and referencing ConfigMap in ValuesFrom %s/%s",
84+
clusterProfile.Name, configMap.GetNamespace(), configMap.GetName())
85+
86+
currentClusterProfile := &configv1beta1.ClusterProfile{}
87+
88+
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
89+
Expect(k8sClient.Get(context.TODO(), types.NamespacedName{Name: clusterProfile.Name},
90+
currentClusterProfile)).To(Succeed())
91+
92+
currentClusterProfile.Spec.HelmCharts = []configv1beta1.HelmChart{
93+
{
94+
RepositoryURL: "https://charts.jetstack.io",
95+
RepositoryName: "jetstack",
96+
ChartName: "jetstack/cert-manager",
97+
ChartVersion: "v1.19.4",
98+
ReleaseName: "cert-manager",
99+
ReleaseNamespace: "cert-manager",
100+
HelmChartAction: configv1beta1.HelmChartActionInstall,
101+
ValuesFrom: []configv1beta1.ValueFrom{
102+
{
103+
Namespace: configMap.GetNamespace(),
104+
Name: configMap.GetName(),
105+
Kind: string(libsveltosv1beta1.ConfigMapReferencedResourceKind),
106+
},
107+
},
108+
},
109+
}
110+
return k8sClient.Update(context.TODO(), currentClusterProfile)
111+
})
112+
Expect(err).To(BeNil())
113+
114+
Expect(k8sClient.Get(context.TODO(),
115+
types.NamespacedName{Name: clusterProfile.Name}, currentClusterProfile)).To(Succeed())
116+
117+
clusterSummary := verifyClusterSummary(clusterops.ClusterProfileLabelName,
118+
currentClusterProfile.Name, &currentClusterProfile.Spec,
119+
kindWorkloadCluster.GetNamespace(), kindWorkloadCluster.GetName(), getClusterType())
120+
121+
Byf("Getting client to access the workload cluster")
122+
workloadClient, err := getKindWorkloadClusterKubeconfig()
123+
Expect(err).To(BeNil())
124+
Expect(workloadClient).ToNot(BeNil())
125+
126+
Byf("Verifying cert-manager deployment is created in the workload cluster")
127+
Eventually(func() error {
128+
depl := &appsv1.Deployment{}
129+
return workloadClient.Get(context.TODO(),
130+
types.NamespacedName{Namespace: certManager, Name: certManager}, depl)
131+
}, timeout, pollingInterval).Should(BeNil())
132+
133+
Byf("Verifying ClusterSummary %s status is set to Deployed for Helm feature", clusterSummary.Name)
134+
verifyFeatureStatusIsProvisioned(kindWorkloadCluster.GetNamespace(), clusterSummary.Name, libsveltosv1beta1.FeatureHelm)
135+
136+
By("Change ConfigMap with HelmValues to contain incorrect values")
137+
incorrectValuesConfigMap, err := k8s_utils.GetUnstructured([]byte(certManagerIncorrectValues))
138+
Expect(err).To(BeNil())
139+
140+
currentConfigMap := &corev1.ConfigMap{}
141+
Expect(k8sClient.Get(context.TODO(),
142+
types.NamespacedName{Namespace: configMap.GetNamespace(), Name: configMap.GetName()},
143+
currentConfigMap)).To(Succeed())
144+
incorrectValuesConfigMap.SetResourceVersion(currentConfigMap.GetResourceVersion())
145+
Expect(k8sClient.Update(context.TODO(), incorrectValuesConfigMap))
146+
147+
Byf("Verifying ClusterSummary reports the error")
148+
Eventually(func() bool {
149+
currentClusterSummary := &configv1beta1.ClusterSummary{}
150+
err = k8sClient.Get(context.TODO(),
151+
types.NamespacedName{Namespace: clusterSummary.Namespace, Name: clusterSummary.Name},
152+
currentClusterSummary)
153+
if err != nil {
154+
return false
155+
}
156+
for i := range currentClusterSummary.Status.FeatureSummaries {
157+
if currentClusterSummary.Status.FeatureSummaries[i].FeatureID == libsveltosv1beta1.FeatureHelm {
158+
return currentClusterSummary.Status.FeatureSummaries[i].FailureMessage != nil
159+
}
160+
}
161+
return false
162+
}, timeout, pollingInterval).Should(BeTrue())
163+
164+
Byf("Verifying cert-manager deployment is still present in the workload cluster")
165+
Eventually(func() error {
166+
depl := &appsv1.Deployment{}
167+
return workloadClient.Get(context.TODO(),
168+
types.NamespacedName{Namespace: certManager, Name: certManager}, depl)
169+
}, timeout, pollingInterval).Should(BeNil())
170+
171+
By("Change ConfigMap with HelmValues to contain correct values")
172+
Expect(k8sClient.Get(context.TODO(),
173+
types.NamespacedName{Namespace: configMap.GetNamespace(), Name: configMap.GetName()},
174+
currentConfigMap)).To(Succeed())
175+
configMap.SetResourceVersion(currentConfigMap.GetResourceVersion())
176+
Expect(k8sClient.Update(context.TODO(), configMap))
177+
178+
Byf("Verifying ClusterSummary %s status is set to Deployed for Helm feature", clusterSummary.Name)
179+
verifyFeatureStatusIsProvisioned(kindWorkloadCluster.GetNamespace(), clusterSummary.Name, libsveltosv1beta1.FeatureHelm)
180+
181+
Byf("Verifying cert-manager deployment is still present in the workload cluster")
182+
Eventually(func() error {
183+
depl := &appsv1.Deployment{}
184+
return workloadClient.Get(context.TODO(),
185+
types.NamespacedName{Namespace: certManager, Name: certManager}, depl)
186+
}, timeout, pollingInterval).Should(BeNil())
187+
188+
deleteClusterProfile(clusterProfile)
189+
190+
Byf("Verifying cert-manager deployment is removed from workload cluster")
191+
Eventually(func() bool {
192+
depl := &appsv1.Deployment{}
193+
err = workloadClient.Get(context.TODO(),
194+
types.NamespacedName{Namespace: certManager, Name: certManager}, depl)
195+
return apierrors.IsNotFound(err)
196+
}, timeout, pollingInterval).Should(BeTrue())
197+
198+
Expect(k8sClient.Get(context.TODO(),
199+
types.NamespacedName{Namespace: configMap.GetNamespace(), Name: configMap.GetName()},
200+
currentConfigMap)).To(Succeed())
201+
Expect(k8sClient.Delete(context.TODO(), currentConfigMap))
202+
})
203+
})

0 commit comments

Comments
 (0)