Skip to content

Commit 180d469

Browse files
authored
Merge pull request #180 from arangodb/bugfix/min-2-dbservers
Min dbserver count is 2. Revert phase when cleanout has failed
2 parents b5a732a + 589fe67 commit 180d469

File tree

4 files changed

+36
-7
lines changed

4 files changed

+36
-7
lines changed

pkg/apis/deployment/v1alpha/server_group_spec.go

+11
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ func (s ServerGroupSpec) Validate(group ServerGroup, used bool, mode DeploymentM
7777
if used {
7878
minCount := 1
7979
if env == EnvironmentProduction {
80+
// Set validation boundaries for production mode
8081
switch group {
8182
case ServerGroupSingle:
8283
if mode == DeploymentModeActiveFailover {
@@ -87,6 +88,16 @@ func (s ServerGroupSpec) Validate(group ServerGroup, used bool, mode DeploymentM
8788
case ServerGroupDBServers, ServerGroupCoordinators, ServerGroupSyncMasters, ServerGroupSyncWorkers:
8889
minCount = 2
8990
}
91+
} else {
92+
// Set validation boundaries for development mode
93+
switch group {
94+
case ServerGroupSingle:
95+
if mode == DeploymentModeActiveFailover {
96+
minCount = 2
97+
}
98+
case ServerGroupDBServers:
99+
minCount = 2
100+
}
90101
}
91102
if s.GetCount() < minCount {
92103
return maskAny(errors.Wrapf(ValidationError, "Invalid count value %d. Expected >= %d", s.GetCount(), minCount))

pkg/deployment/cluster_scaling_integration.go

+16-6
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
3434
"github.com/arangodb/kube-arangodb/pkg/util"
3535
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
36+
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
3637
)
3738

3839
// clusterScalingIntegration is a helper to communicate with the clusters
@@ -150,15 +151,24 @@ func (ci *clusterScalingIntegration) inspectCluster(ctx context.Context, expectS
150151
log.Debug().Err(err).Msg("Failed to get current deployment")
151152
return maskAny(err)
152153
}
154+
newSpec := current.Spec.DeepCopy()
153155
if coordinatorsChanged {
154-
current.Spec.Coordinators.Count = util.NewInt(req.GetCoordinators())
156+
newSpec.Coordinators.Count = util.NewInt(req.GetCoordinators())
155157
}
156158
if dbserversChanged {
157-
current.Spec.DBServers.Count = util.NewInt(req.GetDBServers())
158-
}
159-
if err := ci.depl.updateCRSpec(current.Spec); err != nil {
160-
log.Warn().Err(err).Msg("Failed to update current deployment")
161-
return maskAny(err)
159+
newSpec.DBServers.Count = util.NewInt(req.GetDBServers())
160+
}
161+
if err := newSpec.Validate(); err != nil {
162+
// Log failure & create event
163+
log.Warn().Err(err).Msg("Validation of updated spec has failed")
164+
ci.depl.CreateEvent(k8sutil.NewErrorEvent("Validation failed", err, apiObject))
165+
// Restore original spec in cluster
166+
ci.SendUpdateToCluster(current.Spec)
167+
} else {
168+
if err := ci.depl.updateCRSpec(*newSpec); err != nil {
169+
log.Warn().Err(err).Msg("Failed to update current deployment")
170+
return maskAny(err)
171+
}
162172
}
163173
return nil
164174
}

pkg/deployment/reconcile/action_cleanout_member.go

+6
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,12 @@ func (a *actionCleanoutMember) CheckProgress(ctx context.Context) (bool, bool, e
131131
}
132132
if jobStatus.IsFailed() {
133133
log.Warn().Str("reason", jobStatus.Reason()).Msg("Cleanout Job failed. Aborting plan")
134+
// Revert cleanout state
135+
m.Phase = api.MemberPhaseCreated
136+
m.CleanoutJobID = ""
137+
if a.actionCtx.UpdateMember(m); err != nil {
138+
return false, false, maskAny(err)
139+
}
134140
return false, true, nil
135141
}
136142
return false, false, nil

pkg/deployment/reconcile/plan_builder.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,9 @@ func createScalePlan(log zerolog.Logger, members api.MemberStatusList, group api
359359
Msg("Creating scale-up plan")
360360
} else if len(members) > count {
361361
// Note, we scale down 1 member at a time
362-
if m, err := members.SelectMemberToRemove(); err == nil {
362+
if m, err := members.SelectMemberToRemove(); err != nil {
363+
log.Warn().Err(err).Str("role", group.AsRole()).Msg("Failed to select member to remove")
364+
} else {
363365
if group == api.ServerGroupDBServers {
364366
plan = append(plan,
365367
api.NewAction(api.ActionTypeCleanOutMember, group, m.ID),

0 commit comments

Comments
 (0)