Skip to content

Commit a3dbb96

Browse files
authored
Merge pull request #288 from arangodb/bug-fix/scaling-removing-wrong-nodes
Revisited scale up and scale down.
2 parents 1a35a27 + 783faa3 commit a3dbb96

File tree

5 files changed

+29
-23
lines changed

5 files changed

+29
-23
lines changed

pkg/apis/deployment/v1alpha/member_status_list.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,12 @@ func (l MemberStatusList) SelectMemberToRemove() (MemberStatus, error) {
142142
if len(l) > 0 {
143143
// Try to find a not ready member
144144
for _, m := range l {
145-
if m.Phase == MemberPhaseNone || !m.Conditions.IsTrue(ConditionTypeReady) {
145+
if m.Phase == MemberPhaseNone {
146+
return m, nil
147+
}
148+
}
149+
for _, m := range l {
150+
if !m.Conditions.IsTrue(ConditionTypeReady) {
146151
return m, nil
147152
}
148153
}

pkg/deployment/reconcile/action_cleanout_member.go

+4
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ func (a *actionCleanoutMember) CheckProgress(ctx context.Context) (bool, bool, e
9595
// We wanted to remove and it is already gone. All ok
9696
return true, false, nil
9797
}
98+
// do not try to clean out a pod that was not initialized
99+
if !m.IsInitialized {
100+
return false, true, nil
101+
}
98102
c, err := a.actionCtx.GetDatabaseClient(ctx)
99103
if err != nil {
100104
log.Debug().Err(err).Msg("Failed to create database client")

pkg/deployment/reconcile/action_shutdown_member.go

+4
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ func (a *actionShutdownMember) Start(ctx context.Context) (bool, error) {
6363
return true, nil
6464
}
6565
if group.IsArangod() {
66+
// do not try to shut down a pod that is not ready
67+
if !m.Conditions.IsTrue(api.ConditionTypeReady) {
68+
return true, nil
69+
}
6670
// Invoke shutdown endpoint
6771
c, err := a.actionCtx.GetServerClient(ctx, group, a.action.MemberID)
6872
if err != nil {

pkg/deployment/reconcile/plan_builder.go

+3-7
Original file line numberDiff line numberDiff line change
@@ -371,17 +371,13 @@ func createScalePlan(log zerolog.Logger, members api.MemberStatusList, group api
371371
Str("member-id", m.ID).
372372
Str("phase", string(m.Phase)).
373373
Msg("Found member to remove")
374-
if m.Conditions.IsTrue(api.ConditionTypeReady) {
375-
if group == api.ServerGroupDBServers {
376-
plan = append(plan,
377-
api.NewAction(api.ActionTypeCleanOutMember, group, m.ID),
378-
)
379-
}
374+
if group == api.ServerGroupDBServers {
380375
plan = append(plan,
381-
api.NewAction(api.ActionTypeShutdownMember, group, m.ID),
376+
api.NewAction(api.ActionTypeCleanOutMember, group, m.ID),
382377
)
383378
}
384379
plan = append(plan,
380+
api.NewAction(api.ActionTypeShutdownMember, group, m.ID),
385381
api.NewAction(api.ActionTypeRemoveMember, group, m.ID),
386382
)
387383
log.Debug().

pkg/deployment/reconcile/plan_builder_test.go

+12-15
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,11 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) {
176176
}
177177
newPlan, changed = createPlan(log, depl, nil, spec, status, nil, c)
178178
assert.True(t, changed)
179-
require.Len(t, newPlan, 1) // Note: Downscaling is only down 1 at a time
180-
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[0].Type)
179+
require.Len(t, newPlan, 2) // Note: Downscaling is only down 1 at a time
180+
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[0].Type)
181+
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[1].Type)
181182
assert.Equal(t, api.ServerGroupSingle, newPlan[0].Group)
183+
assert.Equal(t, api.ServerGroupSingle, newPlan[1].Group)
182184
}
183185

184186
// TestCreatePlanClusterScale creates a `cluster` deployment to test the creating of scaling plan.
@@ -261,12 +263,6 @@ func TestCreatePlanClusterScale(t *testing.T) {
261263
api.MemberStatus{
262264
ID: "cr1",
263265
PodName: "coordinator1",
264-
Conditions: api.ConditionList{
265-
api.Condition{
266-
Type: api.ConditionTypeReady,
267-
Status: v1.ConditionTrue,
268-
},
269-
},
270266
},
271267
api.MemberStatus{
272268
ID: "cr2",
@@ -277,14 +273,15 @@ func TestCreatePlanClusterScale(t *testing.T) {
277273
spec.Coordinators.Count = util.NewInt(1)
278274
newPlan, changed = createPlan(log, depl, nil, spec, status, nil, c)
279275
assert.True(t, changed)
280-
281-
fmt.Printf("%v", newPlan)
282-
283-
require.Len(t, newPlan, 3) // Note: Downscaling is done 1 at a time
284-
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[0].Type)
276+
require.Len(t, newPlan, 5) // Note: Downscaling is done 1 at a time
277+
assert.Equal(t, api.ActionTypeCleanOutMember, newPlan[0].Type)
285278
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[1].Type)
286279
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[2].Type)
280+
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[3].Type)
281+
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[4].Type)
287282
assert.Equal(t, api.ServerGroupDBServers, newPlan[0].Group)
288-
assert.Equal(t, api.ServerGroupCoordinators, newPlan[1].Group)
289-
assert.Equal(t, api.ServerGroupCoordinators, newPlan[2].Group)
283+
assert.Equal(t, api.ServerGroupDBServers, newPlan[1].Group)
284+
assert.Equal(t, api.ServerGroupDBServers, newPlan[2].Group)
285+
assert.Equal(t, api.ServerGroupCoordinators, newPlan[3].Group)
286+
assert.Equal(t, api.ServerGroupCoordinators, newPlan[4].Group)
290287
}

0 commit comments

Comments
 (0)