Skip to content

Commit c6db96d

Browse files
authored
[Feature] Allow to recreate Local volumes (#1319)
1 parent 64cfaaf commit c6db96d

13 files changed

+281
-2
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- (Bugfix) Fix creating sync components with EA type set to Managed and headless svc
1212
- (Feature) Check if Volume with LocalStorage is missing
1313
- (Feature) Add disallowConcurrent option to ArangoBackupPolicy
14+
- (Feature) Allow to recreate Local volumes
1415

1516
## [1.2.27](https://github.com/arangodb/kube-arangodb/tree/1.2.27) (2023-04-27)
1617
- (Feature) Add InSync Cache

docs/generated/actions.md

+2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
| RecreateMember | no | 15m0s | no | Community & Enterprise | Recreate member with same ID and Data |
5252
| RefreshTLSKeyfileCertificate | no | 30m0s | no | Enterprise Only | Recreate Server TLS Certificate secret |
5353
| RemoveMember | no | 15m0s | no | Community & Enterprise | Removes member from the Cluster and Status |
54+
| RemoveMemberPVC | no | 15m0s | no | Community & Enterprise | Removes member PVC and enforce recreate procedure |
5455
| RenewTLSCACertificate | no | 30m0s | no | Enterprise Only | Recreate Managed CA secret |
5556
| RenewTLSCertificate | no | 30m0s | no | Enterprise Only | Recreate Server TLS Certificate secret |
5657
| ResignLeadership | no | 30m0s | yes | Community & Enterprise | Run the ResignLeadership job on DBServer |
@@ -139,6 +140,7 @@ spec:
139140
RecreateMember: 15m0s
140141
RefreshTLSKeyfileCertificate: 30m0s
141142
RemoveMember: 15m0s
143+
RemoveMemberPVC: 15m0s
142144
RenewTLSCACertificate: 30m0s
143145
RenewTLSCertificate: 30m0s
144146
ResignLeadership: 30m0s

internal/actions.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ actions:
1212
RemoveMember:
1313
description: Removes member from the Cluster and Status
1414
timeout: 15m
15+
RemoveMemberPVC:
16+
description: Removes member PVC and enforce recreate procedure
17+
timeout: 15m
1518
RecreateMember:
1619
description: Recreate member with same ID and Data
1720
timeout: 15m

pkg/apis/deployment/v1/actions.generated.go

+10
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ const (
117117
ActionRefreshTLSKeyfileCertificateDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
118118
// ActionRemoveMemberDefaultTimeout define default timeout for action ActionRemoveMember
119119
ActionRemoveMemberDefaultTimeout time.Duration = 900 * time.Second // 15m0s
120+
// ActionRemoveMemberPVCDefaultTimeout define default timeout for action ActionRemoveMemberPVC
121+
ActionRemoveMemberPVCDefaultTimeout time.Duration = 900 * time.Second // 15m0s
120122
// ActionRenewTLSCACertificateDefaultTimeout define default timeout for action ActionRenewTLSCACertificate
121123
ActionRenewTLSCACertificateDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
122124
// ActionRenewTLSCertificateDefaultTimeout define default timeout for action ActionRenewTLSCertificate
@@ -276,6 +278,8 @@ const (
276278
ActionTypeRefreshTLSKeyfileCertificate ActionType = "RefreshTLSKeyfileCertificate"
277279
// ActionTypeRemoveMember in scopes Normal. Removes member from the Cluster and Status
278280
ActionTypeRemoveMember ActionType = "RemoveMember"
281+
// ActionTypeRemoveMemberPVC in scopes Normal. Removes member PVC and enforce recreate procedure
282+
ActionTypeRemoveMemberPVC ActionType = "RemoveMemberPVC"
279283
// ActionTypeRenewTLSCACertificate in scopes Normal. Recreate Managed CA secret
280284
ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate"
281285
// ActionTypeRenewTLSCertificate in scopes Normal. Recreate Server TLS Certificate secret
@@ -436,6 +440,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
436440
return ActionRefreshTLSKeyfileCertificateDefaultTimeout
437441
case ActionTypeRemoveMember:
438442
return ActionRemoveMemberDefaultTimeout
443+
case ActionTypeRemoveMemberPVC:
444+
return ActionRemoveMemberPVCDefaultTimeout
439445
case ActionTypeRenewTLSCACertificate:
440446
return ActionRenewTLSCACertificateDefaultTimeout
441447
case ActionTypeRenewTLSCertificate:
@@ -600,6 +606,8 @@ func (a ActionType) Priority() ActionPriority {
600606
return ActionPriorityNormal
601607
case ActionTypeRemoveMember:
602608
return ActionPriorityNormal
609+
case ActionTypeRemoveMemberPVC:
610+
return ActionPriorityNormal
603611
case ActionTypeRenewTLSCACertificate:
604612
return ActionPriorityNormal
605613
case ActionTypeRenewTLSCertificate:
@@ -774,6 +782,8 @@ func (a ActionType) Optional() bool {
774782
return false
775783
case ActionTypeRemoveMember:
776784
return false
785+
case ActionTypeRemoveMemberPVC:
786+
return false
777787
case ActionTypeRenewTLSCACertificate:
778788
return false
779789
case ActionTypeRenewTLSCertificate:

pkg/apis/deployment/v2alpha1/actions.generated.go

+10
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ const (
117117
ActionRefreshTLSKeyfileCertificateDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
118118
// ActionRemoveMemberDefaultTimeout define default timeout for action ActionRemoveMember
119119
ActionRemoveMemberDefaultTimeout time.Duration = 900 * time.Second // 15m0s
120+
// ActionRemoveMemberPVCDefaultTimeout define default timeout for action ActionRemoveMemberPVC
121+
ActionRemoveMemberPVCDefaultTimeout time.Duration = 900 * time.Second // 15m0s
120122
// ActionRenewTLSCACertificateDefaultTimeout define default timeout for action ActionRenewTLSCACertificate
121123
ActionRenewTLSCACertificateDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
122124
// ActionRenewTLSCertificateDefaultTimeout define default timeout for action ActionRenewTLSCertificate
@@ -276,6 +278,8 @@ const (
276278
ActionTypeRefreshTLSKeyfileCertificate ActionType = "RefreshTLSKeyfileCertificate"
277279
// ActionTypeRemoveMember in scopes Normal. Removes member from the Cluster and Status
278280
ActionTypeRemoveMember ActionType = "RemoveMember"
281+
// ActionTypeRemoveMemberPVC in scopes Normal. Removes member PVC and enforce recreate procedure
282+
ActionTypeRemoveMemberPVC ActionType = "RemoveMemberPVC"
279283
// ActionTypeRenewTLSCACertificate in scopes Normal. Recreate Managed CA secret
280284
ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate"
281285
// ActionTypeRenewTLSCertificate in scopes Normal. Recreate Server TLS Certificate secret
@@ -436,6 +440,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
436440
return ActionRefreshTLSKeyfileCertificateDefaultTimeout
437441
case ActionTypeRemoveMember:
438442
return ActionRemoveMemberDefaultTimeout
443+
case ActionTypeRemoveMemberPVC:
444+
return ActionRemoveMemberPVCDefaultTimeout
439445
case ActionTypeRenewTLSCACertificate:
440446
return ActionRenewTLSCACertificateDefaultTimeout
441447
case ActionTypeRenewTLSCertificate:
@@ -600,6 +606,8 @@ func (a ActionType) Priority() ActionPriority {
600606
return ActionPriorityNormal
601607
case ActionTypeRemoveMember:
602608
return ActionPriorityNormal
609+
case ActionTypeRemoveMemberPVC:
610+
return ActionPriorityNormal
603611
case ActionTypeRenewTLSCACertificate:
604612
return ActionPriorityNormal
605613
case ActionTypeRenewTLSCertificate:
@@ -774,6 +782,8 @@ func (a ActionType) Optional() bool {
774782
return false
775783
case ActionTypeRemoveMember:
776784
return false
785+
case ActionTypeRemoveMemberPVC:
786+
return false
777787
case ActionTypeRenewTLSCACertificate:
778788
return false
779789
case ActionTypeRenewTLSCertificate:

pkg/deployment/agency/state.go

+24
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,30 @@ func (s State) PlanServers() Servers {
271271
return r
272272
}
273273

274+
// PlanLeaderServers returns all servers which are part of the plan as a leader
275+
func (s State) PlanLeaderServers() Servers {
276+
q := map[Server]bool{}
277+
278+
for _, db := range s.Plan.Collections {
279+
for _, col := range db {
280+
for _, shards := range col.Shards {
281+
if len(shards) == 0 {
282+
continue
283+
}
284+
q[shards[0]] = true
285+
}
286+
}
287+
}
288+
289+
r := make([]Server, 0, len(q))
290+
291+
for k := range q {
292+
r = append(r, k)
293+
}
294+
295+
return r
296+
}
297+
274298
type CollectionShardDetails []CollectionShardDetail
275299

276300
type CollectionShardDetail struct {

pkg/deployment/features/volumes.go

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2023 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
21+
package features
22+
23+
func init() {
24+
registerFeature(localVolumeReplacementCheck)
25+
}
26+
27+
var localVolumeReplacementCheck Feature = &feature{
28+
name: "local-volume-replacement-check",
29+
description: "Replace volume for local-storage if volume is unschedulable (ex. node is gone)",
30+
version: "3.6.0",
31+
enterpriseRequired: false,
32+
enabledByDefault: false,
33+
}
34+
35+
func LocalVolumeReplacementCheck() Feature {
36+
return localVolumeReplacementCheck
37+
}

pkg/deployment/reconcile/action.register.generated.go

+15
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ var (
165165
_ Action = &actionRemoveMember{}
166166
_ actionFactory = newRemoveMemberAction
167167

168+
_ Action = &actionRemoveMemberPVC{}
169+
_ actionFactory = newRemoveMemberPVCAction
170+
168171
_ Action = &actionRenewTLSCACertificate{}
169172
_ actionFactory = newRenewTLSCACertificateAction
170173

@@ -817,6 +820,18 @@ func init() {
817820
registerAction(action, function)
818821
}
819822

823+
// RemoveMemberPVC
824+
{
825+
// Get Action defition
826+
function := newRemoveMemberPVCAction
827+
action := api.ActionTypeRemoveMemberPVC
828+
829+
// Wrap action main function
830+
831+
// Register action
832+
registerAction(action, function)
833+
}
834+
820835
// RenewTLSCACertificate
821836
{
822837
// Get Action defition

pkg/deployment/reconcile/action.register.generated_test.go

+10
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,16 @@ func Test_Actions(t *testing.T) {
490490
})
491491
})
492492

493+
t.Run("RemoveMemberPVC", func(t *testing.T) {
494+
ActionsExistence(t, api.ActionTypeRemoveMemberPVC)
495+
t.Run("Internal", func(t *testing.T) {
496+
require.False(t, api.ActionTypeRemoveMemberPVC.Internal())
497+
})
498+
t.Run("Optional", func(t *testing.T) {
499+
require.False(t, api.ActionTypeRemoveMemberPVC.Optional())
500+
})
501+
})
502+
493503
t.Run("RenewTLSCACertificate", func(t *testing.T) {
494504
ActionsExistence(t, api.ActionTypeRenewTLSCACertificate)
495505
t.Run("Internal", func(t *testing.T) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2023 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
21+
package reconcile
22+
23+
import (
24+
"context"
25+
26+
apiErrors "k8s.io/apimachinery/pkg/api/errors"
27+
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
28+
29+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
30+
"github.com/arangodb/kube-arangodb/pkg/deployment/agency"
31+
"github.com/arangodb/kube-arangodb/pkg/util/errors"
32+
"github.com/arangodb/kube-arangodb/pkg/util/globals"
33+
)
34+
35+
// newRemoveMemberPVCAction creates a new Action that implements the given
36+
// planned RemoveMemberPVC action.
37+
func newRemoveMemberPVCAction(action api.Action, actionCtx ActionContext) Action {
38+
a := &actionRemoveMemberPVC{}
39+
40+
a.actionImpl = newActionImplDefRef(action, actionCtx)
41+
42+
return a
43+
}
44+
45+
// actionRemoveMemberPVC implements an RemoveMemberPVCAction.
46+
type actionRemoveMemberPVC struct {
47+
// actionImpl implement timeout and member id functions
48+
actionImpl
49+
50+
// actionEmptyCheckProgress implement check progress with empty implementation
51+
actionEmptyCheckProgress
52+
}
53+
54+
// Start performs the start of the action.
55+
// Returns true if the action is completely finished, false in case
56+
// the start time needs to be recorded and a ready condition needs to be checked.
57+
func (a *actionRemoveMemberPVC) Start(ctx context.Context) (bool, error) {
58+
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
59+
if !ok {
60+
return true, nil
61+
}
62+
63+
pvcUID, ok := a.action.GetParam("pvc")
64+
if !ok {
65+
return true, errors.Newf("PVC UID Parameter is missing")
66+
}
67+
68+
cache, ok := a.actionCtx.ACS().ClusterCache(m.ClusterID)
69+
if !ok {
70+
return true, errors.Newf("Cluster is not ready")
71+
}
72+
73+
agencyCache, ok := a.actionCtx.GetAgencyCache()
74+
if !ok {
75+
return true, errors.Newf("Agency is not ready")
76+
}
77+
78+
if agencyCache.PlanLeaderServers().Contains(agency.Server(m.ID)) {
79+
return true, errors.Newf("Server is still used in cluster")
80+
}
81+
82+
// We are safe to remove PVC
83+
if pvcStatus := m.PersistentVolumeClaim; pvcStatus != nil {
84+
if n := pvcStatus.GetName(); n != "" {
85+
nctx, c := globals.GetGlobalTimeouts().Kubernetes().WithTimeout(ctx)
86+
defer c()
87+
err := cache.PersistentVolumeClaimsModInterface().V1().Delete(nctx, n, meta.DeleteOptions{
88+
Preconditions: meta.NewUIDPreconditions(pvcUID),
89+
})
90+
91+
if err != nil {
92+
if apiErrors.IsNotFound(err) {
93+
// PVC is already gone
94+
return true, nil
95+
}
96+
97+
if apiErrors.IsConflict(err) {
98+
// UID Changed, all fine
99+
return true, nil
100+
}
101+
102+
return true, err
103+
}
104+
}
105+
}
106+
107+
return true, nil
108+
}

pkg/deployment/reconcile/plan_builder_high.go

+1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ func (r *Reconciler) createHighPlan(ctx context.Context, apiObject k8sutil.APIOb
6363
ApplyIfEmpty(r.createRebalancerCheckPlan).
6464
ApplyIfEmpty(r.createMemberFailedRestoreHighPlan).
6565
ApplyIfEmpty(r.scaleDownCandidate).
66+
ApplyIfEmpty(r.volumeMemberReplacement).
6667
ApplyWithBackOff(BackOffCheck, time.Minute, r.emptyPlanBuilder)).
6768
ApplyIfEmptyWithBackOff(TimezoneCheck, time.Minute, r.createTimezoneUpdatePlan).
6869
Apply(r.createBackupInProgressConditionPlan). // Discover backups always

0 commit comments

Comments
 (0)