Skip to content

Commit a99d1a0

Browse files
HCPF-1961: Implement retry mechanism for Get and Set Policy (#870)
Retry read-modify-write when conflict occurs during policy update
1 parent bbdc509 commit a99d1a0

8 files changed

+172
-42
lines changed

.changelog/870.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:bug
2+
Fix intermittent conflicts during IAM policy updates
3+
```

internal/clients/iampolicy/iam.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ import (
2020
// Implementations should be created per resource and should keep track of the
2121
// resource identifier.
2222
type ResourceIamUpdater interface {
23-
// Fetch the existing IAM policy attached to a resource.
23+
// GetResourceIamPolicy fetches the existing IAM policy attached to a resource.
2424
GetResourceIamPolicy(context.Context) (*models.HashicorpCloudResourcemanagerPolicy, diag.Diagnostics)
2525

26-
// Replaces the existing IAM Policy attached to a resource.
26+
// SetResourceIamPolicy replaces the existing IAM Policy attached to a resource.
27+
// If an error occurs, a new custom ErrorHTTPStatusCode should be appended to the diagnostics.
2728
SetResourceIamPolicy(ctx context.Context, policy *models.HashicorpCloudResourcemanagerPolicy) (*models.HashicorpCloudResourcemanagerPolicy, diag.Diagnostics)
2829

30+
// GetMutexKey gets the mutex key.
2931
// A mutex guards against concurrent to call to the SetResourceIamPolicy method.
3032
// The mutex key should be globally unique.
3133
GetMutexKey() string

internal/clients/iampolicy/iam_binding_batcher.go

+60-31
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@ package iampolicy
55

66
import (
77
"context"
8+
"log"
89
"sync"
910
"time"
1011

1112
iamModels "github.com/hashicorp/hcp-sdk-go/clients/cloud-iam/stable/2019-12-10/models"
1213
"github.com/hashicorp/hcp-sdk-go/clients/cloud-resource-manager/stable/2019-12-10/models"
1314
"github.com/hashicorp/terraform-plugin-framework/diag"
1415
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
16+
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
1517
"golang.org/x/exp/maps"
1618
)
1719

@@ -184,45 +186,72 @@ func (f *policyFuture) executeModifers(ctx context.Context, u ResourceIamUpdater
184186
return
185187
}
186188

187-
// Get the existing policy
188-
ep, diags := u.GetResourceIamPolicy(ctx)
189-
if diags.HasError() {
190-
f.set(nil, diags)
191-
return
192-
}
189+
backoff := time.Second
190+
maxBackoff := 30 * time.Second
193191

194-
// Determine the principal's we need to lookup their type.
195-
principalSet, diags := f.getPrincipals(ctx, client)
196-
if diags.HasError() {
197-
f.set(nil, diags)
198-
return
199-
}
192+
for {
193+
// Get the existing policy
194+
ep, diags := u.GetResourceIamPolicy(ctx)
195+
if diags.HasError() {
196+
f.set(nil, diags)
197+
return
198+
}
200199

201-
// Remove any bindings needed
202-
bindings := ToMap(ep)
203-
for _, rm := range f.removers {
204-
if members, ok := bindings[rm.RoleID]; ok {
205-
delete(members, rm.Members[0].MemberID)
206-
if len(members) == 0 {
207-
delete(bindings, rm.RoleID)
208-
}
200+
// Determine the principal's we need to lookup their type.
201+
principalSet, diags := f.getPrincipals(ctx, client)
202+
if diags.HasError() {
203+
f.set(nil, diags)
204+
return
209205
}
210-
}
211206

212-
// Go through the setters and apply them
213-
for _, s := range f.setters {
214-
members, ok := bindings[s.RoleID]
215-
if !ok {
216-
members = make(map[string]*models.HashicorpCloudResourcemanagerPolicyBindingMemberType, 4)
217-
bindings[s.RoleID] = members
207+
// Remove any bindings needed
208+
bindings := ToMap(ep)
209+
for _, rm := range f.removers {
210+
if members, ok := bindings[rm.RoleID]; ok {
211+
delete(members, rm.Members[0].MemberID)
212+
if len(members) == 0 {
213+
delete(bindings, rm.RoleID)
214+
}
215+
}
218216
}
219217

220-
members[s.Members[0].MemberID] = principalSet[s.Members[0].MemberID]
221-
}
218+
// Go through the setters and apply them
219+
for _, s := range f.setters {
220+
members, ok := bindings[s.RoleID]
221+
if !ok {
222+
members = make(map[string]*models.HashicorpCloudResourcemanagerPolicyBindingMemberType, 4)
223+
bindings[s.RoleID] = members
224+
}
222225

223-
// Apply the policy
224-
f.set(u.SetResourceIamPolicy(ctx, FromMap(ep.Etag, bindings)))
226+
members[s.Members[0].MemberID] = principalSet[s.Members[0].MemberID]
227+
}
225228

229+
// Apply the policy
230+
ep, diags = u.SetResourceIamPolicy(ctx, FromMap(ep.Etag, bindings))
231+
if diags.HasError() {
232+
if customdiags.HasConflictError(diags) {
233+
// Policy object has changed since it was last gotten and the etag is now different.
234+
// Continuously retry getting and setting the policy with an increasing backoff period until the maximum backoff period is reached.
235+
if backoff > maxBackoff {
236+
log.Printf("[DEBUG]: Maximum backoff time reached. Aborting operation.")
237+
f.set(nil, diags)
238+
return
239+
}
240+
log.Printf("[DEBUG]: Operation failed due to conflicts. Operation will be restarted after %s", backoff)
241+
// Pause the execution for the duration specified by the current backoff time.
242+
time.Sleep(backoff)
243+
// Double the backoff time to increase the delay for the next retry.
244+
backoff *= 2
245+
continue
246+
}
247+
f.set(nil, diags)
248+
return
249+
}
250+
251+
// Successfully applied policy
252+
f.set(ep, nil)
253+
return
254+
}
226255
}
227256

228257
// getPrincipals returns a map of principal_id to binding type. The binding type
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package customdiags
2+
3+
import (
4+
"net/http"
5+
6+
"github.com/hashicorp/terraform-plugin-framework/diag"
7+
)
8+
9+
// ErrorHTTPStatusCode is an error diagnostic that stored the error code.
10+
type ErrorHTTPStatusCode struct {
11+
detail string
12+
summary string
13+
HTTPStatusCode int
14+
}
15+
16+
// Detail returns the diagnostic detail.
17+
func (d ErrorHTTPStatusCode) Detail() string {
18+
return d.detail
19+
}
20+
21+
// Equal returns true if the other diagnostic is equivalent.
22+
func (d ErrorHTTPStatusCode) Equal(o diag.Diagnostic) bool {
23+
ed, ok := o.(ErrorHTTPStatusCode)
24+
25+
if !ok {
26+
return false
27+
}
28+
29+
return ed.Summary() == d.Summary() && ed.Detail() == d.Detail() && ed.HTTPStatusCode == d.HTTPStatusCode
30+
}
31+
32+
// Summary returns the diagnostic summary.
33+
func (d ErrorHTTPStatusCode) Summary() string {
34+
return d.summary
35+
}
36+
37+
// Severity returns the diagnostic severity.
38+
func (d ErrorHTTPStatusCode) Severity() diag.Severity {
39+
return diag.SeverityError
40+
}
41+
42+
// NewErrorHTTPStatusCode returns a new error severity diagnostic with the given summary, detail and error code.
43+
func NewErrorHTTPStatusCode(summary string, detail string, statusCode int) ErrorHTTPStatusCode {
44+
return ErrorHTTPStatusCode{
45+
detail: detail,
46+
summary: summary,
47+
HTTPStatusCode: statusCode,
48+
}
49+
}
50+
51+
// HasConflictError checks if a diagnostic has a specific error code.
52+
func HasConflictError(diags diag.Diagnostics) bool {
53+
for _, d := range diags {
54+
diag, ok := d.(*ErrorHTTPStatusCode)
55+
if !ok {
56+
return false
57+
}
58+
if diag.HTTPStatusCode == http.StatusConflict {
59+
return true
60+
}
61+
}
62+
return false
63+
}

internal/provider/iam/resource_group_iam_policy.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/hashicorp/terraform-plugin-framework/types"
2020
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
2121
"github.com/hashicorp/terraform-provider-hcp/internal/clients/iampolicy"
22+
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
2223
"github.com/hashicorp/terraform-provider-hcp/internal/provider/iam/helper"
2324
)
2425

@@ -100,8 +101,7 @@ func (u *groupIAMPolicyUpdater) GetResourceIamPolicy(ctx context.Context) (*mode
100101
// Groups do not have a policy by default
101102
return &models.HashicorpCloudResourcemanagerPolicy{}, diags
102103
}
103-
104-
diags.AddError("failed to retrieve group IAM policy", err.Error())
104+
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to retrieve group IAM policy", err.Error(), serviceErr.Code()))
105105
return nil, diags
106106
}
107107

@@ -119,7 +119,12 @@ func (u *groupIAMPolicyUpdater) SetResourceIamPolicy(ctx context.Context, policy
119119

120120
res, err := u.client.ResourceService.ResourceServiceSetIamPolicy(params, nil)
121121
if err != nil {
122-
diags.AddError("failed to set group IAM policy", err.Error())
122+
serviceErr, ok := err.(*resource_service.ResourceServiceSetIamPolicyDefault)
123+
if !ok {
124+
diags.AddError("failed to cast resource IAM policy error", err.Error())
125+
return nil, diags
126+
}
127+
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to set group IAM policy", err.Error(), serviceErr.Code()))
123128
return nil, diags
124129
}
125130

internal/provider/resourcemanager/resource_organization_iam_policy.go

+13-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/hashicorp/terraform-plugin-framework/resource/schema"
1414
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
1515
"github.com/hashicorp/terraform-provider-hcp/internal/clients/iampolicy"
16+
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
1617
)
1718

1819
// orgIAMSchema is the schema for the organization IAM resources
@@ -64,7 +65,12 @@ func (u *orgIAMPolicyUpdater) GetResourceIamPolicy(ctx context.Context) (*models
6465
params.ID = u.client.Config.OrganizationID
6566
res, err := u.client.Organization.OrganizationServiceGetIamPolicy(params, nil)
6667
if err != nil {
67-
diags.AddError("failed to retrieve organization IAM policy", err.Error())
68+
serviceErr, ok := err.(*organization_service.OrganizationServiceGetIamPolicyDefault)
69+
if !ok {
70+
diags.AddError("failed to cast organization IAM policy error", err.Error())
71+
return nil, diags
72+
}
73+
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to retrieve organization IAM policy", err.Error(), serviceErr.Code()))
6874
return nil, diags
6975
}
7076

@@ -82,7 +88,12 @@ func (u *orgIAMPolicyUpdater) SetResourceIamPolicy(ctx context.Context, policy *
8288

8389
res, err := u.client.Organization.OrganizationServiceSetIamPolicy(params, nil)
8490
if err != nil {
85-
diags.AddError("failed to retrieve organization IAM policy", err.Error())
91+
serviceErr, ok := err.(*organization_service.OrganizationServiceSetIamPolicyDefault)
92+
if !ok {
93+
diags.AddError("failed to cast organization IAM policy error", err.Error())
94+
return nil, diags
95+
}
96+
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to update organization IAM policy", err.Error(), serviceErr.Code()))
8697
return nil, diags
8798
}
8899

internal/provider/resourcemanager/resource_project_iam_policy.go

+13-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/hashicorp/terraform-plugin-framework/types"
1818
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
1919
"github.com/hashicorp/terraform-provider-hcp/internal/clients/iampolicy"
20+
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
2021
)
2122

2223
// projectIAMSchema is the schema for the project IAM resources
@@ -86,7 +87,12 @@ func (u *projectIAMPolicyUpdater) GetResourceIamPolicy(ctx context.Context) (*mo
8687
params.ID = u.projectID
8788
res, err := u.client.Project.ProjectServiceGetIamPolicy(params, nil)
8889
if err != nil {
89-
diags.AddError("failed to retrieve project IAM policy", err.Error())
90+
serviceErr, ok := err.(*project_service.ProjectServiceGetIamPolicyDefault)
91+
if !ok {
92+
diags.AddError("failed to cast project IAM policy error", err.Error())
93+
return nil, diags
94+
}
95+
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to retrieve project IAM policy", err.Error(), serviceErr.Code()))
9096
return nil, diags
9197
}
9298

@@ -104,7 +110,12 @@ func (u *projectIAMPolicyUpdater) SetResourceIamPolicy(ctx context.Context, poli
104110

105111
res, err := u.client.Project.ProjectServiceSetIamPolicy(params, nil)
106112
if err != nil {
107-
diags.AddError("failed to retrieve project IAM policy", err.Error())
113+
serviceErr, ok := err.(*project_service.ProjectServiceSetIamPolicyDefault)
114+
if !ok {
115+
diags.AddError("failed to cast project IAM policy error", err.Error())
116+
return nil, diags
117+
}
118+
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to update project IAM policy", err.Error(), serviceErr.Code()))
108119
return nil, diags
109120
}
110121

internal/provider/vaultsecrets/resource_vault_secrets_app_iam_policy.go

+8-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/hashicorp/terraform-plugin-framework/types"
1919
"github.com/hashicorp/terraform-provider-hcp/internal/clients"
2020
"github.com/hashicorp/terraform-provider-hcp/internal/clients/iampolicy"
21+
"github.com/hashicorp/terraform-provider-hcp/internal/customdiags"
2122
)
2223

2324
// vaultSecretsAppIAMSchema is the schema for the vault secret resource IAM resources
@@ -92,7 +93,7 @@ func (u *vaultSecretsAppResourceIAMPolicyUpdater) GetResourceIamPolicy(ctx conte
9293
if serviceErr.Code() == http.StatusNotFound {
9394
return &models.HashicorpCloudResourcemanagerPolicy{}, diags
9495
}
95-
diags.AddError("failed to retrieve resource IAM policy", err.Error())
96+
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to retrieve resource IAM policy", err.Error(), serviceErr.Code()))
9697
return nil, diags
9798
}
9899

@@ -111,7 +112,12 @@ func (u *vaultSecretsAppResourceIAMPolicyUpdater) SetResourceIamPolicy(ctx conte
111112

112113
res, err := u.client.ResourceService.ResourceServiceSetIamPolicy(params, nil)
113114
if err != nil {
114-
diags.AddError("failed to retrieve resource IAM policy", err.Error())
115+
serviceErr, ok := err.(*resource_service.ResourceServiceSetIamPolicyDefault)
116+
if !ok {
117+
diags.AddError("failed to cast resource IAM policy error", err.Error())
118+
return nil, diags
119+
}
120+
diags.Append(customdiags.NewErrorHTTPStatusCode("failed to update resource IAM policy", err.Error(), serviceErr.Code()))
115121
return nil, diags
116122
}
117123

0 commit comments

Comments
 (0)