1
+ # Based on the official aws-node-termination-handler setup guide at https://github.com/aws/aws-node-termination-handler#infrastructure-setup
2
+
1
3
provider "aws" {
2
4
region = var. region
3
5
}
@@ -50,7 +52,7 @@ module "vpc" {
50
52
enable_dns_hostnames = true
51
53
}
52
54
53
- data "aws_iam_policy_document" "node_term " {
55
+ data "aws_iam_policy_document" "aws_node_termination_handler " {
54
56
statement {
55
57
effect = " Allow"
56
58
actions = [
@@ -76,22 +78,17 @@ data "aws_iam_policy_document" "node_term" {
76
78
" sqs:ReceiveMessage"
77
79
]
78
80
resources = [
79
- module . node_term_sqs . sqs_queue_arn
81
+ module . aws_node_termination_handler_sqs . sqs_queue_arn
80
82
]
81
83
}
82
84
}
83
85
84
- resource "aws_iam_policy" "node_term" {
85
- name = " node-term-${ local . cluster_name } "
86
- policy = data. aws_iam_policy_document . node_term . json
87
- }
88
-
89
- resource "aws_iam_role_policy_attachment" "node_term_policy" {
90
- policy_arn = aws_iam_policy. node_term . arn
91
- role = module. eks . worker_iam_role_name
86
+ resource "aws_iam_policy" "aws_node_termination_handler" {
87
+ name = " ${ local . cluster_name } -aws-node-termination-handler"
88
+ policy = data. aws_iam_policy_document . aws_node_termination_handler . json
92
89
}
93
90
94
- data "aws_iam_policy_document" "node_term_events " {
91
+ data "aws_iam_policy_document" "aws_node_termination_handler_events " {
95
92
statement {
96
93
effect = " Allow"
97
94
principals {
@@ -110,16 +107,16 @@ data "aws_iam_policy_document" "node_term_events" {
110
107
}
111
108
}
112
109
113
- module "node_term_sqs " {
110
+ module "aws_node_termination_handler_sqs " {
114
111
source = " terraform-aws-modules/sqs/aws"
115
112
version = " ~> 3.0.0"
116
113
name = local. cluster_name
117
114
message_retention_seconds = 300
118
- policy = data. aws_iam_policy_document . node_term_events . json
115
+ policy = data. aws_iam_policy_document . aws_node_termination_handler_events . json
119
116
}
120
117
121
- resource "aws_cloudwatch_event_rule" "node_term_event_rule " {
122
- name = " ${ local . cluster_name } -nth-rule "
118
+ resource "aws_cloudwatch_event_rule" "aws_node_termination_handler_asg " {
119
+ name = " ${ local . cluster_name } -asg-termination "
123
120
description = " Node termination event rule"
124
121
event_pattern = jsonencode (
125
122
{
@@ -134,24 +131,46 @@ resource "aws_cloudwatch_event_rule" "node_term_event_rule" {
134
131
)
135
132
}
136
133
137
- resource "aws_cloudwatch_event_target" "node_term_event_target " {
138
- rule = aws_cloudwatch_event_rule . node_term_event_rule . name
139
- target_id = " ANTHandler "
140
- arn = module. node_term_sqs . sqs_queue_arn
134
+ resource "aws_cloudwatch_event_target" "aws_node_termination_handler_asg " {
135
+ target_id = " ${ local . cluster_name } -asg-termination "
136
+ rule = aws_cloudwatch_event_rule . aws_node_termination_handler_asg . name
137
+ arn = module. aws_node_termination_handler_sqs . sqs_queue_arn
141
138
}
142
139
143
- module "node_term_role" {
140
+ resource "aws_cloudwatch_event_rule" "aws_node_termination_handler_spot" {
141
+ name = " ${ local . cluster_name } -spot-termination"
142
+ description = " Node termination event rule"
143
+ event_pattern = jsonencode (
144
+ {
145
+ " source" : [
146
+ " aws.ec2"
147
+ ],
148
+ " detail-type" : [
149
+ " EC2 Spot Instance Interruption Warning"
150
+ ]
151
+ " resources" : module.eks.workers_asg_arns
152
+ }
153
+ )
154
+ }
155
+
156
+ resource "aws_cloudwatch_event_target" "aws_node_termination_handler_spot" {
157
+ target_id = " ${ local . cluster_name } -spot-termination"
158
+ rule = aws_cloudwatch_event_rule. aws_node_termination_handler_spot . name
159
+ arn = module. aws_node_termination_handler_sqs . sqs_queue_arn
160
+ }
161
+
162
+ module "aws_node_termination_handler_role" {
144
163
source = " terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
145
164
version = " 4.1.0"
146
165
create_role = true
147
166
role_description = " IRSA role for ANTH, cluster ${ local . cluster_name } "
148
167
role_name_prefix = local. cluster_name
149
168
provider_url = replace (module. eks . cluster_oidc_issuer_url , " https://" , " " )
150
- role_policy_arns = [aws_iam_policy . node_term . arn ]
169
+ role_policy_arns = [aws_iam_policy . aws_node_termination_handler . arn ]
151
170
oidc_fully_qualified_subjects = [" system:serviceaccount:${ var . namespace } :${ var . serviceaccount } " ]
152
171
}
153
172
154
- resource "helm_release" "anth " {
173
+ resource "helm_release" "aws_node_termination_handler " {
155
174
depends_on = [
156
175
module . eks
157
176
]
@@ -173,29 +192,35 @@ resource "helm_release" "anth" {
173
192
}
174
193
set {
175
194
name = " serviceAccount.annotations.eks\\ .amazonaws\\ .com/role-arn"
176
- value = module.node_term_role .iam_role_arn
195
+ value = module.aws_node_termination_handler_role .iam_role_arn
177
196
type = " string"
178
197
}
179
198
set {
180
199
name = " enableSqsTerminationDraining"
181
200
value = " true"
182
201
}
202
+ set {
203
+ name = " enableSpotInterruptionDraining"
204
+ value = " true"
205
+ }
183
206
set {
184
207
name = " queueURL"
185
- value = module.node_term_sqs .sqs_queue_id
208
+ value = module.aws_node_termination_handler_sqs .sqs_queue_id
186
209
}
187
210
set {
188
211
name = " logLevel"
189
- value = " DEBUG "
212
+ value = " debug "
190
213
}
191
214
}
192
215
193
216
# Creating the lifecycle-hook outside of the ASG resource's `initial_lifecycle_hook`
194
217
# ensures that node termination does not require the lifecycle action to be completed,
195
218
# and thus allows the ASG to be destroyed cleanly.
196
- resource "aws_autoscaling_lifecycle_hook" "node_term" {
197
- name = " node_term-${ local . cluster_name } "
198
- autoscaling_group_name = module. eks . workers_asg_names [0 ]
219
+ resource "aws_autoscaling_lifecycle_hook" "aws_node_termination_handler" {
220
+ for_each = toset (module. eks . workers_asg_names )
221
+
222
+ name = " aws-node-termination-handler"
223
+ autoscaling_group_name = each. value
199
224
lifecycle_transition = " autoscaling:EC2_INSTANCE_TERMINATING"
200
225
heartbeat_timeout = 300
201
226
default_result = " CONTINUE"
@@ -227,8 +252,8 @@ module "eks" {
227
252
key = " foo"
228
253
value = " buzz"
229
254
propagate_at_launch = true
230
- },
255
+ }
231
256
]
232
- },
257
+ }
233
258
]
234
259
}
0 commit comments