Skip to content

Commit 3c02b49

Browse files
Enable labels for ClusterUUID and CliqueId
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent a6a7ce1 commit 3c02b49

File tree

20 files changed

+1177
-11
lines changed

20 files changed

+1177
-11
lines changed

api/config/v1/flags.go

+8-5
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,12 @@ func (f *deviceListStrategyFlag) UnmarshalJSON(b []byte) error {
103103

104104
// GFDCommandLineFlags holds the list of command line flags specific to GFD.
105105
type GFDCommandLineFlags struct {
106-
Oneshot *bool `json:"oneshot" yaml:"oneshot"`
107-
NoTimestamp *bool `json:"noTimestamp" yaml:"noTimestamp"`
108-
SleepInterval *Duration `json:"sleepInterval" yaml:"sleepInterval"`
109-
OutputFile *string `json:"outputFile" yaml:"outputFile"`
110-
MachineTypeFile *string `json:"machineTypeFile" yaml:"machineTypeFile"`
106+
Oneshot *bool `json:"oneshot" yaml:"oneshot"`
107+
NoTimestamp *bool `json:"noTimestamp" yaml:"noTimestamp"`
108+
SleepInterval *Duration `json:"sleepInterval" yaml:"sleepInterval"`
109+
OutputFile *string `json:"outputFile" yaml:"outputFile"`
110+
ImexNodesConfigFile *string `json:"imexNodesConfigFile" yaml:"imexNodesConfigFile"`
111+
MachineTypeFile *string `json:"machineTypeFile" yaml:"machineTypeFile"`
111112
}
112113

113114
// UpdateFromCLIFlags updates Flags from settings in the cli Flags if they are set.
@@ -162,6 +163,8 @@ func (f *Flags) UpdateFromCLIFlags(c *cli.Context, flags []cli.Flag) {
162163
updateFromCLIFlag(&f.GFD.Oneshot, c, n)
163164
case "output-file":
164165
updateFromCLIFlag(&f.GFD.OutputFile, c, n)
166+
case "imex-nodes-config-file":
167+
updateFromCLIFlag(&f.GFD.ImexNodesConfigFile, c, n)
165168
case "sleep-interval":
166169
updateFromCLIFlag(&f.GFD.SleepInterval, c, n)
167170
case "no-timestamp":

api/config/v1/flags_test.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,8 @@ func TestMarshalFlags(t *testing.T) {
186186
"noTimestamp": null,
187187
"outputFile": null,
188188
"sleepInterval": "0s",
189-
"machineTypeFile": null
189+
"machineTypeFile": null,
190+
"imexNodesConfigFile": null
190191
}
191192
}`,
192193
},
@@ -210,7 +211,8 @@ func TestMarshalFlags(t *testing.T) {
210211
"noTimestamp": null,
211212
"outputFile": null,
212213
"sleepInterval": "5ns",
213-
"machineTypeFile": null
214+
"machineTypeFile": null,
215+
"imexNodesConfigFile": null
214216
}
215217
}`,
216218
},

cmd/gpu-feature-discovery/main.go

+6
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ func main() {
8686
Value: "/etc/kubernetes/node-feature-discovery/features.d/gfd",
8787
EnvVars: []string{"GFD_OUTPUT_FILE"},
8888
},
89+
&cli.StringFlag{
90+
Name: "imex-nodes-config-file",
91+
Usage: "Path to the IMEX ",
92+
Value: "/etc/nvidia-imex/nodes_config.cfg",
93+
EnvVars: []string{"GFD_IMEX_NODES_CONFIG_FILE"},
94+
},
8995
&cli.StringFlag{
9096
Name: "machine-type-file",
9197
Value: "/sys/class/dmi/id/product_name",

deployments/helm/nvidia-device-plugin/templates/_helpers.tpl

+18
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,21 @@ We convert this to JSON so that it can be included and converted to an object us
269269
{{- $_ := set $options "addMigMonitorDevices" ( ne ( (include "nvidia-device-plugin.allPossibleMigStrategiesAreNone" . ) | trim ) "true" ) -}}
270270
{{- mustToJson $options -}}
271271
{{- end -}}
272+
273+
{{- define "nvidia-device-plugin.filepathJoin" -}}
274+
{{- $separator := "/" -}}
275+
{{- $path := "" -}}
276+
{{- range $index, $element := . -}}
277+
{{- if and (ne $element "") (ne $element nil) -}}
278+
{{- if $index -}}
279+
{{- $cleanElement := trimPrefix "/" $element -}}
280+
{{- $path = printf "%s%s%s" $path $separator $cleanElement -}}
281+
{{- else -}}
282+
{{- $path = $element -}}
283+
{{- end -}}
284+
{{- end -}}
285+
{{- end -}}
286+
{{- $resultRaw := $path | trimSuffix "/" -}}
287+
{{- $result := $resultRaw | clean }}
288+
{{- $result | quote -}}
289+
{{- end -}}

deployments/helm/nvidia-device-plugin/templates/daemonset-gfd.yml

+6
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ spec:
182182
mountPath: "/etc/kubernetes/node-feature-discovery/features.d"
183183
- name: host-sys
184184
mountPath: "/sys"
185+
- name: nvidia-imex-dir
186+
mountPath: "/etc/nvidia-imex"
185187
{{- if $options.hasConfigMap }}
186188
- name: available-configs
187189
mountPath: /available-configs
@@ -199,6 +201,10 @@ spec:
199201
- name: host-sys
200202
hostPath:
201203
path: "/sys"
204+
- name: nvidia-imex-dir
205+
type: DirectoryOrCreate
206+
hostPath:
207+
path: {{ include "nvidia-device-plugin.filepathJoin" (list .Values.nvidiaDriverRoot "etc/nvidia-imex") }}
202208
{{- if $options.hasConfigMap }}
203209
- name: available-configs
204210
configMap:

docs/gpu-feature-discovery/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ their meaning:
210210
| nvidia.com/gpu.memory | Integer | Memory of the GPU in Mb | 2048 |
211211
| nvidia.com/gpu.product | String | Model of the GPU | GeForce-GT-710 |
212212
| nvidia.com/gpu.mode | String | Display or Compute Mode of the GPU. Details of the GPU modes can be found [here](https://docs.nvidia.com/grid/13.0/grid-gpumodeswitch-user-guide/index.html#compute-and-graphics-mode) | compute |
213+
| nvidia.com/gpu.clique | String | GPUFabric ClusterUUID + CliqueID | 7b968a6d-c8aa-45e1-9e07-e1e51be99c31.1 |
214+
| nvidia.com/gpu.imex-domain | String | IMEX domain Ip list(Hashed) + CliqueID | 79b326e7-d566-3483-c2a3-9b38fa5cb1c8.1 |
213215

214216
Depending on the MIG strategy used, the following set of labels may also be
215217
available (or override the default values for some of the labels listed above):

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ go 1.22.2
44

55
require (
66
github.com/NVIDIA/go-gpuallocator v0.5.0
7-
github.com/NVIDIA/go-nvlib v0.6.1
7+
github.com/NVIDIA/go-nvlib v0.6.2-0.20240928162840-41955a08425b
88
github.com/NVIDIA/go-nvml v0.12.4-0
99
github.com/NVIDIA/nvidia-container-toolkit v1.16.2
1010
github.com/fsnotify/fsnotify v1.7.0

go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
github.com/NVIDIA/go-gpuallocator v0.5.0 h1:166ICvPv2dU9oZ2J3kJ4y3XdbGCi6LhXgFZJtrqeu3A=
22
github.com/NVIDIA/go-gpuallocator v0.5.0/go.mod h1:zos5bTIN01hpQioOyu9oRKglrznImMQvm0bZllMmckw=
3-
github.com/NVIDIA/go-nvlib v0.6.1 h1:0/5FvaKvDJoJeJ+LFlh+NDQMxMlVw9wOXrOVrGXttfE=
4-
github.com/NVIDIA/go-nvlib v0.6.1/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY=
3+
github.com/NVIDIA/go-nvlib v0.6.2-0.20240928162840-41955a08425b h1:k5ptZB9RGUaR5RcK0R8Cfa4mtTHrSZZ73BFyD3c6KvM=
4+
github.com/NVIDIA/go-nvlib v0.6.2-0.20240928162840-41955a08425b/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY=
55
github.com/NVIDIA/go-nvml v0.12.4-0 h1:4tkbB3pT1O77JGr0gQ6uD8FrsUPqP1A/EOEm2wI1TUg=
66
github.com/NVIDIA/go-nvml v0.12.4-0/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
77
github.com/NVIDIA/nvidia-container-toolkit v1.16.2 h1:udrrtB8JrAs2KkKQ4njgSb/anUOC1b9tP5LjUtbjE+k=

0 commit comments

Comments
 (0)