@@ -32,6 +32,7 @@ type Device interface {
32
32
GetMigDevices () ([]MigDevice , error )
33
33
GetMigProfiles () ([]MigProfile , error )
34
34
GetPCIBusID () (string , error )
35
+ IsFabricAttached () (bool , error )
35
36
IsMigCapable () (bool , error )
36
37
IsMigEnabled () (bool , error )
37
38
VisitMigDevices (func (j int , m MigDevice ) error ) error
@@ -208,6 +209,47 @@ func (d *device) IsMigEnabled() (bool, error) {
208
209
return (mode == nvml .DEVICE_MIG_ENABLE ), nil
209
210
}
210
211
212
+ // IsFabricAttached checks if a device is attached to a GPU fabric.
213
+ func (d * device ) IsFabricAttached () (bool , error ) {
214
+ if d .lib .hasSymbol ("nvmlDeviceGetGpuFabricInfo" ) {
215
+ info , ret := d .GetGpuFabricInfo ()
216
+ if ret == nvml .ERROR_NOT_SUPPORTED {
217
+ return false , nil
218
+ }
219
+ if ret != nvml .SUCCESS {
220
+ return false , fmt .Errorf ("error getting GPU Fabric Info: %v" , ret )
221
+ }
222
+ if info .State != nvml .GPU_FABRIC_STATE_COMPLETED {
223
+ return false , nil
224
+ }
225
+ if info .Status != uint32 (nvml .SUCCESS ) {
226
+ return false , nil
227
+ }
228
+
229
+ return true , nil
230
+ }
231
+
232
+ if d .lib .hasSymbol ("nvmlDeviceGetGpuFabricInfoV" ) {
233
+ info , ret := d .GetGpuFabricInfoV ().V2 ()
234
+ if ret == nvml .ERROR_NOT_SUPPORTED {
235
+ return false , nil
236
+ }
237
+ if ret != nvml .SUCCESS {
238
+ return false , fmt .Errorf ("error getting GPU Fabric Info: %v" , ret )
239
+ }
240
+ if info .State != nvml .GPU_FABRIC_STATE_COMPLETED {
241
+ return false , nil
242
+ }
243
+ if info .Status != uint32 (nvml .SUCCESS ) {
244
+ return false , nil
245
+ }
246
+
247
+ return true , nil
248
+ }
249
+
250
+ return false , nil
251
+ }
252
+
211
253
// VisitMigDevices walks a top-level device and invokes a callback function for each MIG device configured on it.
212
254
func (d * device ) VisitMigDevices (visit func (int , MigDevice ) error ) error {
213
255
capable , err := d .IsMigCapable ()
0 commit comments