forked from boringlee24/socc22-miso
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexport_cuda_device_auto.py
80 lines (71 loc) · 2.28 KB
/
export_cuda_device_auto.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import json
import mig_helper
import os
user = os.environ.get('USER')
import sys
sys.path.append(f'/home/{user}/GIT/sc22-miso/scheduler/main')
from utils import *
import subprocess
import io
import numpy as np
import socket
node = socket.gethostname()
with open(f'/home/{user}/GIT/sc22-miso/scheduler/partition_code.json') as f:
partition_code = json.load(f)
if os.path.exists('mig_device_autogen.json'):
with open('mig_device_autogen.json') as f:
export = json.load(f)
else:
export = {}
def correct_order(device_ids, partition):
result = []
for p in partition:
xg = f'{p}g.'
for d in device_ids:
if xg in d[0]:
result.append(d[1])
device_ids.remove(d)
break
if len(device_ids) > 0:
raise RuntimeError('correct order fault')
return result
def read_cuda_device(gpuid, partition):
num_slices = len(partition)
cmd = 'nvidia-smi -L'
p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE)
p.wait()
read = p.stdout.read().decode("utf-8")
buf = io.StringIO(read)
start = False
line_cnt = 0
device_ids = []
while True:
line = buf.readline()
if line_cnt >= num_slices:
device_ids = correct_order(device_ids, partition)
return device_ids
if start:
if 'UUID: ' not in line:
pdb.set_trace()
mig_str = line.split('UUID: ')[1]
mig_str = mig_str.strip(')\n')
slice_str = line.split(' ')[3]
device_ids.append((slice_str, mig_str))
line_cnt += 1
if f'GPU {gpuid}: NVIDIA' in line:
start = True
#mig_helper.init_mig()
export[node] = {}
for gpuid in range(2):
export[node][f'gpu{gpuid}'] = {}
for code in partition_code:
mig_helper.reset_mig(gpuid)
export[node][f'gpu{gpuid}'][code] = []
partition = partition_code[code] # [2,2,2,1]
for p in partition:
sliceid = GPU_status.num_to_str[p]
mig_helper.create_ins(gpuid, sliceid)
device_ids = read_cuda_device(gpuid, partition)
export[node][f'gpu{gpuid}'][code] = device_ids[:]
with open('mig_device_autogen.json', 'w') as f:
json.dump(export, f, indent=4)