3
3
import json
4
4
import re
5
5
6
+ from collections import defaultdict
7
+
8
+ _PID_TO_DEVICE = None
9
+
6
10
# Code adapted from https://github.com/ezyang/nvprof2json
7
11
8
12
parser = argparse .ArgumentParser (description = 'Convert nsight systems sqlite output to Google Event Trace compatible JSON.' )
9
13
parser .add_argument ('--filename' )
10
14
parser .add_argument ("-o" , "--output" , help = "Output file name" , required = True )
11
- parser .add_argument ("-t" , "--activity-type" , help = "Type of activities shown. Default to all." , default = ["kernel" , "nvtx" ], choices = ['kernel' , 'nvtx' ], nargs = "+" )
15
+ parser .add_argument ("-t" , "--activity-type" , help = "Type of activities shown. Default to all." , default = ["kernel" , "nvtx-kernel " ], choices = ['kernel' , 'nvtx' , "nvtx-kernel" ], nargs = "+" )
12
16
parser .add_argument ("--nvtx-event-prefix" , help = "Filter NVTX events by their names' prefix." , type = str , nargs = "*" )
13
17
parser .add_argument ("--nvtx-color-scheme" , help = """Color scheme for NVTX events.
14
18
Accepts a dict mapping a string to one of chrome tracing colors.
@@ -26,7 +30,7 @@ def munge_time(t):
26
30
# For reference of the schema, see
27
31
# https://docs.nvidia.com/nsight-systems/UserGuide/index.html#exporter-sqlite-schema
28
32
29
- def parse_cupti_kernel_events (conn : sqlite3 .Connection , strings : dict , traceEvents : list ):
33
+ def parse_cupti_kernel_events (conn : sqlite3 .Connection , strings : dict , traceEvents : list = None ):
30
34
"""
31
35
Copied from the docs:
32
36
CUPTI_ACTIVITY_KIND_KERNEL
@@ -58,22 +62,38 @@ def parse_cupti_kernel_events(conn: sqlite3.Connection, strings: dict, traceEven
58
62
graphNodeId INTEGER, -- REFERENCES CUDA_GRAPH_EVENTS(graphNodeId)
59
63
sharedMemoryLimitConfig INTEGER -- REFERENCES ENUM_CUDA_SHARED_MEM_LIMIT_CONFIG(id)
60
64
"""
65
+ per_device_kernel_events = defaultdict (list )
61
66
for row in conn .execute ("SELECT * FROM CUPTI_ACTIVITY_KIND_KERNEL" ):
62
- event = {
63
- "name" : strings [row ["shortName" ]],
64
- "ph" : "X" , # Complete Event (Begin + End event)
65
- "cat" : "cuda" ,
66
- "ts" : munge_time (row ["start" ]),
67
- "dur" : munge_time (row ["end" ] - row ["start" ]),
68
- "tid" : "Stream {}" .format (row ["streamId" ]),
69
- "pid" : "Device {}" .format (row ["deviceId" ]),
70
- "args" : {
71
- # TODO: More
72
- },
73
- }
74
- traceEvents .append (event )
67
+ per_device_kernel_events [row ["deviceId" ]].append (row )
68
+ if traceEvents is not None :
69
+ event = {
70
+ "name" : strings [row ["shortName" ]],
71
+ "ph" : "X" , # Complete Event (Begin + End event)
72
+ "cat" : "cuda" ,
73
+ "ts" : munge_time (row ["start" ]),
74
+ "dur" : munge_time (row ["end" ] - row ["start" ]),
75
+ "tid" : "Stream {}" .format (row ["streamId" ]),
76
+ "pid" : "Device {}" .format (row ["deviceId" ]),
77
+ "args" : {
78
+ # TODO: More
79
+ },
80
+ }
81
+ traceEvents .append (event )
82
+ return per_device_kernel_events
83
+
84
+ def link_pid_with_devices (conn : sqlite3 .Connection ):
85
+ # map each pid to a device. assumes each pid is associated with a single device
86
+ global _PID_TO_DEVICE
87
+ if _PID_TO_DEVICE is None :
88
+ pid_to_device = {}
89
+ for row in conn .execute ("SELECT DISTINCT deviceId, globalPid / 0x1000000 % 0x1000000 AS PID FROM CUPTI_ACTIVITY_KIND_KERNEL" ):
90
+ assert row ["PID" ] not in pid_to_device , \
91
+ f"A single PID ({ row ['PID' ]} ) is associated with multiple devices ({ pid_to_device [row ['PID' ]]} and { row ['deviceId' ]} )."
92
+ pid_to_device [row ["PID" ]] = row ["deviceId" ]
93
+ _PID_TO_DEVICE = pid_to_device
94
+ return _PID_TO_DEVICE
75
95
76
- def parse_nvtx_events (conn : sqlite3 .Connection , traceEvents : list , event_prefix = None , color_scheme = {}):
96
+ def parse_nvtx_events (conn : sqlite3 .Connection , traceEvents : list = None , event_prefix = None , color_scheme = {}):
77
97
"""
78
98
Copied from the docs:
79
99
NVTX_EVENTS
@@ -106,12 +126,6 @@ def parse_nvtx_events(conn: sqlite3.Connection, traceEvents: list, event_prefix=
106
126
75 - NvtxDomainCreate
107
127
76 - NvtxDomainDestroy
108
128
"""
109
- # map each pid to a device. assumes each pid is associated with a single device
110
- pid_to_device = {}
111
- for row in conn .execute ("SELECT DISTINCT deviceId, globalPid / 0x1000000 % 0x1000000 AS PID FROM CUPTI_ACTIVITY_KIND_KERNEL" ):
112
- assert row ["PID" ] not in pid_to_device , \
113
- f"A single PID ({ row ['PID' ]} ) is associated with multiple devices ({ pid_to_device [row ['PID' ]]} and { row ['deviceId' ]} )."
114
- pid_to_device [row ["PID" ]] = row ["deviceId" ]
115
129
116
130
if event_prefix is None :
117
131
match_text = ''
@@ -128,29 +142,156 @@ def parse_nvtx_events(conn: sqlite3.Connection, traceEvents: list, event_prefix=
128
142
else :
129
143
match_text += " OR "
130
144
145
+ nvtx_events_per_device = defaultdict (list )
146
+ pid_to_device = link_pid_with_devices (conn )
131
147
# eventType 59 is NvtxPushPopRange, which corresponds to torch.cuda.nvtx.range apis
132
148
for row in conn .execute (f"SELECT start, end, text, globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID FROM NVTX_EVENTS WHERE NVTX_EVENTS.eventType == 59{ match_text } ;" ):
133
149
text = row ['text' ]
134
150
pid = row ['PID' ]
135
151
tid = row ['TID' ]
136
- assert pid in pid_to_device , f"PID { pid } not found in the pid to device map."
152
+ nvtx_events_per_device [pid_to_device [pid ]].append (row )
153
+ if traceEvents is not None :
154
+ assert pid in pid_to_device , f"PID { pid } not found in the pid to device map."
155
+ event = {
156
+ "name" : text ,
157
+ "ph" : "X" , # Complete Event (Begin + End event)
158
+ "cat" : "nvtx" ,
159
+ "ts" : munge_time (row ["start" ]),
160
+ "dur" : munge_time (row ["end" ] - row ["start" ]),
161
+ "tid" : "NVTX Thread {}" .format (tid ),
162
+ "pid" : "Device {}" .format (pid_to_device [pid ]),
163
+ "args" : {
164
+ # TODO: More
165
+ },
166
+ }
167
+ if color_scheme :
168
+ for key , color in color_scheme .items ():
169
+ if re .search (key , text ):
170
+ event ["cname" ] = color
171
+ break
172
+ traceEvents .append (event )
173
+ return nvtx_events_per_device
174
+
175
+ def parse_cuda_api_events (conn : sqlite3 .Connection , strings : dict , traceEvents : list = None ):
176
+ """
177
+ Copied from the docs:
178
+ CUPTI_ACTIVITY_KIND_RUNTIME
179
+ start INTEGER NOT NULL, -- Event start timestamp (ns).
180
+ end INTEGER NOT NULL, -- Event end timestamp (ns).
181
+ eventClass INTEGER NOT NULL, -- CUDA event class enum value. See docs for specifics.
182
+ globalTid INTEGER, -- Serialized GlobalId.
183
+ correlationId INTEGER, -- ID used to identify events that this function call has triggered.
184
+ nameId INTEGER NOT NULL REFERENCES StringIds(id), -- StringId of the function name.
185
+ returnValue INTEGER NOT NULL, -- Return value of the function call.
186
+ callchainId INTEGER REFERENCES CUDA_CALLCHAINS(id) -- ID of the attached callchain.
187
+ """
188
+ pid_to_devices = link_pid_with_devices (conn )
189
+ per_device_api_events = defaultdict (list )
190
+ # event type 0 is TRACE_PROCESS_EVENT_CUDA_RUNTIME
191
+ for row in conn .execute (f"SELECT start, end, globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID, correlationId, nameId FROM CUPTI_ACTIVITY_KIND_RUNTIME;" ):
192
+ text = strings [row ['nameId' ]]
193
+ pid = row ['PID' ]
194
+ tid = row ['TID' ]
195
+ correlationId = row ['correlationId' ]
196
+ per_device_api_events [pid_to_devices [pid ]].append (row )
197
+ if traceEvents is not None :
198
+ event = {
199
+ "name" : text ,
200
+ "ph" : "X" , # Complete Event (Begin + End event)
201
+ "cat" : "cuda_api" ,
202
+ "ts" : munge_time (row ["start" ]),
203
+ "dur" : munge_time (row ["end" ] - row ["start" ]),
204
+ "tid" : "CUDA API Thread {}" .format (tid ),
205
+ "pid" : "Device {}" .format (pid_to_devices [pid ]),
206
+ "args" : {
207
+ "correlationId" : correlationId ,
208
+ },
209
+ }
210
+ traceEvents .append (event )
211
+ return per_device_api_events
212
+
213
+ def _find_overlapping_intervals (nvtx_events , cuda_api_events ):
214
+ mixed_events = []
215
+ for nvtx_event in nvtx_events :
216
+ start = nvtx_event ["start" ]
217
+ end = nvtx_event ["end" ]
218
+ mixed_events .append ((start , 1 , "nvtx" , nvtx_event ))
219
+ mixed_events .append ((end , - 1 , "nvtx" , nvtx_event ))
220
+ for cuda_api_events in cuda_api_events :
221
+ start = cuda_api_events ["start" ]
222
+ end = cuda_api_events ["end" ]
223
+ mixed_events .append ((start , 1 , "cuda_api" , cuda_api_events ))
224
+ mixed_events .append ((end , - 1 , "cuda_api" , cuda_api_events ))
225
+ mixed_events .sort (key = lambda x : (x [0 ], x [1 ], x [2 ]))
226
+ active_intervals = []
227
+ result = defaultdict (list )
228
+ for _ , event_type , event_origin , orig_event in mixed_events :
229
+ if event_type == 1 :
230
+ # start
231
+ if event_origin == "nvtx" :
232
+ active_intervals .append (orig_event )
233
+ else :
234
+ for event in active_intervals :
235
+ result [event ].append (orig_event )
236
+ else :
237
+ # end
238
+ if event_origin == "nvtx" :
239
+ active_intervals .remove (orig_event )
240
+ return result
241
+
242
+ def link_nvtx_events_to_kernel_events (strings : dict ,
243
+ pid_to_device : dict [int , int ],
244
+ nvtx_events_per_device : dict [int , list ],
245
+ cuda_api_events_per_device : dict [int , list ],
246
+ cuda_kernel_events_per_device : dict [int , list ]):
247
+ """
248
+ Link NVTX events to cupti kernel events. This is done by first matching
249
+ the nvtx ranges with CUDA API calls by timestamp. Then, retrieve the
250
+ corresponding kernel events using the correlationId from CUDA API calls.
251
+ """
252
+ result = {}
253
+ for device in pid_to_device .values ():
254
+ event_map = _find_overlapping_intervals (nvtx_events_per_device [device ], cuda_api_events_per_device [device ])
255
+ correlation_id_map = defaultdict (dict )
256
+ for cuda_api_event in cuda_api_events_per_device [device ]:
257
+ correlation_id_map [cuda_api_event ["correlationId" ]]["cuda_api" ] = cuda_api_event
258
+ for kernel_event in cuda_kernel_events_per_device [device ]:
259
+ correlation_id_map [kernel_event ["correlationId" ]]["kernel" ] = kernel_event
260
+ for nvtx_event , cuda_api_events in event_map .items ():
261
+ kernel_start_time = None
262
+ kernel_end_time = None
263
+ for cuda_api_event in cuda_api_events :
264
+ if "kernel" not in correlation_id_map [cuda_api_event ["correlationId" ]]:
265
+ # other cuda api event, ignore
266
+ continue
267
+ kernel_event = correlation_id_map [cuda_api_event ["correlationId" ]]["kernel" ]
268
+ if kernel_start_time is None or kernel_start_time > kernel_event ["start" ]:
269
+ kernel_start_time = kernel_event ["start" ]
270
+ if kernel_end_time is None or kernel_end_time < kernel_event ["end" ]:
271
+ kernel_end_time = kernel_event ["end" ]
272
+ if kernel_start_time is not None and kernel_end_time is not None :
273
+ result [nvtx_event ] = (kernel_start_time , kernel_end_time )
274
+ return result
275
+
276
+ def parse_nvtx_kernel_events (conn : sqlite3 .Connection , strings : dict , traceEvents : list , event_prefix = None , color_scheme = {}):
277
+ pid_to_device = link_pid_with_devices (conn )
278
+ nvtx_events_per_device = parse_nvtx_events (conn , event_prefix = event_prefix , color_scheme = color_scheme )
279
+ cuda_api_events_per_device = parse_cuda_api_events (conn , strings )
280
+ cuda_kernel_events_per_device = parse_cupti_kernel_events (conn , strings )
281
+ nvtx_kernel_event_map = link_nvtx_events_to_kernel_events (strings , pid_to_device , nvtx_events_per_device , cuda_api_events_per_device , cuda_kernel_events_per_device )
282
+ for nvtx_event , (kernel_start_time , kernel_end_time ) in nvtx_kernel_event_map .items ():
137
283
event = {
138
- "name" : text ,
139
- "ph" : "X" , # Complete Event (Begin + End event)
140
- "cat" : "nvtx" ,
141
- "ts" : munge_time (row ["start" ]),
142
- "dur" : munge_time (row ["end" ] - row ["start" ]),
143
- "tid" : "NVTX Thread {}" .format (tid ),
144
- "pid" : "Device {}" .format (pid_to_device [pid ]),
145
- "args" : {
146
- # TODO: More
147
- },
148
- }
149
- if color_scheme :
150
- for key , color in color_scheme .items ():
151
- if re .search (key , text ):
152
- event ["cname" ] = color
153
- break
284
+ "name" : nvtx_event ["text" ],
285
+ "ph" : "X" , # Complete Event (Begin + End event)
286
+ "cat" : "nvtx-kernel" ,
287
+ "ts" : munge_time (kernel_start_time ),
288
+ "dur" : munge_time (kernel_end_time - kernel_start_time ),
289
+ "tid" : "NVTX Kernel Thread {}" .format (nvtx_event ["tid" ]),
290
+ "pid" : "Device {}" .format (pid_to_device [nvtx_event ["pid" ]]),
291
+ "args" : {
292
+ # TODO: More
293
+ },
294
+ }
154
295
traceEvents .append (event )
155
296
156
297
def nsys2json ():
@@ -167,6 +308,8 @@ def nsys2json():
167
308
parse_cupti_kernel_events (conn , strings , traceEvents )
168
309
elif activity == "nvtx" :
169
310
parse_nvtx_events (conn , traceEvents , event_prefix = args .nvtx_event_prefix , color_scheme = args .nvtx_color_scheme )
311
+ elif activity == "nvtx-kernel" :
312
+ parse_nvtx_kernel_events (conn , strings , traceEvents , event_prefix = args .nvtx_event_prefix , color_scheme = args .nvtx_color_scheme )
170
313
else :
171
314
raise ValueError (f"Unknown activity type: { activity } " )
172
315
# make the timelines appear in pid and tid order
0 commit comments