-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathnsys2json.py
349 lines (327 loc) · 19.7 KB
/
nsys2json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
import sqlite3
import argparse
import json
from pathlib import Path
import re
from collections import defaultdict
_PID_TO_DEVICE = None
# Code adapted from https://github.com/ezyang/nvprof2json
def parse_args():
parser = argparse.ArgumentParser(description='Convert nsight systems sqlite output to Google Event Trace compatible JSON.')
parser.add_argument("-f", '--filename', help="Path to the input sqlite file.", required=True)
parser.add_argument("-o", "--output", help="Output file name, default to same as input with .json extension.")
parser.add_argument("-t", "--activity-type", help="Type of activities shown. Default to all.", default=["kernel", "nvtx-kernel"], choices=['kernel', 'nvtx', "nvtx-kernel", "cuda-api"], nargs="+")
parser.add_argument("--nvtx-event-prefix", help="Filter NVTX events by their names' prefix.", type=str, nargs="*")
parser.add_argument("--nvtx-color-scheme", help="""Color scheme for NVTX events.
Accepts a dict mapping a string to one of chrome tracing colors.
Events with names containing the string will be colored.
E.g. {"send": "thread_state_iowait", "recv": "thread_state_iowait", "compute": "thread_state_running"}
For details of the color scheme, see links in https://github.com/google/perfetto/issues/208
""", type=json.loads, default={})
args = parser.parse_args()
if args.output is None:
args.output = Path(args.filename).with_suffix(".json")
return args
class ActivityType:
KERNEL = "kernel"
NVTX_CPU = "nvtx"
NVTX_KERNEL = "nvtx-kernel"
CUDA_API = "cuda-api"
def munge_time(t):
"""Take a timestamp from nsys (ns) and convert it into us (the default for chrome://tracing)."""
# For strict correctness, divide by 1000, but this reduces accuracy.
return t / 1000.
# For reference of the schema, see
# https://docs.nvidia.com/nsight-systems/UserGuide/index.html#exporter-sqlite-schema
def parse_cupti_kernel_events(conn: sqlite3.Connection, strings: dict):
"""
Copied from the docs:
CUPTI_ACTIVITY_KIND_KERNEL
start INTEGER NOT NULL, -- Event start timestamp (ns).
end INTEGER NOT NULL, -- Event end timestamp (ns).
deviceId INTEGER NOT NULL, -- Device ID.
contextId INTEGER NOT NULL, -- Context ID.
streamId INTEGER NOT NULL, -- Stream ID.
correlationId INTEGER, -- REFERENCES CUPTI_ACTIVITY_KIND_RUNTIME(correlationId)
globalPid INTEGER, -- Serialized GlobalId.
demangledName INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Kernel function name w/ templates
shortName INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Base kernel function name
mangledName INTEGER, -- REFERENCES StringIds(id) -- Raw C++ mangled kernel function name
launchType INTEGER, -- REFERENCES ENUM_CUDA_KRENEL_LAUNCH_TYPE(id)
cacheConfig INTEGER, -- REFERENCES ENUM_CUDA_FUNC_CACHE_CONFIG(id)
registersPerThread INTEGER NOT NULL, -- Number of registers required for each thread executing the kernel.
gridX INTEGER NOT NULL, -- X-dimension grid size.
gridY INTEGER NOT NULL, -- Y-dimension grid size.
gridZ INTEGER NOT NULL, -- Z-dimension grid size.
blockX INTEGER NOT NULL, -- X-dimension block size.
blockY INTEGER NOT NULL, -- Y-dimension block size.
blockZ INTEGER NOT NULL, -- Z-dimension block size.
staticSharedMemory INTEGER NOT NULL, -- Static shared memory allocated for the kernel (B).
dynamicSharedMemory INTEGER NOT NULL, -- Dynamic shared memory reserved for the kernel (B).
localMemoryPerThread INTEGER NOT NULL, -- Amount of local memory reserved for each thread (B).
localMemoryTotal INTEGER NOT NULL, -- Total amount of local memory reserved for the kernel (B).
gridId INTEGER NOT NULL, -- Unique grid ID of the kernel assigned at runtime.
sharedMemoryExecuted INTEGER, -- Shared memory size set by the driver.
graphNodeId INTEGER, -- REFERENCES CUDA_GRAPH_EVENTS(graphNodeId)
sharedMemoryLimitConfig INTEGER -- REFERENCES ENUM_CUDA_SHARED_MEM_LIMIT_CONFIG(id)
"""
per_device_kernel_rows = defaultdict(list)
per_device_kernel_events = defaultdict(list)
for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_KERNEL"):
per_device_kernel_rows[row["deviceId"]].append(row)
event = {
"name": strings[row["shortName"]],
"ph": "X", # Complete Event (Begin + End event)
"cat": "cuda",
"ts": munge_time(row["start"]),
"dur": munge_time(row["end"] - row["start"]),
"tid": "Stream {}".format(row["streamId"]),
"pid": "Device {}".format(row["deviceId"]),
"args": {
# TODO: More
},
}
per_device_kernel_events[row["deviceId"]].append(event)
return per_device_kernel_rows, per_device_kernel_events
def link_pid_with_devices(conn: sqlite3.Connection):
# map each pid to a device. assumes each pid is associated with a single device
global _PID_TO_DEVICE
if _PID_TO_DEVICE is None:
pid_to_device = {}
for row in conn.execute("SELECT DISTINCT deviceId, globalPid / 0x1000000 % 0x1000000 AS PID FROM CUPTI_ACTIVITY_KIND_KERNEL"):
assert row["PID"] not in pid_to_device, \
f"A single PID ({row['PID']}) is associated with multiple devices ({pid_to_device[row['PID']]} and {row['deviceId']})."
pid_to_device[row["PID"]] = row["deviceId"]
_PID_TO_DEVICE = pid_to_device
return _PID_TO_DEVICE
def parse_nvtx_events(conn: sqlite3.Connection, event_prefix=None, color_scheme={}):
"""
Copied from the docs:
NVTX_EVENTS
start INTEGER NOT NULL, -- Event start timestamp (ns).
end INTEGER, -- Event end timestamp (ns).
eventType INTEGER NOT NULL, -- NVTX event type enum value. See docs for specifics.
rangeId INTEGER, -- Correlation ID returned from a nvtxRangeStart call.
category INTEGER, -- User-controlled ID that can be used to group events.
color INTEGER, -- Encoded ARGB color value.
text TEXT, -- Optional text message for non registered strings.
globalTid INTEGER, -- Serialized GlobalId.
endGlobalTid INTEGER, -- Serialized GlobalId. See docs for specifics.
textId INTEGER REFERENCES StringIds(id), -- StringId of the NVTX domain registered string.
domainId INTEGER, -- User-controlled ID that can be used to group events.
uint64Value INTEGER, -- One of possible payload value union members.
int64Value INTEGER, -- One of possible payload value union members.
doubleValue REAL, -- One of possible payload value union members.
uint32Value INTEGER, -- One of possible payload value union members.
int32Value INTEGER, -- One of possible payload value union members.
floatValue REAL, -- One of possible payload value union members.
jsonTextId INTEGER, -- One of possible payload value union members.
jsonText TEXT -- One of possible payload value union members.
NVTX_EVENT_TYPES
33 - NvtxCategory
34 - NvtxMark
39 - NvtxThread
59 - NvtxPushPopRange
60 - NvtxStartEndRange
75 - NvtxDomainCreate
76 - NvtxDomainDestroy
"""
if event_prefix is None:
match_text = ''
else:
match_text = " AND "
if len(event_prefix) == 1:
match_text += f"NVTX_EVENTS.text LIKE '{event_prefix[0]}%'"
else:
match_text += "("
for idx, prefix in enumerate(event_prefix):
match_text += f"NVTX_EVENTS.text LIKE '{prefix}%'"
if idx == len(event_prefix) - 1:
match_text += ")"
else:
match_text += " OR "
per_device_nvtx_rows = defaultdict(list)
per_device_nvtx_events = defaultdict(list)
pid_to_device = link_pid_with_devices(conn)
# eventType 59 is NvtxPushPopRange, which corresponds to torch.cuda.nvtx.range apis
for row in conn.execute(f"SELECT start, end, text, globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID FROM NVTX_EVENTS WHERE NVTX_EVENTS.eventType == 59{match_text};"):
text = row['text']
pid = row['PID']
tid = row['TID']
per_device_nvtx_rows[pid_to_device[pid]].append(row)
assert pid in pid_to_device, f"PID {pid} not found in the pid to device map."
event = {
"name": text,
"ph": "X", # Complete Event (Begin + End event)
"cat": "nvtx",
"ts": munge_time(row["start"]),
"dur": munge_time(row["end"] - row["start"]),
"tid": "NVTX Thread {}".format(tid),
"pid": "Device {}".format(pid_to_device[pid]),
"args": {
# TODO: More
},
}
if color_scheme:
for key, color in color_scheme.items():
if re.search(key, text):
event["cname"] = color
break
per_device_nvtx_events[pid_to_device[pid]].append(event)
return per_device_nvtx_rows, per_device_nvtx_events
def parse_cuda_api_events(conn: sqlite3.Connection, strings: dict):
"""
Copied from the docs:
CUPTI_ACTIVITY_KIND_RUNTIME
start INTEGER NOT NULL, -- Event start timestamp (ns).
end INTEGER NOT NULL, -- Event end timestamp (ns).
eventClass INTEGER NOT NULL, -- CUDA event class enum value. See docs for specifics.
globalTid INTEGER, -- Serialized GlobalId.
correlationId INTEGER, -- ID used to identify events that this function call has triggered.
nameId INTEGER NOT NULL REFERENCES StringIds(id), -- StringId of the function name.
returnValue INTEGER NOT NULL, -- Return value of the function call.
callchainId INTEGER REFERENCES CUDA_CALLCHAINS(id) -- ID of the attached callchain.
"""
pid_to_devices = link_pid_with_devices(conn)
per_device_api_rows = defaultdict(list)
per_device_api_events = defaultdict(list)
# event type 0 is TRACE_PROCESS_EVENT_CUDA_RUNTIME
for row in conn.execute(f"SELECT start, end, globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID, correlationId, nameId FROM CUPTI_ACTIVITY_KIND_RUNTIME;"):
text = strings[row['nameId']]
pid = row['PID']
tid = row['TID']
correlationId = row['correlationId']
per_device_api_rows[pid_to_devices[pid]].append(row)
event = {
"name": text,
"ph": "X", # Complete Event (Begin + End event)
"cat": "cuda_api",
"ts": munge_time(row["start"]),
"dur": munge_time(row["end"] - row["start"]),
"tid": "CUDA API Thread {}".format(tid),
"pid": "Device {}".format(pid_to_devices[pid]),
"args": {
"correlationId": correlationId,
},
}
per_device_api_events[pid_to_devices[pid]].append(event)
return per_device_api_rows, per_device_api_events
def _find_overlapping_intervals(nvtx_rows, cuda_api_rows):
mixed_rows = []
for nvtx_row in nvtx_rows:
start = nvtx_row["start"]
end = nvtx_row["end"]
mixed_rows.append((start, 1, "nvtx", nvtx_row))
mixed_rows.append((end, -1, "nvtx", nvtx_row))
for cuda_api_row in cuda_api_rows:
start = cuda_api_row["start"]
end = cuda_api_row["end"]
mixed_rows.append((start, 1, "cuda_api", cuda_api_row))
mixed_rows.append((end, -1, "cuda_api", cuda_api_row))
mixed_rows.sort(key=lambda x: (x[0], x[1], x[2]))
active_intervals = []
result = defaultdict(list)
for _, event_type, event_origin, orig_event in mixed_rows:
if event_type == 1:
# start
if event_origin == "nvtx":
active_intervals.append(orig_event)
else:
for event in active_intervals:
result[event].append(orig_event)
else:
# end
if event_origin == "nvtx":
active_intervals.remove(orig_event)
return result
def link_nvtx_events_to_kernel_events(strings: dict,
pid_to_device: dict[int, int],
per_device_nvtx_rows: dict[int, list],
per_device_cuda_api_rows: dict[int, list],
per_device_cuda_kernel_rows: dict[int, list],
per_device_kernel_events: dict[int, list]):
"""
Link NVTX events to cupti kernel events. This is done by first matching
the nvtx ranges with CUDA API calls by timestamp. Then, retrieve the
corresponding kernel events using the correlationId from CUDA API calls.
"""
result = {}
for device in pid_to_device.values():
event_map = _find_overlapping_intervals(per_device_nvtx_rows[device], per_device_cuda_api_rows[device])
correlation_id_map = defaultdict(dict)
for cuda_api_row in per_device_cuda_api_rows[device]:
correlation_id_map[cuda_api_row["correlationId"]]["cuda_api"] = cuda_api_row
for kernel_row, kernel_trace_event in zip(per_device_cuda_kernel_rows[device], per_device_kernel_events[device]):
correlation_id_map[kernel_row["correlationId"]]["kernel"] = kernel_row
correlation_id_map[kernel_row["correlationId"]]["kernel_trace_event"] = kernel_trace_event
for nvtx_row, cuda_api_rows in event_map.items():
kernel_start_time = None
kernel_end_time = None
for cuda_api_row in cuda_api_rows:
if "kernel" not in correlation_id_map[cuda_api_row["correlationId"]]:
# other cuda api event, ignore
continue
kernel_row = correlation_id_map[cuda_api_row["correlationId"]]["kernel"]
kernel_trace_event = correlation_id_map[cuda_api_row["correlationId"]]["kernel_trace_event"]
if "NVTXRegions" not in kernel_trace_event["args"]:
kernel_trace_event["args"]["NVTXRegions"] = []
kernel_trace_event["args"]["NVTXRegions"].append(nvtx_row["text"])
if kernel_start_time is None or kernel_start_time > kernel_row["start"]:
kernel_start_time = kernel_row["start"]
if kernel_end_time is None or kernel_end_time < kernel_row["end"]:
kernel_end_time = kernel_row["end"]
if kernel_start_time is not None and kernel_end_time is not None:
result[nvtx_row] = (kernel_start_time, kernel_end_time)
return result
def parse_all_events(conn: sqlite3.Connection, strings: dict, activities=None, event_prefix=None, color_scheme={}):
if activities is None:
activities = [ActivityType.KERNEL, ActivityType.NVTX_CPU, ActivityType.NVTX_KERNEL]
if ActivityType.KERNEL in activities or ActivityType.NVTX_KERNEL in activities:
per_device_kernel_rows, per_device_kernel_events = parse_cupti_kernel_events(conn, strings)
if ActivityType.NVTX_CPU in activities or ActivityType.NVTX_KERNEL in activities:
per_device_nvtx_rows, per_device_nvtx_events = parse_nvtx_events(conn, event_prefix=event_prefix, color_scheme=color_scheme)
if ActivityType.CUDA_API in activities or ActivityType.NVTX_KERNEL in activities:
per_device_cuda_api_rows, per_device_cuda_api_events = parse_cuda_api_events(conn, strings)
if ActivityType.NVTX_KERNEL in activities:
pid_to_device = link_pid_with_devices(conn)
nvtx_kernel_event_map = link_nvtx_events_to_kernel_events(strings, pid_to_device, per_device_nvtx_rows, per_device_cuda_api_rows, per_device_kernel_rows, per_device_kernel_events)
traceEvents = []
if ActivityType.KERNEL in activities:
for k, v in per_device_kernel_events.items():
traceEvents.extend(v)
if ActivityType.NVTX_CPU in activities:
for k, v in per_device_nvtx_events.items():
traceEvents.extend(v)
if ActivityType.CUDA_API in activities:
for k, v in per_device_cuda_api_events.items():
traceEvents.extend(v)
if ActivityType.NVTX_KERNEL in activities:
for nvtx_event, (kernel_start_time, kernel_end_time) in nvtx_kernel_event_map.items():
event = {
"name": nvtx_event["text"] or "",
"ph": "X", # Complete Event (Begin + End event)
"cat": "nvtx-kernel",
"ts": munge_time(kernel_start_time),
"dur": munge_time(kernel_end_time - kernel_start_time),
"tid": "NVTX Kernel Thread {}".format(nvtx_event["tid"]),
"pid": "Device {}".format(pid_to_device[nvtx_event["pid"]]),
"args": {
# TODO: More
},
}
traceEvents.append(event)
return traceEvents
def nsys2json():
args = parse_args()
conn = sqlite3.connect(args.filename)
conn.row_factory = sqlite3.Row
strings = {}
for r in conn.execute("SELECT id, value FROM StringIds"):
strings[r["id"]] = r["value"]
traceEvents = parse_all_events(conn, strings, activities=args.activity_type, event_prefix=args.nvtx_event_prefix, color_scheme=args.nvtx_color_scheme)
# make the timelines appear in pid and tid order
traceEvents.sort(key=lambda x: (x["pid"], x["tid"]))
with open(args.output, 'w') as f:
json.dump(traceEvents, f)
if __name__ == "__main__":
nsys2json()