Skip to content

Commit b8a9d3c

Browse files
committed
link nvtx ranges with kernel timeline
1 parent 1904c09 commit b8a9d3c

File tree

1 file changed

+182
-39
lines changed

1 file changed

+182
-39
lines changed

nsys2json.py

+182-39
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
import json
44
import re
55

6+
from collections import defaultdict
7+
8+
_PID_TO_DEVICE = None
9+
610
# Code adapted from https://github.com/ezyang/nvprof2json
711

812
parser = argparse.ArgumentParser(description='Convert nsight systems sqlite output to Google Event Trace compatible JSON.')
913
parser.add_argument('--filename')
1014
parser.add_argument("-o", "--output", help="Output file name", required=True)
11-
parser.add_argument("-t", "--activity-type", help="Type of activities shown. Default to all.", default=["kernel", "nvtx"], choices=['kernel', 'nvtx'], nargs="+")
15+
parser.add_argument("-t", "--activity-type", help="Type of activities shown. Default to all.", default=["kernel", "nvtx-kernel"], choices=['kernel', 'nvtx', "nvtx-kernel"], nargs="+")
1216
parser.add_argument("--nvtx-event-prefix", help="Filter NVTX events by their names' prefix.", type=str, nargs="*")
1317
parser.add_argument("--nvtx-color-scheme", help="""Color scheme for NVTX events.
1418
Accepts a dict mapping a string to one of chrome tracing colors.
@@ -26,7 +30,7 @@ def munge_time(t):
2630
# For reference of the schema, see
2731
# https://docs.nvidia.com/nsight-systems/UserGuide/index.html#exporter-sqlite-schema
2832

29-
def parse_cupti_kernel_events(conn: sqlite3.Connection, strings: dict, traceEvents: list):
33+
def parse_cupti_kernel_events(conn: sqlite3.Connection, strings: dict, traceEvents: list = None):
3034
"""
3135
Copied from the docs:
3236
CUPTI_ACTIVITY_KIND_KERNEL
@@ -58,22 +62,38 @@ def parse_cupti_kernel_events(conn: sqlite3.Connection, strings: dict, traceEven
5862
graphNodeId INTEGER, -- REFERENCES CUDA_GRAPH_EVENTS(graphNodeId)
5963
sharedMemoryLimitConfig INTEGER -- REFERENCES ENUM_CUDA_SHARED_MEM_LIMIT_CONFIG(id)
6064
"""
65+
per_device_kernel_events = defaultdict(list)
6166
for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_KERNEL"):
62-
event = {
63-
"name": strings[row["shortName"]],
64-
"ph": "X", # Complete Event (Begin + End event)
65-
"cat": "cuda",
66-
"ts": munge_time(row["start"]),
67-
"dur": munge_time(row["end"] - row["start"]),
68-
"tid": "Stream {}".format(row["streamId"]),
69-
"pid": "Device {}".format(row["deviceId"]),
70-
"args": {
71-
# TODO: More
72-
},
73-
}
74-
traceEvents.append(event)
67+
per_device_kernel_events[row["deviceId"]].append(row)
68+
if traceEvents is not None:
69+
event = {
70+
"name": strings[row["shortName"]],
71+
"ph": "X", # Complete Event (Begin + End event)
72+
"cat": "cuda",
73+
"ts": munge_time(row["start"]),
74+
"dur": munge_time(row["end"] - row["start"]),
75+
"tid": "Stream {}".format(row["streamId"]),
76+
"pid": "Device {}".format(row["deviceId"]),
77+
"args": {
78+
# TODO: More
79+
},
80+
}
81+
traceEvents.append(event)
82+
return per_device_kernel_events
83+
84+
def link_pid_with_devices(conn: sqlite3.Connection):
85+
# map each pid to a device. assumes each pid is associated with a single device
86+
global _PID_TO_DEVICE
87+
if _PID_TO_DEVICE is None:
88+
pid_to_device = {}
89+
for row in conn.execute("SELECT DISTINCT deviceId, globalPid / 0x1000000 % 0x1000000 AS PID FROM CUPTI_ACTIVITY_KIND_KERNEL"):
90+
assert row["PID"] not in pid_to_device, \
91+
f"A single PID ({row['PID']}) is associated with multiple devices ({pid_to_device[row['PID']]} and {row['deviceId']})."
92+
pid_to_device[row["PID"]] = row["deviceId"]
93+
_PID_TO_DEVICE = pid_to_device
94+
return _PID_TO_DEVICE
7595

76-
def parse_nvtx_events(conn: sqlite3.Connection, traceEvents: list, event_prefix=None, color_scheme={}):
96+
def parse_nvtx_events(conn: sqlite3.Connection, traceEvents: list = None, event_prefix=None, color_scheme={}):
7797
"""
7898
Copied from the docs:
7999
NVTX_EVENTS
@@ -106,12 +126,6 @@ def parse_nvtx_events(conn: sqlite3.Connection, traceEvents: list, event_prefix=
106126
75 - NvtxDomainCreate
107127
76 - NvtxDomainDestroy
108128
"""
109-
# map each pid to a device. assumes each pid is associated with a single device
110-
pid_to_device = {}
111-
for row in conn.execute("SELECT DISTINCT deviceId, globalPid / 0x1000000 % 0x1000000 AS PID FROM CUPTI_ACTIVITY_KIND_KERNEL"):
112-
assert row["PID"] not in pid_to_device, \
113-
f"A single PID ({row['PID']}) is associated with multiple devices ({pid_to_device[row['PID']]} and {row['deviceId']})."
114-
pid_to_device[row["PID"]] = row["deviceId"]
115129

116130
if event_prefix is None:
117131
match_text = ''
@@ -128,29 +142,156 @@ def parse_nvtx_events(conn: sqlite3.Connection, traceEvents: list, event_prefix=
128142
else:
129143
match_text += " OR "
130144

145+
nvtx_events_per_device = defaultdict(list)
146+
pid_to_device = link_pid_with_devices(conn)
131147
# eventType 59 is NvtxPushPopRange, which corresponds to torch.cuda.nvtx.range apis
132148
for row in conn.execute(f"SELECT start, end, text, globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID FROM NVTX_EVENTS WHERE NVTX_EVENTS.eventType == 59{match_text};"):
133149
text = row['text']
134150
pid = row['PID']
135151
tid = row['TID']
136-
assert pid in pid_to_device, f"PID {pid} not found in the pid to device map."
152+
nvtx_events_per_device[pid_to_device[pid]].append(row)
153+
if traceEvents is not None:
154+
assert pid in pid_to_device, f"PID {pid} not found in the pid to device map."
155+
event = {
156+
"name": text,
157+
"ph": "X", # Complete Event (Begin + End event)
158+
"cat": "nvtx",
159+
"ts": munge_time(row["start"]),
160+
"dur": munge_time(row["end"] - row["start"]),
161+
"tid": "NVTX Thread {}".format(tid),
162+
"pid": "Device {}".format(pid_to_device[pid]),
163+
"args": {
164+
# TODO: More
165+
},
166+
}
167+
if color_scheme:
168+
for key, color in color_scheme.items():
169+
if re.search(key, text):
170+
event["cname"] = color
171+
break
172+
traceEvents.append(event)
173+
return nvtx_events_per_device
174+
175+
def parse_cuda_api_events(conn: sqlite3.Connection, strings: dict, traceEvents: list = None):
176+
"""
177+
Copied from the docs:
178+
CUPTI_ACTIVITY_KIND_RUNTIME
179+
start INTEGER NOT NULL, -- Event start timestamp (ns).
180+
end INTEGER NOT NULL, -- Event end timestamp (ns).
181+
eventClass INTEGER NOT NULL, -- CUDA event class enum value. See docs for specifics.
182+
globalTid INTEGER, -- Serialized GlobalId.
183+
correlationId INTEGER, -- ID used to identify events that this function call has triggered.
184+
nameId INTEGER NOT NULL REFERENCES StringIds(id), -- StringId of the function name.
185+
returnValue INTEGER NOT NULL, -- Return value of the function call.
186+
callchainId INTEGER REFERENCES CUDA_CALLCHAINS(id) -- ID of the attached callchain.
187+
"""
188+
pid_to_devices = link_pid_with_devices(conn)
189+
per_device_api_events = defaultdict(list)
190+
# event type 0 is TRACE_PROCESS_EVENT_CUDA_RUNTIME
191+
for row in conn.execute(f"SELECT start, end, globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID, correlationId, nameId FROM CUPTI_ACTIVITY_KIND_RUNTIME;"):
192+
text = strings[row['nameId']]
193+
pid = row['PID']
194+
tid = row['TID']
195+
correlationId = row['correlationId']
196+
per_device_api_events[pid_to_devices[pid]].append(row)
197+
if traceEvents is not None:
198+
event = {
199+
"name": text,
200+
"ph": "X", # Complete Event (Begin + End event)
201+
"cat": "cuda_api",
202+
"ts": munge_time(row["start"]),
203+
"dur": munge_time(row["end"] - row["start"]),
204+
"tid": "CUDA API Thread {}".format(tid),
205+
"pid": "Device {}".format(pid_to_devices[pid]),
206+
"args": {
207+
"correlationId": correlationId,
208+
},
209+
}
210+
traceEvents.append(event)
211+
return per_device_api_events
212+
213+
def _find_overlapping_intervals(nvtx_events, cuda_api_events):
214+
mixed_events = []
215+
for nvtx_event in nvtx_events:
216+
start = nvtx_event["start"]
217+
end = nvtx_event["end"]
218+
mixed_events.append((start, 1, "nvtx", nvtx_event))
219+
mixed_events.append((end, -1, "nvtx", nvtx_event))
220+
for cuda_api_events in cuda_api_events:
221+
start = cuda_api_events["start"]
222+
end = cuda_api_events["end"]
223+
mixed_events.append((start, 1, "cuda_api", cuda_api_events))
224+
mixed_events.append((end, -1, "cuda_api", cuda_api_events))
225+
mixed_events.sort(key=lambda x: (x[0], x[1], x[2]))
226+
active_intervals = []
227+
result = defaultdict(list)
228+
for _, event_type, event_origin, orig_event in mixed_events:
229+
if event_type == 1:
230+
# start
231+
if event_origin == "nvtx":
232+
active_intervals.append(orig_event)
233+
else:
234+
for event in active_intervals:
235+
result[event].append(orig_event)
236+
else:
237+
# end
238+
if event_origin == "nvtx":
239+
active_intervals.remove(orig_event)
240+
return result
241+
242+
def link_nvtx_events_to_kernel_events(strings: dict,
243+
pid_to_device: dict[int, int],
244+
nvtx_events_per_device: dict[int, list],
245+
cuda_api_events_per_device: dict[int, list],
246+
cuda_kernel_events_per_device: dict[int, list]):
247+
"""
248+
Link NVTX events to cupti kernel events. This is done by first matching
249+
the nvtx ranges with CUDA API calls by timestamp. Then, retrieve the
250+
corresponding kernel events using the correlationId from CUDA API calls.
251+
"""
252+
result = {}
253+
for device in pid_to_device.values():
254+
event_map = _find_overlapping_intervals(nvtx_events_per_device[device], cuda_api_events_per_device[device])
255+
correlation_id_map = defaultdict(dict)
256+
for cuda_api_event in cuda_api_events_per_device[device]:
257+
correlation_id_map[cuda_api_event["correlationId"]]["cuda_api"] = cuda_api_event
258+
for kernel_event in cuda_kernel_events_per_device[device]:
259+
correlation_id_map[kernel_event["correlationId"]]["kernel"] = kernel_event
260+
for nvtx_event, cuda_api_events in event_map.items():
261+
kernel_start_time = None
262+
kernel_end_time = None
263+
for cuda_api_event in cuda_api_events:
264+
if "kernel" not in correlation_id_map[cuda_api_event["correlationId"]]:
265+
# other cuda api event, ignore
266+
continue
267+
kernel_event = correlation_id_map[cuda_api_event["correlationId"]]["kernel"]
268+
if kernel_start_time is None or kernel_start_time > kernel_event["start"]:
269+
kernel_start_time = kernel_event["start"]
270+
if kernel_end_time is None or kernel_end_time < kernel_event["end"]:
271+
kernel_end_time = kernel_event["end"]
272+
if kernel_start_time is not None and kernel_end_time is not None:
273+
result[nvtx_event] = (kernel_start_time, kernel_end_time)
274+
return result
275+
276+
def parse_nvtx_kernel_events(conn: sqlite3.Connection, strings: dict, traceEvents: list, event_prefix=None, color_scheme={}):
277+
pid_to_device = link_pid_with_devices(conn)
278+
nvtx_events_per_device = parse_nvtx_events(conn, event_prefix=event_prefix, color_scheme=color_scheme)
279+
cuda_api_events_per_device = parse_cuda_api_events(conn, strings)
280+
cuda_kernel_events_per_device = parse_cupti_kernel_events(conn, strings)
281+
nvtx_kernel_event_map = link_nvtx_events_to_kernel_events(strings, pid_to_device, nvtx_events_per_device, cuda_api_events_per_device, cuda_kernel_events_per_device)
282+
for nvtx_event, (kernel_start_time, kernel_end_time) in nvtx_kernel_event_map.items():
137283
event = {
138-
"name": text,
139-
"ph": "X", # Complete Event (Begin + End event)
140-
"cat": "nvtx",
141-
"ts": munge_time(row["start"]),
142-
"dur": munge_time(row["end"] - row["start"]),
143-
"tid": "NVTX Thread {}".format(tid),
144-
"pid": "Device {}".format(pid_to_device[pid]),
145-
"args": {
146-
# TODO: More
147-
},
148-
}
149-
if color_scheme:
150-
for key, color in color_scheme.items():
151-
if re.search(key, text):
152-
event["cname"] = color
153-
break
284+
"name": nvtx_event["text"],
285+
"ph": "X", # Complete Event (Begin + End event)
286+
"cat": "nvtx-kernel",
287+
"ts": munge_time(kernel_start_time),
288+
"dur": munge_time(kernel_end_time - kernel_start_time),
289+
"tid": "NVTX Kernel Thread {}".format(nvtx_event["tid"]),
290+
"pid": "Device {}".format(pid_to_device[nvtx_event["pid"]]),
291+
"args": {
292+
# TODO: More
293+
},
294+
}
154295
traceEvents.append(event)
155296

156297
def nsys2json():
@@ -167,6 +308,8 @@ def nsys2json():
167308
parse_cupti_kernel_events(conn, strings, traceEvents)
168309
elif activity == "nvtx":
169310
parse_nvtx_events(conn, traceEvents, event_prefix=args.nvtx_event_prefix, color_scheme=args.nvtx_color_scheme)
311+
elif activity == "nvtx-kernel":
312+
parse_nvtx_kernel_events(conn, strings, traceEvents, event_prefix=args.nvtx_event_prefix, color_scheme=args.nvtx_color_scheme)
170313
else:
171314
raise ValueError(f"Unknown activity type: {activity}")
172315
# make the timelines appear in pid and tid order

0 commit comments

Comments
 (0)