Skip to content

Commit 30c7ea1

Browse files
committed
upload nsys2json
0 parents  commit 30c7ea1

File tree

3 files changed

+245
-0
lines changed

3 files changed

+245
-0
lines changed

LICENSE

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
Copyright (c) 2017- Facebook, Inc
2+
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
8+
1. Redistributions of source code must retain the above copyright
9+
notice, this list of conditions and the following disclaimer.
10+
11+
2. Redistributions in binary form must reproduce the above copyright
12+
notice, this list of conditions and the following disclaimer in the
13+
documentation and/or other materials provided with the distribution.
14+
15+
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
16+
and IDIAP Research Institute nor the names of its contributors may be
17+
used to endorse or promote products derived from this software without
18+
specific prior written permission.
19+
20+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30+
POSSIBILITY OF SUCH DAMAGE.

README.md

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# nsys2json
2+
3+
A Python script to convert the output of NVIDIA Nsight Systems (in SQLite format) to JSON in [Google Chrome Trace Event Format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#) for more customizable visualization and analysis. Inspired and adapted from [nvprof2json](https://github.com/ezyang/nvprof2json).
4+
5+
The SQLite schema used by Nsight Systems is documented [here](https://docs.nvidia.com/nsight-systems/UserGuide/index.html#exporter-sqlite-schema).
6+
7+
## Usage
8+
*If you have a '.qdrep' file, you can convert it first to SQLite format through Nsight Systems [UI](https://developer.nvidia.com/nsight-systems/get-started) or [CLI](https://docs.nvidia.com/nsight-systems/UserGuide/index.html#cli-export-command-switch-options).*
9+
10+
To extract kernel activities and NVTX annotated regions (e.g. [torch.cuda.nvtx.range](https://pytorch.org/docs/stable/generated/torch.cuda.nvtx.range_push.html)):
11+
```bash
12+
python3 nsys2json.py <nsys_sqlite_file> -o <output_json>
13+
```
14+
15+
To filter out only kernel activities or NVTX annotated regions, use:
16+
```bash
17+
-activity-type {kernel,nvtx}
18+
```
19+
20+
To filter NVTX regions based on name, use:
21+
```bash
22+
--nvtx-event-prefix <prefix>
23+
```
24+
25+
To apply custom coloring scheme to NVTX regions, use:
26+
```bash
27+
--nvtx-color-scheme <dict_mapping_regex_to_chrome_colors>
28+
```
29+
e.g.,
30+
```bash
31+
--nvtx-color-scheme '{"comm": "thread_state_iowait", "Layer .* compute": "thread_state_running"}
32+
```
33+
For the list of available colors, see [here](https://chromium.googlesource.com/external/trace-viewer/+/bf55211014397cf0ebcd9e7090de1c4f84fc3ac0/tracing/tracing/ui/base/color_scheme.html).
34+
35+
## Known Issues
36+
* This script assumes each process in the profile only executes kernel on one GPU. Process id is used to match NVTX regions to the corresponding device. Changes to process and thread naming scheme in the JSON file are needed if this assumption is violated.

nsys2json.py

+179
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
import sqlite3
2+
import argparse
3+
import json
4+
import re
5+
6+
# Code adapted from https://github.com/ezyang/nvprof2json
7+
8+
parser = argparse.ArgumentParser(description='Convert nsight systems sqlite output to Google Event Trace compatible JSON.')
9+
parser.add_argument('--filename')
10+
parser.add_argument("-o", "--output", help="Output file name", required=True)
11+
parser.add_argument("-t", "--activity-type", help="Type of activities shown. Default to all.", default=["kernel", "nvtx"], choices=['kernel', 'nvtx'], nargs="+")
12+
parser.add_argument("--nvtx-event-prefix", help="Filter NVTX events by their names' prefix.", type=str, nargs="*")
13+
parser.add_argument("--nvtx-color-scheme", help="""Color scheme for NVTX events.
14+
Accepts a dict mapping a string to one of chrome tracing colors.
15+
Events with names containing the string will be colored.
16+
E.g. {"send": "thread_state_iowait", "recv": "thread_state_iowait", "compute": "thread_state_running"}
17+
For details of the color scheme, see links in https://github.com/google/perfetto/issues/208
18+
""", type=json.loads, default={})
19+
args = parser.parse_args()
20+
21+
def munge_time(t):
22+
"""Take a timestamp from nsys (ns) and convert it into us (the default for chrome://tracing)."""
23+
# For strict correctness, divide by 1000, but this reduces accuracy.
24+
return t / 1000.
25+
26+
# For reference of the schema, see
27+
# https://docs.nvidia.com/nsight-systems/UserGuide/index.html#exporter-sqlite-schema
28+
29+
def parse_cupti_kernel_events(conn: sqlite3.Connection, strings: dict, traceEvents: list):
30+
"""
31+
Copied from the docs:
32+
CUPTI_ACTIVITY_KIND_KERNEL
33+
start INTEGER NOT NULL, -- Event start timestamp (ns).
34+
end INTEGER NOT NULL, -- Event end timestamp (ns).
35+
deviceId INTEGER NOT NULL, -- Device ID.
36+
contextId INTEGER NOT NULL, -- Context ID.
37+
streamId INTEGER NOT NULL, -- Stream ID.
38+
correlationId INTEGER, -- REFERENCES CUPTI_ACTIVITY_KIND_RUNTIME(correlationId)
39+
globalPid INTEGER, -- Serialized GlobalId.
40+
demangledName INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Kernel function name w/ templates
41+
shortName INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Base kernel function name
42+
mangledName INTEGER, -- REFERENCES StringIds(id) -- Raw C++ mangled kernel function name
43+
launchType INTEGER, -- REFERENCES ENUM_CUDA_KRENEL_LAUNCH_TYPE(id)
44+
cacheConfig INTEGER, -- REFERENCES ENUM_CUDA_FUNC_CACHE_CONFIG(id)
45+
registersPerThread INTEGER NOT NULL, -- Number of registers required for each thread executing the kernel.
46+
gridX INTEGER NOT NULL, -- X-dimension grid size.
47+
gridY INTEGER NOT NULL, -- Y-dimension grid size.
48+
gridZ INTEGER NOT NULL, -- Z-dimension grid size.
49+
blockX INTEGER NOT NULL, -- X-dimension block size.
50+
blockY INTEGER NOT NULL, -- Y-dimension block size.
51+
blockZ INTEGER NOT NULL, -- Z-dimension block size.
52+
staticSharedMemory INTEGER NOT NULL, -- Static shared memory allocated for the kernel (B).
53+
dynamicSharedMemory INTEGER NOT NULL, -- Dynamic shared memory reserved for the kernel (B).
54+
localMemoryPerThread INTEGER NOT NULL, -- Amount of local memory reserved for each thread (B).
55+
localMemoryTotal INTEGER NOT NULL, -- Total amount of local memory reserved for the kernel (B).
56+
gridId INTEGER NOT NULL, -- Unique grid ID of the kernel assigned at runtime.
57+
sharedMemoryExecuted INTEGER, -- Shared memory size set by the driver.
58+
graphNodeId INTEGER, -- REFERENCES CUDA_GRAPH_EVENTS(graphNodeId)
59+
sharedMemoryLimitConfig INTEGER -- REFERENCES ENUM_CUDA_SHARED_MEM_LIMIT_CONFIG(id)
60+
"""
61+
for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_KERNEL"):
62+
event = {
63+
"name": strings[row["shortName"]],
64+
"ph": "X", # Complete Event (Begin + End event)
65+
"cat": "cuda",
66+
"ts": munge_time(row["start"]),
67+
"dur": munge_time(row["end"] - row["start"]),
68+
"tid": "Stream {}".format(row["streamId"]),
69+
"pid": "Device {}".format(row["deviceId"]),
70+
"args": {
71+
# TODO: More
72+
},
73+
}
74+
traceEvents.append(event)
75+
76+
def parse_nvtx_events(conn: sqlite3.Connection, traceEvents: list, event_prefix=None, color_scheme={}):
77+
"""
78+
Copied from the docs:
79+
NVTX_EVENTS
80+
start INTEGER NOT NULL, -- Event start timestamp (ns).
81+
end INTEGER, -- Event end timestamp (ns).
82+
eventType INTEGER NOT NULL, -- NVTX event type enum value. See docs for specifics.
83+
rangeId INTEGER, -- Correlation ID returned from a nvtxRangeStart call.
84+
category INTEGER, -- User-controlled ID that can be used to group events.
85+
color INTEGER, -- Encoded ARGB color value.
86+
text TEXT, -- Optional text message for non registered strings.
87+
globalTid INTEGER, -- Serialized GlobalId.
88+
endGlobalTid INTEGER, -- Serialized GlobalId. See docs for specifics.
89+
textId INTEGER REFERENCES StringIds(id), -- StringId of the NVTX domain registered string.
90+
domainId INTEGER, -- User-controlled ID that can be used to group events.
91+
uint64Value INTEGER, -- One of possible payload value union members.
92+
int64Value INTEGER, -- One of possible payload value union members.
93+
doubleValue REAL, -- One of possible payload value union members.
94+
uint32Value INTEGER, -- One of possible payload value union members.
95+
int32Value INTEGER, -- One of possible payload value union members.
96+
floatValue REAL, -- One of possible payload value union members.
97+
jsonTextId INTEGER, -- One of possible payload value union members.
98+
jsonText TEXT -- One of possible payload value union members.
99+
100+
NVTX_EVENT_TYPES
101+
33 - NvtxCategory
102+
34 - NvtxMark
103+
39 - NvtxThread
104+
59 - NvtxPushPopRange
105+
60 - NvtxStartEndRange
106+
75 - NvtxDomainCreate
107+
76 - NvtxDomainDestroy
108+
"""
109+
# map each pid to a device. assumes each pid is associated with a single device
110+
pid_to_device = {}
111+
for row in conn.execute("SELECT DISTINCT deviceId, globalPid / 0x1000000 % 0x1000000 AS PID FROM CUPTI_ACTIVITY_KIND_KERNEL"):
112+
assert row["PID"] not in pid_to_device, \
113+
f"A single PID ({row['PID']}) is associated with multiple devices ({pid_to_device[row['PID']]} and {row['deviceId']})."
114+
pid_to_device[row["PID"]] = row["deviceId"]
115+
116+
if event_prefix is None:
117+
match_text = ''
118+
else:
119+
match_text = " AND "
120+
if len(event_prefix) == 1:
121+
match_text += f"NVTX_EVENTS.text LIKE '{event_prefix}%'"
122+
else:
123+
match_text += "("
124+
for idx, prefix in enumerate(event_prefix):
125+
match_text += f"NVTX_EVENTS.text LIKE '{prefix}%'"
126+
if idx == len(event_prefix) - 1:
127+
match_text += ")"
128+
else:
129+
match_text += " OR "
130+
131+
# eventType 59 is NvtxPushPopRange, which corresponds to torch.cuda.nvtx.range apis
132+
for row in conn.execute(f"SELECT start, end, text, globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID FROM NVTX_EVENTS WHERE NVTX_EVENTS.eventType == 59{match_text};"):
133+
text = row['text']
134+
pid = row['PID']
135+
tid = row['TID']
136+
assert pid in pid_to_device, f"PID {pid} not found in the pid to device map."
137+
event = {
138+
"name": text,
139+
"ph": "X", # Complete Event (Begin + End event)
140+
"cat": "nvtx",
141+
"ts": munge_time(row["start"]),
142+
"dur": munge_time(row["end"] - row["start"]),
143+
"tid": "NVTX Thread {}".format(tid),
144+
"pid": "Device {}".format(pid_to_device[pid]),
145+
"args": {
146+
# TODO: More
147+
},
148+
}
149+
if color_scheme:
150+
for key, color in color_scheme.items():
151+
if re.search(key, text):
152+
event["cname"] = color
153+
break
154+
traceEvents.append(event)
155+
156+
def nsys2json():
157+
conn = sqlite3.connect(args.filename)
158+
conn.row_factory = sqlite3.Row
159+
160+
strings = {}
161+
for r in conn.execute("SELECT id, value FROM StringIds"):
162+
strings[r["id"]] = r["value"]
163+
164+
traceEvents = []
165+
for activity in args.activity_type:
166+
if activity == "kernel":
167+
parse_cupti_kernel_events(conn, strings, traceEvents)
168+
elif activity == "nvtx":
169+
parse_nvtx_events(conn, traceEvents, event_prefix=args.nvtx_event_prefix, color_scheme=args.nvtx_color_scheme)
170+
else:
171+
raise ValueError(f"Unknown activity type: {activity}")
172+
# make the timelines appear in pid and tid order
173+
traceEvents.sort(key=lambda x: (x["pid"], x["tid"]))
174+
175+
with open(args.output, 'w') as f:
176+
json.dump(traceEvents, f)
177+
178+
if __name__ == "__main__":
179+
nsys2json()

0 commit comments

Comments
 (0)