Skip to content

Commit c9e651e

Browse files
eugenevinitskydaphne-cornelisseshacklettbp
authored
Bps/memory leak fix (#133)
* Add script to track gpu memory while running the sim. * Minor fixes * Small fixes * param change * Fix memory leak --------- Co-authored-by: Daphne Cornelisse <[email protected]> Co-authored-by: Brennan Shacklett <[email protected]>
1 parent 9b2304b commit c9e651e

File tree

8 files changed

+165
-24
lines changed

8 files changed

+165
-24
lines changed

Diff for: examples/benchmarks/gpudrive_0505.csv

-10
This file was deleted.

Diff for: examples/benchmarks/gpudrive_0505_after_fix.csv

-5
This file was deleted.

Diff for: examples/benchmarks/profile_gpu_memory.py

+156
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import os
2+
import torch
3+
import matplotlib.pyplot as plt
4+
import nvidia_smi
5+
6+
import gpudrive
7+
8+
9+
def make_sim(
10+
data_dir,
11+
num_worlds,
12+
device,
13+
max_num_objects,
14+
):
15+
"""Make simulator."""
16+
17+
# Create an instance of RewardParams
18+
reward_params = gpudrive.RewardParams()
19+
reward_params.rewardType = gpudrive.RewardType.OnGoalAchieved
20+
reward_params.distanceToGoalThreshold = 1.0
21+
reward_params.distanceToExpertThreshold = 1.0
22+
23+
# Create an instance of Parameters
24+
params = gpudrive.Parameters()
25+
params.polylineReductionThreshold = 0.5
26+
params.observationRadius = 10.0
27+
params.collisionBehaviour = gpudrive.CollisionBehaviour.AgentRemoved
28+
params.datasetInitOptions = gpudrive.DatasetInitOptions.PadN
29+
params.rewardParams = reward_params
30+
params.IgnoreNonVehicles = True
31+
params.maxNumControlledVehicles = max_num_objects
32+
33+
sim = gpudrive.SimManager(
34+
exec_mode=gpudrive.madrona.ExecMode.CPU
35+
if device == "cpu"
36+
else gpudrive.madrona.ExecMode.CUDA,
37+
gpu_id=0,
38+
num_worlds=num_worlds,
39+
auto_reset=False,
40+
json_path=data_dir,
41+
params=params,
42+
)
43+
44+
return sim
45+
46+
47+
def main(
48+
total_timesteps,
49+
num_worlds,
50+
episode_length,
51+
max_num_objects,
52+
data_dir,
53+
device="cuda",
54+
):
55+
# Storage
56+
time_checkpoints = []
57+
free_memory = []
58+
used_memory = []
59+
perc_used = []
60+
61+
# MAKE SIM
62+
sim = make_sim(
63+
data_dir=data_dir,
64+
num_worlds=num_worlds,
65+
device=device,
66+
max_num_objects=max_num_objects,
67+
)
68+
69+
for sim_idx in range(num_worlds):
70+
obs = sim.reset(sim_idx)
71+
72+
pid = os.getpid()
73+
print(f"PID: {pid}")
74+
75+
# RUN SIMULATOR
76+
episode_step = 0
77+
for global_step in range(total_timesteps):
78+
79+
rand_actions = torch.randint(
80+
0, 9, size=(num_worlds, max_num_objects, 3)
81+
)
82+
83+
# Apply actions
84+
sim.action_tensor().to_torch().copy_(rand_actions)
85+
86+
# Step dynamics
87+
sim.step()
88+
89+
episode_step += 1
90+
91+
# LOG GPU MEMORY
92+
if global_step % 200 == 0:
93+
nvidia_smi.nvmlInit()
94+
95+
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
96+
memory_info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
97+
98+
time_checkpoints.append(global_step)
99+
free_memory.append(memory_info.free)
100+
used_memory.append(memory_info.used)
101+
perc_used.append((memory_info.used / memory_info.total) * 100)
102+
103+
print(
104+
f"Global step: {global_step} | Perc. memory used: {(memory_info.used / memory_info.total) * 100:.3f} % \n"
105+
)
106+
107+
# RESET if episode is done
108+
if episode_step == episode_length:
109+
for sim_idx in range(num_worlds):
110+
obs = sim.reset(sim_idx)
111+
episode_step = 0
112+
113+
return time_checkpoints, free_memory, used_memory, perc_used
114+
115+
116+
if __name__ == "__main__":
117+
(time_checkpoints, free_gpu_mem, used_memory, perc_used,) = main(
118+
total_timesteps=10_000,
119+
num_worlds=50,
120+
episode_length=90,
121+
max_num_objects=128,
122+
data_dir="example_data",
123+
)
124+
125+
# Plot stats
126+
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
127+
fig.suptitle("GPU Memory Profiling")
128+
axs[0].plot(
129+
time_checkpoints,
130+
free_gpu_mem,
131+
label="Free memory",
132+
linestyle="-",
133+
marker=".",
134+
)
135+
axs[0].plot(
136+
time_checkpoints,
137+
used_memory,
138+
label="Used memory",
139+
linestyle="-",
140+
marker=".",
141+
)
142+
axs[1].plot(
143+
time_checkpoints,
144+
perc_used,
145+
label="Perc. GPU memory used",
146+
linestyle="-",
147+
marker=".",
148+
color="red",
149+
)
150+
axs[0].set_ylabel("Memory (MB)")
151+
axs[1].set_ylabel("Percentage %")
152+
axs[0].set_xlabel("Global steps")
153+
axs[1].set_xlabel("Global steps")
154+
axs[0].legend(), axs[1].legend()
155+
plt.tight_layout()
156+
plt.savefig("gpu_mem_prof.png", dpi=300)

Diff for: examples/benchmarks/waymax.csv

-3
This file was deleted.

Diff for: gpu_mem_prof.png

127 KB
Loading

Diff for: src/consts.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ namespace gpudrive {
88

99
namespace consts {
1010

11-
inline constexpr madrona::CountT kMaxAgentCount = 40;
12-
inline constexpr madrona::CountT kMaxRoadEntityCount = 4500;
13-
inline constexpr madrona::CountT kMaxAgentMapObservationsCount = 4500;
11+
inline constexpr madrona::CountT kMaxAgentCount = 128;
12+
inline constexpr madrona::CountT kMaxRoadEntityCount = 2000;
13+
inline constexpr madrona::CountT kMaxAgentMapObservationsCount = 2000;
1414

1515
// Various world / entity size parameters
1616
inline constexpr float worldLength = 40.f;

Diff for: src/sim.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,7 @@ void Sim::setupTasks(TaskGraphManager &taskgraph_mgr, const Config &cfg)
730730
builder, {moveSystem});
731731

732732
auto findOverlappingEntities =
733-
phys::PhysicsSystem::setupBroadphaseOverlapTasks(
733+
phys::PhysicsSystem::setupStandaloneBroadphaseOverlapTasks(
734734
builder, {broadphase_setup_sys});
735735

736736
auto detectCollisions = builder.addToGraph<
@@ -744,7 +744,10 @@ void Sim::setupTasks(TaskGraphManager &taskgraph_mgr, const Config &cfg)
744744
{detectCollisions});
745745

746746
// Finalize physics subsystem work
747-
auto phys_done = phys::PhysicsSystem::setupCleanupTasks(
747+
auto phys_done = phys::PhysicsSystem::setupStandaloneBroadphaseCleanupTasks(
748+
builder, {agent_zero_vel});
749+
750+
phys_done = phys::PhysicsSystem::setupCleanupTasks(
748751
builder, {agent_zero_vel});
749752

750753
auto reward_sys = builder.addToGraph<ParallelForNode<Engine,

0 commit comments

Comments
 (0)