Fixed failing python tests.

miguelalonsojr · miguelalonsojr · commit d49357c60d7c · 2023-10-03T12:49:55.000-04:00
diff --git a/docs/Python-LLAPI-Documentation.md b/docs/Python-LLAPI-Documentation.md
@@ -21,6 +21,7 @@
     * [random\_action](#mlagents_envs.base_env.ActionSpec.random_action)
     * [create\_continuous](#mlagents_envs.base_env.ActionSpec.create_continuous)
     * [create\_discrete](#mlagents_envs.base_env.ActionSpec.create_discrete)
+    * [create\_hybrid](#mlagents_envs.base_env.ActionSpec.create_hybrid)
   * [DimensionProperty](#mlagents_envs.base_env.DimensionProperty)
     * [UNSPECIFIED](#mlagents_envs.base_env.DimensionProperty.UNSPECIFIED)
     * [NONE](#mlagents_envs.base_env.DimensionProperty.NONE)
@@ -412,6 +413,16 @@ Creates an ActionSpec that is homogenously continuous
 
 Creates an ActionSpec that is homogenously discrete
 
+<a name="mlagents_envs.base_env.ActionSpec.create_hybrid"></a>
+#### create\_hybrid
+
+```python
+ | @staticmethod
+ | create_hybrid(continuous_size: int, discrete_branches: Tuple[int]) -> "ActionSpec"
+```
+
+Creates a hybrid ActionSpace
+
 <a name="mlagents_envs.base_env.DimensionProperty"></a>
 ## DimensionProperty Objects
 
diff --git a/ml-agents-envs/mlagents_envs/base_env.py b/ml-agents-envs/mlagents_envs/base_env.py
@@ -446,6 +446,15 @@ def create_discrete(discrete_branches: Tuple[int]) -> "ActionSpec":
         """
         return ActionSpec(0, discrete_branches)
 
+    @staticmethod
+    def create_hybrid(
+        continuous_size: int, discrete_branches: Tuple[int]
+    ) -> "ActionSpec":
+        """
+        Creates a hybrid ActionSpace
+        """
+        return ActionSpec(continuous_size, discrete_branches)
+
 
 class DimensionProperty(IntFlag):
     """
diff --git a/ml-agents-envs/mlagents_envs/rpc_utils.py b/ml-agents-envs/mlagents_envs/rpc_utils.py
@@ -122,7 +122,9 @@ def process_pixels(
             image = Image.open(image_fp)
             # Normally Image loads lazily, load() forces it to do loading in the timer scope.
             image.load()
-        image_arrays.append(np.moveaxis(np.array(image, dtype=np.float32) / 255.0, -1, 0))
+        image_arrays.append(
+            np.moveaxis(np.array(image, dtype=np.float32) / 255.0, -1, 0)
+        )
 
         # Look for the next header, starting from the current stream location
         try:
diff --git a/ml-agents/mlagents/trainers/tests/mock_brain.py b/ml-agents/mlagents/trainers/tests/mock_brain.py
@@ -212,7 +212,24 @@ def setup_test_behavior_specs(
         action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
     else:
         action_spec = ActionSpec.create_continuous(vector_action_space)
-    observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
+    observation_shapes = [(3, 84, 84)] * int(use_visual) + [(vector_obs_space,)]
+    obs_spec = create_observation_specs_with_shapes(observation_shapes)
+    behavior_spec = BehaviorSpec(obs_spec, action_spec)
+    return behavior_spec
+
+
+def setup_test_hybrid_behavior_specs(
+    use_visual=False,
+    continuous_action_space=3,
+    discrete_action_space=None,
+    vector_obs_space=1,
+):
+    if discrete_action_space is None:
+        discrete_action_space = [2]
+    action_spec = ActionSpec.create_hybrid(
+        continuous_action_space, tuple(discrete_action_space)
+    )
+    observation_shapes = [(3, 84, 84)] * int(use_visual) + [(vector_obs_space,)]
     obs_spec = create_observation_specs_with_shapes(observation_shapes)
     behavior_spec = BehaviorSpec(obs_spec, action_spec)
     return behavior_spec
@@ -234,3 +251,9 @@ def create_mock_banana_behavior_specs():
     return setup_test_behavior_specs(
         True, True, vector_action_space=[3, 3, 3, 2], vector_obs_space=0
     )
+
+
+def create_visual_food_collector_specs():
+    return setup_test_hybrid_behavior_specs(
+        True, continuous_action_space=3, discrete_action_space=[2], vector_obs_space=1
+    )
diff --git a/ml-agents/mlagents/trainers/tests/results/ppo/run_logs/training_status.json b/ml-agents/mlagents/trainers/tests/results/ppo/run_logs/training_status.json
@@ -1,16 +1,7 @@
 {
-    "param_1": {
-        "lesson_num": 2
-    },
-    "param_2": {
-        "lesson_num": 0
-    },
-    "param_3": {
-        "lesson_num": 0
-    },
     "metadata": {
         "stats_format_version": "0.3.0",
-        "mlagents_version": "0.29.0",
-        "torch_version": "1.8.1"
+        "mlagents_version": "0.31.0.dev0",
+        "torch_version": "1.11.0+cu102"
     }
-}
+}
diff --git a/ml-agents/mlagents/trainers/tests/simple_test_envs.py b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
@@ -20,7 +20,7 @@
 from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes
 
 OBS_SIZE = 1
-VIS_OBS_SIZE = (20, 20, 3)
+VIS_OBS_SIZE = (3, 20, 20)
 VAR_LEN_SIZE = (10, 5)
 STEP_SIZE = 0.2
 
diff --git a/ml-agents/mlagents/trainers/tests/test_rpc_utils.py b/ml-agents/mlagents/trainers/tests/test_rpc_utils.py
@@ -69,7 +69,7 @@ def generate_compressed_data(in_array: np.ndarray) -> bytes:
     num_images = (num_channels + 2) // 3
     # Split the input image into batches of 3 channels.
     for i in range(num_images):
-        sub_image = image_arr[3 * i: 3 * i + 3, ...]
+        sub_image = image_arr[3 * i : 3 * i + 3, ...]
         if (i == num_images - 1) and (num_channels % 3) != 0:
             # Pad zeros
             zero_shape = list(in_array.shape)
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_bcmodule.py
@@ -123,26 +123,28 @@ def test_bcmodule_rnn_update(is_sac):
     assert_stats_are_float(stats)
 
 
-# Test with discrete control and visual observations
+# Test with hybrid control and visual observations
 @pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
-def test_bcmodule_dc_visual_update(is_sac):
-    mock_specs = mb.create_mock_banana_behavior_specs()
+def test_bcmodule_hybrid_visual_updates(is_sac):
+    mock_specs = mb.create_visual_food_collector_specs()
     bc_settings = BehavioralCloningSettings(
-        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
+        demo_path=os.path.dirname(os.path.abspath(__file__))
+        + "/"
+        + "testhybridvis.demo"
     )
     bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
     stats = bc_module.update()
     assert_stats_are_float(stats)
 
 
-# Test with discrete control, visual observations and RNN
-
-
+# Test with hybrid control, visual observations and rnn
 @pytest.mark.parametrize("is_sac", [True, False], ids=["sac", "ppo"])
-def test_bcmodule_rnn_dc_update(is_sac):
-    mock_specs = mb.create_mock_banana_behavior_specs()
+def test_bcmodule_rnn_hybrid_update(is_sac):
+    mock_specs = mb.create_visual_food_collector_specs()
     bc_settings = BehavioralCloningSettings(
-        demo_path=os.path.dirname(os.path.abspath(__file__)) + "/" + "testdcvis.demo"
+        demo_path=os.path.dirname(os.path.abspath(__file__))
+        + "/"
+        + "testhybridvis.demo"
     )
     bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
     stats = bc_module.update()
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_encoders.py
@@ -73,7 +73,7 @@ def test_vector_encoder(mock_normalizer):
     mock_normalizer_inst.copy_from.assert_called_with(mock_normalizer_inst)
 
 
-@pytest.mark.parametrize("image_size", [(36, 36, 3), (84, 84, 4), (256, 256, 5)])
+@pytest.mark.parametrize("image_size", [(3, 36, 36), (4, 84, 84), (5, 256, 256)])
 @pytest.mark.parametrize(
     "vis_class",
     [
@@ -86,7 +86,7 @@ def test_vector_encoder(mock_normalizer):
 )
 def test_visual_encoder(vis_class, image_size):
     num_outputs = 128
-    enc = vis_class(image_size[0], image_size[1], image_size[2], num_outputs)
+    enc = vis_class(image_size[1], image_size[2], image_size[0], num_outputs)
     # Note: NCHW not NHWC
     sample_input = torch.ones((1, image_size[0], image_size[1], image_size[2]))
     encoding = enc(sample_input)
@@ -106,17 +106,17 @@ def test_visual_encoder(vis_class, image_size):
 @pytest.mark.slow
 def test_visual_encoder_trains(vis_class, size):
     torch.manual_seed(0)
-    image_size = (size, size, 1)
+    image_size = (1, size, size)
     batch = 100
 
     inputs = torch.cat(
         [torch.zeros((batch,) + image_size), torch.ones((batch,) + image_size)], dim=0
     )
     target = torch.cat([torch.zeros((batch,)), torch.ones((batch,))], dim=0)
-    enc = vis_class(image_size[0], image_size[1], image_size[2], 1)
+    enc = vis_class(image_size[1], image_size[2], image_size[0], 1)
     optimizer = torch.optim.Adam(enc.parameters(), lr=0.001)
 
-    for _ in range(15):
+    for _ in range(25):
         prediction = enc(inputs)[:, 0]
         loss = torch.mean((target - prediction) ** 2)
         optimizer.zero_grad()
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_hybrid.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_hybrid.py
@@ -33,7 +33,7 @@ def test_hybrid_ppo(action_size):
         PPO_TORCH_CONFIG,
         hyperparameters=new_hyperparams,
         network_settings=new_network_settings,
-        max_steps=10000,
+        max_steps=20000,
     )
     check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
 
@@ -90,7 +90,7 @@ def test_hybrid_sac(action_size):
         buffer_init_steps=0,
     )
     config = attr.evolve(
-        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=4000
+        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=8000
     )
     check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
 
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_networks.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_networks.py
@@ -74,15 +74,15 @@ def test_networkbody_lstm():
 def test_networkbody_visual():
     torch.manual_seed(1)
     vec_obs_size = 4
-    obs_size = (84, 84, 3)
+    obs_size = (3, 84, 84)
     network_settings = NetworkSettings()
     obs_shapes = [(vec_obs_size,), obs_size]
 
     networkbody = NetworkBody(
         create_observation_specs_with_shapes(obs_shapes), network_settings
     )
     optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
-    sample_obs = 0.1 * torch.ones((1, 84, 84, 3), dtype=torch.float32)
+    sample_obs = 0.1 * torch.ones((1, 3, 84, 84), dtype=torch.float32)
     sample_vec_obs = torch.ones((1, vec_obs_size), dtype=torch.float32)
     obs = [sample_vec_obs] + [sample_obs]
     loss = 1
@@ -200,7 +200,7 @@ def test_multinetworkbody_visual(with_actions):
     act_size = 2
     n_agents = 3
     obs_size = 4
-    vis_obs_size = (84, 84, 3)
+    vis_obs_size = (3, 84, 84)
     network_settings = NetworkSettings()
     obs_shapes = [(obs_size,), vis_obs_size]
     action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size)))
@@ -209,7 +209,7 @@ def test_multinetworkbody_visual(with_actions):
     )
     optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
     sample_obs = [
-        [0.1 * torch.ones((1, obs_size))] + [0.1 * torch.ones((1, 84, 84, 3))]
+        [0.1 * torch.ones((1, obs_size))] + [0.1 * torch.ones((1, 3, 84, 84))]
         for _ in range(n_agents)
     ]
     # simulate baseline in POCA
@@ -273,7 +273,7 @@ def test_valuenetwork():
 @pytest.mark.parametrize("lstm", [True, False])
 def test_actor_critic(lstm, shared):
     obs_size = 4
-    vis_obs_size = (84, 84, 3)
+    vis_obs_size = (3, 84, 84)
     network_settings = NetworkSettings(
         memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
     )
@@ -291,14 +291,14 @@ def test_actor_critic(lstm, shared):
         critic = ValueNetwork(stream_names, obs_spec, network_settings)
     if lstm:
         sample_vis_obs = torch.ones(
-            (network_settings.memory.sequence_length, 84, 84, 3), dtype=torch.float32
+            (network_settings.memory.sequence_length, 3, 84, 84), dtype=torch.float32
         )
         sample_obs = torch.ones((network_settings.memory.sequence_length, obs_size))
         memories = torch.ones(
             (1, network_settings.memory.sequence_length, actor.memory_size)
         )
     else:
-        sample_vis_obs = 0.1 * torch.ones((1, 84, 84, 3), dtype=torch.float32)
+        sample_vis_obs = 0.1 * torch.ones((1, 3, 84, 84), dtype=torch.float32)
         sample_obs = torch.ones((1, obs_size))
         memories = torch.tensor([])
         # memories isn't always set to None, the network should be able to
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_reward_providers/test_curiosity.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_reward_providers/test_curiosity.py
@@ -47,11 +47,11 @@ def test_construction(behavior_spec: BehaviorSpec) -> None:
             create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
         ),
         BehaviorSpec(
-            create_observation_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
+            create_observation_specs_with_shapes([(10,), (3, 64, 66), (1, 84, 86)]),
             ACTIONSPEC_CONTINUOUS,
         ),
         BehaviorSpec(
-            create_observation_specs_with_shapes([(10,), (64, 66, 1)]),
+            create_observation_specs_with_shapes([(10,), (1, 64, 66)]),
             ACTIONSPEC_TWODISCRETE,
         ),
         BehaviorSpec(
@@ -72,7 +72,7 @@ def test_factory(behavior_spec: BehaviorSpec) -> None:
     "behavior_spec",
     [
         BehaviorSpec(
-            create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
+            create_observation_specs_with_shapes([(10,), (3, 64, 66), (1, 24, 26)]),
             ACTIONSPEC_CONTINUOUS,
         ),
         BehaviorSpec(
@@ -125,7 +125,7 @@ def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) ->
     "behavior_spec",
     [
         BehaviorSpec(
-            create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
+            create_observation_specs_with_shapes([(10,), (3, 64, 66), (1, 24, 26)]),
             ACTIONSPEC_CONTINUOUS,
         ),
         BehaviorSpec(
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_reward_providers/test_gail.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_reward_providers/test_gail.py
@@ -61,7 +61,7 @@ def test_factory(behavior_spec: BehaviorSpec) -> None:
     "behavior_spec",
     [
         BehaviorSpec(
-            create_observation_specs_with_shapes([(8,), (24, 26, 1)]),
+            create_observation_specs_with_shapes([(8,), (1, 24, 26)]),
             ACTIONSPEC_CONTINUOUS,
         ),
         BehaviorSpec(
@@ -116,7 +116,7 @@ def test_reward_decreases(
     "behavior_spec",
     [
         BehaviorSpec(
-            create_observation_specs_with_shapes([(8,), (24, 26, 1)]),
+            create_observation_specs_with_shapes([(8,), (1, 24, 26)]),
             ACTIONSPEC_CONTINUOUS,
         ),
         BehaviorSpec(
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_reward_providers/test_rnd.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_reward_providers/test_rnd.py
@@ -46,11 +46,11 @@ def test_construction(behavior_spec: BehaviorSpec) -> None:
             create_observation_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS
         ),
         BehaviorSpec(
-            create_observation_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
+            create_observation_specs_with_shapes([(10,), (3, 64, 66), (1, 84, 86)]),
             ACTIONSPEC_CONTINUOUS,
         ),
         BehaviorSpec(
-            create_observation_specs_with_shapes([(10,), (64, 66, 1)]),
+            create_observation_specs_with_shapes([(10,), (1, 64, 66)]),
             ACTIONSPEC_TWODISCRETE,
         ),
         BehaviorSpec(
@@ -71,7 +71,7 @@ def test_factory(behavior_spec: BehaviorSpec) -> None:
     "behavior_spec",
     [
         BehaviorSpec(
-            create_observation_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
+            create_observation_specs_with_shapes([(10,), (3, 64, 66), (1, 24, 26)]),
             ACTIONSPEC_CONTINUOUS,
         ),
         BehaviorSpec(
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_simple_rl.py
@@ -93,7 +93,7 @@ def test_var_len_obs_and_goal_poca(num_vis, num_vector, num_var_len, conditionin
         POCA_TORCH_CONFIG,
         hyperparameters=new_hyperparams,
         network_settings=new_network,
-        max_steps=5000,
+        max_steps=20000,
     )
     check_environment_trains(env, {BRAIN_NAME: config})
 
@@ -207,7 +207,7 @@ def test_visual_advanced_ppo(vis_encode_type, num_visual):
         num_visual=num_visual,
         num_vector=0,
         step_size=0.5,
-        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
+        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (3, 36, 36),
     )
     new_networksettings = attr.evolve(
         SAC_TORCH_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
@@ -311,7 +311,7 @@ def test_visual_advanced_sac(vis_encode_type, num_visual):
         num_visual=num_visual,
         num_vector=0,
         step_size=0.5,
-        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
+        vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (3, 36, 36),
     )
     new_networksettings = attr.evolve(
         SAC_TORCH_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/test_utils.py b/ml-agents/mlagents/trainers/tests/torch_entities/test_utils.py
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/testdcvis.demo b/ml-agents/mlagents/trainers/tests/torch_entities/testdcvis.demo
diff --git a/ml-agents/mlagents/trainers/tests/torch_entities/testhybridvis.demo b/ml-agents/mlagents/trainers/tests/torch_entities/testhybridvis.demo

Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ def test_hybrid_ppo(action_size):`
`33`	`33`	`PPO_TORCH_CONFIG,`
`34`	`34`	`hyperparameters=new_hyperparams,`
`35`	`35`	`network_settings=new_network_settings,`
`36`		`- max_steps=10000,`
	`36`	`+ max_steps=20000,`
`37`	`37`	`)`
`38`	`38`	`check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)`
`39`	`39`
`@@ -90,7 +90,7 @@ def test_hybrid_sac(action_size):`
`90`	`90`	`buffer_init_steps=0,`
`91`	`91`	`)`
`92`	`92`	`config = attr.evolve(`
`93`		`- SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=4000`
	`93`	`+ SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=8000`
`94`	`94`	`)`
`95`	`95`	`check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)`
`96`	`96`