From e9dd26b9eac4652650de14f901e08134f824bd9a Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 2 Aug 2021 15:32:53 -0500 Subject: [PATCH 01/12] Copied over SB3 env checker --- gym/utils/env_checker.py | 283 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 gym/utils/env_checker.py diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py new file mode 100644 index 00000000000..3eb0c3dd261 --- /dev/null +++ b/gym/utils/env_checker.py @@ -0,0 +1,283 @@ +import warnings +from typing import Union + +import gym +import numpy as np +from gym import spaces + +from stable_baselines3.common.preprocessing import is_image_space_channels_first +from stable_baselines3.common.vec_env import DummyVecEnv, VecCheckNan + + +def _is_numpy_array_space(space: spaces.Space) -> bool: + """ + Returns False if provided space is not representable as a single numpy array + (e.g. Dict and Tuple spaces return False) + """ + return not isinstance(space, (spaces.Dict, spaces.Tuple)) + + +def _check_image_input(observation_space: spaces.Box, key: str = "") -> None: + """ + Check that the input will be compatible with Stable-Baselines + when the observation is apparently an image. + """ + if observation_space.dtype != np.uint8: + warnings.warn( + f"It seems that your observation {key} is an image but the `dtype` " + "of your observation_space is not `np.uint8`. " + "If your observation is not an image, we recommend you to flatten the observation " + "to have only a 1D vector" + ) + + if np.any(observation_space.low != 0) or np.any(observation_space.high != 255): + warnings.warn( + f"It seems that your observation space {key} is an image but the " + "upper and lower bounds are not in [0, 255]. " + "Because the CNN policy normalize automatically the observation " + "you may encounter issue if the values are not in that range." + ) + + non_channel_idx = 0 + # Check only if width/height of the image is big enough + if is_image_space_channels_first(observation_space): + non_channel_idx = -1 + + if observation_space.shape[non_channel_idx] < 36 or observation_space.shape[1] < 36: + warnings.warn( + "The minimal resolution for an image is 36x36 for the default `CnnPolicy`. " + "You might need to use a custom feature extractor " + "cf. https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html" + ) + + +def _check_unsupported_spaces(env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space) -> None: + """Emit warnings when the observation space or action space used is not supported by Stable-Baselines.""" + + if isinstance(observation_space, spaces.Dict): + nested_dict = False + for space in observation_space.spaces.values(): + if isinstance(space, spaces.Dict): + nested_dict = True + if nested_dict: + warnings.warn( + "Nested observation spaces are not supported by Stable Baselines3 " + "(Dict spaces inside Dict space). " + "You should flatten it to have only one level of keys." + "For example, `dict(space1=dict(space2=Box(), space3=Box()), spaces4=Discrete())` " + "is not supported but `dict(space2=Box(), spaces3=Box(), spaces4=Discrete())` is." + ) + + if isinstance(observation_space, spaces.Tuple): + warnings.warn( + "The observation space is a Tuple," + "this is currently not supported by Stable Baselines3. " + "However, you can convert it to a Dict observation space " + "(cf. https://github.com/openai/gym/blob/master/gym/spaces/dict.py). " + "which is supported by SB3." + ) + + if not _is_numpy_array_space(action_space): + warnings.warn( + "The action space is not based off a numpy array. Typically this means it's either a Dict or Tuple space. " + "This type of action space is currently not supported by Stable Baselines 3. You should try to flatten the " + "action using a wrapper." + ) + + +def _check_nan(env: gym.Env) -> None: + """Check for Inf and NaN using the VecWrapper.""" + vec_env = VecCheckNan(DummyVecEnv([lambda: env])) + for _ in range(10): + action = np.array([env.action_space.sample()]) + _, _, _, _ = vec_env.step(action) + + +def _check_obs(obs: Union[tuple, dict, np.ndarray, int], observation_space: spaces.Space, method_name: str) -> None: + """ + Check that the observation returned by the environment + correspond to the declared one. + """ + if not isinstance(observation_space, spaces.Tuple): + assert not isinstance( + obs, tuple + ), f"The observation returned by the `{method_name}()` method should be a single value, not a tuple" + + # The check for a GoalEnv is done by the base class + if isinstance(observation_space, spaces.Discrete): + assert isinstance(obs, int), f"The observation returned by `{method_name}()` method must be an int" + elif _is_numpy_array_space(observation_space): + assert isinstance(obs, np.ndarray), f"The observation returned by `{method_name}()` method must be a numpy array" + + assert observation_space.contains( + obs + ), f"The observation returned by the `{method_name}()` method does not match the given observation space" + + +def _check_box_obs(observation_space: spaces.Box, key: str = "") -> None: + """ + Check that the observation space is correctly formatted + when dealing with a ``Box()`` space. In particular, it checks: + - that the dimensions are big enough when it is an image, and that the type matches + - that the observation has an expected shape (warn the user if not) + """ + # If image, check the low and high values, the type and the number of channels + # and the shape (minimal value) + if len(observation_space.shape) == 3: + _check_image_input(observation_space) + + if len(observation_space.shape) not in [1, 3]: + warnings.warn( + f"Your observation {key} has an unconventional shape (neither an image, nor a 1D vector). " + "We recommend you to flatten the observation " + "to have only a 1D vector or use a custom policy to properly process the data." + ) + + +def _check_returned_values(env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space) -> None: + """ + Check the returned values by the env when calling `.reset()` or `.step()` methods. + """ + # because env inherits from gym.Env, we assume that `reset()` and `step()` methods exists + obs = env.reset() + + if isinstance(observation_space, spaces.Dict): + assert isinstance(obs, dict), "The observation returned by `reset()` must be a dictionary" + for key in observation_space.spaces.keys(): + try: + _check_obs(obs[key], observation_space.spaces[key], "reset") + except AssertionError as e: + raise AssertionError(f"Error while checking key={key}: " + str(e)) + else: + _check_obs(obs, observation_space, "reset") + + # Sample a random action + action = action_space.sample() + data = env.step(action) + + assert len(data) == 4, "The `step()` method must return four values: obs, reward, done, info" + + # Unpack + obs, reward, done, info = data + + if isinstance(observation_space, spaces.Dict): + assert isinstance(obs, dict), "The observation returned by `step()` must be a dictionary" + for key in observation_space.spaces.keys(): + try: + _check_obs(obs[key], observation_space.spaces[key], "step") + except AssertionError as e: + raise AssertionError(f"Error while checking key={key}: " + str(e)) + + else: + _check_obs(obs, observation_space, "step") + + # We also allow int because the reward will be cast to float + assert isinstance(reward, (float, int)), "The reward returned by `step()` must be a float" + assert isinstance(done, bool), "The `done` signal must be a boolean" + assert isinstance(info, dict), "The `info` returned by `step()` must be a python dictionary" + + if isinstance(env, gym.GoalEnv): + # For a GoalEnv, the keys are checked at reset + assert reward == env.compute_reward(obs["achieved_goal"], obs["desired_goal"], info) + + +def _check_spaces(env: gym.Env) -> None: + """ + Check that the observation and action spaces are defined + and inherit from gym.spaces.Space. + """ + # Helper to link to the code, because gym has no proper documentation + gym_spaces = " cf https://github.com/openai/gym/blob/master/gym/spaces/" + + assert hasattr(env, "observation_space"), "You must specify an observation space (cf gym.spaces)" + gym_spaces + assert hasattr(env, "action_space"), "You must specify an action space (cf gym.spaces)" + gym_spaces + + assert isinstance(env.observation_space, spaces.Space), "The observation space must inherit from gym.spaces" + gym_spaces + assert isinstance(env.action_space, spaces.Space), "The action space must inherit from gym.spaces" + gym_spaces + + +# Check render cannot be covered by CI +def _check_render(env: gym.Env, warn: bool = True, headless: bool = False) -> None: # pragma: no cover + """ + Check the declared render modes and the `render()`/`close()` + method of the environment. + :param env: The environment to check + :param warn: Whether to output additional warnings + :param headless: Whether to disable render modes + that require a graphical interface. False by default. + """ + render_modes = env.metadata.get("render.modes") + if render_modes is None: + if warn: + warnings.warn( + "No render modes was declared in the environment " + " (env.metadata['render.modes'] is None or not defined), " + "you may have trouble when calling `.render()`" + ) + + else: + # Don't check render mode that require a + # graphical interface (useful for CI) + if headless and "human" in render_modes: + render_modes.remove("human") + # Check all declared render modes + for render_mode in render_modes: + env.render(mode=render_mode) + env.close() + + +def check_env(env: gym.Env, warn: bool = True, skip_render_check: bool = True) -> None: + """ + Check that an environment follows Gym API. + This is particularly useful when using a custom environment. + Please take a look at https://github.com/openai/gym/blob/master/gym/core.py + for more information about the API. + It also optionally check that the environment is compatible with Stable-Baselines. + :param env: The Gym environment that will be checked + :param warn: Whether to output additional warnings + mainly related to the interaction with Stable Baselines + :param skip_render_check: Whether to skip the checks for the render method. + True by default (useful for the CI) + """ + assert isinstance( + env, gym.Env + ), "Your environment must inherit from the gym.Env class cf https://github.com/openai/gym/blob/master/gym/core.py" + + # ============= Check the spaces (observation and action) ================ + _check_spaces(env) + + # Define aliases for convenience + observation_space = env.observation_space + action_space = env.action_space + + # Warn the user if needed. + # A warning means that the environment may run but not work properly with Stable Baselines algorithms + if warn: + _check_unsupported_spaces(env, observation_space, action_space) + + obs_spaces = observation_space.spaces if isinstance(observation_space, spaces.Dict) else {"": observation_space} + for key, space in obs_spaces.items(): + if isinstance(space, spaces.Box): + _check_box_obs(space, key) + + # Check for the action space, it may lead to hard-to-debug issues + if isinstance(action_space, spaces.Box) and ( + np.any(np.abs(action_space.low) != np.abs(action_space.high)) + or np.any(np.abs(action_space.low) > 1) + or np.any(np.abs(action_space.high) > 1) + ): + warnings.warn( + "We recommend you to use a symmetric and normalized Box action space (range=[-1, 1]) " + "cf https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html" + ) + + # ============ Check the returned values =============== + _check_returned_values(env, observation_space, action_space) + + # ==== Check the render method and the declared render modes ==== + if not skip_render_check: + _check_render(env, warn=warn) # pragma: no cover + + # The check only works with numpy arrays + if _is_numpy_array_space(observation_space) and _is_numpy_array_space(action_space): + _check_nan(env) From 2cfc417499d8ed8e42e5dc293b270c53e6128845 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 2 Aug 2021 15:45:27 -0500 Subject: [PATCH 02/12] Added test from SB3 --- gym/utils/tests/test_env_checker.py | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 gym/utils/tests/test_env_checker.py diff --git a/gym/utils/tests/test_env_checker.py b/gym/utils/tests/test_env_checker.py new file mode 100644 index 00000000000..ec9c6c4aa41 --- /dev/null +++ b/gym/utils/tests/test_env_checker.py @@ -0,0 +1,31 @@ +import gym +import numpy as np +import pytest +from gym.spaces import Box, Dict, Discrete + +from gym.utils.env_checker import check_env + + +class ActionDictTestEnv(gym.Env): + action_space = Dict({"position": Discrete(1), "velocity": Discrete(1)}) + observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32) + + def step(self, action): + observation = np.array([1.0, 1.5, 0.5]) + reward = 1 + done = True + info = {} + return observation, reward, done, info + + def reset(self): + return np.array([1.0, 1.5, 0.5]) + + def render(self, mode="human"): + pass + + +def test_check_env_dict_action(): + test_env = ActionDictTestEnv() + + with pytest.warns(Warning): + check_env(env=test_env, warn=True) \ No newline at end of file From 4ca505ef6705c5d79ef83c93ffaff2b7fe733aa9 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 2 Aug 2021 15:53:25 -0500 Subject: [PATCH 03/12] Addition of MIT license attribution and black formatting --- gym/utils/env_checker.py | 101 +++++++++++++++++++++++----- gym/utils/tests/test_env_checker.py | 2 +- 2 files changed, 85 insertions(+), 18 deletions(-) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index 3eb0c3dd261..8cb149e4fa5 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -1,3 +1,32 @@ +""" +This file is originally from the Stable Baselines3 repository hostedon GitHub +(https://github.com/DLR-RM/stable-baselines3/) +Original Author: Antonin Raffin + +This file is covered by the MIT License, as described here: +The MIT License + +Copyright (c) 2019 Antonin Raffin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + import warnings from typing import Union @@ -51,7 +80,9 @@ def _check_image_input(observation_space: spaces.Box, key: str = "") -> None: ) -def _check_unsupported_spaces(env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space) -> None: +def _check_unsupported_spaces( + env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space +) -> None: """Emit warnings when the observation space or action space used is not supported by Stable-Baselines.""" if isinstance(observation_space, spaces.Dict): @@ -93,7 +124,11 @@ def _check_nan(env: gym.Env) -> None: _, _, _, _ = vec_env.step(action) -def _check_obs(obs: Union[tuple, dict, np.ndarray, int], observation_space: spaces.Space, method_name: str) -> None: +def _check_obs( + obs: Union[tuple, dict, np.ndarray, int], + observation_space: spaces.Space, + method_name: str, +) -> None: """ Check that the observation returned by the environment correspond to the declared one. @@ -105,9 +140,13 @@ def _check_obs(obs: Union[tuple, dict, np.ndarray, int], observation_space: spac # The check for a GoalEnv is done by the base class if isinstance(observation_space, spaces.Discrete): - assert isinstance(obs, int), f"The observation returned by `{method_name}()` method must be an int" + assert isinstance( + obs, int + ), f"The observation returned by `{method_name}()` method must be an int" elif _is_numpy_array_space(observation_space): - assert isinstance(obs, np.ndarray), f"The observation returned by `{method_name}()` method must be a numpy array" + assert isinstance( + obs, np.ndarray + ), f"The observation returned by `{method_name}()` method must be a numpy array" assert observation_space.contains( obs @@ -134,7 +173,9 @@ def _check_box_obs(observation_space: spaces.Box, key: str = "") -> None: ) -def _check_returned_values(env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space) -> None: +def _check_returned_values( + env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space +) -> None: """ Check the returned values by the env when calling `.reset()` or `.step()` methods. """ @@ -142,7 +183,9 @@ def _check_returned_values(env: gym.Env, observation_space: spaces.Space, action obs = env.reset() if isinstance(observation_space, spaces.Dict): - assert isinstance(obs, dict), "The observation returned by `reset()` must be a dictionary" + assert isinstance( + obs, dict + ), "The observation returned by `reset()` must be a dictionary" for key in observation_space.spaces.keys(): try: _check_obs(obs[key], observation_space.spaces[key], "reset") @@ -155,13 +198,17 @@ def _check_returned_values(env: gym.Env, observation_space: spaces.Space, action action = action_space.sample() data = env.step(action) - assert len(data) == 4, "The `step()` method must return four values: obs, reward, done, info" + assert ( + len(data) == 4 + ), "The `step()` method must return four values: obs, reward, done, info" # Unpack obs, reward, done, info = data if isinstance(observation_space, spaces.Dict): - assert isinstance(obs, dict), "The observation returned by `step()` must be a dictionary" + assert isinstance( + obs, dict + ), "The observation returned by `step()` must be a dictionary" for key in observation_space.spaces.keys(): try: _check_obs(obs[key], observation_space.spaces[key], "step") @@ -172,13 +219,19 @@ def _check_returned_values(env: gym.Env, observation_space: spaces.Space, action _check_obs(obs, observation_space, "step") # We also allow int because the reward will be cast to float - assert isinstance(reward, (float, int)), "The reward returned by `step()` must be a float" + assert isinstance( + reward, (float, int) + ), "The reward returned by `step()` must be a float" assert isinstance(done, bool), "The `done` signal must be a boolean" - assert isinstance(info, dict), "The `info` returned by `step()` must be a python dictionary" + assert isinstance( + info, dict + ), "The `info` returned by `step()` must be a python dictionary" if isinstance(env, gym.GoalEnv): # For a GoalEnv, the keys are checked at reset - assert reward == env.compute_reward(obs["achieved_goal"], obs["desired_goal"], info) + assert reward == env.compute_reward( + obs["achieved_goal"], obs["desired_goal"], info + ) def _check_spaces(env: gym.Env) -> None: @@ -189,15 +242,25 @@ def _check_spaces(env: gym.Env) -> None: # Helper to link to the code, because gym has no proper documentation gym_spaces = " cf https://github.com/openai/gym/blob/master/gym/spaces/" - assert hasattr(env, "observation_space"), "You must specify an observation space (cf gym.spaces)" + gym_spaces - assert hasattr(env, "action_space"), "You must specify an action space (cf gym.spaces)" + gym_spaces + assert hasattr(env, "observation_space"), ( + "You must specify an observation space (cf gym.spaces)" + gym_spaces + ) + assert hasattr(env, "action_space"), ( + "You must specify an action space (cf gym.spaces)" + gym_spaces + ) - assert isinstance(env.observation_space, spaces.Space), "The observation space must inherit from gym.spaces" + gym_spaces - assert isinstance(env.action_space, spaces.Space), "The action space must inherit from gym.spaces" + gym_spaces + assert isinstance(env.observation_space, spaces.Space), ( + "The observation space must inherit from gym.spaces" + gym_spaces + ) + assert isinstance(env.action_space, spaces.Space), ( + "The action space must inherit from gym.spaces" + gym_spaces + ) # Check render cannot be covered by CI -def _check_render(env: gym.Env, warn: bool = True, headless: bool = False) -> None: # pragma: no cover +def _check_render( + env: gym.Env, warn: bool = True, headless: bool = False +) -> None: # pragma: no cover """ Check the declared render modes and the `render()`/`close()` method of the environment. @@ -255,7 +318,11 @@ def check_env(env: gym.Env, warn: bool = True, skip_render_check: bool = True) - if warn: _check_unsupported_spaces(env, observation_space, action_space) - obs_spaces = observation_space.spaces if isinstance(observation_space, spaces.Dict) else {"": observation_space} + obs_spaces = ( + observation_space.spaces + if isinstance(observation_space, spaces.Dict) + else {"": observation_space} + ) for key, space in obs_spaces.items(): if isinstance(space, spaces.Box): _check_box_obs(space, key) diff --git a/gym/utils/tests/test_env_checker.py b/gym/utils/tests/test_env_checker.py index ec9c6c4aa41..1f1f37a5d31 100644 --- a/gym/utils/tests/test_env_checker.py +++ b/gym/utils/tests/test_env_checker.py @@ -28,4 +28,4 @@ def test_check_env_dict_action(): test_env = ActionDictTestEnv() with pytest.warns(Warning): - check_env(env=test_env, warn=True) \ No newline at end of file + check_env(env=test_env, warn=True) From 1b5a8b53dd92c00e4feb16f8e2e927c0bcab5888 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 2 Aug 2021 16:37:21 -0500 Subject: [PATCH 04/12] Removed SB3 dependence --- gym/utils/env_checker.py | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index 8cb149e4fa5..5cb44196d5b 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -34,10 +34,6 @@ import numpy as np from gym import spaces -from stable_baselines3.common.preprocessing import is_image_space_channels_first -from stable_baselines3.common.vec_env import DummyVecEnv, VecCheckNan - - def _is_numpy_array_space(space: spaces.Space) -> bool: """ Returns False if provided space is not representable as a single numpy array @@ -48,7 +44,7 @@ def _is_numpy_array_space(space: spaces.Space) -> bool: def _check_image_input(observation_space: spaces.Box, key: str = "") -> None: """ - Check that the input will be compatible with Stable-Baselines + Check that the input adheres to general standards when the observation is apparently an image. """ if observation_space.dtype != np.uint8: @@ -63,23 +59,10 @@ def _check_image_input(observation_space: spaces.Box, key: str = "") -> None: warnings.warn( f"It seems that your observation space {key} is an image but the " "upper and lower bounds are not in [0, 255]. " - "Because the CNN policy normalize automatically the observation " - "you may encounter issue if the values are not in that range." + "Generally, CNN policies assume observations are within that range, " + "so you may encounter an issue if the observation values are not." ) - non_channel_idx = 0 - # Check only if width/height of the image is big enough - if is_image_space_channels_first(observation_space): - non_channel_idx = -1 - - if observation_space.shape[non_channel_idx] < 36 or observation_space.shape[1] < 36: - warnings.warn( - "The minimal resolution for an image is 36x36 for the default `CnnPolicy`. " - "You might need to use a custom feature extractor " - "cf. https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html" - ) - - def _check_unsupported_spaces( env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space ) -> None: @@ -117,11 +100,10 @@ def _check_unsupported_spaces( def _check_nan(env: gym.Env) -> None: - """Check for Inf and NaN using the VecWrapper.""" - vec_env = VecCheckNan(DummyVecEnv([lambda: env])) + """Check for Inf and NaN.""" for _ in range(10): action = np.array([env.action_space.sample()]) - _, _, _, _ = vec_env.step(action) + _, _, _, _ = env.step(action) def _check_obs( From 777ab0c82ac1ad2cb2e7af80ba911102e59842c7 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 2 Aug 2021 16:49:37 -0500 Subject: [PATCH 05/12] Implemented rough, non-vectorized version of check_nan --- gym/utils/env_checker.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index 5cb44196d5b..c6bb6c4ab17 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -34,6 +34,7 @@ import numpy as np from gym import spaces + def _is_numpy_array_space(space: spaces.Space) -> bool: """ Returns False if provided space is not representable as a single numpy array @@ -63,6 +64,7 @@ def _check_image_input(observation_space: spaces.Box, key: str = "") -> None: "so you may encounter an issue if the observation values are not." ) + def _check_unsupported_spaces( env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space ) -> None: @@ -99,11 +101,20 @@ def _check_unsupported_spaces( ) -def _check_nan(env: gym.Env) -> None: +def _check_nan(env: gym.Env, check_inf: bool = True) -> None: """Check for Inf and NaN.""" for _ in range(10): action = np.array([env.action_space.sample()]) - _, _, _, _ = env.step(action) + observation, reward, _, _ = env.step(action) + + if np.any(np.isnan(observation)): + warnings.warn("Encountered NaN value in observations.") + if np.any(np.isnan(reward)): + warnings.warn("Encountered NaN value in rewards.") + if np.any(np.isinf(observation)): + warnings.warn("Encountered inf value in observations.") + if np.any(np.isinf(reward)): + warnings.warn("Encountered inf value in rewards.") def _check_obs( From 752bc462897ca878a0338896dc9e3ce684282b55 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 2 Aug 2021 16:56:54 -0500 Subject: [PATCH 06/12] Made some SB3 warnings a bit more "general" --- gym/utils/env_checker.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index c6bb6c4ab17..e0f1da7c968 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -68,7 +68,10 @@ def _check_image_input(observation_space: spaces.Box, key: str = "") -> None: def _check_unsupported_spaces( env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space ) -> None: - """Emit warnings when the observation space or action space used is not supported by Stable-Baselines.""" + """ + Emit warnings when the observation space or action space may not traditionally + be supported by popular RL libraries. e.g. Stable-Baselines3 + """ if isinstance(observation_space, spaces.Dict): nested_dict = False @@ -77,9 +80,9 @@ def _check_unsupported_spaces( nested_dict = True if nested_dict: warnings.warn( - "Nested observation spaces are not supported by Stable Baselines3 " - "(Dict spaces inside Dict space). " - "You should flatten it to have only one level of keys." + "Nested observation spaces may not be supported if you use an RL library " + "for your learning agent (Dict spaces inside Dict space). " + "In this case, you should flatten it to have only one level of keys." "For example, `dict(space1=dict(space2=Box(), space3=Box()), spaces4=Discrete())` " "is not supported but `dict(space2=Box(), spaces3=Box(), spaces4=Discrete())` is." ) @@ -87,16 +90,15 @@ def _check_unsupported_spaces( if isinstance(observation_space, spaces.Tuple): warnings.warn( "The observation space is a Tuple," - "this is currently not supported by Stable Baselines3. " + "this may not be supported if you use an RL library for your learning agent. " "However, you can convert it to a Dict observation space " "(cf. https://github.com/openai/gym/blob/master/gym/spaces/dict.py). " - "which is supported by SB3." ) if not _is_numpy_array_space(action_space): warnings.warn( "The action space is not based off a numpy array. Typically this means it's either a Dict or Tuple space. " - "This type of action space is currently not supported by Stable Baselines 3. You should try to flatten the " + "This type of action space may not supported by your RL library. You should try to flatten the " "action using a wrapper." ) @@ -307,7 +309,7 @@ def check_env(env: gym.Env, warn: bool = True, skip_render_check: bool = True) - action_space = env.action_space # Warn the user if needed. - # A warning means that the environment may run but not work properly with Stable Baselines algorithms + # A warning means that the environment may run but not work properly with popular RL libraries. if warn: _check_unsupported_spaces(env, observation_space, action_space) From 352bf007a7e07f7553fe419c8375d40a772a6a92 Mon Sep 17 00:00:00 2001 From: Anthony Tamasi Date: Fri, 6 Aug 2021 11:39:49 -0500 Subject: [PATCH 07/12] Removed check for "unsupported" spaces, since Gym should support any space --- gym/utils/env_checker.py | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index e0f1da7c968..cb034da5cfb 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -65,44 +65,6 @@ def _check_image_input(observation_space: spaces.Box, key: str = "") -> None: ) -def _check_unsupported_spaces( - env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space -) -> None: - """ - Emit warnings when the observation space or action space may not traditionally - be supported by popular RL libraries. e.g. Stable-Baselines3 - """ - - if isinstance(observation_space, spaces.Dict): - nested_dict = False - for space in observation_space.spaces.values(): - if isinstance(space, spaces.Dict): - nested_dict = True - if nested_dict: - warnings.warn( - "Nested observation spaces may not be supported if you use an RL library " - "for your learning agent (Dict spaces inside Dict space). " - "In this case, you should flatten it to have only one level of keys." - "For example, `dict(space1=dict(space2=Box(), space3=Box()), spaces4=Discrete())` " - "is not supported but `dict(space2=Box(), spaces3=Box(), spaces4=Discrete())` is." - ) - - if isinstance(observation_space, spaces.Tuple): - warnings.warn( - "The observation space is a Tuple," - "this may not be supported if you use an RL library for your learning agent. " - "However, you can convert it to a Dict observation space " - "(cf. https://github.com/openai/gym/blob/master/gym/spaces/dict.py). " - ) - - if not _is_numpy_array_space(action_space): - warnings.warn( - "The action space is not based off a numpy array. Typically this means it's either a Dict or Tuple space. " - "This type of action space may not supported by your RL library. You should try to flatten the " - "action using a wrapper." - ) - - def _check_nan(env: gym.Env, check_inf: bool = True) -> None: """Check for Inf and NaN.""" for _ in range(10): @@ -311,8 +273,6 @@ def check_env(env: gym.Env, warn: bool = True, skip_render_check: bool = True) - # Warn the user if needed. # A warning means that the environment may run but not work properly with popular RL libraries. if warn: - _check_unsupported_spaces(env, observation_space, action_space) - obs_spaces = ( observation_space.spaces if isinstance(observation_space, spaces.Dict) From 724f2b32dfa8bd2684ad0d34feef16173b8757ff Mon Sep 17 00:00:00 2001 From: Anthony Tamasi Date: Fri, 6 Aug 2021 17:07:10 -0500 Subject: [PATCH 08/12] Added action/observation checks from pettingzoo, referenced in file docstring --- gym/utils/env_checker.py | 70 ++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index cb034da5cfb..ad765cbf238 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -1,8 +1,12 @@ """ -This file is originally from the Stable Baselines3 repository hostedon GitHub +This file is originally from the Stable Baselines3 repository hosted on GitHub (https://github.com/DLR-RM/stable-baselines3/) Original Author: Antonin Raffin +It also uses some warnings/assertions from the PettingZoo repository hosted on GitHub +(https://github.com/PettingZoo-Team/PettingZoo) +Original Author: Justin Terry + This file is covered by the MIT License, as described here: The MIT License @@ -129,6 +133,58 @@ def _check_box_obs(observation_space: spaces.Box, key: str = "") -> None: "to have only a 1D vector or use a custom policy to properly process the data." ) + if np.any(np.equal(observation_space.low, -np.inf)): + warnings.warn( + "Agent's minimum observation space value is -infinity. This is probably too low." + ) + if np.any(np.equal(observation_space.high, np.inf)): + warnings.warn( + "Agent's maxmimum observation space value is infinity. This is probably too high" + ) + if np.any(np.equal(observation_space.low, observation_space.high)): + warnings.warn("Agent's maximum and minimum observation space values are equal") + if np.any(np.greater(observation_space.low, observation_space.high)): + assert False, "Agent's minimum observation value is greater than it's maximum" + if observation_space.low.shape != observation_space.shape: + assert ( + False + ), "Agent's observation_space.low and observation_space have different shapes" + if observation_space.high.shape != observation_space.shape: + assert ( + False + ), "Agent's observation_space.high and observation_space have different shapes" + + +def _check_box_action(action_space: spaces.Box): + if np.any(np.equal(action_space.low, -np.inf)): + warnings.warn( + "Agent's minimum action space value is -infinity. This is probably too low." + ) + if np.any(np.equal(action_space.high, np.inf)): + warnings.warn( + "Agent's maxmimum action space value is infinity. This is probably too high" + ) + if np.any(np.equal(action_space.low, action_space.high)): + warnings.warn("Agent's maximum and minimum action space values are equal") + if np.any(np.greater(action_space.low, action_space.high)): + assert False, "Agent's minimum action value is greater than it's maximum" + if action_space.low.shape != action_space.shape: + assert False, "Agent's action_space.low and action_space have different shapes" + if action_space.high.shape != action_space.shape: + assert False, "Agent's action_space.high and action_space have different shapes" + + +def _check_normalized_action(action_space: spaces.Box): + if ( + np.any(np.abs(action_space.low) != np.abs(action_space.high)) + or np.any(np.abs(action_space.low) > 1) + or np.any(np.abs(action_space.high) > 1) + ): + warnings.warn( + "We recommend you to use a symmetric and normalized Box action space (range=[-1, 1]) " + "cf https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html" + ) + def _check_returned_values( env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space @@ -283,15 +339,9 @@ def check_env(env: gym.Env, warn: bool = True, skip_render_check: bool = True) - _check_box_obs(space, key) # Check for the action space, it may lead to hard-to-debug issues - if isinstance(action_space, spaces.Box) and ( - np.any(np.abs(action_space.low) != np.abs(action_space.high)) - or np.any(np.abs(action_space.low) > 1) - or np.any(np.abs(action_space.high) > 1) - ): - warnings.warn( - "We recommend you to use a symmetric and normalized Box action space (range=[-1, 1]) " - "cf https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html" - ) + if isinstance(action_space, spaces.Box): + _check_box_action(action_space) + _check_normalized_action(action_space) # ============ Check the returned values =============== _check_returned_values(env, observation_space, action_space) From 7855783dbe48fd1d9b6d7090b1b682113759c17d Mon Sep 17 00:00:00 2001 From: Anthony Tamasi Date: Sat, 7 Aug 2021 07:00:29 -0500 Subject: [PATCH 09/12] Removed copy of MIT license in file docstring --- gym/utils/env_checker.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index ad765cbf238..6c6fe1a5799 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -7,28 +7,7 @@ (https://github.com/PettingZoo-Team/PettingZoo) Original Author: Justin Terry -This file is covered by the MIT License, as described here: -The MIT License - -Copyright (c) 2019 Antonin Raffin - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +These projects are covered by the MIT License. """ import warnings From c4dae1aadf1ccd1d93f2bf5d95d5b1dc112554db Mon Sep 17 00:00:00 2001 From: Anthony Tamasi Date: Sat, 7 Aug 2021 07:03:04 -0500 Subject: [PATCH 10/12] Re-added usage of the check_inf flag for helper functino --- gym/utils/env_checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index 6c6fe1a5799..889f81c41d8 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -58,9 +58,9 @@ def _check_nan(env: gym.Env, check_inf: bool = True) -> None: warnings.warn("Encountered NaN value in observations.") if np.any(np.isnan(reward)): warnings.warn("Encountered NaN value in rewards.") - if np.any(np.isinf(observation)): + if check_inf and np.any(np.isinf(observation)): warnings.warn("Encountered inf value in observations.") - if np.any(np.isinf(reward)): + if check_inf and np.any(np.isinf(reward)): warnings.warn("Encountered inf value in rewards.") From 86bcd86d05ebca82116262c40ee1caf783ef894c Mon Sep 17 00:00:00 2001 From: Anthony Tamasi Date: Sat, 7 Aug 2021 07:12:02 -0500 Subject: [PATCH 11/12] Changed test to be simple/classic example. Should add more tests --- gym/utils/tests/test_env_checker.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gym/utils/tests/test_env_checker.py b/gym/utils/tests/test_env_checker.py index 1f1f37a5d31..d5082e2633d 100644 --- a/gym/utils/tests/test_env_checker.py +++ b/gym/utils/tests/test_env_checker.py @@ -14,8 +14,7 @@ def step(self, action): observation = np.array([1.0, 1.5, 0.5]) reward = 1 done = True - info = {} - return observation, reward, done, info + return observation, reward, done def reset(self): return np.array([1.0, 1.5, 0.5]) @@ -25,7 +24,12 @@ def render(self, mode="human"): def test_check_env_dict_action(): + # Environment.step() only returns 3 values: obs, reward, done. Not info! test_env = ActionDictTestEnv() - with pytest.warns(Warning): + with pytest.raises(AssertionError) as errorinfo: check_env(env=test_env, warn=True) + assert ( + str(errorinfo.value) + == "The `step()` method must return four values: obs, reward, done, info" + ) From dd9c47c856774e1b6ee2fd318d7b35ec46ca5414 Mon Sep 17 00:00:00 2001 From: Anthony Tamasi Date: Sun, 8 Aug 2021 08:22:40 -0500 Subject: [PATCH 12/12] Added API compliance check to registered envs --- gym/envs/tests/test_envs.py | 4 ++++ gym/utils/env_checker.py | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/gym/envs/tests/test_envs.py b/gym/envs/tests/test_envs.py index 61dc0a5faa5..bb9021f631a 100644 --- a/gym/envs/tests/test_envs.py +++ b/gym/envs/tests/test_envs.py @@ -3,6 +3,7 @@ from gym import envs from gym.envs.tests.spec_list import spec_list +from gym.utils.env_checker import check_env # This runs a smoketest on each official registered env. We may want @@ -14,6 +15,9 @@ def test_env(spec): with pytest.warns(None) as warnings: env = spec.make() + # Test if env adheres to Gym API + check_env(env, warn=True, skip_render_check=True) + # Check that dtype is explicitly declared for gym.Box spaces for warning_msg in warnings: assert "autodetected dtype" not in str(warning_msg.message) diff --git a/gym/utils/env_checker.py b/gym/utils/env_checker.py index 889f81c41d8..87e959f1ca2 100644 --- a/gym/utils/env_checker.py +++ b/gym/utils/env_checker.py @@ -49,9 +49,9 @@ def _check_image_input(observation_space: spaces.Box, key: str = "") -> None: def _check_nan(env: gym.Env, check_inf: bool = True) -> None: - """Check for Inf and NaN.""" + """Check for NaN and Inf.""" for _ in range(10): - action = np.array([env.action_space.sample()]) + action = env.action_space.sample() observation, reward, _, _ = env.step(action) if np.any(np.isnan(observation)): @@ -212,7 +212,7 @@ def _check_returned_values( # We also allow int because the reward will be cast to float assert isinstance( - reward, (float, int) + reward, (float, int, np.float32) ), "The reward returned by `step()` must be a float" assert isinstance(done, bool), "The `done` signal must be a boolean" assert isinstance(