From 1cef4251db8c55ad8fe86c48e4d5b875ddb478d5 Mon Sep 17 00:00:00 2001 From: acxz <17132214+acxz@users.noreply.github.com> Date: Sat, 30 Nov 2024 18:40:47 -0500 Subject: [PATCH] remove openai branding, update gym terminology with gymnasium --- docs/source/examples/index.rst | 2 +- ...pper.rst => rl_with_gymnasium_wrapper.rst} | 20 +++++++++---------- docs/source/getting_started.rst | 2 +- docs/source/index.rst | 2 +- docs/source/modules/player.rst | 4 ++-- ...openai_example.py => gymnasium_example.py} | 20 +++++++++---------- ..._gym_wrapper.py => rl_with_gym_wrapper.py} | 0 ...rapper.py => rl_with_gymnasium_wrapper.py} | 2 +- integration_tests/test_env_player.py | 12 +++++------ src/poke_env/player/__init__.py | 8 ++++---- src/poke_env/player/env_player.py | 8 ++++---- .../{openai_api.py => gymnasium_api.py} | 14 ++++++------- unit_tests/player/test_env_player.py | 12 +++++------ .../{test_openai.py => test_gymnasium.py} | 6 +++--- 14 files changed, 56 insertions(+), 56 deletions(-) rename docs/source/examples/{rl_with_open_ai_gym_wrapper.rst => rl_with_gymnasium_wrapper.rst} (93%) rename examples/{openai_example.py => gymnasium_example.py} (90%) rename examples/{rl_with_open_ai_gym_wrapper.py => rl_with_gym_wrapper.py} (100%) rename examples/{rl_with_new_open_ai_gym_wrapper.py => rl_with_gymnasium_wrapper.py} (99%) rename src/poke_env/player/{openai_api.py => gymnasium_api.py} (98%) rename unit_tests/player/{test_openai.py => test_gymnasium.py} (96%) diff --git a/docs/source/examples/index.rst b/docs/source/examples/index.rst index a953cc4be..145014727 100644 --- a/docs/source/examples/index.rst +++ b/docs/source/examples/index.rst @@ -11,4 +11,4 @@ This page lists detailled examples demonstrating how to use this package. They a quickstart using_a_custom_teambuilder connecting_to_showdown_and_challenging_humans - rl_with_open_ai_gym_wrapper \ No newline at end of file + rl_with_gymnasium_wrapper diff --git a/docs/source/examples/rl_with_open_ai_gym_wrapper.rst b/docs/source/examples/rl_with_gymnasium_wrapper.rst similarity index 93% rename from docs/source/examples/rl_with_open_ai_gym_wrapper.rst rename to docs/source/examples/rl_with_gymnasium_wrapper.rst index feeef43bf..0f7a970ad 100644 --- a/docs/source/examples/rl_with_open_ai_gym_wrapper.rst +++ b/docs/source/examples/rl_with_gymnasium_wrapper.rst @@ -1,18 +1,18 @@ -.. _rl_with_open_ai_gym_wrapper: +.. _rl_with_gymnasium_wrapper: -Reinforcement learning with the OpenAI Gym wrapper +Reinforcement learning with the Gymnasium wrapper ================================================== -The corresponding complete source code can be found `here `__. +The corresponding complete source code can be found `here `__. -The goal of this example is to demonstrate how to use the `open ai gym `__ interface proposed by ``EnvPlayer``, and to train a simple deep reinforcement learning agent comparable in performance to the ``MaxDamagePlayer`` we created in :ref:`max_damage_player`. +The goal of this example is to demonstrate how to use the `farama gymnasium `__ interface proposed by ``EnvPlayer``, and to train a simple deep reinforcement learning agent comparable in performance to the ``MaxDamagePlayer`` we created in :ref:`max_damage_player`. -.. note:: This example necessitates `keras-rl `__ (compatible with Tensorflow 1.X) or `keras-rl2 `__ (Tensorflow 2.X), which implement numerous reinforcement learning algorithms and offer a simple API fully compatible with the Open AI Gym API. You can install them by running ``pip install keras-rl`` or ``pip install keras-rl2``. If you are unsure, ``pip install keras-rl2`` is recommended. +.. note:: This example necessitates `keras-rl `__ (compatible with Tensorflow 1.X) or `keras-rl2 `__ (Tensorflow 2.X), which implement numerous reinforcement learning algorithms and offer a simple API fully compatible with the Gymnasium API. You can install them by running ``pip install keras-rl`` or ``pip install keras-rl2``. If you are unsure, ``pip install keras-rl2`` is recommended. Implementing rewards and observations ************************************* -The open ai gym API provides *rewards* and *observations* for each step of each episode. In our case, each step corresponds to one decision in a battle and battles correspond to episodes. +The Gymnasium API provides *rewards* and *observations* for each step of each episode. In our case, each step corresponds to one decision in a battle and battles correspond to episodes. Defining observations ^^^^^^^^^^^^^^^^^^^^^ @@ -26,9 +26,9 @@ Observations are embeddings of the current state of the battle. They can be an a To define our observations, we will create a custom ``embed_battle`` method. It takes one argument, a ``Battle`` object, and returns our embedding. -In addition to this, we also need to describe the embedding to the gym interface. +In addition to this, we also need to describe the embedding to the gymnasium interface. To achieve this, we need to implement the ``describe_embedding`` method where we specify the low bound and the high bound -for each component of the embedding vector and return them as a ``gym.Space`` object. +for each component of the embedding vector and return them as a ``gymnasium.Space`` object. Defining rewards ^^^^^^^^^^^^^^^^ @@ -108,7 +108,7 @@ Our player will play the ``gen8randombattle`` format. We can therefore inherit f Instantiating and testing a player ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Now that our custom class is defined, we can instantiate our RL player and test if it's compliant with the OpenAI gym API. +Now that our custom class is defined, we can instantiate our RL player and test if it's compliant with the Gymnasium API. .. code-block:: python @@ -340,7 +340,7 @@ To use the ``cross_evaluate`` method, the strategy is the same to the one used f Final result ************ -Running the `whole file `__ should take a couple of minutes and print something similar to this: +Running the `whole file `__ should take a couple of minutes and print something similar to this: .. code-block:: console diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index c3dcd2be9..3042f9a42 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -41,7 +41,7 @@ Agents in ``poke-env`` are instances of the ``Player`` class. Explore the follow - Basic agent: :ref:`/examples/cross_evaluate_random_players.ipynb` - Advanced agent: :ref:`max_damage_player` -- RL agent: :ref:`rl_with_open_ai_gym_wrapper` +- RL agent: :ref:`rl_with_gymnasium_wrapper` - Using teams: :ref:`ou_max_player` - Custom team builder: :ref:`using_a_custom_teambuilder` diff --git a/docs/source/index.rst b/docs/source/index.rst index 2fe84f098..3711cade9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,7 +6,7 @@ Poke-env: A Python Interface for Training Reinforcement Learning Pokémon Bots Poke-env provides an environment for engaging in `Pokémon Showdown `__ battles with a focus on reinforcement learning. -It boasts a straightforward API for handling Pokémon, Battles, Moves, and other battle-centric objects, alongside an `OpenAI Gym `__ interface for training agents. +It boasts a straightforward API for handling Pokémon, Battles, Moves, and other battle-centric objects, alongside a `Farama Gymnasium `__ interface for training agents. .. attention:: While poke-env aims to support all Pokémon generations, it was primarily developed with the latest generations in mind. If you discover any missing or incorrect functionalities for earlier generations, please `open an issue `__ to help improve the library. diff --git a/docs/source/modules/player.rst b/docs/source/modules/player.rst index eb3e0381e..75b0ee969 100644 --- a/docs/source/modules/player.rst +++ b/docs/source/modules/player.rst @@ -21,10 +21,10 @@ Player :undoc-members: :show-inheritance: -OpenAIGymEnv +GymnasiumEnv ************ -.. automodule:: poke_env.player.openai_api +.. automodule:: poke_env.player.gymnasium_api :members: :undoc-members: :show-inheritance: diff --git a/examples/openai_example.py b/examples/gymnasium_example.py similarity index 90% rename from examples/openai_example.py rename to examples/gymnasium_example.py index b14e634c8..276e325fd 100644 --- a/examples/openai_example.py +++ b/examples/gymnasium_example.py @@ -7,13 +7,13 @@ from poke_env.environment.abstract_battle import AbstractBattle from poke_env.player import ( Gen8EnvSinglePlayer, + GymnasiumEnv, ObservationType, - OpenAIGymEnv, RandomPlayer, ) -class TestEnv(OpenAIGymEnv): +class TestEnv(GymnasiumEnv): def __init__(self, **kwargs): self.opponent = RandomPlayer( battle_format="gen8randombattle", @@ -66,14 +66,14 @@ def describe_embedding(self) -> Space: return Box(np.array([0, 0]), np.array([6, 6]), dtype=int) -def openai_api(): - gym_env = TestEnv( +def gymnasium_api(): + gymnasium_env = TestEnv( battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, start_challenging=True, ) - check_env(gym_env) - gym_env.close() + check_env(gymnasium_env) + gymnasium_env.close() def env_player(): @@ -81,16 +81,16 @@ def env_player(): battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, ) - gym_env = Gen8( + gymnasium_env = Gen8( battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, start_challenging=True, opponent=opponent, ) - check_env(gym_env) - gym_env.close() + check_env(gymnasium_env) + gymnasium_env.close() if __name__ == "__main__": - openai_api() + gymnasium_api() env_player() diff --git a/examples/rl_with_open_ai_gym_wrapper.py b/examples/rl_with_gym_wrapper.py similarity index 100% rename from examples/rl_with_open_ai_gym_wrapper.py rename to examples/rl_with_gym_wrapper.py diff --git a/examples/rl_with_new_open_ai_gym_wrapper.py b/examples/rl_with_gymnasium_wrapper.py similarity index 99% rename from examples/rl_with_new_open_ai_gym_wrapper.py rename to examples/rl_with_gymnasium_wrapper.py index 7bcf23d9e..9248b7edc 100644 --- a/examples/rl_with_new_open_ai_gym_wrapper.py +++ b/examples/rl_with_gymnasium_wrapper.py @@ -72,7 +72,7 @@ def describe_embedding(self) -> Space: async def main(): # First test the environment to ensure the class is consistent - # with the OpenAI API + # with the Gymnasium API opponent = RandomPlayer(battle_format="gen8randombattle") test_env = SimpleRLPlayer( battle_format="gen8randombattle", start_challenging=True, opponent=opponent diff --git a/integration_tests/test_env_player.py b/integration_tests/test_env_player.py index a7ef4ba68..cfc761926 100644 --- a/integration_tests/test_env_player.py +++ b/integration_tests/test_env_player.py @@ -90,7 +90,7 @@ def play_function(player, n_battles): @pytest.mark.timeout(30) -def test_random_gym_players_gen4(): +def test_random_gymnasium_players_gen4(): random_player = RandomPlayer(battle_format="gen4randombattle", log_level=25) env_player = RandomGen4EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -100,7 +100,7 @@ def test_random_gym_players_gen4(): @pytest.mark.timeout(30) -def test_random_gym_players_gen5(): +def test_random_gymnasium_players_gen5(): random_player = RandomPlayer(battle_format="gen5randombattle", log_level=25) env_player = RandomGen5EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -110,7 +110,7 @@ def test_random_gym_players_gen5(): @pytest.mark.timeout(30) -def test_random_gym_players_gen6(): +def test_random_gymnasium_players_gen6(): random_player = RandomPlayer(battle_format="gen6randombattle", log_level=25) env_player = RandomGen6EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -120,7 +120,7 @@ def test_random_gym_players_gen6(): @pytest.mark.timeout(30) -def test_random_gym_players_gen7(): +def test_random_gymnasium_players_gen7(): random_player = RandomPlayer(battle_format="gen7randombattle", log_level=25) env_player = RandomGen7EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -130,7 +130,7 @@ def test_random_gym_players_gen7(): @pytest.mark.timeout(30) -def test_random_gym_players_gen8(): +def test_random_gymnasium_players_gen8(): random_player = RandomPlayer(battle_format="gen8randombattle", log_level=25) env_player = RandomGen8EnvPlayer( log_level=25, opponent=random_player, start_challenging=False @@ -140,7 +140,7 @@ def test_random_gym_players_gen8(): @pytest.mark.timeout(30) -def test_random_gym_players_gen9(): +def test_random_gymnasium_players_gen9(): random_player = RandomPlayer(battle_format="gen9randombattle", log_level=25) env_player = RandomGen9EnvPlayer( log_level=25, opponent=random_player, start_challenging=False diff --git a/src/poke_env/player/__init__.py b/src/poke_env/player/__init__.py index 0f88467cc..b364bcec8 100644 --- a/src/poke_env/player/__init__.py +++ b/src/poke_env/player/__init__.py @@ -2,7 +2,7 @@ """ from poke_env.concurrency import POKE_LOOP -from poke_env.player import env_player, openai_api, player, random_player, utils +from poke_env.player import env_player, gymnasium_api, player, random_player, utils from poke_env.player.baselines import MaxBasePowerPlayer, SimpleHeuristicsPlayer from poke_env.player.battle_order import ( BattleOrder, @@ -19,7 +19,7 @@ Gen8EnvSinglePlayer, Gen9EnvSinglePlayer, ) -from poke_env.player.openai_api import ActType, ObsType, OpenAIGymEnv +from poke_env.player.gymnasium_api import ActType, GymnasiumEnv, ObsType from poke_env.player.player import Player from poke_env.player.random_player import RandomPlayer from poke_env.player.utils import ( @@ -32,7 +32,7 @@ __all__ = [ "env_player", - "openai_api", + "gymnasium_api", "player", "random_player", "utils", @@ -47,7 +47,7 @@ "Gen8EnvSinglePlayer", "Gen9EnvSinglePlayer", "POKE_LOOP", - "OpenAIGymEnv", + "GymnasiumEnv", "PSClient", "Player", "RandomPlayer", diff --git a/src/poke_env/player/env_player.py b/src/poke_env/player/env_player.py index 61c0f810f..fb4ce7803 100644 --- a/src/poke_env/player/env_player.py +++ b/src/poke_env/player/env_player.py @@ -1,4 +1,4 @@ -"""This module defines a player class exposing the Open AI Gym API with utility functions. +"""This module defines a player class exposing the Gymnasium API with utility functions. """ from abc import ABC @@ -8,15 +8,15 @@ from poke_env.environment.abstract_battle import AbstractBattle from poke_env.player.battle_order import BattleOrder, ForfeitBattleOrder -from poke_env.player.openai_api import ActType, ObsType, OpenAIGymEnv +from poke_env.player.gymnasium_api import ActType, GymnasiumEnv, ObsType from poke_env.player.player import Player from poke_env.ps_client.account_configuration import AccountConfiguration from poke_env.ps_client.server_configuration import ServerConfiguration from poke_env.teambuilder.teambuilder import Teambuilder -class EnvPlayer(OpenAIGymEnv[ObsType, ActType], ABC): - """Player exposing the Open AI Gym Env API.""" +class EnvPlayer(GymnasiumEnv[ObsType, ActType], ABC): + """Player exposing the Gymnasium Env API.""" _ACTION_SPACE: List[int] = [] _DEFAULT_BATTLE_FORMAT = "gen8randombattle" diff --git a/src/poke_env/player/openai_api.py b/src/poke_env/player/gymnasium_api.py similarity index 98% rename from src/poke_env/player/openai_api.py rename to src/poke_env/player/gymnasium_api.py index 19270c3d5..72a585d64 100644 --- a/src/poke_env/player/openai_api.py +++ b/src/poke_env/player/gymnasium_api.py @@ -1,4 +1,4 @@ -"""This module defines a player class with the OpenAI API on the main thread. +"""This module defines a player class with the Gymnasium API on the main thread. For a black-box implementation consider using the module env_player. """ @@ -62,7 +62,7 @@ class _AsyncPlayer(Generic[ObsType, ActType], Player): def __init__( self, - user_funcs: OpenAIGymEnv[ObsType, ActType], + user_funcs: GymnasiumEnv[ObsType, ActType], username: str, **kwargs: Any, ): @@ -94,12 +94,12 @@ def _battle_finished_callback(self, battle: AbstractBattle): asyncio.run_coroutine_threadsafe(self.observations.async_put(to_put), POKE_LOOP) -class OpenAIGymEnv( +class GymnasiumEnv( Env[ObsType, ActType], ABC, ): """ - Base class implementing the OpenAI Gym API on the main thread. + Base class implementing the Gymnasium API on the main thread. """ _INIT_RETRIES = 100 @@ -239,7 +239,7 @@ def action_to_move(self, action: int, battle: AbstractBattle) -> BattleOrder: def embed_battle(self, battle: AbstractBattle) -> ObsType: """ Returns the embedding of the current battle state in a format compatible with - the OpenAI gym API. + the Gymnasium API. :param battle: The current battle state. :type battle: AbstractBattle @@ -416,7 +416,7 @@ def close(self, purge: bool = True): def background_send_challenge(self, username: str): """ Sends a single challenge specified player. The function immediately returns - to allow use of the OpenAI gym API. + to allow use of the Gymnasium API. :param username: The username of the player to challenge. :type username: str @@ -434,7 +434,7 @@ def background_send_challenge(self, username: str): def background_accept_challenge(self, username: str): """ Accepts a single challenge specified player. The function immediately returns - to allow use of the OpenAI gym API. + to allow use of the Gymnasium API. :param username: The username of the player to challenge. :type username: str diff --git a/unit_tests/player/test_env_player.py b/unit_tests/player/test_env_player.py index b079de6b4..56b77b1f4 100644 --- a/unit_tests/player/test_env_player.py +++ b/unit_tests/player/test_env_player.py @@ -19,7 +19,7 @@ Gen9EnvSinglePlayer, RandomPlayer, ) -from poke_env.player.openai_api import _AsyncPlayer +from poke_env.player.gymnasium_api import _AsyncPlayer account_configuration = AccountConfiguration("username", "password") server_configuration = ServerConfiguration("server.url", "auth.url") @@ -42,15 +42,15 @@ def embed_battle(self, battle): def test_init(): - gym_env = CustomEnvPlayer( + gymnasium_env = CustomEnvPlayer( None, account_configuration=account_configuration, server_configuration=server_configuration, start_listening=False, battle_format="gen7randombattles", ) - player = gym_env.agent - assert isinstance(gym_env, CustomEnvPlayer) + player = gymnasium_env.agent + assert isinstance(gymnasium_env, CustomEnvPlayer) assert isinstance(player, _AsyncPlayer) @@ -60,11 +60,11 @@ async def __call__(self, *args, **kwargs): @patch( - "poke_env.player.openai_api._AsyncQueue.async_get", + "poke_env.player.gymnasium_api._AsyncQueue.async_get", return_value=2, new_callable=AsyncMock, ) -@patch("poke_env.player.openai_api._AsyncQueue.async_put", new_callable=AsyncMock) +@patch("poke_env.player.gymnasium_api._AsyncQueue.async_put", new_callable=AsyncMock) def test_choose_move(queue_put_mock, queue_get_mock): player = CustomEnvPlayer( None, diff --git a/unit_tests/player/test_openai.py b/unit_tests/player/test_gymnasium.py similarity index 96% rename from unit_tests/player/test_openai.py rename to unit_tests/player/test_gymnasium.py index 40e9ec82a..e40c0fd7d 100644 --- a/unit_tests/player/test_openai.py +++ b/unit_tests/player/test_gymnasium.py @@ -10,14 +10,14 @@ ActType, BattleOrder, ForfeitBattleOrder, + GymnasiumEnv, ObsType, - OpenAIGymEnv, Player, ) -from poke_env.player.openai_api import _AsyncPlayer, _AsyncQueue +from poke_env.player.gymnasium_api import _AsyncPlayer, _AsyncQueue -class DummyEnv(OpenAIGymEnv[ObsType, ActType]): +class DummyEnv(GymnasiumEnv[ObsType, ActType]): def __init__(self, *args, **kwargs): self.opponent = None super().__init__(*args, **kwargs)