allow ending the episode for MaxStepsReached (#4453)

Chris Elion · web-flow · commit fce35214bf00 · 2020-09-09T15:37:54.000-07:00
* allow ending the episode for MaxStepsReached

* changelog

* rename and update docs
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -19,6 +19,9 @@ and this project adheres to
 - Enabled C# formatting using `dotnet-format`. (#4362)
 - GridSensor was added to the com.unity.ml-agents.extensions package. Thank you
 to Jaden Travnik from Eidos Montreal for the contribution! (#4399)
+- Added `Agent.EpisodeInterrupted()`, which can be used to reset the agent when
+it has reached a user-determined maximum number of steps. This behaves similarly
+to `Agent.EndEpsiode()` but has a slightly different effect on training (#4453).
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Experimental PyTorch support has been added. Use `--torch` when running `mlagents-learn`, or add
 `framework: pytorch` to your trainer configuration (under the behavior name) to enable it.
diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
@@ -445,7 +445,7 @@ public void LazyInitialize()
         enum DoneReason
         {
             /// <summary>
-            /// The <see cref="EndEpisode"/> method was called.
+            /// The episode was ended manually by calling <see cref="EndEpisode"/>.
             /// </summary>
             DoneCalled,
 
@@ -691,10 +691,41 @@ void UpdateRewardStats()
         /// <summary>
         /// Sets the done flag to true and resets the agent.
         /// </summary>
+        /// <remarks>
+        /// This should be used when the episode can no longer continue, such as when the Agent
+        /// reaches the goal or fails at the task.
+        /// </remarks>
         /// <seealso cref="OnEpisodeBegin"/>
+        /// <seealso cref="EpisodeInterrupted"/>
         public void EndEpisode()
         {
-            NotifyAgentDone(DoneReason.DoneCalled);
+            EndEpisodeAndReset(DoneReason.DoneCalled);
+        }
+
+        /// <summary>
+        /// Indicate that the episode is over but not due to the "fault" of the Agent.
+        /// This has the same end result as calling <see cref="EndEpisode"/>, but has a
+        /// slightly different effect on training.
+        /// </summary>
+        /// <remarks>
+        /// This should be used when the episode could continue, but has gone on for
+        /// a sufficient number of steps.
+        /// </remarks>
+        /// <seealso cref="OnEpisodeBegin"/>
+        /// <seealso cref="EndEpisode"/>
+        public void EpisodeInterrupted()
+        {
+            EndEpisodeAndReset(DoneReason.MaxStepReached);
+        }
+
+
+        /// <summary>
+        /// Internal method to end the episode and reset the Agent.
+        /// </summary>
+        /// <param name="reason"></param>
+        void EndEpisodeAndReset(DoneReason reason)
+        {
+            NotifyAgentDone(reason);
             _AgentReset();
         }