padreati
diff --git a/‎rapaio-core/src/rapaio/nn/Autograd.java
+1-3 b/‎rapaio-core/src/rapaio/nn/Autograd.java
+1-3
diff --git a/‎rapaio-core/src/rapaio/nn/Loss.java
+3 b/‎rapaio-core/src/rapaio/nn/Loss.java
+3
diff --git a/‎rapaio-core/src/rapaio/nn/Network.java
+99 b/‎rapaio-core/src/rapaio/nn/Network.java
+99
diff --git a/‎rapaio-core/src/rapaio/nn/NetworkState.java
+17 b/‎rapaio-core/src/rapaio/nn/NetworkState.java
+17
diff --git a/‎rapaio-core/src/rapaio/nn/Optimizer.java
+29 b/‎rapaio-core/src/rapaio/nn/Optimizer.java
+29
@@ -31,9 +31,7 @@
 import rapaio.darray.DArray;
 
 /**
- * Central place of automatic differentiation in reverse mode.
- * <p>
- * Object which allows differentiation must implement {@link Tensor}.
+ * Implementation of automatic differentiation in reverse mode. Object which allows differentiation must implement {@link Tensor}.
  * <p>
  * The forward operations are performed when the computation is called using various operations
  * on {@link Tensor} or when new node are created with {@link TensorManager#var(DArray)}.
 
@@ -26,6 +26,9 @@
 
 import rapaio.nn.data.Batch;
 
+/**
+ * Loss function used to optimize a network during backpropagation
+ */
 public interface Loss {
 
     enum Reduce {
 
@@ -104,32 +104,131 @@ public interface Network extends Serializable {
      */
     void eval();
 
+    /**
+     * Performs network inference by taking an array of input tensors and computing an array of output tensors.
+     * Usually a network takes a single input and produces a single output. For this purpose the default
+     * implementation delegates to {@link #forward11(Tensor)}.
+     * <p>
+     * If the default scenario is not in the purpose of the network, this method needs to be implemented.
+     * <p>
+     * The forward method contains operations on tensors. All operations on tensors are tracked by the
+     * computational graph since each tensor operation leaves a trace which consists of backpropagation
+     * functions. When {@link Autograd#backward(Tensor)} method is called on some tensor which has
+     * a scalar gradient, the computational graph starts to back propagate gradients.
+     *
+     * @param xs input tensors
+     * @return computed output tensors
+     */
     default Tensor[] forward(Tensor... xs) {
         if (xs.length == 1) {
             return new Tensor[] {forward11(xs[0])};
         }
         throw new NotImplementedException();
     }
 
+    /**
+     * The default case of {@link #forward(Tensor...)} method which receives a single input tensor
+     * and outputs a single tensor.
+     *
+     * @param x input tensor
+     * @return computed output tensor
+     */
     default Tensor forward11(Tensor x) {
         throw new NotImplementedException();
     }
 
+    /**
+     * Improved forward method which trades memory for parallel batched execution of the forward pass.
+     * <p>
+     * The execution consists of splitting the input tensors in batches and parallel execution
+     * of those batches in the forward step. The tradeoff consists in the fact that all the computational
+     * graph will reside in memory, thus one can use this method if the dataset is small enough,
+     * depending on the available memory.
+     * <p>
+     * The result consists of a list of batches. Each batch contains input data and also contains the
+     * network output tensors computed for the given specific batch.
+     * <p>
+     * The batch size is given as parameter. Before splitting in batches the data from the dataset is shuffled
+     * and all the batches are used for execution.
+     *
+     * @param batchSize the number of instances for each batch, the last batch might contain few instances
+     * @param inputs    input tensors
+     * @return list of computed batches
+     */
     default List<Batch> batchForward(int batchSize, Tensor... inputs) {
         return batchForward(batchSize, true, false, inputs);
     }
 
+    /**
+     * Fully customizable version of {@link #batchForward(int, Tensor...)}.
+     * <p>
+     * Improved forward method which trades memory for parallel batched execution of the forward pass.
+     * <p>
+     * The execution consists of splitting the input tensors in batches and parallel execution
+     * of those batches in the forward step. The tradeoff consists in the fact that all the computational
+     * graph will reside in memory, thus one can use this method if the dataset is small enough,
+     * depending on the available memory.
+     * <p>
+     * The result consists of a list of batches. Each batch contains input data and also contains the
+     * network output tensors computed for the given specific batch.
+     * <p>
+     * The batch size is given as parameter. Before splitting in batches the data from the instances are shuffled if
+     * {@code shuffle} parameter is true. In some cases the last batch might contain fewer instances. If this
+     * is not desirable, one can set {@code skipLast} to {@code true} to skip the last batch.
+     *
+     * @param batchSize the batch size
+     * @param shuffle   if data is shuffled before splitting in batches
+     * @param skipLast  if the last batch, which might be smaller, is skipped for execution
+     * @param inputs    input tensors
+     * @return list of computed batches
+     */
     List<Batch> batchForward(int batchSize, boolean shuffle, boolean skipLast, Tensor... inputs);
 
+    /**
+     * Saves the state of the network to an atom output stream.
+     *
+     * @param out atom output stream
+     * @throws IOException thrown if something goes wrong
+     */
     void saveState(AtomOutputStream out) throws IOException;
 
+    /**
+     * Loads the state of the network from an atom input stream
+     *
+     * @param in atom input stream
+     * @throws IOException thrown if something goes wrong
+     */
     void loadState(AtomInputStream in) throws IOException;
 
+    /**
+     * Save the network state using atom binary serialization protocol to a file.
+     *
+     * @param file file which will store the network state
+     * @throws IOException thrown if something goes wrong
+     */
     void saveState(File file) throws IOException;
 
+    /**
+     * Saves the network state using atom binary serialization protocol to a generic output stream.
+     *
+     * @param out output stream
+     * @throws IOException thrown if something goes wrong
+     */
     void saveState(OutputStream out) throws IOException;
 
+    /**
+     * Loads the network state using atom binary serialization protocol from a file.
+     *
+     * @param file file which contains the serialized network state
+     * @throws IOException thrown if something goes wrong
+     */
     void loadState(File file) throws IOException;
 
+    /**
+     * Loads the network state using atom binary serialization protocol from a generic input stream
+     *
+     * @param in input stream
+     * @throws IOException if something goes wrong
+     */
     void loadState(InputStream in) throws IOException;
 }
@@ -24,6 +24,23 @@
 import java.util.ArrayList;
 import java.util.List;
 
+/**
+ * Represents the network state. The network state is a container which holds
+ * all the tensors which are used for inference and learning in a network.
+ * <p>
+ * For serialization purposes the network code is not saved. This is in order
+ * to give enough freedom to the used to customize the network behavior
+ * with custom code at training and inference time. Instead of that,
+ * if a network has to be serialized, the following scenario can be followed:
+ *
+ * <li>
+ * <item>create a network instance and do whatever is needed to be used later (including initialization, training, other customizations)</item>
+ * <item>save the network state into a persistent storage using one of the methods {@code Network#saveState}</item>
+ * <item>For later usage, create again a new instance of the network</item>
+ * <item>Loads the network state from a persistent storage using one of the methods {@code Network#loadState}</item>
+ * <item>The new network is ready to be used like the old network instance for inference of for other scenarios, like further training</item>
+ * </li>
+ */
 public final class NetworkState {
 
     private final ArrayList<Tensor> tensors;
 
@@ -26,17 +26,46 @@
 import rapaio.nn.optimizer.Adam;
 import rapaio.nn.optimizer.SGD;
 
+/**
+ * Defines the contract for optimization algorithms.
+ * <p>
+ * An optimization algorithm is an algorithm which uses computed gradients and updates the values of tracked tensors according
+ * to its own strategy. The tracked tensors are received at creation time.
+ * <p>
+ * Even if a tensor is tracked for optimization, it can be skipped for optimization if it has {@code requiresGrad} set to false.
+ * This is useful for scenarios when somebody wants to freeze some parts of the networks and update only other parts.
+ */
 public interface Optimizer {
 
+    /**
+     * Creates a new instance of Stochastic Gradient Descent optimizer. Further customization can be done on the returned instance.
+     *
+     * @param tm     tensor manager used for computation
+     * @param params list of tracked tensors for optimization
+     * @return new optimizer instance
+     */
     static SGD SGD(TensorManager tm, Collection<Tensor> params) {
         return new SGD(tm, params);
     }
 
+    /**
+     * Creates a new instance of Adam optimizer. Further customization can be done on the returned instance.
+     *
+     * @param tm     tensor manager used for computation
+     * @param params list of tracked tensors for optimization
+     * @return new optimizer instance
+     */
     static Adam Adam(TensorManager tm, Collection<Tensor> params) {
         return new Adam(tm, params);
     }
 
+    /**
+     * Deletes all the computed gradients for the tracked tensors
+     */
     void zeroGrad();
 
+    /**
+     * Performs the optimization which consists of updating tensor values according to the computed gradients and algorithm strategy.
+     */
     void step();
 }