Lambda to Gamma. Updated Readme.

Alex · Alex · commit 79cadc0ddb88 · 2017-11-24T16:47:26.000+09:00
diff --git a/MC/README.md b/MC/README.md
@@ -37,7 +37,7 @@
 
 ### Exercises
 
-- [Get familiar with the Blackjack environment (Blackjack-v0)](Blackjack%20Playground.ipynb)
+- Get familiar with the [Blackjack environment (Blackjack-v0)](Blackjack%20Playground.ipynb)
 - Implement the Monte Carlo Prediction to estimate state-action values
   - [Exercise](MC%20Prediction.ipynb)
   - [Solution](MC%20Prediction%20Solution.ipynb)
diff --git a/TD/Q-Learning Solution.ipynb b/TD/Q-Learning Solution.ipynb
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
@@ -31,9 +29,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "env = CliffWalkingEnv()"
@@ -73,9 +69,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def q_learning(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n",
@@ -86,7 +80,7 @@
     "    Args:\n",
     "        env: OpenAI environment.\n",
     "        num_episodes: Number of episodes to run for.\n",
-    "        discount_factor: Lambda time discount factor.\n",
+    "        discount_factor: Gamma discount factor.\n",
     "        alpha: TD learning rate.\n",
     "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
     "    \n",
@@ -147,9 +141,7 @@
   {
    "cell_type": "code",
    "execution_count": 18,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -166,9 +158,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -231,9 +221,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/TD/Q-Learning.ipynb b/TD/Q-Learning.ipynb
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
@@ -30,9 +28,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "env = CliffWalkingEnv()"
@@ -72,9 +68,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def q_learning(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n",
@@ -85,7 +79,7 @@
     "    Args:\n",
     "        env: OpenAI environment.\n",
     "        num_episodes: Number of episodes to run for.\n",
-    "        discount_factor: Lambda time discount factor.\n",
+    "        discount_factor: Gamma discount factor.\n",
     "        alpha: TD learning rate.\n",
     "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
     "    \n",
@@ -121,9 +115,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -140,9 +132,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -205,9 +195,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/TD/README.md b/TD/README.md
@@ -40,11 +40,11 @@
 
 ### Exercises
 
-- [Windy Gridworld Playground](Windy%20Gridworld%20Playground.ipynb)
+- Get familiar with the [Windy Gridworld Playground](Windy%20Gridworld%20Playground.ipynb)
 - Implement SARSA
   - [Exercise](SARSA.ipynb)
   - [Solution](SARSA%20Solution.ipynb)
-- [Cliff Environment Playground](Cliff%20Environment%20Playground.ipynb)
+- Get familiar with the [Cliff Environment Playground](Cliff%20Environment%20Playground.ipynb)
 - Implement Q-Learning in Python
   - [Exercise](Q-Learning.ipynb)
   - [Solution](Q-Learning%20Solution.ipynb)
diff --git a/TD/SARSA Solution.ipynb b/TD/SARSA Solution.ipynb
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
@@ -39,9 +37,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "env = WindyGridworldEnv()"
@@ -81,9 +77,7 @@
   {
    "cell_type": "code",
    "execution_count": 22,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def sarsa(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n",
@@ -93,7 +87,7 @@
     "    Args:\n",
     "        env: OpenAI environment.\n",
     "        num_episodes: Number of episodes to run for.\n",
-    "        discount_factor: Lambda time discount factor.\n",
+    "        discount_factor: Gamma discount factor.\n",
     "        alpha: TD learning rate.\n",
     "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
     "    \n",
@@ -156,9 +150,7 @@
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -175,9 +167,7 @@
   {
    "cell_type": "code",
    "execution_count": 24,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -217,9 +207,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   }
@@ -240,9 +228,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/TD/SARSA.ipynb b/TD/SARSA.ipynb
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
@@ -30,9 +28,7 @@
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "env = WindyGridworldEnv()"
@@ -72,9 +68,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def sarsa(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1):\n",
@@ -84,7 +78,7 @@
     "    Args:\n",
     "        env: OpenAI environment.\n",
     "        num_episodes: Number of episodes to run for.\n",
-    "        discount_factor: Lambda time discount factor.\n",
+    "        discount_factor: Gamma discount factor.\n",
     "        alpha: TD learning rate.\n",
     "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
     "    \n",
@@ -121,9 +115,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -140,9 +132,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -182,9 +172,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   }
@@ -205,9 +193,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }