default to disabling NTKv2 instead of enabling it

cebtenzzre · cebtenzzre · commit c62b01b82ca8 · 2023-07-19T23:12:00.000-04:00
diff --git a/examples/common.h b/examples/common.h
@@ -35,8 +35,8 @@ struct gpt_params {
     int32_t n_probs                         = 0;   // if greater than 0, output the probabilities of top n_probs tokens.
     float   rope_freq_base                  = 10000.0f; // RoPE base frequency
     float   rope_freq_scale                 = 1.0f;     // RoPE frequency scaling factor
-    float   rope_ntk_factor                 = 1.0f;     // RoPE NTK mix factor
-    float   rope_extrapolation_factor       = 1.0f;     // RoPE extrapolation mix factor
+    float   rope_ntk_factor                 = 0.0f;     // RoPE NTK mix factor
+    float   rope_extrapolation_factor       = 0.0f;     // RoPE extrapolation mix factor
 
     // sampling parameters
     std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
diff --git a/ggml.c b/ggml.c
@@ -7005,7 +7005,7 @@ struct ggml_tensor * ggml_rope(
         int                   n_dims,
         int                   mode,
         int                   n_ctx) {
-    return ggml_rope_impl(ctx, a, n_past, n_dims, mode, 10000.0f, 1.0f, 1.0f, 1.0f, n_ctx, false);
+    return ggml_rope_impl(ctx, a, n_past, n_dims, mode, 10000.0f, 1.0f, 0.0f, 0.0f, n_ctx, false);
 }
 
 struct ggml_tensor * ggml_rope_inplace(
@@ -7015,7 +7015,7 @@ struct ggml_tensor * ggml_rope_inplace(
         int                   n_dims,
         int                   mode,
         int                   n_ctx) {
-    return ggml_rope_impl(ctx, a, n_past, n_dims, mode, 10000.0f, 1.0f, 1.0f, 1.0f, n_ctx, true);
+    return ggml_rope_impl(ctx, a, n_past, n_dims, mode, 10000.0f, 1.0f, 0.0f, 0.0f, n_ctx, true);
 }
 
 struct ggml_tensor * ggml_rope_custom_inplace(
diff --git a/llama.cpp b/llama.cpp
@@ -193,8 +193,8 @@ struct llama_hparams {
 
     float rope_freq_base  = 10000.0f;
     float rope_freq_scale = 1.0f;
-    float rope_ntk_factor = 1.0f;
-    float rope_extrapolation_factor = 1.0f;
+    float rope_ntk_factor = 0.0f;
+    float rope_extrapolation_factor = 0.0f;
 
     enum llama_ftype ftype = LLAMA_FTYPE_MOSTLY_F16;
 
@@ -852,8 +852,8 @@ struct llama_context_params llama_context_default_params() {
         /*.tensor_split                =*/ {0},
         /*.rope_freq_base              =*/ 10000.0f,
         /*.rope_freq_scale             =*/ 1.0f,
-        /*.rope_ntk_factor             =*/ 1.0f,
-        /*.rope_extrapolation_factor   =*/ 1.0f,
+        /*.rope_ntk_factor             =*/ 0.0f,
+        /*.rope_extrapolation_factor   =*/ 0.0f,
         /*.progress_callback           =*/ nullptr,
         /*.progress_callback_user_data =*/ nullptr,
         /*.low_vram                    =*/ false,