@@ -278,8 +278,6 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
278
278
break ;
279
279
}
280
280
params.yarn_beta_slow = std::stof (argv[i]);
281
- } else if (arg == " --memory-f32" ) {
282
- params.memory_f16 = false ;
283
281
} else if (arg == " --top-p" ) {
284
282
if (++i >= argc) {
285
283
invalid_param = true ;
@@ -804,8 +802,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
804
802
printf (" --yarn-beta-fast N YaRN: low correction dim or beta (default: %.1f)\n " , params.yarn_beta_fast );
805
803
printf (" --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n " );
806
804
printf (" --no-penalize-nl do not penalize newline token\n " );
807
- printf (" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n " );
808
- printf (" not recommended: doubles context memory required and no measurable increase in quality\n " );
809
805
printf (" --temp N temperature (default: %.1f)\n " , (double )sparams.temp );
810
806
printf (" --logits-all return logits for all tokens in the batch (default: disabled)\n " );
811
807
printf (" --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n " );
@@ -948,7 +944,6 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
948
944
cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch ;
949
945
cparams.mul_mat_q = params.mul_mat_q ;
950
946
cparams.seed = params.seed ;
951
- cparams.f16_kv = params.memory_f16 ;
952
947
cparams.logits_all = params.logits_all ;
953
948
cparams.embedding = params.embedding ;
954
949
cparams.rope_scaling_type = params.rope_scaling_type ;
@@ -1375,7 +1370,6 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
1375
1370
}
1376
1371
fprintf (stream, " lora_base: %s\n " , params.lora_base .c_str ());
1377
1372
fprintf (stream, " main_gpu: %d # default: 0\n " , params.main_gpu );
1378
- fprintf (stream, " memory_f32: %s # default: false\n " , !params.memory_f16 ? " true" : " false" );
1379
1373
fprintf (stream, " mirostat: %d # default: 0 (disabled)\n " , sparams.mirostat );
1380
1374
fprintf (stream, " mirostat_ent: %f # default: 5.0\n " , sparams.mirostat_tau );
1381
1375
fprintf (stream, " mirostat_lr: %f # default: 0.1\n " , sparams.mirostat_eta );
0 commit comments