Skip to content

Commit d916c5b

Browse files
swggerganov
authored andcommitted
Remove unused n_parts parameter (ggml-org#1509)
1 parent d5207bf commit d916c5b

File tree

6 files changed

+0
-13
lines changed

6 files changed

+0
-13
lines changed

examples/common.cpp

-8
Original file line numberDiff line numberDiff line change
@@ -321,12 +321,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
321321
invalid_param = true;
322322
break;
323323
}
324-
} else if (arg == "--n-parts") {
325-
if (++i >= argc) {
326-
invalid_param = true;
327-
break;
328-
}
329-
params.n_parts = std::stoi(argv[i]);
330324
} else if (arg == "-h" || arg == "--help") {
331325
gpt_print_usage(argc, argv, default_params);
332326
exit(0);
@@ -418,7 +412,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
418412
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
419413
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n");
420414
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
421-
fprintf(stderr, " --n-parts N number of model parts (default: -1 = determine from dimensions)\n");
422415
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
423416
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
424417
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
@@ -473,7 +466,6 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
473466
auto lparams = llama_context_default_params();
474467

475468
lparams.n_ctx = params.n_ctx;
476-
lparams.n_parts = params.n_parts;
477469
lparams.n_gpu_layers = params.n_gpu_layers;
478470
lparams.seed = params.seed;
479471
lparams.f16_kv = params.memory_f16;

examples/common.h

-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ struct gpt_params {
2424
int32_t seed = -1; // RNG seed
2525
int32_t n_threads = get_num_physical_cores();
2626
int32_t n_predict = -1; // new tokens to predict
27-
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
2827
int32_t n_ctx = 512; // context size
2928
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
3029
int32_t n_keep = 0; // number of tokens to keep from initial prompt

examples/quantize-stats/quantize-stats.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,6 @@ int main(int argc, char ** argv) {
321321
auto lparams = llama_context_default_params();
322322

323323
lparams.n_ctx = 256;
324-
lparams.n_parts = 1;
325324
lparams.seed = 1;
326325
lparams.f16_kv = false;
327326
lparams.use_mlock = false;

examples/save-load-state/save-load-state.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ int main(int argc, char ** argv) {
2626
auto lparams = llama_context_default_params();
2727

2828
lparams.n_ctx = params.n_ctx;
29-
lparams.n_parts = params.n_parts;
3029
lparams.seed = params.seed;
3130
lparams.f16_kv = params.memory_f16;
3231
lparams.use_mmap = params.use_mmap;

llama.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,6 @@ static bool kv_cache_init(
820820
struct llama_context_params llama_context_default_params() {
821821
struct llama_context_params result = {
822822
/*.n_ctx =*/ 512,
823-
/*.n_parts =*/ -1,
824823
/*.gpu_layers =*/ 0,
825824
/*.seed =*/ -1,
826825
/*.f16_kv =*/ false,

llama.h

-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ extern "C" {
5555

5656
struct llama_context_params {
5757
int n_ctx; // text context
58-
int n_parts; // -1 for default
5958
int n_gpu_layers; // number of layers to store in VRAM
6059
int seed; // RNG seed, -1 for random
6160

0 commit comments

Comments
 (0)