Skip to content

Commit 8401e3e

Browse files
committed
llama : fix save/load state context size
ggml-ci
1 parent 83c96d5 commit 8401e3e

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

llama.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -8558,7 +8558,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
85588558
if (kv_buf_size) {
85598559
const size_t elt_size = ggml_element_size(kv_self.k);
85608560

8561-
ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
8561+
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
85628562
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
85638563

85648564
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
@@ -8686,7 +8686,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
86868686

86878687
const size_t elt_size = ggml_element_size(kv_self.k);
86888688

8689-
ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
8689+
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
86908690
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
86918691

86928692
ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);

0 commit comments

Comments
 (0)