Skip to content

Commit 3d48f42

Browse files
authored
llama : mark LLM_ARCH_STARCODER as full offload supported (#3945)
as done in #3827
1 parent c41ea36 commit 3d48f42

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

Diff for: llama.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -5164,11 +5164,12 @@ static int llama_decode_internal(
51645164

51655165
// If all tensors can be run on the GPU then using more than 1 thread is detrimental.
51665166
const bool full_offload_supported =
5167-
model.arch == LLM_ARCH_LLAMA ||
5168-
model.arch == LLM_ARCH_BAICHUAN ||
5169-
model.arch == LLM_ARCH_FALCON ||
5170-
model.arch == LLM_ARCH_REFACT ||
5171-
model.arch == LLM_ARCH_MPT;
5167+
model.arch == LLM_ARCH_LLAMA ||
5168+
model.arch == LLM_ARCH_BAICHUAN ||
5169+
model.arch == LLM_ARCH_FALCON ||
5170+
model.arch == LLM_ARCH_REFACT ||
5171+
model.arch == LLM_ARCH_MPT ||
5172+
model.arch == LLM_ARCH_STARCODER;
51725173

51735174
const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 3;
51745175
if (ggml_cpu_has_cublas() && full_offload_supported && fully_offloaded) {

0 commit comments

Comments
 (0)