Skip to content

Commit f7205bb

Browse files
committed
Allow quantizing k-quants to fall back when tensor size incompatible
1 parent 96981f3 commit f7205bb

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

Diff for: llama.cpp

+10-10
Original file line numberDiff line numberDiff line change
@@ -8133,20 +8133,20 @@ static ggml_type get_k_quant_type(
81338133
int nx = tensor->ne[0];
81348134
int ny = tensor->ne[1];
81358135
if (nx % QK_K != 0) {
8136-
LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K);
8136+
LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for %s", __func__, nx, ny, QK_K, ggml_type_name(new_type));
81378137
convert_incompatible_tensor = true;
81388138
}
81398139
}
81408140
if (convert_incompatible_tensor) {
8141-
if (name == tn(LLM_TENSOR_OUTPUT, "weight")) {
8142-
new_type = GGML_TYPE_F16; //fall back to F16 instead of just failing.
8143-
LLAMA_LOG_WARN("F16 will be used for this tensor instead.\n");
8144-
} else if (name == tn(LLM_TENSOR_TOKEN_EMBD, "weight")) {
8145-
new_type = GGML_TYPE_Q4_0; //fall back to Q4_0 instead of just failing.
8146-
LLAMA_LOG_WARN("Q4_0 will be used for this tensor instead.\n");
8147-
} else {
8148-
throw std::runtime_error("Unsupported tensor size encountered\n");
8149-
}
8141+
switch (new_type) {
8142+
case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break;
8143+
case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break;
8144+
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
8145+
case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
8146+
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
8147+
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
8148+
}
8149+
LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
81508150
}
81518151

81528152
return new_type;

0 commit comments

Comments
 (0)