We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 7082d24 commit e0a4002Copy full SHA for e0a4002
ggml-cuda.cu
@@ -7937,12 +7937,16 @@ static void ggml_cuda_op_mul_mat(
7937
7938
if (id != 0) {
7939
row_low[id] = ne01*g_tensor_split[id];
7940
- row_low[id] -= row_low[id] % rounding;
+ if (row_low[id] < ne01) {
7941
+ row_low[id] -= row_low[id] % rounding;
7942
+ }
7943
}
7944
7945
if (id != g_device_count - 1) {
7946
row_high[id] = ne01*g_tensor_split[id + 1];
- row_high[id] -= row_high[id] % rounding;
7947
+ if (row_high[id] < ne01) {
7948
+ row_high[id] -= row_high[id] % rounding;
7949
7950
7951
7952
0 commit comments