Skip to content

Commit e0a4002

Browse files
authoredDec 23, 2023
CUDA: fixed row rounding for 0 tensor splits (#4594)
1 parent 7082d24 commit e0a4002

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed
 

‎ggml-cuda.cu

+6-2
Original file line numberDiff line numberDiff line change
@@ -7937,12 +7937,16 @@ static void ggml_cuda_op_mul_mat(
79377937

79387938
if (id != 0) {
79397939
row_low[id] = ne01*g_tensor_split[id];
7940-
row_low[id] -= row_low[id] % rounding;
7940+
if (row_low[id] < ne01) {
7941+
row_low[id] -= row_low[id] % rounding;
7942+
}
79417943
}
79427944

79437945
if (id != g_device_count - 1) {
79447946
row_high[id] = ne01*g_tensor_split[id + 1];
7945-
row_high[id] -= row_high[id] % rounding;
7947+
if (row_high[id] < ne01) {
7948+
row_high[id] -= row_high[id] % rounding;
7949+
}
79467950
}
79477951
}
79487952
}

0 commit comments

Comments
 (0)
Please sign in to comment.