Skip to content

Commit 1cecdec

Browse files
committed
Revert "Avoid the transposed X branch in the Z = X * Y matrix multiplication (ggml-org#439)"
This reverts commit 483bab2.
1 parent 53dbba7 commit 1cecdec

File tree

1 file changed

+5
-7
lines changed

1 file changed

+5
-7
lines changed

llama.cpp

+5-7
Original file line numberDiff line numberDiff line change
@@ -860,13 +860,11 @@ static bool llama_eval_internal(
860860

861861
// V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous()
862862
struct ggml_tensor * V_trans =
863-
ggml_cpy(ctx0,
864-
ggml_permute(ctx0,
865-
ggml_reshape_3d(ctx0,
866-
ggml_view_1d(ctx0, kv_self.v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(kv_self.v)*n_embd),
867-
n_embd/n_head, n_head, n_past + N),
868-
1, 2, 0, 3),
869-
ggml_new_tensor_3d(ctx0, kv_self.v->type, n_past + N, n_embd/n_head, n_head));
863+
ggml_permute(ctx0,
864+
ggml_reshape_3d(ctx0,
865+
ggml_view_1d(ctx0, kv_self.v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(kv_self.v)*n_embd),
866+
n_embd/n_head, n_head, n_past + N),
867+
1, 2, 0, 3);
870868

871869
// KQV = transpose(V) * KQ_soft_max
872870
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max);

0 commit comments

Comments
 (0)