Skip to content

Commit 2de16b0

Browse files
authored
fix: remove redundant load (#495)
1 parent f2ca781 commit 2de16b0

File tree

1 file changed

+0
-1
lines changed

1 file changed

+0
-1
lines changed

include/flashinfer/norm.cuh

-1
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ __global__ void FusedAddRMSNormKernel(T* __restrict__ input, T* __restrict__ res
178178
weight_vec.fill(0.f);
179179
residual_vec.fill(0.f);
180180
if ((i * num_threads + thread_id) * VEC_SIZE < d) {
181-
input_vec.load(input + bx * d + i * num_threads * VEC_SIZE + thread_id * VEC_SIZE);
182181
weight_vec.load(weight + i * num_threads * VEC_SIZE + thread_id * VEC_SIZE);
183182
residual_vec.load(residual + bx * d + i * num_threads * VEC_SIZE + thread_id * VEC_SIZE);
184183
}

0 commit comments

Comments
 (0)