Skip to content

Commit 553ace5

Browse files
authored
perf: reduce total_num_tiles_q by one (#644)
The bound can be reduced by one to slightly decrease workspace memory usage.
1 parent b577710 commit 553ace5

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

include/flashinfer/attention/scheduler.cuh

+1-1
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ inline auto PrefillSplitQOKVIndptr(IdType* qo_indptr_h, IdType* kv_indptr_h,
483483
// number of rows and the batch size. The sum of qo lengths rounded
484484
// up to cta_tile_q will not exceed this number derived from the total
485485
// number of rows.
486-
total_num_tiles_q = ceil_div(total_num_rows, cta_tile_q) + batch_size;
486+
total_num_tiles_q = ceil_div(total_num_rows, cta_tile_q) + batch_size - 1;
487487
} else {
488488
int64_t sum_packed_qo_len = 0;
489489
for (uint32_t i = 0; i < batch_size; ++i) {

0 commit comments

Comments
 (0)