feat: add group size 3 to GQA decode dispatch (#558)

abcdabcd987 · web-flow · commit 62275625f933 · 2024-10-25T12:48:01.000-07:00
Llama 3.2 3B comes with 24 qo heads and 8 kv heads.
diff --git a/include/flashinfer/utils.cuh b/include/flashinfer/utils.cuh
@@ -126,6 +126,9 @@
   } else if (group_size == 2) {                              \
     constexpr size_t GROUP_SIZE = 2;                         \
     __VA_ARGS__                                              \
+  } else if (group_size == 3) {                              \
+    constexpr size_t GROUP_SIZE = 3;                         \
+    __VA_ARGS__                                              \
   } else if (group_size == 4) {                              \
     constexpr size_t GROUP_SIZE = 4;                         \
     __VA_ARGS__                                              \