File tree 1 file changed +21
-1
lines changed
1 file changed +21
-1
lines changed Original file line number Diff line number Diff line change @@ -477,7 +477,7 @@ typedef struct {
477
477
} block_q6_K;
478
478
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding");
479
479
480
- #define QR2_XXS 4
480
+ #define QR2_XXS 8
481
481
#define QI2_XXS (QK_K / (4*QR2_XXS))
482
482
typedef struct {
483
483
half d;
@@ -3955,6 +3955,25 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
3955
3955
#if QK_K == 256
3956
3956
const block_iq2_xxs * bq2 = (const block_iq2_xxs *) vbq;
3957
3957
3958
+ #if QR2_XXS == 8
3959
+ const int ib32 = iqs;
3960
+ const uint16_t * q2 = bq2->qs + 4*ib32;
3961
+ const uint8_t * aux8 = (const uint8_t *)q2;
3962
+ const int8_t * q8 = bq8_1[ib32].qs;
3963
+ uint32_t aux32 = q2[2] | (q2[3] << 16);
3964
+ int sumi = 0;
3965
+ for (int l = 0; l < 4; ++l) {
3966
+ const uint8_t * grid = (const uint8_t *)(kgrid_iq2xxs + aux8[l]);
3967
+ const uint8_t signs = ksigns_iq2xs[aux32 & 127];
3968
+ for (int j = 0; j < 8; ++j) {
3969
+ sumi += q8[j] * grid[j] * (signs & kmask_iq2xs[j] ? -1 : 1);
3970
+ }
3971
+ q8 += 8;
3972
+ aux32 >>= 7;
3973
+ }
3974
+ const float d = (float)bq2->d * (0.5f + aux32) * (float)bq8_1[ib32].ds.x * 0.25f;
3975
+ return d * sumi;
3976
+ #else
3958
3977
// iqs is 0...15
3959
3978
const int ib32 = iqs/2;
3960
3979
const int il = iqs%2;
@@ -3973,6 +3992,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
3973
3992
sumi2 += q8[j+8] * grid2[j] * (signs2 & kmask_iq2xs[j] ? -1 : 1);
3974
3993
}
3975
3994
return d * (sumi1 + sumi2);
3995
+ #endif
3976
3996
#else
3977
3997
assert(false);
3978
3998
return 0.f;
You can’t perform that action at this time.
0 commit comments