@@ -5760,18 +5760,18 @@ typedef void (mat_mm_t)(
5760
5760
threadgroup uchar *,
5761
5761
uint3, uint , uint );
5762
5762
5763
- template [[host_name(" kernel_mul_mm_f32_f32" )]] kernel mat_mm_t kernel_mul_mm<float4x4, 1 , dequantize_f32>;
5764
- template [[host_name(" kernel_mul_mm_f16_f32" )]] kernel mat_mm_t kernel_mul_mm<half4x4, 1 , dequantize_f16>;
5765
- template [[host_name(" kernel_mul_mm_q4_0_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q4_0, 2 , dequantize_q4_0>;
5766
- template [[host_name(" kernel_mul_mm_q4_1_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q4_1, 2 , dequantize_q4_1>;
5767
- template [[host_name(" kernel_mul_mm_q5_0_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q5_0, 2 , dequantize_q5_0>;
5768
- template [[host_name(" kernel_mul_mm_q5_1_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q5_1, 2 , dequantize_q5_1>;
5769
- template [[host_name(" kernel_mul_mm_q8_0_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q8_0, 2 , dequantize_q8_0>;
5770
- template [[host_name(" kernel_mul_mm_q2_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q2_K, QK_NL, dequantize_q2_K>;
5771
- template [[host_name(" kernel_mul_mm_q3_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q3_K, QK_NL, dequantize_q3_K>;
5772
- template [[host_name(" kernel_mul_mm_q4_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q4_K, QK_NL, dequantize_q4_K>;
5773
- template [[host_name(" kernel_mul_mm_q5_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q5_K, QK_NL, dequantize_q5_K>;
5774
- template [[host_name(" kernel_mul_mm_q6_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q6_K, QK_NL, dequantize_q6_K>;
5763
+ template [[host_name(" kernel_mul_mm_f32_f32" )]] kernel mat_mm_t kernel_mul_mm<float4x4, 1 , dequantize_f32>;
5764
+ template [[host_name(" kernel_mul_mm_f16_f32" )]] kernel mat_mm_t kernel_mul_mm<half4x4, 1 , dequantize_f16>;
5765
+ template [[host_name(" kernel_mul_mm_q4_0_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q4_0, 2 , dequantize_q4_0>;
5766
+ template [[host_name(" kernel_mul_mm_q4_1_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q4_1, 2 , dequantize_q4_1>;
5767
+ template [[host_name(" kernel_mul_mm_q5_0_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q5_0, 2 , dequantize_q5_0>;
5768
+ template [[host_name(" kernel_mul_mm_q5_1_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q5_1, 2 , dequantize_q5_1>;
5769
+ template [[host_name(" kernel_mul_mm_q8_0_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q8_0, 2 , dequantize_q8_0>;
5770
+ template [[host_name(" kernel_mul_mm_q2_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q2_K, QK_NL, dequantize_q2_K>;
5771
+ template [[host_name(" kernel_mul_mm_q3_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q3_K, QK_NL, dequantize_q3_K>;
5772
+ template [[host_name(" kernel_mul_mm_q4_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q4_K, QK_NL, dequantize_q4_K>;
5773
+ template [[host_name(" kernel_mul_mm_q5_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q5_K, QK_NL, dequantize_q5_K>;
5774
+ template [[host_name(" kernel_mul_mm_q6_K_f32" )]] kernel mat_mm_t kernel_mul_mm<block_q6_K, QK_NL, dequantize_q6_K>;
5775
5775
template [[host_name(" kernel_mul_mm_iq2_xxs_f32" )]] kernel mat_mm_t kernel_mul_mm<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
5776
5776
template [[host_name(" kernel_mul_mm_iq2_xs_f32" )]] kernel mat_mm_t kernel_mul_mm<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
5777
5777
template [[host_name(" kernel_mul_mm_iq3_xxs_f32" )]] kernel mat_mm_t kernel_mul_mm<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
@@ -5820,18 +5820,18 @@ typedef void (mat_mm_id_t)(
5820
5820
threadgroup uchar *,
5821
5821
uint3, uint , uint );
5822
5822
5823
- template [[host_name(" kernel_mul_mm_id_f32_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<float4x4, 1 , dequantize_f32>;
5824
- template [[host_name(" kernel_mul_mm_id_f16_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<half4x4, 1 , dequantize_f16>;
5825
- template [[host_name(" kernel_mul_mm_id_q4_0_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_0, 2 , dequantize_q4_0>;
5826
- template [[host_name(" kernel_mul_mm_id_q4_1_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_1, 2 , dequantize_q4_1>;
5827
- template [[host_name(" kernel_mul_mm_id_q5_0_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_0, 2 , dequantize_q5_0>;
5828
- template [[host_name(" kernel_mul_mm_id_q5_1_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_1, 2 , dequantize_q5_1>;
5829
- template [[host_name(" kernel_mul_mm_id_q8_0_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q8_0, 2 , dequantize_q8_0>;
5830
- template [[host_name(" kernel_mul_mm_id_q2_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q2_K, QK_NL, dequantize_q2_K>;
5831
- template [[host_name(" kernel_mul_mm_id_q3_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q3_K, QK_NL, dequantize_q3_K>;
5832
- template [[host_name(" kernel_mul_mm_id_q4_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_K, QK_NL, dequantize_q4_K>;
5833
- template [[host_name(" kernel_mul_mm_id_q5_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_K, QK_NL, dequantize_q5_K>;
5834
- template [[host_name(" kernel_mul_mm_id_q6_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q6_K, QK_NL, dequantize_q6_K>;
5823
+ template [[host_name(" kernel_mul_mm_id_f32_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<float4x4, 1 , dequantize_f32>;
5824
+ template [[host_name(" kernel_mul_mm_id_f16_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<half4x4, 1 , dequantize_f16>;
5825
+ template [[host_name(" kernel_mul_mm_id_q4_0_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_0, 2 , dequantize_q4_0>;
5826
+ template [[host_name(" kernel_mul_mm_id_q4_1_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_1, 2 , dequantize_q4_1>;
5827
+ template [[host_name(" kernel_mul_mm_id_q5_0_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_0, 2 , dequantize_q5_0>;
5828
+ template [[host_name(" kernel_mul_mm_id_q5_1_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_1, 2 , dequantize_q5_1>;
5829
+ template [[host_name(" kernel_mul_mm_id_q8_0_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q8_0, 2 , dequantize_q8_0>;
5830
+ template [[host_name(" kernel_mul_mm_id_q2_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q2_K, QK_NL, dequantize_q2_K>;
5831
+ template [[host_name(" kernel_mul_mm_id_q3_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q3_K, QK_NL, dequantize_q3_K>;
5832
+ template [[host_name(" kernel_mul_mm_id_q4_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_K, QK_NL, dequantize_q4_K>;
5833
+ template [[host_name(" kernel_mul_mm_id_q5_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_K, QK_NL, dequantize_q5_K>;
5834
+ template [[host_name(" kernel_mul_mm_id_q6_K_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_q6_K, QK_NL, dequantize_q6_K>;
5835
5835
template [[host_name(" kernel_mul_mm_id_iq2_xxs_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
5836
5836
template [[host_name(" kernel_mul_mm_id_iq2_xs_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
5837
5837
template [[host_name(" kernel_mul_mm_id_iq3_xxs_f32" )]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
0 commit comments