|
| 1 | +diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_int16.inl b/fflas-ffpack/fflas/fflas_simd/simd256_int16.inl |
| 2 | +index e416545..ba407ec 100644 |
| 3 | +--- a/fflas-ffpack/fflas/fflas_simd/simd256_int16.inl |
| 4 | ++++ b/fflas-ffpack/fflas/fflas_simd/simd256_int16.inl |
| 5 | +@@ -255,7 +255,7 @@ template <> struct Simd256_impl<true, true, true, 2> : public Simd256i_base { |
| 6 | + * Return : [a0, b0, ..., a7, b7] int16_t |
| 7 | + * [a8, b8, ..., a15, b15] int16_t |
| 8 | + */ |
| 9 | +- static INLINE CONST void unpacklohi(vect_t& s1, vect_t& s2, const vect_t a, const vect_t b) { |
| 10 | ++ static INLINE void unpacklohi(vect_t& s1, vect_t& s2, const vect_t a, const vect_t b) { |
| 11 | + using Simd256_64 = Simd256<uint64_t>; |
| 12 | + vect_t a1 = Simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4 |
| 13 | + vect_t b1 = Simd256_64::template shuffle<0xD8>(b); // 0xD8 = 3120 base_4 |
| 14 | +diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_int32.inl b/fflas-ffpack/fflas/fflas_simd/simd256_int32.inl |
| 15 | +index 7b870a1..9d27d6a 100644 |
| 16 | +--- a/fflas-ffpack/fflas/fflas_simd/simd256_int32.inl |
| 17 | ++++ b/fflas-ffpack/fflas/fflas_simd/simd256_int32.inl |
| 18 | +@@ -255,7 +255,7 @@ template <> struct Simd256_impl<true, true, true, 4> : public Simd256i_base { |
| 19 | + * Return : [a0, b0, ..., a3, b3] int32_t |
| 20 | + * [a4, b4, ..., a7, b7] int32_t |
| 21 | + */ |
| 22 | +- static INLINE CONST void unpacklohi(vect_t& s1, vect_t& s2, const vect_t a, const vect_t b) { |
| 23 | ++ static INLINE void unpacklohi(vect_t& s1, vect_t& s2, const vect_t a, const vect_t b) { |
| 24 | + using Simd256_64 = Simd256<uint64_t>; |
| 25 | + vect_t a1 = Simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4 |
| 26 | + vect_t b1 = Simd256_64::template shuffle<0xD8>(b); // 0xD8 = 3120 base_4 |
| 27 | +diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl b/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl |
| 28 | +index 7f61345..3976c1e 100644 |
| 29 | +--- a/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl |
| 30 | ++++ b/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl |
| 31 | +@@ -244,7 +244,7 @@ template <> struct Simd256_impl<true, true, true, 8> : public Simd256i_base { |
| 32 | + * Return : [a0, b0, a1, b1] int64_t |
| 33 | + * [a2, b2, a3, b3] int64_t |
| 34 | + */ |
| 35 | +- static INLINE CONST void unpacklohi(vect_t& l, vect_t& h, const vect_t a, const vect_t b) { |
| 36 | ++ static INLINE void unpacklohi(vect_t& l, vect_t& h, const vect_t a, const vect_t b) { |
| 37 | + vect_t a1 = shuffle<0xD8>(a); // 0xD8 = 3120 base_4 so a -> [a0,a2,a1,a3] |
| 38 | + vect_t b1 = shuffle<0xD8>(b); // 0xD8 = 3120 base_4 |
| 39 | + l = unpacklo_twice(a1, b1); |
0 commit comments