@@ -165,6 +165,80 @@ pub unsafe fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
165
165
maxps ( a, b)
166
166
}
167
167
168
+ /// Construct a `f32x4` with the lowest element set to `a` and the rest set to
169
+ /// zero.
170
+ #[ inline( always) ]
171
+ #[ target_feature = "+sse" ]
172
+ #[ cfg_attr( test, assert_instr( movss) ) ]
173
+ pub unsafe fn _mm_set_ss ( a : f32 ) -> f32x4 {
174
+ f32x4:: new ( a, 0.0 , 0.0 , 0.0 )
175
+ }
176
+
177
+ /// Construct a `f32x4` with all element set to `a`.
178
+ #[ inline( always) ]
179
+ #[ target_feature = "+sse" ]
180
+ #[ cfg_attr( test, assert_instr( shufps) ) ]
181
+ pub unsafe fn _mm_set1_ps ( a : f32 ) -> f32x4 {
182
+ f32x4:: new ( a, a, a, a)
183
+ }
184
+
185
+ /// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
186
+ #[ inline( always) ]
187
+ #[ target_feature = "+sse" ]
188
+ #[ cfg_attr( test, assert_instr( shufps) ) ]
189
+ pub unsafe fn _mm_set_ps1 ( a : f32 ) -> f32x4 {
190
+ _mm_set1_ps ( a)
191
+ }
192
+
193
+ /// Construct a `f32x4` from four floating point values highest to lowest.
194
+ ///
195
+ /// Note that `a` will be the highest 32 bits of the result, and `d` the lowest.
196
+ /// This matches the standard way of writing bit patterns on x86:
197
+ ///
198
+ /// ```text
199
+ /// bit 127 .. 96 95 .. 64 63 .. 32 31 .. 0
200
+ /// +---------+---------+---------+---------+
201
+ /// | a | b | c | d | result
202
+ /// +---------+---------+---------+---------+
203
+ /// ```
204
+ ///
205
+ /// Alternatively:
206
+ ///
207
+ /// ```text
208
+ /// assert_eq!(f32x4::new(a, b, c, d), _mm_set_ps(d, c, b, a));
209
+ /// ```
210
+ #[ inline( always) ]
211
+ #[ target_feature = "+sse" ]
212
+ #[ cfg_attr( test, assert_instr( unpcklps) ) ]
213
+ pub unsafe fn _mm_set_ps ( a : f32 , b : f32 , c : f32 , d : f32 ) -> f32x4 {
214
+ f32x4:: new ( d, c, b, a)
215
+ }
216
+
217
+ /// Construct a `f32x4` from four floating point values lowest to highest.
218
+ ///
219
+ /// This matches the memory order of `f32x4`, i.e., `a` will be the lowest 32
220
+ /// bits of the result, and `d` the highest.
221
+ ///
222
+ /// ```text
223
+ /// assert_eq!(f32x4::new(a, b, c, d), _mm_setr_ps(a, b, c, d));
224
+ /// ```
225
+ #[ inline( always) ]
226
+ #[ target_feature = "+sse" ]
227
+ #[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( unpcklps) ) ]
228
+ // On a 32-bit architecture it just copies the operands from the stack.
229
+ #[ cfg_attr( all( test, target_arch = "x86" ) , assert_instr( movaps) ) ]
230
+ pub unsafe fn _mm_setr_ps ( a : f32 , b : f32 , c : f32 , d : f32 ) -> f32x4 {
231
+ f32x4:: new ( a, b, c, d)
232
+ }
233
+
234
+ /// Construct a `f32x4` with all elements initialized to zero.
235
+ #[ inline( always) ]
236
+ #[ target_feature = "+sse" ]
237
+ #[ cfg_attr( test, assert_instr( xorps) ) ]
238
+ pub unsafe fn _mm_setzero_ps ( ) -> f32x4 {
239
+ f32x4:: new ( 0.0 , 0.0 , 0.0 , 0.0 )
240
+ }
241
+
168
242
/// Shuffle packed single-precision (32-bit) floating-point elements in `a` and
169
243
/// `b` using `mask`.
170
244
///
@@ -789,6 +863,40 @@ mod tests {
789
863
assert_eq ! ( r, f32x4:: new( -1.0 , 20.0 , 0.0 , -5.0 ) ) ;
790
864
}
791
865
866
+ #[ simd_test = "sse" ]
867
+ unsafe fn _mm_set_ss ( ) {
868
+ let r = sse:: _mm_set_ss ( black_box ( 4.25 ) ) ;
869
+ assert_eq ! ( r, f32x4:: new( 4.25 , 0.0 , 0.0 , 0.0 ) ) ;
870
+ }
871
+
872
+ #[ simd_test = "sse" ]
873
+ unsafe fn _mm_set1_ps ( ) {
874
+ let r1 = sse:: _mm_set1_ps ( black_box ( 4.25 ) ) ;
875
+ let r2 = sse:: _mm_set_ps1 ( black_box ( 4.25 ) ) ;
876
+ assert_eq ! ( r1, f32x4:: splat( 4.25 ) ) ;
877
+ assert_eq ! ( r2, f32x4:: splat( 4.25 ) ) ;
878
+ }
879
+
880
+ #[ simd_test = "sse" ]
881
+ unsafe fn _mm_set_ps ( ) {
882
+ let r = sse:: _mm_set_ps (
883
+ black_box ( 1.0 ) , black_box ( 2.0 ) , black_box ( 3.0 ) , black_box ( 4.0 ) ) ;
884
+ assert_eq ! ( r, f32x4:: new( 4.0 , 3.0 , 2.0 , 1.0 ) ) ;
885
+ }
886
+
887
+ #[ simd_test = "sse" ]
888
+ unsafe fn _mm_setr_ps ( ) {
889
+ let r = sse:: _mm_setr_ps (
890
+ black_box ( 1.0 ) , black_box ( 2.0 ) , black_box ( 3.0 ) , black_box ( 4.0 ) ) ;
891
+ assert_eq ! ( r, f32x4:: new( 1.0 , 2.0 , 3.0 , 4.0 ) ) ;
892
+ }
893
+
894
+ #[ simd_test = "sse" ]
895
+ unsafe fn _mm_setzero_ps ( ) {
896
+ let r = * black_box ( & sse:: _mm_setzero_ps ( ) ) ;
897
+ assert_eq ! ( r, f32x4:: splat( 0.0 ) ) ;
898
+ }
899
+
792
900
#[ simd_test = "sse" ]
793
901
unsafe fn _mm_shuffle_ps ( ) {
794
902
let a = f32x4:: new ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
0 commit comments