Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f1dd5a9

Browse files
nominoloalexcrichton
authored andcommittedOct 7, 2017
Implement SSE _mm_set* intrinsics (rust-lang#100)
* Add _mm_set_ss * Add _mm_set1_ps and _mm_set_ps1 * Add _mm_set_ps * Add _mm_setr_ps * Add _mm_setzero_ps * Fix _mm_setr_ps instr test on x86 * Sidestep black_box ABI issue on i586
1 parent b4098a7 commit f1dd5a9

File tree

1 file changed

+108
-0
lines changed

1 file changed

+108
-0
lines changed
 

‎src/x86/sse.rs

+108
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,80 @@ pub unsafe fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
165165
maxps(a, b)
166166
}
167167

168+
/// Construct a `f32x4` with the lowest element set to `a` and the rest set to
169+
/// zero.
170+
#[inline(always)]
171+
#[target_feature = "+sse"]
172+
#[cfg_attr(test, assert_instr(movss))]
173+
pub unsafe fn _mm_set_ss(a: f32) -> f32x4 {
174+
f32x4::new(a, 0.0, 0.0, 0.0)
175+
}
176+
177+
/// Construct a `f32x4` with all element set to `a`.
178+
#[inline(always)]
179+
#[target_feature = "+sse"]
180+
#[cfg_attr(test, assert_instr(shufps))]
181+
pub unsafe fn _mm_set1_ps(a: f32) -> f32x4 {
182+
f32x4::new(a, a, a, a)
183+
}
184+
185+
/// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
186+
#[inline(always)]
187+
#[target_feature = "+sse"]
188+
#[cfg_attr(test, assert_instr(shufps))]
189+
pub unsafe fn _mm_set_ps1(a: f32) -> f32x4 {
190+
_mm_set1_ps(a)
191+
}
192+
193+
/// Construct a `f32x4` from four floating point values highest to lowest.
194+
///
195+
/// Note that `a` will be the highest 32 bits of the result, and `d` the lowest.
196+
/// This matches the standard way of writing bit patterns on x86:
197+
///
198+
/// ```text
199+
/// bit 127 .. 96 95 .. 64 63 .. 32 31 .. 0
200+
/// +---------+---------+---------+---------+
201+
/// | a | b | c | d | result
202+
/// +---------+---------+---------+---------+
203+
/// ```
204+
///
205+
/// Alternatively:
206+
///
207+
/// ```text
208+
/// assert_eq!(f32x4::new(a, b, c, d), _mm_set_ps(d, c, b, a));
209+
/// ```
210+
#[inline(always)]
211+
#[target_feature = "+sse"]
212+
#[cfg_attr(test, assert_instr(unpcklps))]
213+
pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> f32x4 {
214+
f32x4::new(d, c, b, a)
215+
}
216+
217+
/// Construct a `f32x4` from four floating point values lowest to highest.
218+
///
219+
/// This matches the memory order of `f32x4`, i.e., `a` will be the lowest 32
220+
/// bits of the result, and `d` the highest.
221+
///
222+
/// ```text
223+
/// assert_eq!(f32x4::new(a, b, c, d), _mm_setr_ps(a, b, c, d));
224+
/// ```
225+
#[inline(always)]
226+
#[target_feature = "+sse"]
227+
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(unpcklps))]
228+
// On a 32-bit architecture it just copies the operands from the stack.
229+
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(movaps))]
230+
pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> f32x4 {
231+
f32x4::new(a, b, c, d)
232+
}
233+
234+
/// Construct a `f32x4` with all elements initialized to zero.
235+
#[inline(always)]
236+
#[target_feature = "+sse"]
237+
#[cfg_attr(test, assert_instr(xorps))]
238+
pub unsafe fn _mm_setzero_ps() -> f32x4 {
239+
f32x4::new(0.0, 0.0, 0.0, 0.0)
240+
}
241+
168242
/// Shuffle packed single-precision (32-bit) floating-point elements in `a` and
169243
/// `b` using `mask`.
170244
///
@@ -789,6 +863,40 @@ mod tests {
789863
assert_eq!(r, f32x4::new(-1.0, 20.0, 0.0, -5.0));
790864
}
791865

866+
#[simd_test = "sse"]
867+
unsafe fn _mm_set_ss() {
868+
let r = sse::_mm_set_ss(black_box(4.25));
869+
assert_eq!(r, f32x4::new(4.25, 0.0, 0.0, 0.0));
870+
}
871+
872+
#[simd_test = "sse"]
873+
unsafe fn _mm_set1_ps() {
874+
let r1 = sse::_mm_set1_ps(black_box(4.25));
875+
let r2 = sse::_mm_set_ps1(black_box(4.25));
876+
assert_eq!(r1, f32x4::splat(4.25));
877+
assert_eq!(r2, f32x4::splat(4.25));
878+
}
879+
880+
#[simd_test = "sse"]
881+
unsafe fn _mm_set_ps() {
882+
let r = sse::_mm_set_ps(
883+
black_box(1.0), black_box(2.0), black_box(3.0), black_box(4.0));
884+
assert_eq!(r, f32x4::new(4.0, 3.0, 2.0, 1.0));
885+
}
886+
887+
#[simd_test = "sse"]
888+
unsafe fn _mm_setr_ps() {
889+
let r = sse::_mm_setr_ps(
890+
black_box(1.0), black_box(2.0), black_box(3.0), black_box(4.0));
891+
assert_eq!(r, f32x4::new(1.0, 2.0, 3.0, 4.0));
892+
}
893+
894+
#[simd_test = "sse"]
895+
unsafe fn _mm_setzero_ps() {
896+
let r = *black_box(&sse::_mm_setzero_ps());
897+
assert_eq!(r, f32x4::splat(0.0));
898+
}
899+
792900
#[simd_test = "sse"]
793901
unsafe fn _mm_shuffle_ps() {
794902
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);

0 commit comments

Comments
 (0)
Please sign in to comment.