Skip to content

Commit 833ec1f

Browse files
authored
Reduce dot threshold at which lerp is used instead of slerp. (#550)
* Use precise lerp for linear interpolation. * Use common slerp code except for sse2 which has a simd sin implementation.
1 parent 913395d commit 833ec1f

File tree

8 files changed

+120
-224
lines changed

8 files changed

+120
-224
lines changed

codegen/templates/quat.rs.tera

+26-103
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,12 @@ impl {{ self_t }} {
750750
{{ vec4_t }}::from(self).abs_diff_eq({{ vec4_t }}::from(rhs), max_abs_diff)
751751
}
752752

753+
#[inline(always)]
754+
#[must_use]
755+
fn lerp_impl(self, end: Self, s: {{ scalar_t }}) -> Self {
756+
(self * (1.0 - s) + end * s).normalize()
757+
}
758+
753759
/// Performs a linear interpolation between `self` and `rhs` based on
754760
/// the value `s`.
755761
///
@@ -767,69 +773,41 @@ impl {{ self_t }} {
767773
glam_assert!(end.is_normalized());
768774

769775
{% if is_scalar %}
770-
let start = self;
771-
let dot = start.dot(end);
776+
let dot = self.dot(end);
772777
let bias = if dot >= 0.0 { 1.0 } else { -1.0 };
773-
let interpolated = start.add(end.mul(bias).sub(start).mul(s));
774-
interpolated.normalize()
778+
self.lerp_impl(end * bias, s)
775779
{% elif is_sse2 %}
776780
const NEG_ZERO: __m128 = m128_from_f32x4([-0.0; 4]);
777-
let start = self.0;
778-
let end = end.0;
779781
unsafe {
780-
let dot = dot4_into_m128(start, end);
782+
let dot = dot4_into_m128(self.0, end.0);
781783
// Calculate the bias, if the dot product is positive or zero, there is no bias
782784
// but if it is negative, we want to flip the 'end' rotation XYZW components
783785
let bias = _mm_and_ps(dot, NEG_ZERO);
784-
let interpolated = _mm_add_ps(
785-
_mm_mul_ps(_mm_sub_ps(_mm_xor_ps(end, bias), start), _mm_set_ps1(s)),
786-
start,
787-
);
788-
{{ self_t }}(interpolated).normalize()
786+
self.lerp_impl(Self(_mm_xor_ps(end.0, bias)), s)
789787
}
790788
{% elif is_wasm32 %}
791789
const NEG_ZERO: v128 = v128_from_f32x4([-0.0; 4]);
792-
let start = self.0;
793-
let end = end.0;
794-
let dot = dot4_into_v128(start, end);
790+
let dot = dot4_into_v128(self.0, end.0);
795791
// Calculate the bias, if the dot product is positive or zero, there is no bias
796792
// but if it is negative, we want to flip the 'end' rotation XYZW components
797793
let bias = v128_and(dot, NEG_ZERO);
798-
let interpolated = f32x4_add(
799-
f32x4_mul(f32x4_sub(v128_xor(end, bias), start), f32x4_splat(s)),
800-
start,
801-
);
802-
{{ self_t }}(interpolated).normalize()
794+
self.lerp_impl(Self(v128_xor(end.0, bias)), s)
803795
{% elif is_coresimd %}
804796
const NEG_ZERO: f32x4 = f32x4::from_array([-0.0; 4]);
805-
let start = self.0;
806-
let end = end.0;
807-
let dot = dot4_into_f32x4(start, end);
797+
let dot = dot4_into_f32x4(self.0, end.0);
808798
// Calculate the bias, if the dot product is positive or zero, there is no bias
809799
// but if it is negative, we want to flip the 'end' rotation XYZW components
810800
let bias = f32x4_bitand(dot, NEG_ZERO);
811-
let interpolated = start + ((f32x4_bitxor(end, bias) - start) * f32x4::splat(s));
812-
{{ self_t }}(interpolated).normalize()
801+
self.lerp_impl(Self(f32x4_bitxor(end.0, bias)), s)
813802
{% elif is_neon %}
814803
const NEG_ZERO: float32x4_t = f32x4_from_array([-0.0; 4]);
815-
let start = self.0;
816-
let end = end.0;
817804
unsafe {
818-
let dot = dot4_into_f32x4(start, end);
805+
let dot = dot4_into_f32x4(self.0, end.0);
819806
// Calculate the bias, if the dot product is positive or zero, there is no bias
820807
// but if it is negative, we want to flip the 'end' rotation XYZW components
821808
let bias = vandq_u32(vreinterpretq_u32_f32(dot), vreinterpretq_u32_f32(NEG_ZERO));
822-
let interpolated = vaddq_f32(
823-
vmulq_f32(
824-
vsubq_f32(
825-
vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end), bias)),
826-
start,
827-
),
828-
vld1q_dup_f32(&s),
829-
),
830-
start,
831-
);
832-
{{ self_t }}(interpolated).normalize()
809+
self.lerp_impl(
810+
Self(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end.0), bias))), s)
833811
}
834812
{% else %}
835813
unimplemented!()
@@ -852,8 +830,6 @@ impl {{ self_t }} {
852830
glam_assert!(self.is_normalized());
853831
glam_assert!(end.is_normalized());
854832

855-
const DOT_THRESHOLD: {{ scalar_t }} = 0.9995;
856-
857833
// Note that a rotation can be represented by two quaternions: `q` and
858834
// `-q`. The slerp path between `q` and `end` will be different from the
859835
// path between `-q` and `end`. One path will take the long way around and
@@ -866,20 +842,13 @@ impl {{ self_t }} {
866842
dot = -dot;
867843
}
868844

845+
const DOT_THRESHOLD: {{ scalar_t }} = 1.0 - {{ scalar_t }}::EPSILON;
869846
if dot > DOT_THRESHOLD {
870-
// assumes lerp returns a normalized quaternion
871-
self.lerp(end, s)
847+
// if above threshold perform linear interpolation to avoid divide by zero
848+
self.lerp_impl(end, s)
872849
} else {
873850
let theta = math::acos_approx(dot);
874-
{% if is_scalar %}
875-
let scale1 = math::sin(theta * (1.0 - s));
876-
let scale2 = math::sin(theta * s);
877-
let theta_sin = math::sin(theta);
878-
879-
self.mul(scale1)
880-
.add(end.mul(scale2))
881-
.mul(1.0 / theta_sin)
882-
{% elif is_sse2 %}
851+
{% if is_sse2 %}
883852
let x = 1.0 - s;
884853
let y = s;
885854
let z = 1.0;
@@ -897,57 +866,11 @@ impl {{ self_t }} {
897866
theta_sin,
898867
))
899868
}
900-
{% elif is_wasm32 %}
901-
// TODO: v128_sin is broken
902-
// let x = 1.0 - s;
903-
// let y = s;
904-
// let z = 1.0;
905-
// let w = 0.0;
906-
// let tmp = f32x4_mul(f32x4_splat(theta), f32x4(x, y, z, w));
907-
// let tmp = v128_sin(tmp);
908-
let x = math::sin(theta * (1.0 - s));
909-
let y = math::sin(theta * s);
910-
let z = math::sin(theta);
911-
let w = 0.0;
912-
let tmp = f32x4(x, y, z, w);
913-
914-
let scale1 = i32x4_shuffle::<0, 0, 4, 4>(tmp, tmp);
915-
let scale2 = i32x4_shuffle::<1, 1, 5, 5>(tmp, tmp);
916-
let theta_sin = i32x4_shuffle::<2, 2, 6, 6>(tmp, tmp);
917-
918-
Self(f32x4_div(
919-
f32x4_add(f32x4_mul(self.0, scale1), f32x4_mul(end.0, scale2)),
920-
theta_sin,
921-
))
922-
{% elif is_coresimd %}
923-
let x = math::sin(theta * (1.0 - s));
924-
let y = math::sin(theta * s);
925-
let z = math::sin(theta);
926-
let w = 0.0;
927-
let tmp = f32x4::from_array([x, y, z, w]);
928-
929-
let scale1 = simd_swizzle!(tmp, [0, 0, 0, 0]);
930-
let scale2 = simd_swizzle!(tmp, [1, 1, 1, 1]);
931-
let theta_sin = simd_swizzle!(tmp, [2, 2, 2, 2]);
932-
933-
Self(self.0.mul(scale1).add(end.0.mul(scale2)).div(theta_sin))
934-
{% elif is_neon %}
935-
let x = math::sin(theta * (1.0 - s));
936-
let y = math::sin(theta * s);
937-
let z = math::sin(theta);
938-
let w = 0.0;
939-
unsafe {
940-
let tmp = vld1q_f32([x, y, z, w].as_ptr());
941-
942-
let scale1 = vdupq_laneq_f32(tmp, 0);
943-
let scale2 = vdupq_laneq_f32(tmp, 1);
944-
let theta_sin = vdupq_laneq_f32(tmp, 2);
945-
946-
Self(vdivq_f32(
947-
vaddq_f32(vmulq_f32(self.0, scale1), vmulq_f32(end.0, scale2)),
948-
theta_sin,
949-
))
950-
}
869+
{% else %}
870+
let scale1 = math::sin(theta * (1.0 - s));
871+
let scale2 = math::sin(theta * s);
872+
let theta_sin = math::sin(theta);
873+
((self * scale1) + (end * scale2)) * (1.0 / theta_sin)
951874
{% endif %}
952875
}
953876
}

src/f32/coresimd/quat.rs

+15-20
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,12 @@ impl Quat {
606606
Vec4::from(self).abs_diff_eq(Vec4::from(rhs), max_abs_diff)
607607
}
608608

609+
#[inline(always)]
610+
#[must_use]
611+
fn lerp_impl(self, end: Self, s: f32) -> Self {
612+
(self * (1.0 - s) + end * s).normalize()
613+
}
614+
609615
/// Performs a linear interpolation between `self` and `rhs` based on
610616
/// the value `s`.
611617
///
@@ -623,14 +629,11 @@ impl Quat {
623629
glam_assert!(end.is_normalized());
624630

625631
const NEG_ZERO: f32x4 = f32x4::from_array([-0.0; 4]);
626-
let start = self.0;
627-
let end = end.0;
628-
let dot = dot4_into_f32x4(start, end);
632+
let dot = dot4_into_f32x4(self.0, end.0);
629633
// Calculate the bias, if the dot product is positive or zero, there is no bias
630634
// but if it is negative, we want to flip the 'end' rotation XYZW components
631635
let bias = f32x4_bitand(dot, NEG_ZERO);
632-
let interpolated = start + ((f32x4_bitxor(end, bias) - start) * f32x4::splat(s));
633-
Quat(interpolated).normalize()
636+
self.lerp_impl(Self(f32x4_bitxor(end.0, bias)), s)
634637
}
635638

636639
/// Performs a spherical linear interpolation between `self` and `end`
@@ -649,8 +652,6 @@ impl Quat {
649652
glam_assert!(self.is_normalized());
650653
glam_assert!(end.is_normalized());
651654

652-
const DOT_THRESHOLD: f32 = 0.9995;
653-
654655
// Note that a rotation can be represented by two quaternions: `q` and
655656
// `-q`. The slerp path between `q` and `end` will be different from the
656657
// path between `-q` and `end`. One path will take the long way around and
@@ -663,23 +664,17 @@ impl Quat {
663664
dot = -dot;
664665
}
665666

667+
const DOT_THRESHOLD: f32 = 1.0 - f32::EPSILON;
666668
if dot > DOT_THRESHOLD {
667-
// assumes lerp returns a normalized quaternion
668-
self.lerp(end, s)
669+
// if above threshold perform linear interpolation to avoid divide by zero
670+
self.lerp_impl(end, s)
669671
} else {
670672
let theta = math::acos_approx(dot);
671673

672-
let x = math::sin(theta * (1.0 - s));
673-
let y = math::sin(theta * s);
674-
let z = math::sin(theta);
675-
let w = 0.0;
676-
let tmp = f32x4::from_array([x, y, z, w]);
677-
678-
let scale1 = simd_swizzle!(tmp, [0, 0, 0, 0]);
679-
let scale2 = simd_swizzle!(tmp, [1, 1, 1, 1]);
680-
let theta_sin = simd_swizzle!(tmp, [2, 2, 2, 2]);
681-
682-
Self(self.0.mul(scale1).add(end.0.mul(scale2)).div(theta_sin))
674+
let scale1 = math::sin(theta * (1.0 - s));
675+
let scale2 = math::sin(theta * s);
676+
let theta_sin = math::sin(theta);
677+
((self * scale1) + (end * scale2)) * (1.0 / theta_sin)
683678
}
684679
}
685680

src/f32/neon/quat.rs

+21-34
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,12 @@ impl Quat {
611611
Vec4::from(self).abs_diff_eq(Vec4::from(rhs), max_abs_diff)
612612
}
613613

614+
#[inline(always)]
615+
#[must_use]
616+
fn lerp_impl(self, end: Self, s: f32) -> Self {
617+
(self * (1.0 - s) + end * s).normalize()
618+
}
619+
614620
/// Performs a linear interpolation between `self` and `rhs` based on
615621
/// the value `s`.
616622
///
@@ -628,24 +634,18 @@ impl Quat {
628634
glam_assert!(end.is_normalized());
629635

630636
const NEG_ZERO: float32x4_t = f32x4_from_array([-0.0; 4]);
631-
let start = self.0;
632-
let end = end.0;
633637
unsafe {
634-
let dot = dot4_into_f32x4(start, end);
638+
let dot = dot4_into_f32x4(self.0, end.0);
635639
// Calculate the bias, if the dot product is positive or zero, there is no bias
636640
// but if it is negative, we want to flip the 'end' rotation XYZW components
637641
let bias = vandq_u32(vreinterpretq_u32_f32(dot), vreinterpretq_u32_f32(NEG_ZERO));
638-
let interpolated = vaddq_f32(
639-
vmulq_f32(
640-
vsubq_f32(
641-
vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end), bias)),
642-
start,
643-
),
644-
vld1q_dup_f32(&s),
645-
),
646-
start,
647-
);
648-
Quat(interpolated).normalize()
642+
self.lerp_impl(
643+
Self(vreinterpretq_f32_u32(veorq_u32(
644+
vreinterpretq_u32_f32(end.0),
645+
bias,
646+
))),
647+
s,
648+
)
649649
}
650650
}
651651

@@ -665,8 +665,6 @@ impl Quat {
665665
glam_assert!(self.is_normalized());
666666
glam_assert!(end.is_normalized());
667667

668-
const DOT_THRESHOLD: f32 = 0.9995;
669-
670668
// Note that a rotation can be represented by two quaternions: `q` and
671669
// `-q`. The slerp path between `q` and `end` will be different from the
672670
// path between `-q` and `end`. One path will take the long way around and
@@ -679,28 +677,17 @@ impl Quat {
679677
dot = -dot;
680678
}
681679

680+
const DOT_THRESHOLD: f32 = 1.0 - f32::EPSILON;
682681
if dot > DOT_THRESHOLD {
683-
// assumes lerp returns a normalized quaternion
684-
self.lerp(end, s)
682+
// if above threshold perform linear interpolation to avoid divide by zero
683+
self.lerp_impl(end, s)
685684
} else {
686685
let theta = math::acos_approx(dot);
687686

688-
let x = math::sin(theta * (1.0 - s));
689-
let y = math::sin(theta * s);
690-
let z = math::sin(theta);
691-
let w = 0.0;
692-
unsafe {
693-
let tmp = vld1q_f32([x, y, z, w].as_ptr());
694-
695-
let scale1 = vdupq_laneq_f32(tmp, 0);
696-
let scale2 = vdupq_laneq_f32(tmp, 1);
697-
let theta_sin = vdupq_laneq_f32(tmp, 2);
698-
699-
Self(vdivq_f32(
700-
vaddq_f32(vmulq_f32(self.0, scale1), vmulq_f32(end.0, scale2)),
701-
theta_sin,
702-
))
703-
}
687+
let scale1 = math::sin(theta * (1.0 - s));
688+
let scale2 = math::sin(theta * s);
689+
let theta_sin = math::sin(theta);
690+
((self * scale1) + (end * scale2)) * (1.0 / theta_sin)
704691
}
705692
}
706693

0 commit comments

Comments
 (0)