@@ -750,6 +750,12 @@ impl {{ self_t }} {
750
750
{{ vec4_t }}::from(self).abs_diff_eq({{ vec4_t }}::from(rhs), max_abs_diff)
751
751
}
752
752
753
+ #[inline(always)]
754
+ #[must_use]
755
+ fn lerp_impl(self, end: Self, s: {{ scalar_t }}) -> Self {
756
+ (self * (1.0 - s) + end * s).normalize()
757
+ }
758
+
753
759
/// Performs a linear interpolation between `self` and `rhs` based on
754
760
/// the value `s`.
755
761
///
@@ -767,69 +773,41 @@ impl {{ self_t }} {
767
773
glam_assert!(end.is_normalized());
768
774
769
775
{% if is_scalar %}
770
- let start = self;
771
- let dot = start.dot(end);
776
+ let dot = self.dot(end);
772
777
let bias = if dot >= 0.0 { 1.0 } else { -1.0 };
773
- let interpolated = start.add(end.mul(bias).sub(start).mul(s));
774
- interpolated.normalize()
778
+ self.lerp_impl(end * bias, s)
775
779
{% elif is_sse2 %}
776
780
const NEG_ZERO: __m128 = m128_from_f32x4([-0.0; 4]);
777
- let start = self.0;
778
- let end = end.0;
779
781
unsafe {
780
- let dot = dot4_into_m128(start , end);
782
+ let dot = dot4_into_m128(self.0 , end.0 );
781
783
// Calculate the bias, if the dot product is positive or zero, there is no bias
782
784
// but if it is negative, we want to flip the 'end' rotation XYZW components
783
785
let bias = _mm_and_ps(dot, NEG_ZERO);
784
- let interpolated = _mm_add_ps(
785
- _mm_mul_ps(_mm_sub_ps(_mm_xor_ps(end, bias), start), _mm_set_ps1(s)),
786
- start,
787
- );
788
- {{ self_t }}(interpolated).normalize()
786
+ self.lerp_impl(Self(_mm_xor_ps(end.0, bias)), s)
789
787
}
790
788
{% elif is_wasm32 %}
791
789
const NEG_ZERO: v128 = v128_from_f32x4([-0.0; 4]);
792
- let start = self.0;
793
- let end = end.0;
794
- let dot = dot4_into_v128(start, end);
790
+ let dot = dot4_into_v128(self.0, end.0);
795
791
// Calculate the bias, if the dot product is positive or zero, there is no bias
796
792
// but if it is negative, we want to flip the 'end' rotation XYZW components
797
793
let bias = v128_and(dot, NEG_ZERO);
798
- let interpolated = f32x4_add(
799
- f32x4_mul(f32x4_sub(v128_xor(end, bias), start), f32x4_splat(s)),
800
- start,
801
- );
802
- {{ self_t }}(interpolated).normalize()
794
+ self.lerp_impl(Self(v128_xor(end.0, bias)), s)
803
795
{% elif is_coresimd %}
804
796
const NEG_ZERO: f32x4 = f32x4::from_array([-0.0; 4]);
805
- let start = self.0;
806
- let end = end.0;
807
- let dot = dot4_into_f32x4(start, end);
797
+ let dot = dot4_into_f32x4(self.0, end.0);
808
798
// Calculate the bias, if the dot product is positive or zero, there is no bias
809
799
// but if it is negative, we want to flip the 'end' rotation XYZW components
810
800
let bias = f32x4_bitand(dot, NEG_ZERO);
811
- let interpolated = start + ((f32x4_bitxor(end, bias) - start) * f32x4::splat(s));
812
- {{ self_t }}(interpolated).normalize()
801
+ self.lerp_impl(Self(f32x4_bitxor(end.0, bias)), s)
813
802
{% elif is_neon %}
814
803
const NEG_ZERO: float32x4_t = f32x4_from_array([-0.0; 4]);
815
- let start = self.0;
816
- let end = end.0;
817
804
unsafe {
818
- let dot = dot4_into_f32x4(start , end);
805
+ let dot = dot4_into_f32x4(self.0 , end.0 );
819
806
// Calculate the bias, if the dot product is positive or zero, there is no bias
820
807
// but if it is negative, we want to flip the 'end' rotation XYZW components
821
808
let bias = vandq_u32(vreinterpretq_u32_f32(dot), vreinterpretq_u32_f32(NEG_ZERO));
822
- let interpolated = vaddq_f32(
823
- vmulq_f32(
824
- vsubq_f32(
825
- vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end), bias)),
826
- start,
827
- ),
828
- vld1q_dup_f32(&s),
829
- ),
830
- start,
831
- );
832
- {{ self_t }}(interpolated).normalize()
809
+ self.lerp_impl(
810
+ Self(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end.0), bias))), s)
833
811
}
834
812
{% else %}
835
813
unimplemented!()
@@ -852,8 +830,6 @@ impl {{ self_t }} {
852
830
glam_assert!(self.is_normalized());
853
831
glam_assert!(end.is_normalized());
854
832
855
- const DOT_THRESHOLD: {{ scalar_t }} = 0.9995;
856
-
857
833
// Note that a rotation can be represented by two quaternions: `q` and
858
834
// `-q`. The slerp path between `q` and `end` will be different from the
859
835
// path between `-q` and `end`. One path will take the long way around and
@@ -866,20 +842,13 @@ impl {{ self_t }} {
866
842
dot = -dot;
867
843
}
868
844
845
+ const DOT_THRESHOLD: {{ scalar_t }} = 1.0 - {{ scalar_t }}::EPSILON;
869
846
if dot > DOT_THRESHOLD {
870
- // assumes lerp returns a normalized quaternion
871
- self.lerp (end, s)
847
+ // if above threshold perform linear interpolation to avoid divide by zero
848
+ self.lerp_impl (end, s)
872
849
} else {
873
850
let theta = math::acos_approx(dot);
874
- {% if is_scalar %}
875
- let scale1 = math::sin(theta * (1.0 - s));
876
- let scale2 = math::sin(theta * s);
877
- let theta_sin = math::sin(theta);
878
-
879
- self.mul(scale1)
880
- .add(end.mul(scale2))
881
- .mul(1.0 / theta_sin)
882
- {% elif is_sse2 %}
851
+ {% if is_sse2 %}
883
852
let x = 1.0 - s;
884
853
let y = s;
885
854
let z = 1.0;
@@ -897,57 +866,11 @@ impl {{ self_t }} {
897
866
theta_sin,
898
867
))
899
868
}
900
- {% elif is_wasm32 %}
901
- // TODO: v128_sin is broken
902
- // let x = 1.0 - s;
903
- // let y = s;
904
- // let z = 1.0;
905
- // let w = 0.0;
906
- // let tmp = f32x4_mul(f32x4_splat(theta), f32x4(x, y, z, w));
907
- // let tmp = v128_sin(tmp);
908
- let x = math::sin(theta * (1.0 - s));
909
- let y = math::sin(theta * s);
910
- let z = math::sin(theta);
911
- let w = 0.0;
912
- let tmp = f32x4(x, y, z, w);
913
-
914
- let scale1 = i32x4_shuffle::<0, 0, 4, 4>(tmp, tmp);
915
- let scale2 = i32x4_shuffle::<1, 1, 5, 5>(tmp, tmp);
916
- let theta_sin = i32x4_shuffle::<2, 2, 6, 6>(tmp, tmp);
917
-
918
- Self(f32x4_div(
919
- f32x4_add(f32x4_mul(self.0, scale1), f32x4_mul(end.0, scale2)),
920
- theta_sin,
921
- ))
922
- {% elif is_coresimd %}
923
- let x = math::sin(theta * (1.0 - s));
924
- let y = math::sin(theta * s);
925
- let z = math::sin(theta);
926
- let w = 0.0;
927
- let tmp = f32x4::from_array([x, y, z, w]);
928
-
929
- let scale1 = simd_swizzle!(tmp, [0, 0, 0, 0]);
930
- let scale2 = simd_swizzle!(tmp, [1, 1, 1, 1]);
931
- let theta_sin = simd_swizzle!(tmp, [2, 2, 2, 2]);
932
-
933
- Self(self.0.mul(scale1).add(end.0.mul(scale2)).div(theta_sin))
934
- {% elif is_neon %}
935
- let x = math::sin(theta * (1.0 - s));
936
- let y = math::sin(theta * s);
937
- let z = math::sin(theta);
938
- let w = 0.0;
939
- unsafe {
940
- let tmp = vld1q_f32([x, y, z, w].as_ptr());
941
-
942
- let scale1 = vdupq_laneq_f32(tmp, 0);
943
- let scale2 = vdupq_laneq_f32(tmp, 1);
944
- let theta_sin = vdupq_laneq_f32(tmp, 2);
945
-
946
- Self(vdivq_f32(
947
- vaddq_f32(vmulq_f32(self.0, scale1), vmulq_f32(end.0, scale2)),
948
- theta_sin,
949
- ))
950
- }
869
+ {% else %}
870
+ let scale1 = math::sin(theta * (1.0 - s));
871
+ let scale2 = math::sin(theta * s);
872
+ let theta_sin = math::sin(theta);
873
+ ((self * scale1) + (end * scale2)) * (1.0 / theta_sin)
951
874
{% endif %}
952
875
}
953
876
}
0 commit comments