@@ -2950,14 +2950,108 @@ pub trait Itertools: Iterator {
2950
2950
/// itertools::assert_equal(five_smallest, 0..5);
2951
2951
/// ```
2952
2952
#[ cfg( feature = "use_alloc" ) ]
2953
- fn k_smallest ( self , k : usize ) -> VecIntoIter < Self :: Item >
2953
+ fn k_smallest ( mut self , k : usize ) -> VecIntoIter < Self :: Item >
2954
2954
where
2955
2955
Self : Sized ,
2956
2956
Self :: Item : Ord ,
2957
2957
{
2958
- crate :: k_smallest:: k_smallest ( self , k)
2959
- . into_sorted_vec ( )
2960
- . into_iter ( )
2958
+ // The stdlib heap has optimised handling of "holes", which is not included in our heap implementation in k_smallest_general.
2959
+ // While the difference is unlikely to have practical impact unless `Self::Item` is very large, this method uses the stdlib structure
2960
+ // to maintain performance compared to previous versions of the crate.
2961
+ use alloc:: collections:: BinaryHeap ;
2962
+
2963
+ if k == 0 {
2964
+ return Vec :: new ( ) . into_iter ( ) ;
2965
+ }
2966
+
2967
+ let mut heap = self . by_ref ( ) . take ( k) . collect :: < BinaryHeap < _ > > ( ) ;
2968
+
2969
+ self . for_each ( |i| {
2970
+ debug_assert_eq ! ( heap. len( ) , k) ;
2971
+ // Equivalent to heap.push(min(i, heap.pop())) but more efficient.
2972
+ // This should be done with a single `.peek_mut().unwrap()` but
2973
+ // `PeekMut` sifts-down unconditionally on Rust 1.46.0 and prior.
2974
+ if * heap. peek ( ) . unwrap ( ) > i {
2975
+ * heap. peek_mut ( ) . unwrap ( ) = i;
2976
+ }
2977
+ } ) ;
2978
+
2979
+ heap. into_sorted_vec ( ) . into_iter ( )
2980
+ }
2981
+
2982
+ /// Sort the k smallest elements into a new iterator using the provided comparison.
2983
+ ///
2984
+ /// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that
2985
+ /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity.
2986
+ /// Particularly, a custom heap implementation ensures the comparison is not cloned.
2987
+ #[ cfg( feature = "use_alloc" ) ]
2988
+ fn k_smallest_by < F > ( self , k : usize , cmp : F ) -> VecIntoIter < Self :: Item >
2989
+ where
2990
+ Self : Sized ,
2991
+ F : Fn ( & Self :: Item , & Self :: Item ) -> Ordering ,
2992
+ {
2993
+ k_smallest:: k_smallest_general ( self , k, cmp) . into_iter ( )
2994
+ }
2995
+
2996
+ /// Return the elements producing the k smallest outputs of the provided function
2997
+ ///
2998
+ /// This corresponds to `self.sorted_by_key(cmp).take(k)` in the same way that
2999
+ /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and time complexity.
3000
+ #[ cfg( feature = "use_alloc" ) ]
3001
+ fn k_smallest_by_key < F , K > ( self , k : usize , key : F ) -> VecIntoIter < Self :: Item >
3002
+ where
3003
+ Self : Sized ,
3004
+ F : Fn ( & Self :: Item ) -> K ,
3005
+ K : Ord ,
3006
+ {
3007
+ self . k_smallest_by ( k, k_smallest:: key_to_cmp ( key) )
3008
+ }
3009
+
3010
+ /// Sort the k largest elements into a new iterator, in descending order.
3011
+ /// Semantically equivalent to `k_smallest` with a reversed `Ord`
3012
+ /// However, this is implemented by way of a custom binary heap
3013
+ /// which does not have the same performance characteristics for very large `Self::Item`
3014
+ /// ```
3015
+ /// use itertools::Itertools;
3016
+ ///
3017
+ /// // A random permutation of 0..15
3018
+ /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5];
3019
+ ///
3020
+ /// let five_largest = numbers
3021
+ /// .into_iter()
3022
+ /// .k_largest(5);
3023
+ ///
3024
+ /// itertools::assert_equal(five_largest, vec![14,13,12,11,10]);
3025
+ /// ```
3026
+ #[ cfg( feature = "use_alloc" ) ]
3027
+ fn k_largest ( self , k : usize ) -> VecIntoIter < Self :: Item >
3028
+ where
3029
+ Self : Sized ,
3030
+ Self :: Item : Ord ,
3031
+ {
3032
+ self . k_largest_by ( k, Self :: Item :: cmp)
3033
+ }
3034
+
3035
+ /// Sort the k largest elements into a new iterator using the provided comparison.
3036
+ /// Functionally equivalent to `k_smallest_by` with a reversed `Ord`
3037
+ #[ cfg( feature = "use_alloc" ) ]
3038
+ fn k_largest_by < F > ( self , k : usize , cmp : F ) -> VecIntoIter < Self :: Item >
3039
+ where
3040
+ Self : Sized ,
3041
+ F : Fn ( & Self :: Item , & Self :: Item ) -> Ordering ,
3042
+ {
3043
+ self . k_smallest_by ( k, move |a, b| cmp ( b, a) )
3044
+ }
3045
+
3046
+ /// Return the elements producing the k largest outputs of the provided function
3047
+ #[ cfg( feature = "use_alloc" ) ]
3048
+ fn k_largest_by_key < F , K > ( self , k : usize , key : F ) -> VecIntoIter < Self :: Item >
3049
+ where
3050
+ Self : Sized ,
3051
+ F : Fn ( & Self :: Item ) -> K ,
3052
+ K : Ord ,
3053
+ {
3054
+ self . k_largest_by ( k, k_smallest:: key_to_cmp ( key) )
2961
3055
}
2962
3056
2963
3057
/// Collect all iterator elements into one of two
0 commit comments