Skip to content

Commit fd868d0

Browse files
Rollup merge of #81484 - Kogia-sima:perf/optimize-udiv_1e19, r=nagisa
Optimize decimal formatting of 128-bit integers ## Description This PR optimizes the `udivmod_1e19` function, which is used for formatting 128-bit integers, based on the algorithm provided in \[1\]. This optimization improves performance of formatting 128-bit integers, especially on 64-bit architectures. It also slightly reduces the output binary size. ## Assembler comparison https://godbolt.org/z/YrG5zY ## Performance #### previous results ``` test fmt::write_u128_max ... bench: 552 ns/iter (+/- 4) test fmt::write_u128_min ... bench: 125 ns/iter (+/- 2) ``` #### new results ``` test fmt::write_u128_max ... bench: 205 ns/iter (+/- 13) test fmt::write_u128_min ... bench: 129 ns/iter (+/- 5) ``` ## Reference \[1\] T. Granlund and P. Montgomery, “Division by Invariant Integers Using Multiplication” in Proc. of the SIGPLAN94 Conference on Programming Language Design and Implementation, 1994, pp. 61–72
2 parents 3e8ae5d + ada714d commit fd868d0

File tree

1 file changed

+36
-19
lines changed

1 file changed

+36
-19
lines changed

library/core/src/fmt/num.rs

+36-19
Original file line numberDiff line numberDiff line change
@@ -643,25 +643,42 @@ fn fmt_u128(n: u128, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::R
643643
}
644644

645645
/// Partition of `n` into n > 1e19 and rem <= 1e19
646+
///
647+
/// Integer division algorithm is based on the following paper:
648+
///
649+
/// T. Granlund and P. Montgomery, “Division by Invariant Integers Using Multiplication”
650+
/// in Proc. of the SIGPLAN94 Conference on Programming Language Design and
651+
/// Implementation, 1994, pp. 61–72
652+
///
646653
fn udiv_1e19(n: u128) -> (u128, u64) {
647654
const DIV: u64 = 1e19 as u64;
648-
let high = (n >> 64) as u64;
649-
if high == 0 {
650-
let low = n as u64;
651-
return ((low / DIV) as u128, low % DIV);
652-
}
653-
let sr = 65 - high.leading_zeros();
654-
let mut q = n << (128 - sr);
655-
let mut r = n >> sr;
656-
let mut carry = 0;
657-
658-
for _ in 0..sr {
659-
r = (r << 1) | (q >> 127);
660-
q = (q << 1) | carry as u128;
661-
662-
let s = (DIV as u128).wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
663-
carry = (s & 1) as u64;
664-
r -= (DIV as u128) & s as u128;
665-
}
666-
((q << 1) | carry as u128, r as u64)
655+
const FACTOR: u128 = 156927543384667019095894735580191660403;
656+
657+
let quot = if n < 1 << 83 {
658+
((n >> 19) as u64 / (DIV >> 19)) as u128
659+
} else {
660+
u128_mulhi(n, FACTOR) >> 62
661+
};
662+
663+
let rem = (n - quot * DIV as u128) as u64;
664+
(quot, rem)
665+
}
666+
667+
/// Multiply unsigned 128 bit integers, return upper 128 bits of the result
668+
#[inline]
669+
fn u128_mulhi(x: u128, y: u128) -> u128 {
670+
let x_lo = x as u64;
671+
let x_hi = (x >> 64) as u64;
672+
let y_lo = y as u64;
673+
let y_hi = (y >> 64) as u64;
674+
675+
// handle possibility of overflow
676+
let carry = (x_lo as u128 * y_lo as u128) >> 64;
677+
let m = x_lo as u128 * y_hi as u128 + carry;
678+
let high1 = m >> 64;
679+
680+
let m_lo = m as u64;
681+
let high2 = (x_hi as u128 * y_lo as u128 + m_lo as u128) >> 64;
682+
683+
x_hi as u128 * y_hi as u128 + high1 + high2
667684
}

0 commit comments

Comments
 (0)