Skip to main content

core/num/imp/
traits.rs

1//! Numeric traits used for internal implementations.
2
3#![doc(hidden)]
4#![unstable(
5    feature = "num_internals",
6    reason = "internal routines only exposed for testing",
7    issue = "none"
8)]
9
10use crate::num::FpCategory;
11use crate::{f64, fmt, ops};
12
13/// Lossy `as` casting between two types.
14pub trait CastInto<T: Copy>: Copy {
15    fn cast(self) -> T;
16}
17
18/// Collection of traits that allow us to be generic over integer size.
19pub trait Int:
20    Sized
21    + Clone
22    + Copy
23    + fmt::Debug
24    + ops::Shr<u32, Output = Self>
25    + ops::Shl<u32, Output = Self>
26    + ops::BitAnd<Output = Self>
27    + ops::BitOr<Output = Self>
28    + PartialEq
29    + CastInto<i16>
30{
31    const ZERO: Self;
32    const ONE: Self;
33}
34
35macro_rules! int {
36    ($($ty:ty),+) => {
37        $(
38            impl CastInto<i16> for $ty {
39                fn cast(self) -> i16 {
40                    self as i16
41                }
42            }
43
44            impl Int for $ty {
45                const ZERO: Self = 0;
46                const ONE: Self = 1;
47            }
48        )+
49    }
50}
51
52int!(u16, u32, u64);
53
54/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
55#[doc(hidden)]
56pub trait Float:
57    Sized
58    + ops::Div<Output = Self>
59    + ops::Neg<Output = Self>
60    + ops::Mul<Output = Self>
61    + ops::Add<Output = Self>
62    + fmt::Debug
63    + PartialEq
64    + PartialOrd
65    + Default
66    + Clone
67    + Copy
68{
69    /// The unsigned integer with the same size as the float
70    type Int: Int + Into<u64>;
71
72    /* general constants */
73
74    const INFINITY: Self;
75    const NEG_INFINITY: Self;
76    const NAN: Self;
77    const NEG_NAN: Self;
78
79    /// Bit width of the float
80    const BITS: u32;
81
82    /// The number of bits in the significand, *including* the hidden bit.
83    const SIG_TOTAL_BITS: u32;
84
85    const EXP_MASK: Self::Int;
86    const SIG_MASK: Self::Int;
87
88    /// The number of bits in the significand, *excluding* the hidden bit.
89    const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1;
90
91    /// Number of bits in the exponent.
92    const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
93
94    /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
95    /// representation.
96    ///
97    /// This shifted fully right, use `EXP_MASK` for the shifted value.
98    const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
99
100    /// Signed version of `EXP_SAT` since we convert a lot.
101    const INFINITE_POWER: i32 = Self::EXP_SAT as i32;
102
103    /// The exponent bias value. This is also the maximum value of the exponent.
104    const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
105
106    /// Minimum exponent value of normal values.
107    const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1);
108
109    /// Round-to-even only happens for negative values of q
110    /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
111    /// the 32-bit case.
112    ///
113    /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
114    /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
115    /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
116    ///
117    /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
118    /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
119    /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
120    /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
121    /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bit case).
122    ///
123    /// Thus we have that we only need to round ties to even when
124    /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
125    /// (in the 32-bit case). In both cases,the power of five(5^|q|)
126    /// fits in a 64-bit word.
127    const MIN_EXPONENT_ROUND_TO_EVEN: i32;
128    const MAX_EXPONENT_ROUND_TO_EVEN: i32;
129
130    /// Largest decimal exponent for a non-infinite value.
131    ///
132    /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
133    /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
134    const LARGEST_POWER_OF_TEN: i32 = {
135        let largest_pow2 = Self::EXP_BIAS + 1;
136        pow2_to_pow10(largest_pow2 as i64) as i32
137    };
138
139    /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
140    /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
141    ///
142    /// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is
143    /// the smallest power of two. The `2^64 - 1)` denominator comes from the number of values
144    /// that are representable by the intermediate storage format. I don't actually know _why_
145    /// the storage format is relevant here.
146    ///
147    /// The values may be calculated using the formula. Unfortunately we cannot calculate them at
148    /// compile time since intermediates exceed the range of an `f64`.
149    const SMALLEST_POWER_OF_TEN: i32;
150
151    /// Returns the category that this number falls into.
152    fn classify(self) -> FpCategory;
153
154    /// Transmute to the integer representation
155    fn to_bits(self) -> Self::Int;
156}
157
158/// Items that ideally would be on `Float`, but don't apply to all float types because they
159/// rely on the mantissa fitting into a `u64` (which isn't true for `f128`).
160#[doc(hidden)]
161pub trait FloatExt: Float {
162    /// Performs a raw transmutation from an integer.
163    fn from_u64_bits(v: u64) -> Self;
164
165    /// Returns the mantissa, exponent and sign as integers.
166    ///
167    /// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the
168    /// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
169    /// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
170    ///
171    /// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
172    /// with the explicit bit set but otherwise unshifted
173    ///
174    /// `s` is only ever +/-1.
175    fn integer_decode(self) -> (u64, i16, i8) {
176        let bits = self.to_bits();
177        let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
178        let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast();
179        let mantissa = if exponent == 0 {
180            (bits & Self::SIG_MASK) << 1
181        } else {
182            (bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS)
183        };
184        // Exponent bias + mantissa shift
185        exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16;
186        (mantissa.into(), exponent, sign)
187    }
188}
189
190/// Solve for `b` in `10^b = 2^a`
191const fn pow2_to_pow10(a: i64) -> i64 {
192    let res = (a as f64) / f64::consts::LOG2_10;
193    res as i64
194}
195
196#[cfg(target_has_reliable_f16)]
197impl Float for f16 {
198    type Int = u16;
199
200    const INFINITY: Self = Self::INFINITY;
201    const NEG_INFINITY: Self = Self::NEG_INFINITY;
202    const NAN: Self = Self::NAN;
203    const NEG_NAN: Self = -Self::NAN;
204
205    const BITS: u32 = 16;
206    const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
207    const EXP_MASK: Self::Int = Self::EXP_MASK;
208    const SIG_MASK: Self::Int = Self::MAN_MASK;
209
210    const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22;
211    const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5;
212    const SMALLEST_POWER_OF_TEN: i32 = -27;
213
214    fn to_bits(self) -> Self::Int {
215        self.to_bits()
216    }
217
218    fn classify(self) -> FpCategory {
219        self.classify()
220    }
221}
222
223#[cfg(target_has_reliable_f16)]
224impl FloatExt for f16 {
225    #[inline]
226    fn from_u64_bits(v: u64) -> Self {
227        Self::from_bits((v & 0xFFFF) as u16)
228    }
229}
230
231impl Float for f32 {
232    type Int = u32;
233
234    const INFINITY: Self = f32::INFINITY;
235    const NEG_INFINITY: Self = f32::NEG_INFINITY;
236    const NAN: Self = f32::NAN;
237    const NEG_NAN: Self = -f32::NAN;
238
239    const BITS: u32 = 32;
240    const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
241    const EXP_MASK: Self::Int = Self::EXP_MASK;
242    const SIG_MASK: Self::Int = Self::MAN_MASK;
243
244    const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
245    const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
246    const SMALLEST_POWER_OF_TEN: i32 = -65;
247
248    fn to_bits(self) -> Self::Int {
249        self.to_bits()
250    }
251
252    fn classify(self) -> FpCategory {
253        self.classify()
254    }
255}
256
257impl FloatExt for f32 {
258    #[inline]
259    fn from_u64_bits(v: u64) -> Self {
260        f32::from_bits((v & 0xFFFFFFFF) as u32)
261    }
262}
263
264impl Float for f64 {
265    type Int = u64;
266
267    const INFINITY: Self = Self::INFINITY;
268    const NEG_INFINITY: Self = Self::NEG_INFINITY;
269    const NAN: Self = Self::NAN;
270    const NEG_NAN: Self = -Self::NAN;
271
272    const BITS: u32 = 64;
273    const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
274    const EXP_MASK: Self::Int = Self::EXP_MASK;
275    const SIG_MASK: Self::Int = Self::MAN_MASK;
276
277    const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
278    const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
279    const SMALLEST_POWER_OF_TEN: i32 = -342;
280
281    fn to_bits(self) -> Self::Int {
282        self.to_bits()
283    }
284
285    fn classify(self) -> FpCategory {
286        self.classify()
287    }
288}
289
290impl FloatExt for f64 {
291    #[inline]
292    fn from_u64_bits(v: u64) -> Self {
293        f64::from_bits(v)
294    }
295}