chore: checkpoint before Python removal

2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions
--- a/vendor/libm/src/math/generic/ceil.rs
+++ b/vendor/libm/src/math/generic/ceil.rs
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/ceilf.c */
+
+//! Generic `ceil` algorithm.
+//!
+//! Note that this uses the algorithm from musl's `ceilf` rather than `ceil` or `ceill` because
+//! performance seems to be better (based on icount) and it does not seem to experience rounding
+//! errors on i386.
+
+use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
+
+#[inline]
+pub fn ceil<F: Float>(x: F) -> F {
+    ceil_status(x).val
+}
+
+#[inline]
+pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
+    let zero = IntTy::<F>::ZERO;
+
+    let mut ix = x.to_bits();
+    let e = x.exp_unbiased();
+
+    // If the represented value has no fractional part, no truncation is needed.
+    if e >= F::SIG_BITS as i32 {
+        return FpResult::ok(x);
+    }
+
+    let status;
+    let res = if e >= 0 {
+        // |x| >= 1.0
+        let m = F::SIG_MASK >> e.unsigned();
+        if (ix & m) == zero {
+            // Portion to be masked is already zero; no adjustment needed.
+            return FpResult::ok(x);
+        }
+
+        // Otherwise, raise an inexact exception.
+        status = Status::INEXACT;
+
+        if x.is_sign_positive() {
+            ix += m;
+        }
+
+        ix &= !m;
+        F::from_bits(ix)
+    } else {
+        // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
+        if ix & F::SIG_MASK == F::Int::ZERO {
+            status = Status::OK;
+        } else {
+            status = Status::INEXACT;
+        }
+
+        if x.is_sign_negative() {
+            // -1.0 < x <= -0.0; rounding up goes toward -0.0.
+            F::NEG_ZERO
+        } else if ix << 1 != zero {
+            // 0.0 < x < 1.0; rounding up goes toward +1.0.
+            F::ONE
+        } else {
+            // +0.0 remains unchanged
+            x
+        }
+    };
+
+    FpResult::new(res, status)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Hexf;
+
+    /// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [
+            F::ZERO,
+            F::ONE,
+            F::NEG_ONE,
+            F::NEG_ZERO,
+            F::INFINITY,
+            F::NEG_INFINITY,
+        ];
+
+        for x in roundtrip {
+            let FpResult { val, status } = ceil_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = ceil_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(ceil(1.1f32), 2.0);
+        assert_eq!(ceil(2.9f32), 3.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(ceil(1.1f64), 2.0);
+        assert_eq!(ceil(2.9f64), 3.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [
+            (0.1, 1.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 1.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 2.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 2.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f128>(&cases);
+    }
+}
--- a/vendor/libm/src/math/generic/copysign.rs
+++ b/vendor/libm/src/math/generic/copysign.rs
@@ -0,0 +1,11 @@
+use crate::support::Float;
+
+/// Copy the sign of `y` to `x`.
+#[inline]
+pub fn copysign<F: Float>(x: F, y: F) -> F {
+    let mut ux = x.to_bits();
+    let uy = y.to_bits();
+    ux &= !F::SIGN_MASK;
+    ux |= uy & F::SIGN_MASK;
+    F::from_bits(ux)
+}
--- a/vendor/libm/src/math/generic/fabs.rs
+++ b/vendor/libm/src/math/generic/fabs.rs
@@ -0,0 +1,8 @@
+use crate::support::Float;
+
+/// Absolute value.
+#[inline]
+pub fn fabs<F: Float>(x: F) -> F {
+    let abs_mask = !F::SIGN_MASK;
+    F::from_bits(x.to_bits() & abs_mask)
+}
--- a/vendor/libm/src/math/generic/fdim.rs
+++ b/vendor/libm/src/math/generic/fdim.rs
@@ -0,0 +1,6 @@
+use crate::support::Float;
+
+#[inline]
+pub fn fdim<F: Float>(x: F, y: F) -> F {
+    if x <= y { F::ZERO } else { x - y }
+}
--- a/vendor/libm/src/math/generic/floor.rs
+++ b/vendor/libm/src/math/generic/floor.rs
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: MIT
+ * origin: musl src/math/floor.c */
+
+//! Generic `floor` algorithm.
+//!
+//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because
+//! performance seems to be better (based on icount) and it does not seem to experience rounding
+//! errors on i386.
+
+use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
+
+#[inline]
+pub fn floor<F: Float>(x: F) -> F {
+    floor_status(x).val
+}
+
+#[inline]
+pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
+    let zero = IntTy::<F>::ZERO;
+
+    let mut ix = x.to_bits();
+    let e = x.exp_unbiased();
+
+    // If the represented value has no fractional part, no truncation is needed.
+    if e >= F::SIG_BITS as i32 {
+        return FpResult::ok(x);
+    }
+
+    let status;
+    let res = if e >= 0 {
+        // |x| >= 1.0
+        let m = F::SIG_MASK >> e.unsigned();
+        if ix & m == zero {
+            // Portion to be masked is already zero; no adjustment needed.
+            return FpResult::ok(x);
+        }
+
+        // Otherwise, raise an inexact exception.
+        status = Status::INEXACT;
+
+        if x.is_sign_negative() {
+            ix += m;
+        }
+
+        ix &= !m;
+        F::from_bits(ix)
+    } else {
+        // |x| < 1.0, raise an inexact exception since truncation will happen.
+        if ix & F::SIG_MASK == F::Int::ZERO {
+            status = Status::OK;
+        } else {
+            status = Status::INEXACT;
+        }
+
+        if x.is_sign_positive() {
+            // 0.0 <= x < 1.0; rounding down goes toward +0.0.
+            F::ZERO
+        } else if ix << 1 != zero {
+            // -1.0 < x < 0.0; rounding down goes toward -1.0.
+            F::NEG_ONE
+        } else {
+            // -0.0 remains unchanged
+            x
+        }
+    };
+
+    FpResult::new(res, status)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Hexf;
+
+    /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [
+            F::ZERO,
+            F::ONE,
+            F::NEG_ONE,
+            F::NEG_ZERO,
+            F::INFINITY,
+            F::NEG_INFINITY,
+        ];
+
+        for x in roundtrip {
+            let FpResult { val, status } = floor_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = floor_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(floor(0.5f32), 0.0);
+        assert_eq!(floor(1.1f32), 1.0);
+        assert_eq!(floor(2.9f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -1.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -1.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -2.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -2.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(floor(1.1f64), 1.0);
+        assert_eq!(floor(2.9f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -1.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -1.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -2.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -2.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [];
+        spec_test::<f128>(&cases);
+    }
+}
--- a/vendor/libm/src/math/generic/fma.rs
+++ b/vendor/libm/src/math/generic/fma.rs
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
+
+use crate::support::{
+    CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status,
+};
+
+/// Fused multiply-add that works when there is not a larger float size available. Computes
+/// `(x * y) + z`.
+#[inline]
+pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
+where
+    F: Float,
+    F: CastFrom<F::SignedInt>,
+    F: CastFrom<i8>,
+    F::Int: HInt,
+    u32: CastInto<F::Int>,
+{
+    let one = IntTy::<F>::ONE;
+    let zero = IntTy::<F>::ZERO;
+
+    // Normalize such that the top of the mantissa is zero and we have a guard bit.
+    let nx = Norm::from_float(x);
+    let ny = Norm::from_float(y);
+    let nz = Norm::from_float(z);
+
+    if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() {
+        // Value will overflow, defer to non-fused operations.
+        return FpResult::ok(x * y + z);
+    }
+
+    if nz.is_zero_nan_inf() {
+        if nz.is_zero() {
+            // Empty add component means we only need to multiply.
+            return FpResult::ok(x * y);
+        }
+        // `z` is NaN or infinity, which sets the result.
+        return FpResult::ok(z);
+    }
+
+    // multiply: r = x * y
+    let zhi: F::Int;
+    let zlo: F::Int;
+    let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
+
+    // Exponent result of multiplication
+    let mut e: i32 = nx.e + ny.e;
+    // Needed shift to align `z` to the multiplication result
+    let mut d: i32 = nz.e - e;
+    let sbits = F::BITS as i32;
+
+    // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz)
+    if d > 0 {
+        // The magnitude of `z` is larger than `x * y`
+        if d < sbits {
+            // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift
+            // it into `(zhi, zlo)`. No exponent adjustment necessary.
+            zlo = nz.m << d;
+            zhi = nz.m >> (sbits - d);
+        } else {
+            // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts
+            // as a shift by `sbits`).
+            zlo = zero;
+            zhi = nz.m;
+            d -= sbits;
+
+            // `z`'s exponent is large enough that it now needs to be taken into account.
+            e = nz.e - sbits;
+
+            if d == 0 {
+                // Exactly `sbits`, nothing to do
+            } else if d < sbits {
+                // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y`
+                rlo = (rhi << (sbits - d)) | (rlo >> d);
+                // Set the sticky bit
+                rlo |= IntTy::<F>::from((rlo << (sbits - d)) != zero);
+                rhi = rhi >> d;
+            } else {
+                // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set
+                // the sticky bit.
+                rlo = one;
+                rhi = zero;
+            }
+        }
+    } else {
+        // `z`'s magnitude once shifted fits entirely within `zlo`
+        zhi = zero;
+        d = -d;
+        if d == 0 {
+            // No shift needed
+            zlo = nz.m;
+        } else if d < sbits {
+            // Shift s.t. `nz.m` fits into `zlo`
+            let sticky = IntTy::<F>::from((nz.m << (sbits - d)) != zero);
+            zlo = (nz.m >> d) | sticky;
+        } else {
+            // Would be entirely shifted out, only set the sticky bit
+            zlo = one;
+        }
+    }
+
+    /* addition */
+
+    let mut neg = nx.neg ^ ny.neg;
+    let samesign: bool = !neg ^ nz.neg;
+    let mut rhi_nonzero = true;
+
+    if samesign {
+        // r += z
+        rlo = rlo.wrapping_add(zlo);
+        rhi += zhi + IntTy::<F>::from(rlo < zlo);
+    } else {
+        // r -= z
+        let (res, borrow) = rlo.overflowing_sub(zlo);
+        rlo = res;
+        rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::<F>::from(borrow)));
+        if (rhi >> (F::BITS - 1)) != zero {
+            rlo = rlo.signed().wrapping_neg().unsigned();
+            rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != zero);
+            neg = !neg;
+        }
+        rhi_nonzero = rhi != zero;
+    }
+
+    /* Construct result */
+
+    // Shift result into `rhi`, left-aligned. Last bit is sticky
+    if rhi_nonzero {
+        // `d` > 0, need to shift both `rhi` and `rlo` into result
+        e += sbits;
+        d = rhi.leading_zeros() as i32 - 1;
+        rhi = (rhi << d) | (rlo >> (sbits - d));
+        // Update sticky
+        rhi |= IntTy::<F>::from((rlo << d) != zero);
+    } else if rlo != zero {
+        // `rhi` is zero, `rlo` is the entire result and needs to be shifted
+        d = rlo.leading_zeros() as i32 - 1;
+        if d < 0 {
+            // Shift and set sticky
+            rhi = (rlo >> 1) | (rlo & one);
+        } else {
+            rhi = rlo << d;
+        }
+    } else {
+        // exact +/- 0.0
+        return FpResult::ok(x * y + z);
+    }
+
+    e -= d;
+
+    // Use int->float conversion to populate the significand.
+    // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1]
+    let mut i: F::SignedInt = rhi.signed();
+
+    if neg {
+        i = -i;
+    }
+
+    // `|r|` is in `[0x1p62,0x1p63]` for `f64`
+    let mut r: F = F::cast_from_lossy(i);
+
+    /* Account for subnormal and rounding */
+
+    // Unbiased exponent for the maximum value of `r`
+    let max_pow = F::BITS - 1 + F::EXP_BIAS;
+
+    let mut status = Status::OK;
+
+    if e < -(max_pow as i32 - 2) {
+        // Result is subnormal before rounding
+        if e == -(max_pow as i32 - 1) {
+            let mut c = F::from_parts(false, max_pow, zero);
+            if neg {
+                c = -c;
+            }
+
+            if r == c {
+                // Min normal after rounding,
+                status.set_underflow(true);
+                r = F::MIN_POSITIVE_NORMAL.copysign(r);
+                return FpResult::new(r, status);
+            }
+
+            if (rhi << (F::SIG_BITS + 1)) != zero {
+                // Account for truncated bits. One bit will be lost in the `scalbn` call, add
+                // another top bit to avoid double rounding if inexact.
+                let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2));
+                i = iu.signed();
+
+                if neg {
+                    i = -i;
+                }
+
+                r = F::cast_from_lossy(i);
+
+                // Remove the top bit
+                r = F::cast_from(2i8) * r - c;
+                status.set_underflow(true);
+            }
+        } else {
+            // Only round once when scaled
+            d = F::EXP_BITS as i32 - 1;
+            let sticky = IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero);
+            i = (((rhi >> d) | sticky) << d).signed();
+
+            if neg {
+                i = -i;
+            }
+
+            r = F::cast_from_lossy(i);
+        }
+    }
+
+    // Use our exponent to scale the final value.
+    FpResult::new(super::scalbn(r, e), status)
+}
+
+/// Representation of `F` that has handled subnormals.
+#[derive(Clone, Copy, Debug)]
+struct Norm<F: Float> {
+    /// Normalized significand with one guard bit, unsigned.
+    m: F::Int,
+    /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa
+    /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`.
+    e: i32,
+    neg: bool,
+}
+
+impl<F: Float> Norm<F> {
+    /// Unbias the exponent and account for the mantissa's precision, including the guard bit.
+    const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1;
+
+    /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we
+    /// adjusted the exponent such that it exceeds this threashold.
+    const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS;
+
+    fn from_float(x: F) -> Self {
+        let mut ix = x.to_bits();
+        let mut e = x.ex() as i32;
+        let neg = x.is_sign_negative();
+        if e == 0 {
+            // Normalize subnormals by multiplication
+            let scale_i = F::BITS - 1;
+            let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO);
+            let scaled = x * scale_f;
+            ix = scaled.to_bits();
+            e = scaled.ex() as i32;
+            e = if e == 0 {
+                // If the exponent is still zero, the input was zero. Artifically set this value
+                // such that the final `e` will exceed `ZERO_INF_NAN`.
+                1 << F::EXP_BITS
+            } else {
+                // Otherwise, account for the scaling we just did.
+                e - scale_i as i32
+            };
+        }
+
+        e -= Self::EXP_UNBIAS as i32;
+
+        // Absolute  value, set the implicit bit, and shift to create a guard bit
+        ix &= F::SIG_MASK;
+        ix |= F::IMPLICIT_BIT;
+        ix <<= 1;
+
+        Self { m: ix, e, neg }
+    }
+
+    /// True if the value was zero, infinity, or NaN.
+    fn is_zero_nan_inf(self) -> bool {
+        self.e >= Self::ZERO_INF_NAN as i32
+    }
+
+    /// The only value we have
+    fn is_zero(self) -> bool {
+        // The only exponent that strictly exceeds this value is our sentinel value for zero.
+        self.e > Self::ZERO_INF_NAN as i32
+    }
+}
--- a/vendor/libm/src/math/generic/fma_wide.rs
+++ b/vendor/libm/src/math/generic/fma_wide.rs
@@ -0,0 +1,73 @@
+use crate::support::{
+    CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status,
+};
+
+/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
+/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
+#[inline]
+pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
+where
+    F: Float + HFloat<D = B>,
+    B: Float + DFloat<H = F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    let one = IntTy::<B>::ONE;
+
+    let xy: B = x.widen() * y.widen();
+    let mut result: B = xy + z.widen();
+    let mut ui: B::Int = result.to_bits();
+    let re = result.ex();
+    let zb: B = z.widen();
+
+    let prec_diff = B::SIG_BITS - F::SIG_BITS;
+    let excess_prec = ui & ((one << prec_diff) - one);
+    let halfway = one << (prec_diff - 1);
+
+    // Common case: the larger precision is fine if...
+    // This is not a halfway case
+    if excess_prec != halfway
+        // Or the result is NaN
+        || re == B::EXP_SAT
+        // Or the result is exact
+        || (result - xy == zb && result - zb == xy)
+        // Or the mode is something other than round to nearest
+        || round != Round::Nearest
+    {
+        let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
+        let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
+
+        let mut status = Status::OK;
+
+        if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() {
+            // This branch is never hit; requires previous operations to set a status
+            status.set_inexact(false);
+
+            result = xy + z.widen();
+            if status.inexact() {
+                status.set_underflow(true);
+            } else {
+                status.set_inexact(true);
+            }
+        }
+
+        return FpResult {
+            val: result.narrow(),
+            status,
+        };
+    }
+
+    let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
+    let err = if neg == (zb > xy) {
+        xy - result + zb
+    } else {
+        zb - result + xy
+    };
+    if neg == (err < B::ZERO) {
+        ui += one;
+    } else {
+        ui -= one;
+    }
+
+    FpResult::ok(B::from_bits(ui).narrow())
+}
--- a/vendor/libm/src/math/generic/fmax.rs
+++ b/vendor/libm/src/math/generic/fmax.rs
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2011 `maxNum`. This has been superseded by IEEE 754-2019 `maximumNumber`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - The other number if one is NaN
+//! - Otherwise, either `x` or `y`, canonicalized
+//! - -0.0 and +0.0 may be disregarded (unlike newer operations)
+//!
+//! Excluded from our implementation is sNaN handling.
+//!
+//! More on the differences: [link].
+//!
+//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf
+
+use crate::support::Float;
+
+#[inline]
+pub fn fmax<F: Float>(x: F, y: F) -> F {
+    let res = if x.is_nan() || x < y { y } else { x };
+    res.canonicalize()
+}
--- a/vendor/libm/src/math/generic/fmaximum.rs
+++ b/vendor/libm/src/math/generic/fmaximum.rs
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `maximum`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - +0.0 if x and y are zero with opposite signs
+//! - qNaN if either operation is NaN
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use crate::support::Float;
+
+#[inline]
+pub fn fmaximum<F: Float>(x: F, y: F) -> F {
+    let res = if x.is_nan() {
+        x
+    } else if y.is_nan() {
+        y
+    } else if x > y || (y.biteq(F::NEG_ZERO) && x.is_sign_positive()) {
+        x
+    } else {
+        y
+    };
+
+    res.canonicalize()
+}
--- a/vendor/libm/src/math/generic/fmaximum_num.rs
+++ b/vendor/libm/src/math/generic/fmaximum_num.rs
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `maximumNumber`.
+//!
+//! Per the spec, returns:
+//! - `x` if `x > y`
+//! - `y` if `y > x`
+//! - +0.0 if x and y are zero with opposite signs
+//! - Either `x` or `y` if `x == y` and the signs are the same
+//! - Non-NaN if one operand is NaN
+//! - qNaN if both operands are NaNx
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use crate::support::Float;
+
+#[inline]
+pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
+    let res = if x > y || y.is_nan() {
+        x
+    } else if y > x || x.is_nan() {
+        y
+    } else if x.is_sign_positive() {
+        x
+    } else {
+        y
+    };
+
+    res.canonicalize()
+}
--- a/vendor/libm/src/math/generic/fmin.rs
+++ b/vendor/libm/src/math/generic/fmin.rs
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2008 `minNum`. This has been superseded by IEEE 754-2019 `minimumNumber`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - The other number if one is NaN
+//! - Otherwise, either `x` or `y`, canonicalized
+//! - -0.0 and +0.0 may be disregarded (unlike newer operations)
+//!
+//! Excluded from our implementation is sNaN handling.
+//!
+//! More on the differences: [link].
+//!
+//! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf
+
+use crate::support::Float;
+
+#[inline]
+pub fn fmin<F: Float>(x: F, y: F) -> F {
+    let res = if y.is_nan() || x < y { x } else { y };
+    res.canonicalize()
+}
--- a/vendor/libm/src/math/generic/fminimum.rs
+++ b/vendor/libm/src/math/generic/fminimum.rs
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `minimum`.
+//!
+//! Per the spec, returns the canonicalized result of:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - -0.0 if x and y are zero with opposite signs
+//! - qNaN if either operation is NaN
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use crate::support::Float;
+
+#[inline]
+pub fn fminimum<F: Float>(x: F, y: F) -> F {
+    let res = if x.is_nan() {
+        x
+    } else if y.is_nan() {
+        y
+    } else if x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) {
+        x
+    } else {
+        y
+    };
+
+    res.canonicalize()
+}
--- a/vendor/libm/src/math/generic/fminimum_num.rs
+++ b/vendor/libm/src/math/generic/fminimum_num.rs
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+//! IEEE 754-2019 `minimum`.
+//!
+//! Per the spec, returns:
+//! - `x` if `x < y`
+//! - `y` if `y < x`
+//! - -0.0 if x and y are zero with opposite signs
+//! - Either `x` or `y` if `x == y` and the signs are the same
+//! - Non-NaN if one operand is NaN
+//! - qNaN if both operands are NaNx
+//!
+//! Excluded from our implementation is sNaN handling.
+
+use crate::support::Float;
+
+#[inline]
+pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
+    let res = if x > y || x.is_nan() {
+        y
+    } else if y > x || y.is_nan() {
+        x
+    } else if x.is_sign_positive() {
+        y
+    } else {
+        x
+    };
+
+    res.canonicalize()
+}
--- a/vendor/libm/src/math/generic/fmod.rs
+++ b/vendor/libm/src/math/generic/fmod.rs
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+use crate::support::{CastFrom, CastInto, Float, HInt, Int, MinInt, NarrowingDiv};
+
+#[inline]
+pub fn fmod<F: Float>(x: F, y: F) -> F
+where
+    F::Int: HInt,
+    <F::Int as HInt>::D: NarrowingDiv,
+{
+    let _1 = F::Int::ONE;
+    let sx = x.to_bits() & F::SIGN_MASK;
+    let ux = x.to_bits() & !F::SIGN_MASK;
+    let uy = y.to_bits() & !F::SIGN_MASK;
+
+    // Cases that return NaN:
+    //   NaN % _
+    //   Inf % _
+    //     _ % NaN
+    //     _ % 0
+    let x_nan_or_inf = ux & F::EXP_MASK == F::EXP_MASK;
+    let y_nan_or_zero = uy.wrapping_sub(_1) & F::EXP_MASK == F::EXP_MASK;
+    if x_nan_or_inf | y_nan_or_zero {
+        return (x * y) / (x * y);
+    }
+
+    if ux < uy {
+        // |x| < |y|
+        return x;
+    }
+
+    let (num, ex) = into_sig_exp::<F>(ux);
+    let (div, ey) = into_sig_exp::<F>(uy);
+
+    // To compute `(num << ex) % (div << ey)`, first
+    // evaluate `rem = (num << (ex - ey)) % div` ...
+    let rem = reduction::<F>(num, ex - ey, div);
+    // ... so the result will be `rem << ey`
+
+    if rem.is_zero() {
+        // Return zero with the sign of `x`
+        return F::from_bits(sx);
+    };
+
+    // We would shift `rem` up by `ey`, but have to stop at `F::SIG_BITS`
+    let shift = ey.min(F::SIG_BITS - rem.ilog2());
+    // Anything past that is added to the exponent field
+    let bits = (rem << shift) + (F::Int::cast_from(ey - shift) << F::SIG_BITS);
+    F::from_bits(sx + bits)
+}
+
+/// Given the bits of a finite float, return a tuple of
+///  - the mantissa with the implicit bit (0 if subnormal, 1 otherwise)
+///  - the additional exponent past 1, (0 for subnormal, 0 or more otherwise)
+fn into_sig_exp<F: Float>(mut bits: F::Int) -> (F::Int, u32) {
+    bits &= !F::SIGN_MASK;
+    // Subtract 1 from the exponent, clamping at 0
+    let sat = bits.checked_sub(F::IMPLICIT_BIT).unwrap_or(F::Int::ZERO);
+    (
+        bits - (sat & F::EXP_MASK),
+        u32::cast_from(sat >> F::SIG_BITS),
+    )
+}
+
+/// Compute the remainder `(x * 2.pow(e)) % y` without overflow.
+fn reduction<F>(mut x: F::Int, e: u32, y: F::Int) -> F::Int
+where
+    F: Float,
+    F::Int: HInt,
+    <<F as Float>::Int as HInt>::D: NarrowingDiv,
+{
+    // `f16` only has 5 exponent bits, so even `f16::MAX = 65504.0` is only
+    // a 40-bit integer multiple of the smallest subnormal.
+    if F::BITS == 16 {
+        debug_assert!(F::EXP_MAX - F::EXP_MIN == 29);
+        debug_assert!(e <= 29);
+        let u: u16 = x.cast();
+        let v: u16 = y.cast();
+        let u = (u as u64) << e;
+        let v = v as u64;
+        return F::Int::cast_from((u % v) as u16);
+    }
+
+    // Ensure `x < 2y` for later steps
+    if x >= (y << 1) {
+        // This case is only reached with subnormal divisors,
+        // but it might be better to just normalize all significands
+        // to make this unnecessary. The further calls could potentially
+        // benefit from assuming a specific fixed leading bit position.
+        x %= y;
+    }
+
+    // The simple implementation seems to be fastest for a short reduction
+    // at this size. The limit here was chosen empirically on an Intel Nehalem.
+    // Less old CPUs that have faster `u64 * u64 -> u128` might not benefit,
+    // and 32-bit systems or architectures without hardware multipliers might
+    // want to do this in more cases.
+    if F::BITS == 64 && e < 32 {
+        // Assumes `x < 2y`
+        for _ in 0..e {
+            x = x.checked_sub(y).unwrap_or(x);
+            x <<= 1;
+        }
+        return x.checked_sub(y).unwrap_or(x);
+    }
+
+    // Fast path for short reductions
+    if e < F::BITS {
+        let w = x.widen() << e;
+        if let Some((_, r)) = w.checked_narrowing_div_rem(y) {
+            return r;
+        }
+    }
+
+    // Assumes `x < 2y`
+    crate::support::linear_mul_reduction(x, e, y)
+}
--- a/vendor/libm/src/math/generic/mod.rs
+++ b/vendor/libm/src/math/generic/mod.rs
@@ -0,0 +1,42 @@
+// Note: generic functions are marked `#[inline]` because, even though generic functions are
+// typically inlined, this does not seem to always be the case.
+
+mod ceil;
+mod copysign;
+mod fabs;
+mod fdim;
+mod floor;
+mod fma;
+mod fma_wide;
+mod fmax;
+mod fmaximum;
+mod fmaximum_num;
+mod fmin;
+mod fminimum;
+mod fminimum_num;
+mod fmod;
+mod rint;
+mod round;
+mod scalbn;
+mod sqrt;
+mod trunc;
+
+pub use ceil::ceil;
+pub use copysign::copysign;
+pub use fabs::fabs;
+pub use fdim::fdim;
+pub use floor::floor;
+pub use fma::fma_round;
+pub use fma_wide::fma_wide_round;
+pub use fmax::fmax;
+pub use fmaximum::fmaximum;
+pub use fmaximum_num::fmaximum_num;
+pub use fmin::fmin;
+pub use fminimum::fminimum;
+pub use fminimum_num::fminimum_num;
+pub use fmod::fmod;
+pub use rint::rint_round;
+pub use round::round;
+pub use scalbn::scalbn;
+pub use sqrt::sqrt;
+pub use trunc::trunc;
--- a/vendor/libm/src/math/generic/rint.rs
+++ b/vendor/libm/src/math/generic/rint.rs
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/rint.c */
+
+use crate::support::{Float, FpResult, Round};
+
+/// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
+/// applicable.
+#[inline]
+pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
+    let toint = F::ONE / F::EPSILON;
+    let e = x.ex();
+    let positive = x.is_sign_positive();
+
+    // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise,
+    // the excess precission from x87 would cause an incorrect final result.
+    let force = |x| {
+        if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) {
+            force_eval!(x)
+        } else {
+            x
+        }
+    };
+
+    let res = if e >= F::EXP_BIAS + F::SIG_BITS {
+        // No fractional part; exact result can be returned.
+        x
+    } else {
+        // Apply a net-zero adjustment that nudges `y` in the direction of the rounding mode. For
+        // Rust this is always nearest, but ideally it would take `round` into account.
+        let y = if positive {
+            force(force(x) + toint) - toint
+        } else {
+            force(force(x) - toint) + toint
+        };
+
+        if y == F::ZERO {
+            // A zero result takes the sign of the input.
+            if positive { F::ZERO } else { F::NEG_ZERO }
+        } else {
+            y
+        }
+    };
+
+    FpResult::ok(res)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Hexf, Status};
+
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [
+            F::ZERO,
+            F::ONE,
+            F::NEG_ONE,
+            F::NEG_ZERO,
+            F::INFINITY,
+            F::NEG_INFINITY,
+        ];
+
+        for x in roundtrip {
+            let FpResult { val, status } = rint_round(x, Round::Nearest);
+            assert_biteq!(val, x, "rint_round({})", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = rint_round(x, Round::Nearest);
+            assert_biteq!(val, res, "rint_round({})", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 0.0, Status::OK),
+            (-0.1, -0.0, Status::OK),
+            (0.5, 0.0, Status::OK),
+            (-0.5, -0.0, Status::OK),
+            (0.9, 1.0, Status::OK),
+            (-0.9, -1.0, Status::OK),
+            (1.1, 1.0, Status::OK),
+            (-1.1, -1.0, Status::OK),
+            (1.5, 2.0, Status::OK),
+            (-1.5, -2.0, Status::OK),
+            (1.9, 2.0, Status::OK),
+            (-1.9, -2.0, Status::OK),
+            (2.8, 3.0, Status::OK),
+            (-2.8, -3.0, Status::OK),
+        ];
+        spec_test::<f32>(&cases);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 0.0, Status::OK),
+            (-0.1, -0.0, Status::OK),
+            (0.5, 0.0, Status::OK),
+            (-0.5, -0.0, Status::OK),
+            (0.9, 1.0, Status::OK),
+            (-0.9, -1.0, Status::OK),
+            (1.1, 1.0, Status::OK),
+            (-1.1, -1.0, Status::OK),
+            (1.5, 2.0, Status::OK),
+            (-1.5, -2.0, Status::OK),
+            (1.9, 2.0, Status::OK),
+            (-1.9, -2.0, Status::OK),
+            (2.8, 3.0, Status::OK),
+            (-2.8, -3.0, Status::OK),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [];
+        spec_test::<f128>(&cases);
+    }
+}
--- a/vendor/libm/src/math/generic/round.rs
+++ b/vendor/libm/src/math/generic/round.rs
@@ -0,0 +1,83 @@
+use super::{copysign, trunc};
+use crate::support::{Float, MinInt};
+
+#[inline]
+pub fn round<F: Float>(x: F) -> F {
+    let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5
+    let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25
+
+    trunc(x + copysign(f0p5 - f0p25 * F::EPSILON, x))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn zeroes_f16() {
+        assert_biteq!(round(0.0_f16), 0.0_f16);
+        assert_biteq!(round(-0.0_f16), -0.0_f16);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_eq!(round(-1.0_f16), -1.0);
+        assert_eq!(round(2.8_f16), 3.0);
+        assert_eq!(round(-0.5_f16), -1.0);
+        assert_eq!(round(0.5_f16), 1.0);
+        assert_eq!(round(-1.5_f16), -2.0);
+        assert_eq!(round(1.5_f16), 2.0);
+    }
+
+    #[test]
+    fn zeroes_f32() {
+        assert_biteq!(round(0.0_f32), 0.0_f32);
+        assert_biteq!(round(-0.0_f32), -0.0_f32);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(round(-1.0_f32), -1.0);
+        assert_eq!(round(2.8_f32), 3.0);
+        assert_eq!(round(-0.5_f32), -1.0);
+        assert_eq!(round(0.5_f32), 1.0);
+        assert_eq!(round(-1.5_f32), -2.0);
+        assert_eq!(round(1.5_f32), 2.0);
+    }
+
+    #[test]
+    fn zeroes_f64() {
+        assert_biteq!(round(0.0_f64), 0.0_f64);
+        assert_biteq!(round(-0.0_f64), -0.0_f64);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(round(-1.0_f64), -1.0);
+        assert_eq!(round(2.8_f64), 3.0);
+        assert_eq!(round(-0.5_f64), -1.0);
+        assert_eq!(round(0.5_f64), 1.0);
+        assert_eq!(round(-1.5_f64), -2.0);
+        assert_eq!(round(1.5_f64), 2.0);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn zeroes_f128() {
+        assert_biteq!(round(0.0_f128), 0.0_f128);
+        assert_biteq!(round(-0.0_f128), -0.0_f128);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_eq!(round(-1.0_f128), -1.0);
+        assert_eq!(round(2.8_f128), 3.0);
+        assert_eq!(round(-0.5_f128), -1.0);
+        assert_eq!(round(0.5_f128), 1.0);
+        assert_eq!(round(-1.5_f128), -2.0);
+        assert_eq!(round(1.5_f128), 2.0);
+    }
+}
--- a/vendor/libm/src/math/generic/scalbn.rs
+++ b/vendor/libm/src/math/generic/scalbn.rs
@@ -0,0 +1,121 @@
+use crate::support::{CastFrom, CastInto, Float, IntTy, MinInt};
+
+/// Scale the exponent.
+///
+/// From N3220:
+///
+/// > The scalbn and scalbln functions compute `x * b^n`, where `b = FLT_RADIX` if the return type
+/// > of the function is a standard floating type, or `b = 10` if the return type of the function
+/// > is a decimal floating type. A range error occurs for some finite x, depending on n.
+/// >
+/// > [...]
+/// >
+/// > * `scalbn(±0, n)` returns `±0`.
+/// > * `scalbn(x, 0)` returns `x`.
+/// > * `scalbn(±∞, n)` returns `±∞`.
+/// >
+/// > If the calculation does not overflow or underflow, the returned value is exact and
+/// > independent of the current rounding direction mode.
+#[inline]
+pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
+where
+    u32: CastInto<F::Int>,
+    F::Int: CastFrom<i32>,
+    F::Int: CastFrom<u32>,
+{
+    let zero = IntTy::<F>::ZERO;
+
+    // Bits including the implicit bit
+    let sig_total_bits = F::SIG_BITS + 1;
+
+    // Maximum and minimum values when biased
+    let exp_max = F::EXP_MAX;
+    let exp_min = F::EXP_MIN;
+
+    // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
+    let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);
+
+    // 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
+    let f_exp_min = F::from_parts(false, 1, zero);
+
+    // 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
+    let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
+
+    /*
+     * The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases
+     * where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with
+     * `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of
+     * the final scale operation by prescaling by the max/min power representable by `F`.
+     */
+
+    if n > exp_max {
+        // Worse case positive `n`: `x`  is the minimum subnormal value, the result is `F::MAX`.
+        // This can be reached by three scaling multiplications (two here and one final).
+        debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3);
+
+        x *= f_exp_max;
+        n -= exp_max;
+        if n > exp_max {
+            x *= f_exp_max;
+            n -= exp_max;
+            if n > exp_max {
+                n = exp_max;
+            }
+        }
+    } else if n < exp_min {
+        // When scaling toward 0, the prescaling is limited to a value that does not allow `x` to
+        // go subnormal. This avoids double rounding.
+        if F::BITS > 16 {
+            // `mul` s.t. `!(x * mul).is_subnormal() ∀ x`
+            let mul = f_exp_min * f_pow_subnorm;
+            let add = -exp_min - sig_total_bits as i32;
+
+            // Worse case negative `n`: `x`  is the maximum positive value, the result is `F::MIN`.
+            // This must be reachable by three scaling multiplications (two here and one final).
+            debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min);
+
+            x *= mul;
+            n += add;
+
+            if n < exp_min {
+                x *= mul;
+                n += add;
+
+                if n < exp_min {
+                    n = exp_min;
+                }
+            }
+        } else {
+            // `f16` is unique compared to other float types in that the difference between the
+            // minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is
+            // small, only three. The above method depend on decrementing `n` by `add` two times;
+            // for other float types this works out because `add` is a substantial fraction of
+            // the exponent range. For `f16`, however, 3 is relatively small compared to the
+            // exponent range (which is 39), so that requires ~10 prescale rounds rather than two.
+            //
+            // Work aroudn this by using a different algorithm that calculates the prescale
+            // dynamically based on the maximum possible value. This adds more operations per round
+            // since it needs to construct the scale, but works better in the general case.
+            let add = -(n + sig_total_bits as i32).max(exp_min);
+            let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
+
+            x *= mul;
+            n += add;
+
+            if n < exp_min {
+                let add = -(n + sig_total_bits as i32).max(exp_min);
+                let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
+
+                x *= mul;
+                n += add;
+
+                if n < exp_min {
+                    n = exp_min;
+                }
+            }
+        }
+    }
+
+    let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero);
+    x * scale
+}
--- a/vendor/libm/src/math/generic/sqrt.rs
+++ b/vendor/libm/src/math/generic/sqrt.rs
@@ -0,0 +1,541 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/sqrt.c. Ported to generic Rust algorithm in 2025, TG. */
+
+//! Generic square root algorithm.
+//!
+//! This routine operates around `m_u2`, a U.2 (fixed point with two integral bits) mantissa
+//! within the range [1, 4). A table lookup provides an initial estimate, then goldschmidt
+//! iterations at various widths are used to approach the real values.
+//!
+//! For the iterations, `r` is a U0 number that approaches `1/sqrt(m_u2)`, and `s` is a U2 number
+//! that approaches `sqrt(m_u2)`. Recall that m_u2 ∈ [1, 4).
+//!
+//! With Newton-Raphson iterations, this would be:
+//!
+//! - `w = r * r           w ~ 1 / m`
+//! - `u = 3 - m * w       u ~ 3 - m * w = 3 - m / m = 2`
+//! - `r = r * u / 2       r ~ r`
+//!
+//! (Note that the righthand column does not show anything analytically meaningful (i.e. r ~ r),
+//! since the value of performing one iteration is in reducing the error representable by `~`).
+//!
+//! Instead of Newton-Raphson iterations, Goldschmidt iterations are used to calculate
+//! `s = m * r`:
+//!
+//! - `s = m * r           s ~ m / sqrt(m)`
+//! - `u = 3 - s * r       u ~ 3 - (m / sqrt(m)) * (1 / sqrt(m)) = 3 - m / m = 2`
+//! - `r = r * u / 2       r ~ r`
+//! - `s = s * u / 2       s ~ s`
+//!
+//! The above is precise because it uses the original value `m`. There is also a faster version
+//! that performs fewer steps but does not use `m`:
+//!
+//! - `u = 3 - s * r       u ~ 3 - 1`
+//! - `r = r * u / 2       r ~ r`
+//! - `s = s * u / 2       s ~ s`
+//!
+//! Rounding errors accumulate faster with the second version, so it is only used for subsequent
+//! iterations within the same width integer. The first version is always used for the first
+//! iteration at a new width in order to avoid this accumulation.
+//!
+//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are
+//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it.
+
+use crate::support::{
+    CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, cold_path,
+};
+
+#[inline]
+pub fn sqrt<F>(x: F) -> F
+where
+    F: Float + SqrtHelper,
+    F::Int: HInt,
+    F::Int: From<u8>,
+    F::Int: From<F::ISet2>,
+    F::Int: CastInto<F::ISet1>,
+    F::Int: CastInto<F::ISet2>,
+    u32: CastInto<F::Int>,
+{
+    sqrt_round(x, Round::Nearest).val
+}
+
+#[inline]
+pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
+where
+    F: Float + SqrtHelper,
+    F::Int: HInt,
+    F::Int: From<u8>,
+    F::Int: From<F::ISet2>,
+    F::Int: CastInto<F::ISet1>,
+    F::Int: CastInto<F::ISet2>,
+    u32: CastInto<F::Int>,
+{
+    let zero = IntTy::<F>::ZERO;
+    let one = IntTy::<F>::ONE;
+
+    let mut ix = x.to_bits();
+
+    // Top is the exponent and sign, which may or may not be shifted. If the float fits into a
+    // `u32`, we can get by without paying shifting costs.
+    let noshift = F::BITS <= u32::BITS;
+    let (mut top, special_case) = if noshift {
+        let exp_lsb = one << F::SIG_BITS;
+        let special_case = ix.wrapping_sub(exp_lsb) >= F::EXP_MASK - exp_lsb;
+        (Exp::NoShift(()), special_case)
+    } else {
+        let top = u32::cast_from(ix >> F::SIG_BITS);
+        let special_case = top.wrapping_sub(1) >= F::EXP_SAT - 1;
+        (Exp::Shifted(top), special_case)
+    };
+
+    // Handle NaN, zero, and out of domain (<= 0)
+    if special_case {
+        cold_path();
+
+        // +/-0
+        if ix << 1 == zero {
+            return FpResult::ok(x);
+        }
+
+        // Positive infinity
+        if ix == F::EXP_MASK {
+            return FpResult::ok(x);
+        }
+
+        // NaN or negative
+        if ix > F::EXP_MASK {
+            return FpResult::new(F::NAN, Status::INVALID);
+        }
+
+        // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles).
+        let scaled = x * F::from_parts(false, F::SIG_BITS + F::EXP_BIAS, zero);
+        ix = scaled.to_bits();
+        match top {
+            Exp::Shifted(ref mut v) => {
+                *v = scaled.ex();
+                *v = (*v).wrapping_sub(F::SIG_BITS);
+            }
+            Exp::NoShift(()) => {
+                ix = ix.wrapping_sub((F::SIG_BITS << F::SIG_BITS).cast());
+            }
+        }
+    }
+
+    // Reduce arguments such that `x = 4^e * m`:
+    //
+    // - m_u2 ∈ [1, 4), a fixed point U2.BITS number
+    // - 2^e is the exponent part of the result
+    let (m_u2, exp) = match top {
+        Exp::Shifted(top) => {
+            // We now know `x` is positive, so `top` is just its (biased) exponent
+            let mut e = top;
+            // Construct a fixed point representation of the mantissa.
+            let mut m_u2 = (ix | F::IMPLICIT_BIT) << F::EXP_BITS;
+            let even = (e & 1) != 0;
+            if even {
+                m_u2 >>= 1;
+            }
+            e = (e.wrapping_add(F::EXP_SAT >> 1)) >> 1;
+            (m_u2, Exp::Shifted(e))
+        }
+        Exp::NoShift(()) => {
+            let even = ix & (one << F::SIG_BITS) != zero;
+
+            // Exponent part of the return value
+            let mut e_noshift = ix >> 1;
+            // ey &= (F::EXP_MASK << 2) >> 2; // clear the top exponent bit (result = 1.0)
+            e_noshift += (F::EXP_MASK ^ (F::SIGN_MASK >> 1)) >> 1;
+            e_noshift &= F::EXP_MASK;
+
+            let m1 = (ix << F::EXP_BITS) | F::SIGN_MASK;
+            let m0 = (ix << (F::EXP_BITS - 1)) & !F::SIGN_MASK;
+            let m_u2 = if even { m0 } else { m1 };
+
+            (m_u2, Exp::NoShift(e_noshift))
+        }
+    };
+
+    // Extract the top 6 bits of the significand with the lowest bit of the exponent.
+    let i = usize::cast_from(ix >> (F::SIG_BITS - 6)) & 0b1111111;
+
+    // Start with an initial guess for `r = 1 / sqrt(m)` from the table, and shift `m` as an
+    // initial value for `s = sqrt(m)`. See the module documentation for details.
+    let r1_u0: F::ISet1 = F::ISet1::cast_from(RSQRT_TAB[i]) << (F::ISet1::BITS - 16);
+    let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast();
+
+    // Perform iterations, if any, at quarter width (used for `f128`).
+    let (r1_u0, _s1_u2) = goldschmidt::<F, F::ISet1>(r1_u0, s1_u2, F::SET1_ROUNDS, false);
+
+    // Widen values and perform iterations at half width (used for `f64` and `f128`).
+    let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS);
+    let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast();
+    let (r2_u0, _s2_u2) = goldschmidt::<F, F::ISet2>(r2_u0, s2_u2, F::SET2_ROUNDS, false);
+
+    // Perform final iterations at full width (used for all float types).
+    let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS);
+    let s_u2: F::Int = m_u2;
+    let (_r_u0, s_u2) = goldschmidt::<F, F::Int>(r_u0, s_u2, F::FINAL_ROUNDS, true);
+
+    // Shift back to mantissa position.
+    let mut m = s_u2 >> (F::EXP_BITS - 2);
+
+    // The musl source includes the following comment (with literals replaced):
+    //
+    // > s < sqrt(m) < s + 0x1.09p-SIG_BITS
+    // > compute nearest rounded result: the nearest result to SIG_BITS bits is either s or
+    // > s+0x1p-SIG_BITS, we can decide by comparing (2^SIG_BITS s + 0.5)^2 to 2^(2*SIG_BITS) m.
+    //
+    // Expanding this with , with `SIG_BITS = p` and adjusting based on the operations done to
+    // `d0` and `d1`:
+    //
+    // - `2^(2p)m ≟ ((2^p)m + 0.5)^2`
+    // - `2^(2p)m ≟ 2^(2p)m^2 + (2^p)m + 0.25`
+    // - `2^(2p)m - m^2 ≟ (2^(2p) - 1)m^2 + (2^p)m + 0.25`
+    // - `(1 - 2^(2p))m + m^2 ≟ (1 - 2^(2p))m^2 + (1 - 2^p)m + 0.25` (?)
+    //
+    // I do not follow how the rounding bit is extracted from this comparison with the below
+    // operations. In any case, the algorithm is well tested.
+
+    // The value needed to shift `m_u2` by to create `m*2^(2p)`. `2p = 2 * F::SIG_BITS`,
+    // `F::BITS - 2` accounts for the offset that `m_u2` already has.
+    let shift = 2 * F::SIG_BITS - (F::BITS - 2);
+
+    // `2^(2p)m - m^2`
+    let d0 = (m_u2 << shift).wrapping_sub(m.wrapping_mul(m));
+    // `m - 2^(2p)m + m^2`
+    let d1 = m.wrapping_sub(d0);
+    m += d1 >> (F::BITS - 1);
+    m &= F::SIG_MASK;
+
+    match exp {
+        Exp::Shifted(e) => m |= IntTy::<F>::cast_from(e) << F::SIG_BITS,
+        Exp::NoShift(e) => m |= e,
+    };
+
+    let mut y = F::from_bits(m);
+
+    // FIXME(f16): the fenv math does not work for `f16`
+    if F::BITS > 16 {
+        // Handle rounding and inexact. `(m + 1)^2 == 2^shift m` is exact; for all other cases, add
+        // a tiny value to cause fenv effects.
+        let d2 = d1.wrapping_add(m).wrapping_add(one);
+        let mut tiny = if d2 == zero {
+            cold_path();
+            zero
+        } else {
+            F::IMPLICIT_BIT
+        };
+
+        tiny |= (d1 ^ d2) & F::SIGN_MASK;
+        let t = F::from_bits(tiny);
+        y = y + t;
+    }
+
+    FpResult::ok(y)
+}
+
+/// Multiply at the wider integer size, returning the high half.
+fn wmulh<I: HInt>(a: I, b: I) -> I {
+    a.widen_mul(b).hi()
+}
+
+/// Perform `count` goldschmidt iterations, returning `(r_u0, s_u?)`.
+///
+/// - `r_u0` is the reciprocal `r ~ 1 / sqrt(m)`, as U0.
+/// - `s_u2` is the square root, `s ~ sqrt(m)`, as U2.
+/// - `count` is the number of iterations to perform.
+/// - `final_set` should be true if this is the last round (same-sized integer). If so, the
+///   returned `s` will be U3, for later shifting. Otherwise, the returned `s` is U2.
+///
+/// Note that performance relies on the optimizer being able to unroll these loops (reasonably
+/// trivial, `count` is a constant when called).
+#[inline]
+fn goldschmidt<F, I>(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I)
+where
+    F: SqrtHelper,
+    I: HInt + From<u8>,
+{
+    let three_u2 = I::from(0b11u8) << (I::BITS - 2);
+    let mut u_u0 = r_u0;
+
+    for i in 0..count {
+        // First iteration: `s = m*r` (`u_u0 = r_u0` set above)
+        // Subsequent iterations: `s=s*u/2`
+        s_u2 = wmulh(s_u2, u_u0);
+
+        // Perform `s /= 2` if:
+        //
+        // 1. This is not the first iteration (the first iteration is `s = m*r`)...
+        // 2. ... and this is not the last set of iterations
+        // 3. ... or, if this is the last set, it is not the last iteration
+        //
+        // This step is not performed for the final iteration because the shift is combined with
+        // a later shift (moving `s` into the mantissa).
+        if i > 0 && (!final_set || i + 1 < count) {
+            s_u2 <<= 1;
+        }
+
+        // u = 3 - s*r
+        let d_u2 = wmulh(s_u2, r_u0);
+        u_u0 = three_u2.wrapping_sub(d_u2);
+
+        // r = r*u/2
+        r_u0 = wmulh(r_u0, u_u0) << 1;
+    }
+
+    (r_u0, s_u2)
+}
+
+/// Representation of whether we shift the exponent into a `u32`, or modify it in place to save
+/// the shift operations.
+enum Exp<T> {
+    /// The exponent has been shifted to a `u32` and is LSB-aligned.
+    Shifted(u32),
+    /// The exponent is in its natural position in integer repr.
+    NoShift(T),
+}
+
+/// Size-specific constants related to the square root routine.
+pub trait SqrtHelper: Float {
+    /// Integer for the first set of rounds. If unused, set to the same type as the next set.
+    type ISet1: HInt + Into<Self::ISet2> + CastFrom<Self::Int> + From<u8>;
+    /// Integer for the second set of rounds. If unused, set to the same type as the next set.
+    type ISet2: HInt + From<Self::ISet1> + From<u8>;
+
+    /// Number of rounds at `ISet1`.
+    const SET1_ROUNDS: u32 = 0;
+    /// Number of rounds at `ISet2`.
+    const SET2_ROUNDS: u32 = 0;
+    /// Number of rounds at `Self::Int`.
+    const FINAL_ROUNDS: u32;
+}
+
+#[cfg(f16_enabled)]
+impl SqrtHelper for f16 {
+    type ISet1 = u16; // unused
+    type ISet2 = u16; // unused
+
+    const FINAL_ROUNDS: u32 = 2;
+}
+
+impl SqrtHelper for f32 {
+    type ISet1 = u32; // unused
+    type ISet2 = u32; // unused
+
+    const FINAL_ROUNDS: u32 = 3;
+}
+
+impl SqrtHelper for f64 {
+    type ISet1 = u32; // unused
+    type ISet2 = u32;
+
+    const SET2_ROUNDS: u32 = 2;
+    const FINAL_ROUNDS: u32 = 2;
+}
+
+#[cfg(f128_enabled)]
+impl SqrtHelper for f128 {
+    type ISet1 = u32;
+    type ISet2 = u64;
+
+    const SET1_ROUNDS: u32 = 1;
+    const SET2_ROUNDS: u32 = 2;
+    const FINAL_ROUNDS: u32 = 2;
+}
+
+/// A U0.16 representation of `1/sqrt(x)`.
+///
+/// The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
+#[rustfmt::skip]
+static RSQRT_TAB: [u16; 128] = [
+    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
+    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
+    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
+    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
+    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
+    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
+    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
+    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
+    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
+    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
+    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
+    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
+    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
+    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
+    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
+    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
+];
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Test behavior specified in IEEE 754 `squareRoot`.
+    fn spec_test<F>()
+    where
+        F: Float + SqrtHelper,
+        F::Int: HInt,
+        F::Int: From<u8>,
+        F::Int: From<F::ISet2>,
+        F::Int: CastInto<F::ISet1>,
+        F::Int: CastInto<F::ISet2>,
+        u32: CastInto<F::Int>,
+    {
+        // Values that should return a NaN and raise invalid
+        let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN];
+
+        // Values that return unaltered
+        let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY];
+
+        for x in nan {
+            let FpResult { val, status } = sqrt_round(x, Round::Nearest);
+            assert!(val.is_nan());
+            assert!(status == Status::INVALID);
+        }
+
+        for x in roundtrip {
+            let FpResult { val, status } = sqrt_round(x, Round::Nearest);
+            assert_biteq!(val, x);
+            assert!(status == Status::OK);
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_biteq!(sqrt(100.0f16), 10.0);
+        assert_biteq!(sqrt(4.0f16), 2.0);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f16() {
+        let cases = [
+            (f16::PI, 0x3f17_u16),
+            // 10_000.0, using a hex literal for MSRV hack (Rust < 1.67 checks literal widths as
+            // part of the AST, so the `cfg` is irrelevant here).
+            (f16::from_bits(0x70e2), 0x5640_u16),
+            (f16::from_bits(0x0000000f), 0x13bf_u16),
+            (f16::INFINITY, f16::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f16::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_biteq!(sqrt(100.0f32), 10.0);
+        assert_biteq!(sqrt(4.0f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f32() {
+        let cases = [
+            (f32::PI, 0x3fe2dfc5_u32),
+            (10000.0f32, 0x42c80000_u32),
+            (f32::from_bits(0x0000000f), 0x1b2f456f_u32),
+            (f32::INFINITY, f32::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f32::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_biteq!(sqrt(100.0f64), 10.0);
+        assert_biteq!(sqrt(4.0f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f64() {
+        let cases = [
+            (f64::PI, 0x3ffc5bf891b4ef6a_u64),
+            (10000.0, 0x4059000000000000_u64),
+            (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64),
+            (f64::INFINITY, f64::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f64::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_biteq!(sqrt(100.0f128), 10.0);
+        assert_biteq!(sqrt(4.0f128), 2.0);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f128() {
+        let cases = [
+            (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128),
+            // 10_000.0, see `f16` for reasoning.
+            (
+                f128::from_bits(0x400c3880000000000000000000000000),
+                0x40059000000000000000000000000000_u128,
+            ),
+            (
+                f128::from_bits(0x0000000f),
+                0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128,
+            ),
+            (f128::INFINITY, f128::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f128::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+}
--- a/vendor/libm/src/math/generic/trunc.rs
+++ b/vendor/libm/src/math/generic/trunc.rs
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: MIT
+ * origin: musl src/math/trunc.c */
+
+use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
+
+#[inline]
+pub fn trunc<F: Float>(x: F) -> F {
+    trunc_status(x).val
+}
+
+#[inline]
+pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
+    let mut xi: F::Int = x.to_bits();
+    let e: i32 = x.exp_unbiased();
+
+    // C1: The represented value has no fractional part, so no truncation is needed
+    if e >= F::SIG_BITS as i32 {
+        return FpResult::ok(x);
+    }
+
+    let mask = if e < 0 {
+        // C2: If the exponent is negative, the result will be zero so we mask out everything
+        // except the sign.
+        F::SIGN_MASK
+    } else {
+        // C3: Otherwise, we mask out the last `e` bits of the significand.
+        !(F::SIG_MASK >> e.unsigned())
+    };
+
+    // C4: If the to-be-masked-out portion is already zero, we have an exact result
+    if (xi & !mask) == IntTy::<F>::ZERO {
+        return FpResult::ok(x);
+    }
+
+    // C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the
+    // result, and return.
+
+    let status = if xi & F::SIG_MASK == F::Int::ZERO {
+        Status::OK
+    } else {
+        Status::INEXACT
+    };
+    xi &= mask;
+    FpResult::new(F::from_bits(xi), status)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::Hexf;
+
+    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
+        let roundtrip = [
+            F::ZERO,
+            F::ONE,
+            F::NEG_ONE,
+            F::NEG_ZERO,
+            F::INFINITY,
+            F::NEG_INFINITY,
+        ];
+
+        for x in roundtrip {
+            let FpResult { val, status } = trunc_status(x);
+            assert_biteq!(val, x, "{}", Hexf(x));
+            assert_eq!(status, Status::OK, "{}", Hexf(x));
+        }
+
+        for &(x, res, res_stat) in cases {
+            let FpResult { val, status } = trunc_status(x);
+            assert_biteq!(val, res, "{}", Hexf(x));
+            assert_eq!(status, res_stat, "{}", Hexf(x));
+        }
+    }
+
+    /* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        let cases = [];
+        spec_test::<f16>(&cases);
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_eq!(trunc(0.5f32), 0.0);
+        assert_eq!(trunc(1.1f32), 1.0);
+        assert_eq!(trunc(2.9f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f32>(&cases);
+
+        assert_biteq!(trunc(1.1f32), 1.0);
+        assert_biteq!(trunc(1.1f64), 1.0);
+
+        // C1
+        assert_biteq!(trunc(hf32!("0x1p23")), hf32!("0x1p23"));
+        assert_biteq!(trunc(hf64!("0x1p52")), hf64!("0x1p52"));
+        assert_biteq!(trunc(hf32!("-0x1p23")), hf32!("-0x1p23"));
+        assert_biteq!(trunc(hf64!("-0x1p52")), hf64!("-0x1p52"));
+
+        // C2
+        assert_biteq!(trunc(hf32!("0x1p-1")), 0.0);
+        assert_biteq!(trunc(hf64!("0x1p-1")), 0.0);
+        assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0);
+        assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0);
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_eq!(trunc(1.1f64), 1.0);
+        assert_eq!(trunc(2.9f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        let cases = [
+            (0.1, 0.0, Status::INEXACT),
+            (-0.1, -0.0, Status::INEXACT),
+            (0.9, 0.0, Status::INEXACT),
+            (-0.9, -0.0, Status::INEXACT),
+            (1.1, 1.0, Status::INEXACT),
+            (-1.1, -1.0, Status::INEXACT),
+            (1.9, 1.0, Status::INEXACT),
+            (-1.9, -1.0, Status::INEXACT),
+        ];
+        spec_test::<f64>(&cases);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        let cases = [];
+        spec_test::<f128>(&cases);
+    }
+}