roundeven: Use an assembly implementation on i586

Add an assembly implementation for roundeven which also works for
`rint`, similar to the existing `ceil` and `floor` implementations. This
resolves cases where values close to the *.5 boundary would round the
incorrect direction, such as -519629176421.49976 (tested in
`case_list`).
diff --git a/library/compiler-builtins/etc/function-definitions.json b/library/compiler-builtins/etc/function-definitions.json
index 39f7897..38d609d 100644
--- a/library/compiler-builtins/etc/function-definitions.json
+++ b/library/compiler-builtins/etc/function-definitions.json
@@ -824,6 +824,7 @@
     "rint": {
         "sources": [
             "libm/src/math/arch/aarch64/rounding.rs",
+            "libm/src/math/arch/i586/rounding.rs",
             "libm/src/math/arch/wasm32/rounding.rs",
             "libm/src/math/generic/rint.rs",
             "libm/src/math/rint.rs"
diff --git a/library/compiler-builtins/libm-test/src/precision.rs b/library/compiler-builtins/libm-test/src/precision.rs
index bc28d97..2034e89 100644
--- a/library/compiler-builtins/libm-test/src/precision.rs
+++ b/library/compiler-builtins/libm-test/src/precision.rs
@@ -297,15 +297,6 @@ fn check_int<I: Int>(input: (f32,), actual: I, expected: I, ctx: &CheckCtx) -> C
 
 impl MaybeOverride<(f64,)> for SpecialCase {
     fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
-        if cfg!(x86_no_sse2)
-            && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
-            && (expected - actual).abs() <= F::ONE
-            && (expected - actual).abs() > F::ZERO
-        {
-            // Our rounding mode is incorrect.
-            return XFAIL("i586 rint rounding mode");
-        }
-
         if ctx.base_name == BaseName::J0 && input.0 < -1e300 {
             // Errors get huge close to -inf
             return XFAIL_NOCHECK;
diff --git a/library/compiler-builtins/libm/src/math/arch/i586/mod.rs b/library/compiler-builtins/libm/src/math/arch/i586/mod.rs
index fa8b798..f80be49 100644
--- a/library/compiler-builtins/libm/src/math/arch/i586/mod.rs
+++ b/library/compiler-builtins/libm/src/math/arch/i586/mod.rs
@@ -11,4 +11,4 @@
 mod rounding;
 
 pub use exp_all::{x87_exp, x87_exp2, x87_exp2f, x87_exp10, x87_exp10f, x87_expf};
-pub use rounding::{ceil, floor};
+pub use rounding::{ceil, floor, rint};
diff --git a/library/compiler-builtins/libm/src/math/arch/i586/rounding.rs b/library/compiler-builtins/libm/src/math/arch/i586/rounding.rs
index 3981e3d..45bf0ad 100644
--- a/library/compiler-builtins/libm/src/math/arch/i586/rounding.rs
+++ b/library/compiler-builtins/libm/src/math/arch/i586/rounding.rs
@@ -51,3 +51,53 @@ pub fn floor(mut x: f64) -> f64 {
     }
     x
 }
+
+/// Note that this respects rounding mode. Because it is UB to have a non-default rounding
+/// mode in Rust, this acts as roundeven.
+pub fn rint(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            "frndint",
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
+    }
+    x
+}
+
+/* FIXME(msrv): after 1.82, the below can be used to compute control words using `asm_const`:
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum Precision {
+    Single,
+    Double,
+    Extended,
+}
+
+/// See: Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 1:
+/// Basic Architecture, section 8.1.5 x87 FPU Control Word.
+const fn make_fpcw(round: Round, prec: Precision) -> u16 {
+    let exceptions = 0b111111; // Disable all 6 exceptions
+    let misc = 0b1000000; // reserved field usually set by default
+    let pc = match prec {
+        Precision::Single => 0b00,
+        Precision::Double => 0b10,
+        Precision::Extended => 0b11,
+    };
+    let rc = match round {
+        Round::Nearest => 0b00,
+        Round::Negative => 0b01,
+        Round::Positive => 0b10,
+        Round::Zero => 0b11,
+    };
+    (rc << 10) | (pc << 8) | misc | exceptions
+}
+
+*/
diff --git a/library/compiler-builtins/libm/src/math/arch/mod.rs b/library/compiler-builtins/libm/src/math/arch/mod.rs
index bcca0ff..1bed464 100644
--- a/library/compiler-builtins/libm/src/math/arch/mod.rs
+++ b/library/compiler-builtins/libm/src/math/arch/mod.rs
@@ -48,6 +48,7 @@
         pub use i586::{
             ceil,
             floor,
+            rint,
             x87_exp,
             x87_exp10,
             x87_exp10f,
diff --git a/library/compiler-builtins/libm/src/math/rint.rs b/library/compiler-builtins/libm/src/math/rint.rs
index 0cb0e2d..75c46ac 100644
--- a/library/compiler-builtins/libm/src/math/rint.rs
+++ b/library/compiler-builtins/libm/src/math/rint.rs
@@ -37,6 +37,7 @@ pub fn rint(x: f64) -> f64 {
             all(target_arch = "aarch64", target_feature = "neon"),
             all(target_arch = "wasm32", intrinsics_enabled),
         ),
+        use_arch_required: x86_no_sse2,
         args: x,
     }