| //@ compile-flags: -Copt-level=3 |
| //@ only-x86_64 |
| |
| #![crate_type = "lib"] |
| |
| use std::mem::swap; |
| use std::ptr::{copy_nonoverlapping, read, write}; |
| |
| type KeccakBuffer = [[u64; 5]; 5]; |
| |
| // A basic read+copy+write swap implementation ends up copying one of the values |
| // to stack for large types, which is completely unnecessary as the lack of |
| // overlap means we can just do whatever fits in registers at a time. |
| |
| // The tests here (after the first one showing that the problem still exists) |
| // are less about testing *exactly* what the codegen is, and more about testing |
| // 1) That things are swapped directly from one argument to the other, |
| // never going through stack along the way, and |
| // 2) That we're doing the swapping for big things using large vector types, |
| // rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time. |
| // |
| // (There are separate tests for intrinsics::typed_swap_nonoverlapping that |
| // check that it, as an intrinsic, are emitting exactly what it should.) |
| |
| // CHECK-LABEL: @swap_basic |
| #[no_mangle] |
| pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { |
| // CHECK: alloca [200 x i8] |
| |
| // SAFETY: exclusive references are always valid to read/write, |
| // are non-overlapping, and nothing here panics so it's drop-safe. |
| unsafe { |
| let z = read(x); |
| copy_nonoverlapping(y, x, 1); |
| write(y, z); |
| } |
| } |
| |
| // CHECK-LABEL: @swap_std |
| #[no_mangle] |
| pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { |
| // CHECK-NOT: alloca |
| // CHECK: load <{{2|4}} x i64> |
| // CHECK: store <{{2|4}} x i64> |
| swap(x, y) |
| } |
| |
| // CHECK-LABEL: @swap_slice |
| #[no_mangle] |
| pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) { |
| // CHECK-NOT: alloca |
| // CHECK: load <{{2|4}} x i64> |
| // CHECK: store <{{2|4}} x i64> |
| if x.len() == y.len() { |
| x.swap_with_slice(y); |
| } |
| } |
| |
| type OneKilobyteBuffer = [u8; 1024]; |
| |
| // CHECK-LABEL: @swap_1kb_slices |
| #[no_mangle] |
| pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) { |
| // CHECK-NOT: alloca |
| |
| // CHECK-NOT: load i32 |
| // CHECK-NOT: store i32 |
| // CHECK-NOT: load i16 |
| // CHECK-NOT: store i16 |
| // CHECK-NOT: load i8 |
| // CHECK-NOT: store i8 |
| |
| // CHECK: load <{{2|4}} x i64>{{.+}}align 1, |
| // CHECK: store <{{2|4}} x i64>{{.+}}align 1, |
| |
| // CHECK-NOT: load i32 |
| // CHECK-NOT: store i32 |
| // CHECK-NOT: load i16 |
| // CHECK-NOT: store i16 |
| // CHECK-NOT: load i8 |
| // CHECK-NOT: store i8 |
| |
| if x.len() == y.len() { |
| x.swap_with_slice(y); |
| } |
| } |
| |
| #[repr(align(64))] |
| pub struct BigButHighlyAligned([u8; 64 * 3]); |
| |
| // CHECK-LABEL: @swap_big_aligned |
| #[no_mangle] |
| pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) { |
| // CHECK-NOT: call void @llvm.memcpy |
| // CHECK-NOT: load i32 |
| // CHECK-NOT: store i32 |
| // CHECK-NOT: load i16 |
| // CHECK-NOT: store i16 |
| // CHECK-NOT: load i8 |
| // CHECK-NOT: store i8 |
| |
| // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 64, |
| // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 64, |
| |
| // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 32, |
| // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 32, |
| |
| // CHECK-NOT: load i32 |
| // CHECK-NOT: store i32 |
| // CHECK-NOT: load i16 |
| // CHECK-NOT: store i16 |
| // CHECK-NOT: load i8 |
| // CHECK-NOT: store i8 |
| // CHECK-NOT: call void @llvm.memcpy |
| swap(x, y) |
| } |