tests/codegen-llvm/swap-large-types.rs - rust-lang/rust - Git at Google

 //@ compile-flags: -Copt-level=3
 //@ only-x86_64

 #![crate_type = "lib"]

 use std::mem::swap;
 use std::ptr::{copy_nonoverlapping, read, write};

 type KeccakBuffer = [[u64; 5]; 5];

 // A basic read+copy+write swap implementation ends up copying one of the values
 // to stack for large types, which is completely unnecessary as the lack of
 // overlap means we can just do whatever fits in registers at a time.

 // The tests here (after the first one showing that the problem still exists)
 // are less about testing *exactly* what the codegen is, and more about testing
 // 1) That things are swapped directly from one argument to the other,
 //    never going through stack along the way, and
 // 2) That we're doing the swapping for big things using large vector types,
 //    rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time.
 //
 // (There are separate tests for intrinsics::typed_swap_nonoverlapping that
 //  check that it, as an intrinsic, are emitting exactly what it should.)

 // CHECK-LABEL: @swap_basic
 #[no_mangle]
 pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
     // CHECK: alloca [200 x i8]

     // SAFETY: exclusive references are always valid to read/write,
     // are non-overlapping, and nothing here panics so it's drop-safe.
     unsafe {
         let z = read(x);
         copy_nonoverlapping(y, x, 1);
         write(y, z);
     }
 }

 // CHECK-LABEL: @swap_std
 #[no_mangle]
 pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
     // CHECK-NOT: alloca
     // CHECK: load <{{2|4}} x i64>
     // CHECK: store <{{2|4}} x i64>
     swap(x, y)
 }

 // CHECK-LABEL: @swap_slice
 #[no_mangle]
 pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
     // CHECK-NOT: alloca
     // CHECK: load <{{2|4}} x i64>
     // CHECK: store <{{2|4}} x i64>
     if x.len() == y.len() {
         x.swap_with_slice(y);
     }
 }

 type OneKilobyteBuffer = [u8; 1024];

 // CHECK-LABEL: @swap_1kb_slices
 #[no_mangle]
 pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
     // CHECK-NOT: alloca

     // CHECK-NOT: load i32
     // CHECK-NOT: store i32
     // CHECK-NOT: load i16
     // CHECK-NOT: store i16
     // CHECK-NOT: load i8
     // CHECK-NOT: store i8

     // CHECK: load <{{2|4}} x i64>{{.+}}align 1,
     // CHECK: store <{{2|4}} x i64>{{.+}}align 1,

     // CHECK-NOT: load i32
     // CHECK-NOT: store i32
     // CHECK-NOT: load i16
     // CHECK-NOT: store i16
     // CHECK-NOT: load i8
     // CHECK-NOT: store i8

     if x.len() == y.len() {
         x.swap_with_slice(y);
     }
 }

 #[repr(align(64))]
 pub struct BigButHighlyAligned([u8; 64 * 3]);

 // CHECK-LABEL: @swap_big_aligned
 #[no_mangle]
 pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
     // CHECK-NOT: call void @llvm.memcpy
     // CHECK-NOT: load i32
     // CHECK-NOT: store i32
     // CHECK-NOT: load i16
     // CHECK-NOT: store i16
     // CHECK-NOT: load i8
     // CHECK-NOT: store i8

     // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 64,
     // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 64,

     // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 32,
     // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 32,

     // CHECK-NOT: load i32
     // CHECK-NOT: store i32
     // CHECK-NOT: load i16
     // CHECK-NOT: store i16
     // CHECK-NOT: load i8
     // CHECK-NOT: store i8
     // CHECK-NOT: call void @llvm.memcpy
     swap(x, y)
 }
	//@ compile-flags: -Copt-level=3
	//@ only-x86_64

	#![crate_type = "lib"]

	use std::mem::swap;
	use std::ptr::{copy_nonoverlapping, read, write};

	type KeccakBuffer = [[u64; 5]; 5];

	// A basic read+copy+write swap implementation ends up copying one of the values
	// to stack for large types, which is completely unnecessary as the lack of
	// overlap means we can just do whatever fits in registers at a time.

	// The tests here (after the first one showing that the problem still exists)
	// are less about testing exactly what the codegen is, and more about testing
	// 1) That things are swapped directly from one argument to the other,
	// never going through stack along the way, and
	// 2) That we're doing the swapping for big things using large vector types,
	// rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time.
	//
	// (There are separate tests for intrinsics::typed_swap_nonoverlapping that
	// check that it, as an intrinsic, are emitting exactly what it should.)

	// CHECK-LABEL: @swap_basic
	#[no_mangle]
	pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
	// CHECK: alloca [200 x i8]

	// SAFETY: exclusive references are always valid to read/write,
	// are non-overlapping, and nothing here panics so it's drop-safe.
	unsafe {
	let z = read(x);
	copy_nonoverlapping(y, x, 1);
	write(y, z);
	}
	}

	// CHECK-LABEL: @swap_std
	#[no_mangle]
	pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
	// CHECK-NOT: alloca
	// CHECK: load <{{2\|4}} x i64>
	// CHECK: store <{{2\|4}} x i64>
	swap(x, y)
	}

	// CHECK-LABEL: @swap_slice
	#[no_mangle]
	pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
	// CHECK-NOT: alloca
	// CHECK: load <{{2\|4}} x i64>
	// CHECK: store <{{2\|4}} x i64>
	if x.len() == y.len() {
	x.swap_with_slice(y);
	}
	}

	type OneKilobyteBuffer = [u8; 1024];

	// CHECK-LABEL: @swap_1kb_slices
	#[no_mangle]
	pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
	// CHECK-NOT: alloca

	// CHECK-NOT: load i32
	// CHECK-NOT: store i32
	// CHECK-NOT: load i16
	// CHECK-NOT: store i16
	// CHECK-NOT: load i8
	// CHECK-NOT: store i8

	// CHECK: load <{{2\|4}} x i64>{{.+}}align 1,
	// CHECK: store <{{2\|4}} x i64>{{.+}}align 1,

	// CHECK-NOT: load i32
	// CHECK-NOT: store i32
	// CHECK-NOT: load i16
	// CHECK-NOT: store i16
	// CHECK-NOT: load i8
	// CHECK-NOT: store i8

	if x.len() == y.len() {
	x.swap_with_slice(y);
	}
	}

	#[repr(align(64))]
	pub struct BigButHighlyAligned([u8; 64 * 3]);

	// CHECK-LABEL: @swap_big_aligned
	#[no_mangle]
	pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
	// CHECK-NOT: call void @llvm.memcpy
	// CHECK-NOT: load i32
	// CHECK-NOT: store i32
	// CHECK-NOT: load i16
	// CHECK-NOT: store i16
	// CHECK-NOT: load i8
	// CHECK-NOT: store i8

	// CHECK-COUNT-2: load <{{2\|4}} x i64>{{.+}}align 64,
	// CHECK-COUNT-2: store <{{2\|4}} x i64>{{.+}}align 64,

	// CHECK-COUNT-2: load <{{2\|4}} x i64>{{.+}}align 32,
	// CHECK-COUNT-2: store <{{2\|4}} x i64>{{.+}}align 32,

	// CHECK-NOT: load i32
	// CHECK-NOT: store i32
	// CHECK-NOT: load i16
	// CHECK-NOT: store i16
	// CHECK-NOT: load i8
	// CHECK-NOT: store i8
	// CHECK-NOT: call void @llvm.memcpy
	swap(x, y)
	}