| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march nvptx64 --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s --check-prefixes=SM30,CHECK |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %} |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=SM70,CHECK |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %} |
| |
| ; TODO: these are system scope, but are compiled to gpu scope.. |
| ; TODO: these are seq_cst, but are compiled to relaxed.. |
| |
| ; CHECK-LABEL: relaxed_sys_i8 |
| define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { |
| ; SM30-LABEL: relaxed_sys_i8( |
| ; SM30: { |
| ; SM30-NEXT: .reg .pred %p<3>; |
| ; SM30-NEXT: .reg .b16 %rs<2>; |
| ; SM30-NEXT: .reg .b32 %r<21>; |
| ; SM30-NEXT: .reg .b64 %rd<3>; |
| ; SM30-EMPTY: |
| ; SM30-NEXT: // %bb.0: |
| ; SM30-NEXT: ld.param.u8 %rs1, [relaxed_sys_i8_param_2]; |
| ; SM30-NEXT: ld.param.u64 %rd2, [relaxed_sys_i8_param_0]; |
| ; SM30-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM30-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM30-NEXT: and.b32 %r10, %r9, 3; |
| ; SM30-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM30-NEXT: mov.b32 %r11, 255; |
| ; SM30-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM30-NEXT: not.b32 %r2, %r12; |
| ; SM30-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM30-NEXT: and.b32 %r14, %r13, 255; |
| ; SM30-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM30-NEXT: ld.param.u8 %r15, [relaxed_sys_i8_param_1]; |
| ; SM30-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM30-NEXT: ld.u32 %r16, [%rd1]; |
| ; SM30-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM30-NEXT: $L__BB0_1: // %partword.cmpxchg.loop |
| ; SM30-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM30-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM30-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM30-NEXT: @%p1 bra $L__BB0_3; |
| ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1 |
| ; SM30-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM30-NEXT: mov.u32 %r20, %r8; |
| ; SM30-NEXT: @%p2 bra $L__BB0_1; |
| ; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end |
| ; SM30-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM30-NEXT: ret; |
| ; |
| ; SM70-LABEL: relaxed_sys_i8( |
| ; SM70: { |
| ; SM70-NEXT: .reg .pred %p<3>; |
| ; SM70-NEXT: .reg .b16 %rs<2>; |
| ; SM70-NEXT: .reg .b32 %r<21>; |
| ; SM70-NEXT: .reg .b64 %rd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u8 %rs1, [relaxed_sys_i8_param_2]; |
| ; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i8_param_0]; |
| ; SM70-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM70-NEXT: cvt.u32.u64 %r9, %rd2; |
| ; SM70-NEXT: and.b32 %r10, %r9, 3; |
| ; SM70-NEXT: shl.b32 %r1, %r10, 3; |
| ; SM70-NEXT: mov.b32 %r11, 255; |
| ; SM70-NEXT: shl.b32 %r12, %r11, %r1; |
| ; SM70-NEXT: not.b32 %r2, %r12; |
| ; SM70-NEXT: cvt.u32.u16 %r13, %rs1; |
| ; SM70-NEXT: and.b32 %r14, %r13, 255; |
| ; SM70-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM70-NEXT: ld.param.u8 %r15, [relaxed_sys_i8_param_1]; |
| ; SM70-NEXT: shl.b32 %r4, %r15, %r1; |
| ; SM70-NEXT: ld.u32 %r16, [%rd1]; |
| ; SM70-NEXT: and.b32 %r20, %r16, %r2; |
| ; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop |
| ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM70-NEXT: or.b32 %r17, %r20, %r3; |
| ; SM70-NEXT: or.b32 %r18, %r20, %r4; |
| ; SM70-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17; |
| ; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18; |
| ; SM70-NEXT: @%p1 bra $L__BB0_3; |
| ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1 |
| ; SM70-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; |
| ; SM70-NEXT: mov.u32 %r20, %r8; |
| ; SM70-NEXT: @%p2 bra $L__BB0_1; |
| ; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end |
| ; SM70-NEXT: st.param.b32 [func_retval0], %r13; |
| ; SM70-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic monotonic |
| ret i8 %new |
| } |
| |
| ; CHECK-LABEL: relaxed_sys_i16 |
| define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { |
| ; SM30-LABEL: relaxed_sys_i16( |
| ; SM30: { |
| ; SM30-NEXT: .reg .pred %p<3>; |
| ; SM30-NEXT: .reg .b16 %rs<2>; |
| ; SM30-NEXT: .reg .b32 %r<20>; |
| ; SM30-NEXT: .reg .b64 %rd<3>; |
| ; SM30-EMPTY: |
| ; SM30-NEXT: // %bb.0: |
| ; SM30-NEXT: ld.param.u16 %rs1, [relaxed_sys_i16_param_2]; |
| ; SM30-NEXT: ld.param.u64 %rd2, [relaxed_sys_i16_param_0]; |
| ; SM30-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM30-NEXT: ld.param.u16 %r9, [relaxed_sys_i16_param_1]; |
| ; SM30-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM30-NEXT: and.b32 %r11, %r10, 3; |
| ; SM30-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM30-NEXT: mov.b32 %r12, 65535; |
| ; SM30-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM30-NEXT: not.b32 %r2, %r13; |
| ; SM30-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM30-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM30-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM30-NEXT: ld.u32 %r15, [%rd1]; |
| ; SM30-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM30-NEXT: $L__BB1_1: // %partword.cmpxchg.loop |
| ; SM30-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM30-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM30-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM30-NEXT: @%p1 bra $L__BB1_3; |
| ; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1 |
| ; SM30-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM30-NEXT: mov.u32 %r19, %r8; |
| ; SM30-NEXT: @%p2 bra $L__BB1_1; |
| ; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end |
| ; SM30-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM30-NEXT: ret; |
| ; |
| ; SM70-LABEL: relaxed_sys_i16( |
| ; SM70: { |
| ; SM70-NEXT: .reg .pred %p<3>; |
| ; SM70-NEXT: .reg .b16 %rs<2>; |
| ; SM70-NEXT: .reg .b32 %r<20>; |
| ; SM70-NEXT: .reg .b64 %rd<3>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u16 %rs1, [relaxed_sys_i16_param_2]; |
| ; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i16_param_0]; |
| ; SM70-NEXT: and.b64 %rd1, %rd2, -4; |
| ; SM70-NEXT: ld.param.u16 %r9, [relaxed_sys_i16_param_1]; |
| ; SM70-NEXT: cvt.u32.u64 %r10, %rd2; |
| ; SM70-NEXT: and.b32 %r11, %r10, 3; |
| ; SM70-NEXT: shl.b32 %r1, %r11, 3; |
| ; SM70-NEXT: mov.b32 %r12, 65535; |
| ; SM70-NEXT: shl.b32 %r13, %r12, %r1; |
| ; SM70-NEXT: not.b32 %r2, %r13; |
| ; SM70-NEXT: cvt.u32.u16 %r14, %rs1; |
| ; SM70-NEXT: shl.b32 %r3, %r14, %r1; |
| ; SM70-NEXT: shl.b32 %r4, %r9, %r1; |
| ; SM70-NEXT: ld.u32 %r15, [%rd1]; |
| ; SM70-NEXT: and.b32 %r19, %r15, %r2; |
| ; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop |
| ; SM70-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; SM70-NEXT: or.b32 %r16, %r19, %r3; |
| ; SM70-NEXT: or.b32 %r17, %r19, %r4; |
| ; SM70-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16; |
| ; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17; |
| ; SM70-NEXT: @%p1 bra $L__BB1_3; |
| ; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure |
| ; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1 |
| ; SM70-NEXT: and.b32 %r8, %r7, %r2; |
| ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; |
| ; SM70-NEXT: mov.u32 %r19, %r8; |
| ; SM70-NEXT: @%p2 bra $L__BB1_1; |
| ; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end |
| ; SM70-NEXT: st.param.b32 [func_retval0], %r14; |
| ; SM70-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic monotonic |
| ret i16 %new |
| } |
| |
| ; CHECK-LABEL: relaxed_sys_i32 |
| define i32 @relaxed_sys_i32(ptr %addr, i32 %cmp, i32 %new) { |
| ; SM30-LABEL: relaxed_sys_i32( |
| ; SM30: { |
| ; SM30-NEXT: .reg .b32 %r<4>; |
| ; SM30-NEXT: .reg .b64 %rd<2>; |
| ; SM30-EMPTY: |
| ; SM30-NEXT: // %bb.0: |
| ; SM30-NEXT: ld.param.u64 %rd1, [relaxed_sys_i32_param_0]; |
| ; SM30-NEXT: ld.param.u32 %r1, [relaxed_sys_i32_param_1]; |
| ; SM30-NEXT: ld.param.u32 %r2, [relaxed_sys_i32_param_2]; |
| ; SM30-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM30-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM30-NEXT: ret; |
| ; |
| ; SM70-LABEL: relaxed_sys_i32( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b32 %r<4>; |
| ; SM70-NEXT: .reg .b64 %rd<2>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [relaxed_sys_i32_param_0]; |
| ; SM70-NEXT: ld.param.u32 %r1, [relaxed_sys_i32_param_1]; |
| ; SM70-NEXT: ld.param.u32 %r2, [relaxed_sys_i32_param_2]; |
| ; SM70-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2; |
| ; SM70-NEXT: st.param.b32 [func_retval0], %r2; |
| ; SM70-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic monotonic |
| ret i32 %new |
| } |
| |
| ; CHECK-LABEL: relaxed_sys_i64 |
| define i64 @relaxed_sys_i64(ptr %addr, i64 %cmp, i64 %new) { |
| ; SM30-LABEL: relaxed_sys_i64( |
| ; SM30: { |
| ; SM30-NEXT: .reg .b64 %rd<5>; |
| ; SM30-EMPTY: |
| ; SM30-NEXT: // %bb.0: |
| ; SM30-NEXT: ld.param.u64 %rd1, [relaxed_sys_i64_param_0]; |
| ; SM30-NEXT: ld.param.u64 %rd2, [relaxed_sys_i64_param_1]; |
| ; SM30-NEXT: ld.param.u64 %rd3, [relaxed_sys_i64_param_2]; |
| ; SM30-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM30-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM30-NEXT: ret; |
| ; |
| ; SM70-LABEL: relaxed_sys_i64( |
| ; SM70: { |
| ; SM70-NEXT: .reg .b64 %rd<5>; |
| ; SM70-EMPTY: |
| ; SM70-NEXT: // %bb.0: |
| ; SM70-NEXT: ld.param.u64 %rd1, [relaxed_sys_i64_param_0]; |
| ; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i64_param_1]; |
| ; SM70-NEXT: ld.param.u64 %rd3, [relaxed_sys_i64_param_2]; |
| ; SM70-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3; |
| ; SM70-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; SM70-NEXT: ret; |
| %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic monotonic |
| ret i64 %new |
| } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; CHECK: {{.*}} |