| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s --check-prefix=PTX60 |
| ; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx50 | FileCheck %s --check-prefix=PTX50 |
| ; RUN: %if ptxas-isa-6.0 %{ llc < %s -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %} |
| ; RUN: %if ptxas-isa-5.0 %{ llc < %s -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verify %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| @out = addrspace(1) global i32 0, align 4 |
| |
| define void @foo(i32 %i) { |
| ; PTX60-LABEL: foo( |
| ; PTX60: { |
| ; PTX60-NEXT: .reg .pred %p<2>; |
| ; PTX60-NEXT: .reg .b32 %r<2>; |
| ; PTX60-EMPTY: |
| ; PTX60-NEXT: // %bb.0: // %entry |
| ; PTX60-NEXT: ld.param.b32 %r1, [foo_param_0]; |
| ; PTX60-NEXT: setp.gt.u32 %p1, %r1, 3; |
| ; PTX60-NEXT: @%p1 bra $L__BB0_6; |
| ; PTX60-NEXT: // %bb.1: // %entry |
| ; PTX60-NEXT: $L_brx_0: .branchtargets |
| ; PTX60-NEXT: $L__BB0_2, |
| ; PTX60-NEXT: $L__BB0_3, |
| ; PTX60-NEXT: $L__BB0_4, |
| ; PTX60-NEXT: $L__BB0_5; |
| ; PTX60-NEXT: brx.idx %r1, $L_brx_0; |
| ; PTX60-NEXT: $L__BB0_2: // %case0 |
| ; PTX60-NEXT: st.global.b32 [out], 0; |
| ; PTX60-NEXT: bra.uni $L__BB0_6; |
| ; PTX60-NEXT: $L__BB0_4: // %case2 |
| ; PTX60-NEXT: st.global.b32 [out], 2; |
| ; PTX60-NEXT: bra.uni $L__BB0_6; |
| ; PTX60-NEXT: $L__BB0_5: // %case3 |
| ; PTX60-NEXT: st.global.b32 [out], 3; |
| ; PTX60-NEXT: bra.uni $L__BB0_6; |
| ; PTX60-NEXT: $L__BB0_3: // %case1 |
| ; PTX60-NEXT: st.global.b32 [out], 1; |
| ; PTX60-NEXT: $L__BB0_6: // %end |
| ; PTX60-NEXT: ret; |
| ; |
| ; PTX50-LABEL: foo( |
| ; PTX50: { |
| ; PTX50-NEXT: .reg .pred %p<6>; |
| ; PTX50-NEXT: .reg .b32 %r<2>; |
| ; PTX50-EMPTY: |
| ; PTX50-NEXT: // %bb.0: // %entry |
| ; PTX50-NEXT: ld.param.b32 %r1, [foo_param_0]; |
| ; PTX50-NEXT: setp.gt.s32 %p1, %r1, 1; |
| ; PTX50-NEXT: @%p1 bra $L__BB0_4; |
| ; PTX50-NEXT: // %bb.1: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p4, %r1, 0; |
| ; PTX50-NEXT: @%p4 bra $L__BB0_7; |
| ; PTX50-NEXT: // %bb.2: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p5, %r1, 1; |
| ; PTX50-NEXT: @%p5 bra $L__BB0_3; |
| ; PTX50-NEXT: bra.uni $L__BB0_9; |
| ; PTX50-NEXT: $L__BB0_3: // %case1 |
| ; PTX50-NEXT: st.global.b32 [out], 1; |
| ; PTX50-NEXT: bra.uni $L__BB0_9; |
| ; PTX50-NEXT: $L__BB0_4: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p2, %r1, 2; |
| ; PTX50-NEXT: @%p2 bra $L__BB0_8; |
| ; PTX50-NEXT: // %bb.5: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p3, %r1, 3; |
| ; PTX50-NEXT: @%p3 bra $L__BB0_6; |
| ; PTX50-NEXT: bra.uni $L__BB0_9; |
| ; PTX50-NEXT: $L__BB0_6: // %case3 |
| ; PTX50-NEXT: st.global.b32 [out], 3; |
| ; PTX50-NEXT: bra.uni $L__BB0_9; |
| ; PTX50-NEXT: $L__BB0_7: // %case0 |
| ; PTX50-NEXT: st.global.b32 [out], 0; |
| ; PTX50-NEXT: bra.uni $L__BB0_9; |
| ; PTX50-NEXT: $L__BB0_8: // %case2 |
| ; PTX50-NEXT: st.global.b32 [out], 2; |
| ; PTX50-NEXT: $L__BB0_9: // %end |
| ; PTX50-NEXT: ret; |
| entry: |
| switch i32 %i, label %end [ |
| i32 0, label %case0 |
| i32 1, label %case1 |
| i32 2, label %case2 |
| i32 3, label %case3 |
| ] |
| |
| case0: |
| store i32 0, ptr addrspace(1) @out, align 4 |
| br label %end |
| |
| case1: |
| store i32 1, ptr addrspace(1) @out, align 4 |
| br label %end |
| |
| case2: |
| store i32 2, ptr addrspace(1) @out, align 4 |
| br label %end |
| |
| case3: |
| store i32 3, ptr addrspace(1) @out, align 4 |
| br label %end |
| |
| end: |
| ret void |
| } |
| |
| |
| define i32 @test2(i32 %tmp158) { |
| ; PTX60-LABEL: test2( |
| ; PTX60: { |
| ; PTX60-NEXT: .reg .pred %p<6>; |
| ; PTX60-NEXT: .reg .b32 %r<3>; |
| ; PTX60-EMPTY: |
| ; PTX60-NEXT: // %bb.0: // %entry |
| ; PTX60-NEXT: ld.param.b32 %r1, [test2_param_0]; |
| ; PTX60-NEXT: setp.gt.s32 %p1, %r1, 119; |
| ; PTX60-NEXT: @%p1 bra $L__BB1_4; |
| ; PTX60-NEXT: // %bb.1: // %entry |
| ; PTX60-NEXT: setp.lt.u32 %p4, %r1, 6; |
| ; PTX60-NEXT: @%p4 bra $L__BB1_3; |
| ; PTX60-NEXT: // %bb.2: // %entry |
| ; PTX60-NEXT: setp.lt.s32 %p5, %r1, -2147483645; |
| ; PTX60-NEXT: @%p5 bra $L__BB1_3; |
| ; PTX60-NEXT: bra.uni $L__BB1_6; |
| ; PTX60-NEXT: $L__BB1_4: // %entry |
| ; PTX60-NEXT: add.s32 %r2, %r1, -120; |
| ; PTX60-NEXT: setp.gt.u32 %p2, %r2, 5; |
| ; PTX60-NEXT: @%p2 bra $L__BB1_5; |
| ; PTX60-NEXT: // %bb.12: // %entry |
| ; PTX60-NEXT: $L_brx_0: .branchtargets |
| ; PTX60-NEXT: $L__BB1_3, |
| ; PTX60-NEXT: $L__BB1_7, |
| ; PTX60-NEXT: $L__BB1_8, |
| ; PTX60-NEXT: $L__BB1_9, |
| ; PTX60-NEXT: $L__BB1_10, |
| ; PTX60-NEXT: $L__BB1_11; |
| ; PTX60-NEXT: brx.idx %r2, $L_brx_0; |
| ; PTX60-NEXT: $L__BB1_7: // %bb339 |
| ; PTX60-NEXT: st.param.b32 [func_retval0], 12; |
| ; PTX60-NEXT: ret; |
| ; PTX60-NEXT: $L__BB1_5: // %entry |
| ; PTX60-NEXT: setp.eq.b32 %p3, %r1, 1024; |
| ; PTX60-NEXT: @%p3 bra $L__BB1_3; |
| ; PTX60-NEXT: bra.uni $L__BB1_6; |
| ; PTX60-NEXT: $L__BB1_3: // %bb338 |
| ; PTX60-NEXT: st.param.b32 [func_retval0], 11; |
| ; PTX60-NEXT: ret; |
| ; PTX60-NEXT: $L__BB1_10: // %bb342 |
| ; PTX60-NEXT: st.param.b32 [func_retval0], 15; |
| ; PTX60-NEXT: ret; |
| ; PTX60-NEXT: $L__BB1_6: // %bb336 |
| ; PTX60-NEXT: st.param.b32 [func_retval0], 10; |
| ; PTX60-NEXT: ret; |
| ; PTX60-NEXT: $L__BB1_8: // %bb340 |
| ; PTX60-NEXT: st.param.b32 [func_retval0], 13; |
| ; PTX60-NEXT: ret; |
| ; PTX60-NEXT: $L__BB1_9: // %bb341 |
| ; PTX60-NEXT: st.param.b32 [func_retval0], 14; |
| ; PTX60-NEXT: ret; |
| ; PTX60-NEXT: $L__BB1_11: // %bb343 |
| ; PTX60-NEXT: st.param.b32 [func_retval0], 18; |
| ; PTX60-NEXT: ret; |
| ; |
| ; PTX50-LABEL: test2( |
| ; PTX50: { |
| ; PTX50-NEXT: .reg .pred %p<13>; |
| ; PTX50-NEXT: .reg .b32 %r<2>; |
| ; PTX50-EMPTY: |
| ; PTX50-NEXT: // %bb.0: // %entry |
| ; PTX50-NEXT: ld.param.b32 %r1, [test2_param_0]; |
| ; PTX50-NEXT: setp.gt.s32 %p1, %r1, 119; |
| ; PTX50-NEXT: @%p1 bra $L__BB1_4; |
| ; PTX50-NEXT: // %bb.1: // %entry |
| ; PTX50-NEXT: setp.lt.u32 %p11, %r1, 6; |
| ; PTX50-NEXT: @%p11 bra $L__BB1_3; |
| ; PTX50-NEXT: // %bb.2: // %entry |
| ; PTX50-NEXT: setp.lt.s32 %p12, %r1, -2147483645; |
| ; PTX50-NEXT: @%p12 bra $L__BB1_3; |
| ; PTX50-NEXT: bra.uni $L__BB1_15; |
| ; PTX50-NEXT: $L__BB1_4: // %entry |
| ; PTX50-NEXT: setp.gt.s32 %p2, %r1, 122; |
| ; PTX50-NEXT: @%p2 bra $L__BB1_9; |
| ; PTX50-NEXT: bra.uni $L__BB1_5; |
| ; PTX50-NEXT: $L__BB1_9: // %entry |
| ; PTX50-NEXT: setp.gt.s32 %p3, %r1, 124; |
| ; PTX50-NEXT: @%p3 bra $L__BB1_13; |
| ; PTX50-NEXT: // %bb.10: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p6, %r1, 123; |
| ; PTX50-NEXT: @%p6 bra $L__BB1_17; |
| ; PTX50-NEXT: // %bb.11: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p7, %r1, 124; |
| ; PTX50-NEXT: @%p7 bra $L__BB1_12; |
| ; PTX50-NEXT: bra.uni $L__BB1_15; |
| ; PTX50-NEXT: $L__BB1_12: // %bb342 |
| ; PTX50-NEXT: st.param.b32 [func_retval0], 15; |
| ; PTX50-NEXT: ret; |
| ; PTX50-NEXT: $L__BB1_5: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p8, %r1, 120; |
| ; PTX50-NEXT: @%p8 bra $L__BB1_3; |
| ; PTX50-NEXT: // %bb.6: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p9, %r1, 121; |
| ; PTX50-NEXT: @%p9 bra $L__BB1_16; |
| ; PTX50-NEXT: // %bb.7: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p10, %r1, 122; |
| ; PTX50-NEXT: @%p10 bra $L__BB1_8; |
| ; PTX50-NEXT: bra.uni $L__BB1_15; |
| ; PTX50-NEXT: $L__BB1_8: // %bb340 |
| ; PTX50-NEXT: st.param.b32 [func_retval0], 13; |
| ; PTX50-NEXT: ret; |
| ; PTX50-NEXT: $L__BB1_13: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p4, %r1, 125; |
| ; PTX50-NEXT: @%p4 bra $L__BB1_18; |
| ; PTX50-NEXT: // %bb.14: // %entry |
| ; PTX50-NEXT: setp.eq.b32 %p5, %r1, 1024; |
| ; PTX50-NEXT: @%p5 bra $L__BB1_3; |
| ; PTX50-NEXT: bra.uni $L__BB1_15; |
| ; PTX50-NEXT: $L__BB1_3: // %bb338 |
| ; PTX50-NEXT: st.param.b32 [func_retval0], 11; |
| ; PTX50-NEXT: ret; |
| ; PTX50-NEXT: $L__BB1_17: // %bb341 |
| ; PTX50-NEXT: st.param.b32 [func_retval0], 14; |
| ; PTX50-NEXT: ret; |
| ; PTX50-NEXT: $L__BB1_18: // %bb343 |
| ; PTX50-NEXT: st.param.b32 [func_retval0], 18; |
| ; PTX50-NEXT: ret; |
| ; PTX50-NEXT: $L__BB1_15: // %bb336 |
| ; PTX50-NEXT: st.param.b32 [func_retval0], 10; |
| ; PTX50-NEXT: ret; |
| ; PTX50-NEXT: $L__BB1_16: // %bb339 |
| ; PTX50-NEXT: st.param.b32 [func_retval0], 12; |
| ; PTX50-NEXT: ret; |
| entry: |
| switch i32 %tmp158, label %bb336 [ |
| i32 -2147483648, label %bb338 |
| i32 -2147483647, label %bb338 |
| i32 -2147483646, label %bb338 |
| i32 120, label %bb338 |
| i32 121, label %bb339 |
| i32 122, label %bb340 |
| i32 123, label %bb341 |
| i32 124, label %bb342 |
| i32 125, label %bb343 |
| i32 126, label %bb336 |
| i32 1024, label %bb338 |
| i32 0, label %bb338 |
| i32 1, label %bb338 |
| i32 2, label %bb338 |
| i32 3, label %bb338 |
| i32 4, label %bb338 |
| i32 5, label %bb338 |
| ] |
| |
| bb336: |
| ret i32 10 |
| bb338: |
| ret i32 11 |
| bb339: |
| ret i32 12 |
| bb340: |
| ret i32 13 |
| bb341: |
| ret i32 14 |
| bb342: |
| ret i32 15 |
| bb343: |
| ret i32 18 |
| |
| } |