| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| @size = internal addrspace(1) global i32 0, align 4 |
| @x = internal addrspace(1) global i128 0, align 16 |
| |
| define void @test_b128_in_loop() { |
| ; CHECK-LABEL: test_b128_in_loop( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<15>; |
| ; CHECK-NEXT: .reg .b128 %rq<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.global.s32 %rd1, [size]; |
| ; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0; |
| ; CHECK-NEXT: @%p1 bra $L__BB0_3; |
| ; CHECK-NEXT: // %bb.1: // %BB1 |
| ; CHECK-NEXT: ld.global.u64 %rd13, [x+8]; |
| ; CHECK-NEXT: ld.global.u64 %rd12, [x]; |
| ; CHECK-NEXT: mov.b64 %rd14, 0; |
| ; CHECK-NEXT: $L__BB0_2: // %BB2 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: mov.b128 %rq1, {%rd12, %rd13}; |
| ; CHECK-NEXT: // begin inline asm |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: .reg .b64 lo; |
| ; CHECK-NEXT: .reg .b64 hi; |
| ; CHECK-NEXT: mov.b128 {lo, hi}, %rq1; |
| ; CHECK-NEXT: add.cc.u64 lo, lo, %rd14; |
| ; CHECK-NEXT: mov.b128 %rq1, {lo, hi}; |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: // end inline asm |
| ; CHECK-NEXT: mov.b128 {%rd12, %rd13}, %rq1; |
| ; CHECK-NEXT: st.global.u64 [x+8], %rd13; |
| ; CHECK-NEXT: st.global.u64 [x], %rd12; |
| ; CHECK-NEXT: add.s64 %rd14, %rd14, 1; |
| ; CHECK-NEXT: setp.ne.s64 %p2, %rd1, %rd14; |
| ; CHECK-NEXT: @%p2 bra $L__BB0_2; |
| ; CHECK-NEXT: $L__BB0_3: // %BB3 |
| ; CHECK-NEXT: ret; |
| |
| %1 = load i32, ptr addrspace(1) @size, align 4 |
| %2 = icmp eq i32 %1, 0 |
| br i1 %2, label %BB3, label %BB1 |
| |
| BB1: ; preds = %0 |
| %3 = load i128, ptr addrspace(1) @x, align 16 |
| %4 = sext i32 %1 to i64 |
| br label %BB2 |
| |
| BB2: ; preds = %BB2, %BB1 |
| %5 = phi i128 [ %7, %BB2 ], [ %3, %BB1 ] |
| %6 = phi i64 [ %9, %BB2 ], [ 0, %BB1 ] |
| %7 = tail call i128 asm "{\0A\09.reg .b64 lo;\0A\09.reg .b64 hi;\0A\09mov.b128 {lo, hi}, $0;\0A\09add.cc.u64 lo, lo, $1;\0A\09mov.b128 $0, {lo, hi};\0A\09}", "=q,l,0"(i64 %6, i128 %5) |
| %8 = bitcast i128 %7 to <2 x i64> |
| store <2 x i64> %8, ptr addrspace(1) @x, align 16 |
| %9 = add nuw i64 %6, 1 |
| %10 = icmp eq i64 %9, %4 |
| br i1 %10, label %BB3, label %BB2 |
| |
| BB3: ; preds = %BB2, %0 |
| ret void |
| } |