| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| |
| ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ |
| ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 |
| ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ |
| ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 |
| |
| declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8>, |
| <vscale x 1 x i8>, |
| <vscale x 1 x i8>, |
| iXLen, iXLen); |
| declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8( |
| <vscale x 1 x i8>, |
| <vscale x 1 x i8>, |
| <vscale x 1 x i8>, |
| iXLen, iXLen); |
| |
| ; Test same rounding mode in one block. |
| define <vscale x 1 x i8> @test1(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { |
| ; CHECK-LABEL: test1: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %a, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| |
| ret <vscale x 1 x i8> %b |
| } |
| |
| ; Test different rounding mode. |
| define <vscale x 1 x i8> @test2(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { |
| ; CHECK-LABEL: test2: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: csrwi vxrm, 2 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 2, iXLen %3) |
| %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %a, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| |
| ret <vscale x 1 x i8> %b |
| } |
| |
| declare <vscale x 1 x i8> @foo(<vscale x 1 x i8>) |
| |
| ; Test same vxrm with call in between which may invalidate vxrm. |
| define <vscale x 1 x i8> @test3(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { |
| ; RV32-LABEL: test3: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: addi sp, sp, -32 |
| ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: csrr a1, vlenb |
| ; RV32-NEXT: slli a1, a1, 1 |
| ; RV32-NEXT: sub sp, sp, a1 |
| ; RV32-NEXT: mv s0, a0 |
| ; RV32-NEXT: addi a1, sp, 16 |
| ; RV32-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill |
| ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; RV32-NEXT: csrwi vxrm, 0 |
| ; RV32-NEXT: vaadd.vv v8, v8, v9 |
| ; RV32-NEXT: call foo |
| ; RV32-NEXT: vsetvli zero, s0, e8, mf8, ta, ma |
| ; RV32-NEXT: csrwi vxrm, 0 |
| ; RV32-NEXT: addi a0, sp, 16 |
| ; RV32-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload |
| ; RV32-NEXT: vaadd.vv v8, v8, v9 |
| ; RV32-NEXT: csrr a0, vlenb |
| ; RV32-NEXT: slli a0, a0, 1 |
| ; RV32-NEXT: add sp, sp, a0 |
| ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 32 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: test3: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: csrr a1, vlenb |
| ; RV64-NEXT: slli a1, a1, 1 |
| ; RV64-NEXT: sub sp, sp, a1 |
| ; RV64-NEXT: mv s0, a0 |
| ; RV64-NEXT: addi a1, sp, 16 |
| ; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill |
| ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; RV64-NEXT: csrwi vxrm, 0 |
| ; RV64-NEXT: vaadd.vv v8, v8, v9 |
| ; RV64-NEXT: call foo |
| ; RV64-NEXT: vsetvli zero, s0, e8, mf8, ta, ma |
| ; RV64-NEXT: csrwi vxrm, 0 |
| ; RV64-NEXT: addi a0, sp, 16 |
| ; RV64-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload |
| ; RV64-NEXT: vaadd.vv v8, v8, v9 |
| ; RV64-NEXT: csrr a0, vlenb |
| ; RV64-NEXT: slli a0, a0, 1 |
| ; RV64-NEXT: add sp, sp, a0 |
| ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: ret |
| entry: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| %b = call <vscale x 1 x i8> @foo(<vscale x 1 x i8> %a) |
| %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %b, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| |
| ret <vscale x 1 x i8> %c |
| } |
| |
| ; Test same vxrm with asm in between which may invalidate vxrm. |
| define <vscale x 1 x i8> @test4(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3) nounwind { |
| ; CHECK-LABEL: test4: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| %b = call <vscale x 1 x i8> asm "", "=^vr,0"(<vscale x 1 x i8> %a) |
| %c = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %b, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| |
| ret <vscale x 1 x i8> %c |
| } |
| |
| ; Test same rounding mode in triangle. |
| define <vscale x 1 x i8> @test5(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { |
| ; CHECK-LABEL: test5: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: andi a1, a1, 1 |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: beqz a1, .LBB4_2 |
| ; CHECK-NEXT: # %bb.1: # %condblock |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: .LBB4_2: # %mergeblock |
| ; CHECK-NEXT: ret |
| entry: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| br i1 %cond, label %condblock, label %mergeblock |
| |
| condblock: |
| %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %a, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| mergeblock: |
| %c = phi <vscale x 1 x i8> [%a, %entry], [%b, %condblock] |
| |
| ret <vscale x 1 x i8> %c |
| } |
| |
| ; Test same rounding mode in diamond with no dominating vxrm. |
| define <vscale x 1 x i8> @test6(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { |
| ; CHECK-LABEL: test6: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: andi a1, a1, 1 |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: beqz a1, .LBB5_2 |
| ; CHECK-NEXT: # %bb.1: # %trueblock |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB5_2: # %falseblock |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| entry: |
| br i1 %cond, label %trueblock, label %falseblock |
| |
| trueblock: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| falseblock: |
| %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| mergeblock: |
| %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock] |
| |
| ret <vscale x 1 x i8> %c |
| } |
| |
| ; Test same rounding mode in diamond with same dominating vxrm. |
| define <vscale x 1 x i8> @test7(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { |
| ; CHECK-LABEL: test7: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: andi a1, a1, 1 |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: beqz a1, .LBB6_2 |
| ; CHECK-NEXT: # %bb.1: # %trueblock |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB6_2: # %falseblock |
| ; CHECK-NEXT: vasub.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| br i1 %cond, label %trueblock, label %falseblock |
| |
| trueblock: |
| %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %a, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| falseblock: |
| %c = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %a, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| mergeblock: |
| %d = phi <vscale x 1 x i8> [%b, %trueblock], [%c, %falseblock] |
| |
| ret <vscale x 1 x i8> %d |
| } |
| |
| ; Test same rounding mode in diamond with same vxrm at merge. |
| define <vscale x 1 x i8> @test8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { |
| ; CHECK-LABEL: test8: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: andi a1, a1, 1 |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: beqz a1, .LBB7_2 |
| ; CHECK-NEXT: # %bb.1: # %trueblock |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB7_2: # %falseblock |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: vasub.vv v8, v8, v9 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| entry: |
| br i1 %cond, label %trueblock, label %falseblock |
| |
| trueblock: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| falseblock: |
| %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| mergeblock: |
| %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock] |
| %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %c, |
| <vscale x 1 x i8> %2, |
| iXLen 0, iXLen %3) |
| |
| ret <vscale x 1 x i8> %d |
| } |
| |
| ; Test same rounding mode in diamond with different vxrm at merge. |
| define <vscale x 1 x i8> @test9(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %3, i1 %cond) nounwind { |
| ; CHECK-LABEL: test9: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: andi a1, a1, 1 |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: beqz a1, .LBB8_2 |
| ; CHECK-NEXT: # %bb.1: # %trueblock |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: j .LBB8_3 |
| ; CHECK-NEXT: .LBB8_2: # %falseblock |
| ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma |
| ; CHECK-NEXT: vasub.vv v8, v8, v9 |
| ; CHECK-NEXT: .LBB8_3: # %mergeblock |
| ; CHECK-NEXT: csrwi vxrm, 2 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| entry: |
| br i1 %cond, label %trueblock, label %falseblock |
| |
| trueblock: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| falseblock: |
| %b = call <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %0, |
| <vscale x 1 x i8> %1, |
| iXLen 0, iXLen %3) |
| br label %mergeblock |
| |
| mergeblock: |
| %c = phi <vscale x 1 x i8> [%a, %trueblock], [%b, %falseblock] |
| %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8( |
| <vscale x 1 x i8> undef, |
| <vscale x 1 x i8> %c, |
| <vscale x 1 x i8> %2, |
| iXLen 2, iXLen %3) |
| |
| ret <vscale x 1 x i8> %d |
| } |
| |
| ; Test loop with no dominating vxrm write. |
| define void @test10(i8* nocapture %ptr_dest, i8* nocapture readonly %ptr_op1, i8* nocapture readonly %ptr_op2, iXLen %n) { |
| ; CHECK-LABEL: test10: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: beqz a3, .LBB9_3 |
| ; CHECK-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-NEXT: csrwi vxrm, 2 |
| ; CHECK-NEXT: .LBB9_2: # %for.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma |
| ; CHECK-NEXT: vle8.v v8, (a1) |
| ; CHECK-NEXT: vle8.v v9, (a2) |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: sub a3, a3, a4 |
| ; CHECK-NEXT: vse8.v v8, (a0) |
| ; CHECK-NEXT: bnez a3, .LBB9_2 |
| ; CHECK-NEXT: .LBB9_3: # %for.end |
| ; CHECK-NEXT: ret |
| entry: |
| %tobool.not9 = icmp eq iXLen %n, 0 |
| br i1 %tobool.not9, label %for.end, label %for.body |
| |
| for.body: |
| %n.addr.011 = phi iXLen [ %n, %entry ], [ %sub, %for.body ] |
| %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5) |
| %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl) |
| %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl) |
| %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl) |
| tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl) |
| %sub = sub iXLen %n.addr.011, %vl |
| %tobool.not = icmp eq iXLen %sub, 0 |
| br i1 %tobool.not, label %for.end, label %for.body |
| |
| for.end: |
| ret void |
| } |
| |
| declare iXLen @llvm.riscv.vsetvli.iXLen(iXLen, iXLen immarg, iXLen immarg) |
| declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8>, <vscale x 1 x i8>* nocapture, iXLen) |
| declare void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8>, <vscale x 1 x i8>* nocapture, iXLen) |
| |
| ; Test loop with dominating vxrm write. Make sure there is no write in the loop. |
| define void @test11(i8* nocapture %ptr_dest, i8* nocapture readonly %ptr_op1, i8* nocapture readonly %ptr_op2, iXLen %n) { |
| ; CHECK-LABEL: test11: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma |
| ; CHECK-NEXT: vle8.v v8, (a1) |
| ; CHECK-NEXT: vle8.v v9, (a2) |
| ; CHECK-NEXT: csrwi vxrm, 2 |
| ; CHECK-NEXT: .LBB10_1: # %for.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: sub a3, a3, a4 |
| ; CHECK-NEXT: vse8.v v8, (a0) |
| ; CHECK-NEXT: beqz a3, .LBB10_3 |
| ; CHECK-NEXT: # %bb.2: # %for.body |
| ; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1 |
| ; CHECK-NEXT: vsetvli a4, a3, e8, mf8, ta, ma |
| ; CHECK-NEXT: vle8.v v8, (a1) |
| ; CHECK-NEXT: vle8.v v9, (a2) |
| ; CHECK-NEXT: j .LBB10_1 |
| ; CHECK-NEXT: .LBB10_3: # %for.end |
| ; CHECK-NEXT: ret |
| entry: |
| %vl = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n, iXLen 0, iXLen 5) |
| %load1a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl) |
| %load2a = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl) |
| %vadda = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1a, <vscale x 1 x i8> %load2a, iXLen 2, iXLen %vl) |
| tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadda, ptr %ptr_dest, iXLen %vl) |
| %suba = sub iXLen %n, %vl |
| %tobool.not9 = icmp eq iXLen %suba, 0 |
| br i1 %tobool.not9, label %for.end, label %for.body |
| |
| for.body: |
| %n.addr.011 = phi iXLen [ %suba, %entry ], [ %sub, %for.body ] |
| %vl2 = tail call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %n.addr.011, iXLen 0, iXLen 5) |
| %load1 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op1, iXLen %vl2) |
| %load2 = tail call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.iXLen(<vscale x 1 x i8> undef, ptr %ptr_op2, iXLen %vl2) |
| %vadd = tail call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %load1, <vscale x 1 x i8> %load2, iXLen 2, iXLen %vl2) |
| tail call void @llvm.riscv.vse.nxv1i8.iXLen(<vscale x 1 x i8> %vadd, ptr %ptr_dest, iXLen %vl2) |
| %sub = sub iXLen %n.addr.011, %vl2 |
| %tobool.not = icmp eq iXLen %sub, 0 |
| br i1 %tobool.not, label %for.end, label %for.body |
| |
| for.end: |
| ret void |
| } |
| |
| ; The edge from entry to block2 is a critical edge. The vxrm write in block2 |
| ; is redundant when coming from block1, but is needed when coming from entry. |
| ; FIXME: We could remove the write from the end of block1 without splitting the |
| ; critical edge. |
| define <vscale x 1 x i8> @test12(i1 %c1, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) { |
| ; CHECK-LABEL: test12: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: andi a0, a0, 1 |
| ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vaadd.vv v9, v8, v9 |
| ; CHECK-NEXT: beqz a0, .LBB11_2 |
| ; CHECK-NEXT: # %bb.1: # %block1 |
| ; CHECK-NEXT: csrwi vxrm, 1 |
| ; CHECK-NEXT: vaadd.vv v9, v8, v9 |
| ; CHECK-NEXT: csrwi vxrm, 2 |
| ; CHECK-NEXT: .LBB11_2: # %block2 |
| ; CHECK-NEXT: csrwi vxrm, 2 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v9 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl) |
| br i1 %c1, label %block1, label %block2 |
| |
| block1: |
| %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl) |
| br label %block2 |
| |
| block2: |
| %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1] |
| %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl) |
| ret <vscale x 1 x i8> %d |
| } |
| |
| ; Similar to test12, but introduces a second critical edge from block1 to |
| ; block3. Now the write to vxrm at the end of block1, can't be removed because |
| ; it is needed by block3. |
| define <vscale x 1 x i8> @test13(i1 %c1, i1 %c2, i1 %c3, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %vl) { |
| ; CHECK-LABEL: test13: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: andi a0, a0, 1 |
| ; CHECK-NEXT: vsetvli zero, a3, e8, mf8, ta, ma |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vaadd.vv v10, v8, v9 |
| ; CHECK-NEXT: beqz a0, .LBB12_2 |
| ; CHECK-NEXT: # %bb.1: # %block1 |
| ; CHECK-NEXT: csrwi vxrm, 1 |
| ; CHECK-NEXT: vaadd.vv v10, v8, v10 |
| ; CHECK-NEXT: andi a1, a1, 1 |
| ; CHECK-NEXT: csrwi vxrm, 2 |
| ; CHECK-NEXT: beqz a1, .LBB12_3 |
| ; CHECK-NEXT: .LBB12_2: # %block2 |
| ; CHECK-NEXT: csrwi vxrm, 2 |
| ; CHECK-NEXT: vaadd.vv v8, v8, v10 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB12_3: # %block3 |
| ; CHECK-NEXT: vaadd.vv v8, v9, v10 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen 0, iXLen %vl) |
| br i1 %c1, label %block1, label %block2 |
| |
| block1: |
| %b = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %a, iXLen 1, iXLen %vl) |
| br i1 %c2, label %block2, label %block3 |
| |
| block2: |
| %c = phi <vscale x 1 x i8> [ %a, %entry ], [ %b, %block1] |
| %d = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %0, <vscale x 1 x i8> %c, iXLen 2, iXLen %vl) |
| ret <vscale x 1 x i8> %d |
| |
| block3: |
| %e = call <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> %1, <vscale x 1 x i8> %b, iXLen 2, iXLen %vl) |
| ret <vscale x 1 x i8> %e |
| } |