llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll - rust-lang/llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs \
 ; RUN:   -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs \
 ; RUN:   -riscv-enable-vl-optimizer=false | FileCheck %s -check-prefixes=CHECK,NOVLOPT
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer \
 ; RUN:   -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer \
 ; RUN:   -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,VLOPT

 define <2 x i32> @vdot_lane_s32(<2 x i32> noundef %var_1, <8 x i8> noundef %var_3, <8 x i8> noundef %var_5, <8 x i16> %x) {
 ; CHECK-LABEL: vdot_lane_s32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v11, 0
 ; CHECK-NEXT:    vnsrl.wi v9, v11, 16
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vwadd.vv v10, v8, v9
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-NEXT:    vnsrl.wi v8, v10, 0
 ; CHECK-NEXT:    vnsrl.wx v9, v10, a0
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %b = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
   %c = sext <4 x i16> %a to <4 x i32>
   %d = sext <4 x i16> %b to <4 x i32>
   %e = add nsw <4 x i32> %c, %d
   %z10 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
   %z11 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
   %y12 = add <2 x i32> %z10, %z11
   ret <2 x i32> %y12
 }

 declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
   <vscale x 2 x i16>,
   <vscale x 2 x i32>,
   <vscale x 2 x i16>,
   iXLen);

 define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
 ; NOVLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
 ; NOVLOPT:       # %bb.0: # %entry
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; NOVLOPT-NEXT:    vwadd.vv v10, v8, v9
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; NOVLOPT-NEXT:    vnsrl.wv v8, v10, v12
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
 ; VLOPT:       # %bb.0: # %entry
 ; VLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; VLOPT-NEXT:    vwadd.vv v10, v8, v9
 ; VLOPT-NEXT:    vnsrl.wv v8, v10, v12
 ; VLOPT-NEXT:    ret
 entry:
   %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
   %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
   %v1 = add <vscale x 2 x i32> %c, %d
   %x = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
     <vscale x 2 x i16> undef,
     <vscale x 2 x i32> %v1,
     <vscale x 2 x i16> %z,
     iXLen %2)

   ret <vscale x 2 x i16> %x
 }

 declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
   <vscale x 2 x i16>,
   <vscale x 2 x i32>,
   <vscale x 2 x i16>,
   iXLen, iXLen);

 define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
 ; NOVLOPT-LABEL: vnclip:
 ; NOVLOPT:       # %bb.0: # %entry
 ; NOVLOPT-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; NOVLOPT-NEXT:    vwadd.vv v10, v8, v9
 ; NOVLOPT-NEXT:    csrwi vxrm, 0
 ; NOVLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; NOVLOPT-NEXT:    vnclip.wv v8, v10, v12
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vnclip:
 ; VLOPT:       # %bb.0: # %entry
 ; VLOPT-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 ; VLOPT-NEXT:    vwadd.vv v10, v8, v9
 ; VLOPT-NEXT:    csrwi vxrm, 0
 ; VLOPT-NEXT:    vnclip.wv v8, v10, v12
 ; VLOPT-NEXT:    ret
 entry:
   %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
   %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
   %v1 = add <vscale x 2 x i32> %c, %d
   %x = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
     <vscale x 2 x i16> undef,
     <vscale x 2 x i32> %v1,
     <vscale x 2 x i16> %z,
     iXLen 0, iXLen %2)

   ret <vscale x 2 x i16> %x
 }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
	; RUN: sed 's/iXLen/i32/g' %s \| llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs \
	; RUN: -riscv-enable-vl-optimizer=false \| FileCheck %s -check-prefixes=CHECK,NOVLOPT
	; RUN: sed 's/iXLen/i64/g' %s \| llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs \
	; RUN: -riscv-enable-vl-optimizer=false \| FileCheck %s -check-prefixes=CHECK,NOVLOPT
	; RUN: sed 's/iXLen/i32/g' %s \| llc -mtriple=riscv32 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer \
	; RUN: -verify-machineinstrs \| FileCheck %s -check-prefixes=CHECK,VLOPT
	; RUN: sed 's/iXLen/i64/g' %s \| llc -mtriple=riscv64 -mattr=+v,+zvl512b -riscv-enable-vl-optimizer \
	; RUN: -verify-machineinstrs \| FileCheck %s -check-prefixes=CHECK,VLOPT

	define <2 x i32> @vdot_lane_s32(<2 x i32> noundef %var_1, <8 x i8> noundef %var_3, <8 x i8> noundef %var_5, <8 x i16> %x) {
	; CHECK-LABEL: vdot_lane_s32:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
	; CHECK-NEXT: vnsrl.wi v8, v11, 0
	; CHECK-NEXT: vnsrl.wi v9, v11, 16
	; CHECK-NEXT: li a0, 32
	; CHECK-NEXT: vwadd.vv v10, v8, v9
	; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
	; CHECK-NEXT: vnsrl.wi v8, v10, 0
	; CHECK-NEXT: vnsrl.wx v9, v10, a0
	; CHECK-NEXT: vadd.vv v8, v8, v9
	; CHECK-NEXT: ret
	entry:
	%a = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
	%b = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
	%c = sext <4 x i16> %a to <4 x i32>
	%d = sext <4 x i16> %b to <4 x i32>
	%e = add nsw <4 x i32> %c, %d
	%z10 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
	%z11 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
	%y12 = add <2 x i32> %z10, %z11
	ret <2 x i32> %y12
	}

	declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
	<vscale x 2 x i16>,
	<vscale x 2 x i32>,
	<vscale x 2 x i16>,
	iXLen);

	define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
	; NOVLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
	; NOVLOPT: # %bb.0: # %entry
	; NOVLOPT-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
	; NOVLOPT-NEXT: vwadd.vv v10, v8, v9
	; NOVLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
	; NOVLOPT-NEXT: vnsrl.wv v8, v10, v12
	; NOVLOPT-NEXT: ret
	;
	; VLOPT-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
	; VLOPT: # %bb.0: # %entry
	; VLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
	; VLOPT-NEXT: vwadd.vv v10, v8, v9
	; VLOPT-NEXT: vnsrl.wv v8, v10, v12
	; VLOPT-NEXT: ret
	entry:
	%c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
	%d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
	%v1 = add <vscale x 2 x i32> %c, %d
	%x = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
	<vscale x 2 x i16> undef,
	<vscale x 2 x i32> %v1,
	<vscale x 2 x i16> %z,
	iXLen %2)

	ret <vscale x 2 x i16> %x
	}

	declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
	<vscale x 2 x i16>,
	<vscale x 2 x i32>,
	<vscale x 2 x i16>,
	iXLen, iXLen);

	define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
	; NOVLOPT-LABEL: vnclip:
	; NOVLOPT: # %bb.0: # %entry
	; NOVLOPT-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
	; NOVLOPT-NEXT: vwadd.vv v10, v8, v9
	; NOVLOPT-NEXT: csrwi vxrm, 0
	; NOVLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
	; NOVLOPT-NEXT: vnclip.wv v8, v10, v12
	; NOVLOPT-NEXT: ret
	;
	; VLOPT-LABEL: vnclip:
	; VLOPT: # %bb.0: # %entry
	; VLOPT-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
	; VLOPT-NEXT: vwadd.vv v10, v8, v9
	; VLOPT-NEXT: csrwi vxrm, 0
	; VLOPT-NEXT: vnclip.wv v8, v10, v12
	; VLOPT-NEXT: ret
	entry:
	%c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
	%d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
	%v1 = add <vscale x 2 x i32> %c, %d
	%x = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
	<vscale x 2 x i16> undef,
	<vscale x 2 x i32> %v1,
	<vscale x 2 x i16> %z,
	iXLen 0, iXLen %2)

	ret <vscale x 2 x i16> %x
	}