| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVFHMIN-ZVFBFMIN |
| ; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN-ZVFBFMIN |
| |
| |
| define void @test(ptr %p, ptr noalias %s) { |
| ; CHECK-LABEL: @test( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 |
| ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) |
| ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]] |
| ; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 |
| %i = load float, ptr %arrayidx, align 4 |
| %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 |
| %i1 = load float, ptr %arrayidx1, align 4 |
| %add = fsub fast float %i1, %i |
| %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 |
| store float %add, ptr %arrayidx2, align 4 |
| %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4 |
| %i2 = load float, ptr %arrayidx4, align 4 |
| %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 |
| %i3 = load float, ptr %arrayidx6, align 4 |
| %add7 = fsub fast float %i3, %i2 |
| %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 |
| store float %add7, ptr %arrayidx9, align 4 |
| %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8 |
| %i4 = load float, ptr %arrayidx11, align 4 |
| %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 |
| %i5 = load float, ptr %arrayidx13, align 4 |
| %add14 = fsub fast float %i5, %i4 |
| %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 |
| store float %add14, ptr %arrayidx16, align 4 |
| %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12 |
| %i6 = load float, ptr %arrayidx18, align 4 |
| %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 |
| %i7 = load float, ptr %arrayidx20, align 4 |
| %add21 = fsub fast float %i7, %i6 |
| %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 |
| store float %add21, ptr %arrayidx23, align 4 |
| %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16 |
| %i8 = load float, ptr %arrayidx25, align 4 |
| %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 |
| %i9 = load float, ptr %arrayidx27, align 4 |
| %add28 = fsub fast float %i9, %i8 |
| %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 |
| store float %add28, ptr %arrayidx30, align 4 |
| %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20 |
| %i10 = load float, ptr %arrayidx32, align 4 |
| %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 |
| %i11 = load float, ptr %arrayidx34, align 4 |
| %add35 = fsub fast float %i11, %i10 |
| %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 |
| store float %add35, ptr %arrayidx37, align 4 |
| %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 |
| %i12 = load float, ptr %arrayidx39, align 4 |
| %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 |
| %i13 = load float, ptr %arrayidx41, align 4 |
| %add42 = fsub fast float %i13, %i12 |
| %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 |
| store float %add42, ptr %arrayidx44, align 4 |
| %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 |
| %i14 = load float, ptr %arrayidx46, align 4 |
| %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 |
| %i15 = load float, ptr %arrayidx48, align 4 |
| %add49 = fsub fast float %i15, %i14 |
| %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 |
| store float %add49, ptr %arrayidx51, align 4 |
| ret void |
| } |
| |
| define void @test1(ptr %p, ptr noalias %s, i32 %stride) { |
| ; CHECK-LABEL: @test1( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 |
| ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) |
| ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]] |
| ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %str = zext i32 %stride to i64 |
| %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 |
| %i = load float, ptr %arrayidx, align 4 |
| %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 |
| %i1 = load float, ptr %arrayidx1, align 4 |
| %add = fsub fast float %i1, %i |
| %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 |
| store float %add, ptr %arrayidx2, align 4 |
| %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str |
| %i2 = load float, ptr %arrayidx4, align 4 |
| %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 |
| %i3 = load float, ptr %arrayidx6, align 4 |
| %add7 = fsub fast float %i3, %i2 |
| %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 |
| store float %add7, ptr %arrayidx9, align 4 |
| %st1 = mul i64 %str, 2 |
| %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1 |
| %i4 = load float, ptr %arrayidx11, align 4 |
| %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 |
| %i5 = load float, ptr %arrayidx13, align 4 |
| %add14 = fsub fast float %i5, %i4 |
| %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 |
| store float %add14, ptr %arrayidx16, align 4 |
| %st2 = mul i64 %str, 3 |
| %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2 |
| %i6 = load float, ptr %arrayidx18, align 4 |
| %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 |
| %i7 = load float, ptr %arrayidx20, align 4 |
| %add21 = fsub fast float %i7, %i6 |
| %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 |
| store float %add21, ptr %arrayidx23, align 4 |
| %st3 = mul i64 %str, 4 |
| %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3 |
| %i8 = load float, ptr %arrayidx25, align 4 |
| %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 |
| %i9 = load float, ptr %arrayidx27, align 4 |
| %add28 = fsub fast float %i9, %i8 |
| %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 |
| store float %add28, ptr %arrayidx30, align 4 |
| %st4 = mul i64 %str, 5 |
| %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4 |
| %i10 = load float, ptr %arrayidx32, align 4 |
| %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 |
| %i11 = load float, ptr %arrayidx34, align 4 |
| %add35 = fsub fast float %i11, %i10 |
| %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 |
| store float %add35, ptr %arrayidx37, align 4 |
| %st5 = mul i64 %str, 6 |
| %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5 |
| %i12 = load float, ptr %arrayidx39, align 4 |
| %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 |
| %i13 = load float, ptr %arrayidx41, align 4 |
| %add42 = fsub fast float %i13, %i12 |
| %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 |
| store float %add42, ptr %arrayidx44, align 4 |
| %st6 = mul i64 %str, 7 |
| %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6 |
| %i14 = load float, ptr %arrayidx46, align 4 |
| %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 |
| %i15 = load float, ptr %arrayidx48, align 4 |
| %add49 = fsub fast float %i15, %i14 |
| %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 |
| store float %add49, ptr %arrayidx51, align 4 |
| ret void |
| } |
| |
| define void @test2(ptr %p, ptr noalias %s, i32 %stride) { |
| ; CHECK-LABEL: @test2( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2 |
| ; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]] |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 |
| ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) |
| ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> splat (i1 true), i32 8) |
| ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] |
| ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %str = zext i32 %stride to i64 |
| %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 |
| %i = load float, ptr %arrayidx, align 4 |
| %st6 = mul i64 %str, 7 |
| %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6 |
| %i1 = load float, ptr %arrayidx1, align 4 |
| %add = fsub fast float %i1, %i |
| %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 |
| store float %add, ptr %arrayidx2, align 4 |
| %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 |
| %i2 = load float, ptr %arrayidx4, align 4 |
| %st5 = mul i64 %str, 6 |
| %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5 |
| %i3 = load float, ptr %arrayidx6, align 4 |
| %add7 = fsub fast float %i3, %i2 |
| %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 |
| store float %add7, ptr %arrayidx9, align 4 |
| %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 |
| %i4 = load float, ptr %arrayidx11, align 4 |
| %st4 = mul i64 %str, 5 |
| %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4 |
| %i5 = load float, ptr %arrayidx13, align 4 |
| %add14 = fsub fast float %i5, %i4 |
| %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 |
| store float %add14, ptr %arrayidx16, align 4 |
| %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 |
| %i6 = load float, ptr %arrayidx18, align 4 |
| %st3 = mul i64 %str, 4 |
| %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3 |
| %i7 = load float, ptr %arrayidx20, align 4 |
| %add21 = fsub fast float %i7, %i6 |
| %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 |
| store float %add21, ptr %arrayidx23, align 4 |
| %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 |
| %st2 = mul i64 %str, 3 |
| %i8 = load float, ptr %arrayidx25, align 4 |
| %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2 |
| %i9 = load float, ptr %arrayidx27, align 4 |
| %add28 = fsub fast float %i9, %i8 |
| %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 |
| store float %add28, ptr %arrayidx30, align 4 |
| %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 |
| %i10 = load float, ptr %arrayidx32, align 4 |
| %st1 = mul i64 %str, 2 |
| %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1 |
| %i11 = load float, ptr %arrayidx34, align 4 |
| %add35 = fsub fast float %i11, %i10 |
| %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 |
| store float %add35, ptr %arrayidx37, align 4 |
| %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 |
| %i12 = load float, ptr %arrayidx39, align 4 |
| %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str |
| %i13 = load float, ptr %arrayidx41, align 4 |
| %add42 = fsub fast float %i13, %i12 |
| %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 |
| store float %add42, ptr %arrayidx44, align 4 |
| %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 |
| %i14 = load float, ptr %arrayidx46, align 4 |
| %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 |
| %i15 = load float, ptr %arrayidx48, align 4 |
| %add49 = fsub fast float %i15, %i14 |
| %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 |
| store float %add49, ptr %arrayidx51, align 4 |
| ret void |
| } |
| |
| define void @test3(ptr %p, ptr noalias %s) { |
| ; CHECK-LABEL: @test3( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 |
| ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 |
| ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> splat (i1 true), i32 8) |
| ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] |
| ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 |
| %i = load float, ptr %arrayidx, align 4 |
| %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 |
| %i1 = load float, ptr %arrayidx1, align 4 |
| %add = fsub fast float %i1, %i |
| %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 |
| store float %add, ptr %arrayidx2, align 4 |
| %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4 |
| %i2 = load float, ptr %arrayidx4, align 4 |
| %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 29 |
| %i3 = load float, ptr %arrayidx6, align 4 |
| %add7 = fsub fast float %i3, %i2 |
| %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 |
| store float %add7, ptr %arrayidx9, align 4 |
| %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8 |
| %i4 = load float, ptr %arrayidx11, align 4 |
| %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 |
| %i5 = load float, ptr %arrayidx13, align 4 |
| %add14 = fsub fast float %i5, %i4 |
| %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 |
| store float %add14, ptr %arrayidx16, align 4 |
| %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12 |
| %i6 = load float, ptr %arrayidx18, align 4 |
| %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 27 |
| %i7 = load float, ptr %arrayidx20, align 4 |
| %add21 = fsub fast float %i7, %i6 |
| %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 |
| store float %add21, ptr %arrayidx23, align 4 |
| %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16 |
| %i8 = load float, ptr %arrayidx25, align 4 |
| %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 |
| %i9 = load float, ptr %arrayidx27, align 4 |
| %add28 = fsub fast float %i9, %i8 |
| %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 |
| store float %add28, ptr %arrayidx30, align 4 |
| %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20 |
| %i10 = load float, ptr %arrayidx32, align 4 |
| %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 25 |
| %i11 = load float, ptr %arrayidx34, align 4 |
| %add35 = fsub fast float %i11, %i10 |
| %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 |
| store float %add35, ptr %arrayidx37, align 4 |
| %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 |
| %i12 = load float, ptr %arrayidx39, align 4 |
| %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 |
| %i13 = load float, ptr %arrayidx41, align 4 |
| %add42 = fsub fast float %i13, %i12 |
| %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 |
| store float %add42, ptr %arrayidx44, align 4 |
| %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 |
| %i14 = load float, ptr %arrayidx46, align 4 |
| %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 23 |
| %i15 = load float, ptr %arrayidx48, align 4 |
| %add49 = fsub fast float %i15, %i14 |
| %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 |
| store float %add49, ptr %arrayidx51, align 4 |
| ret void |
| } |
| |
| |
| define void @test_bf16(ptr %p, ptr noalias %s) { |
| ; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16( |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry: |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load bfloat, ptr [[ARRAYIDX1]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast bfloat [[I1]], [[I]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD]], ptr [[ARRAYIDX2]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load bfloat, ptr [[ARRAYIDX4]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 26 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load bfloat, ptr [[ARRAYIDX6]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast bfloat [[I3]], [[I2]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 1 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD7]], ptr [[ARRAYIDX9]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 8 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load bfloat, ptr [[ARRAYIDX11]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 22 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load bfloat, ptr [[ARRAYIDX13]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast bfloat [[I5]], [[I4]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 2 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD14]], ptr [[ARRAYIDX16]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 12 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load bfloat, ptr [[ARRAYIDX18]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 18 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load bfloat, ptr [[ARRAYIDX20]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast bfloat [[I7]], [[I6]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 3 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD21]], ptr [[ARRAYIDX23]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 16 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load bfloat, ptr [[ARRAYIDX25]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 14 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load bfloat, ptr [[ARRAYIDX27]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast bfloat [[I9]], [[I8]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD28]], ptr [[ARRAYIDX30]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 20 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load bfloat, ptr [[ARRAYIDX32]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 10 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load bfloat, ptr [[ARRAYIDX34]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast bfloat [[I11]], [[I10]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 5 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD35]], ptr [[ARRAYIDX37]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 24 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load bfloat, ptr [[ARRAYIDX39]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 6 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load bfloat, ptr [[ARRAYIDX41]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast bfloat [[I13]], [[I12]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 6 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD42]], ptr [[ARRAYIDX44]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 28 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load bfloat, ptr [[ARRAYIDX46]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 2 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load bfloat, ptr [[ARRAYIDX48]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast bfloat [[I15]], [[I14]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 7 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD49]], ptr [[ARRAYIDX51]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void |
| ; |
| ; ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16( |
| ; ZVFHMIN-ZVFBFMIN-NEXT: entry: |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0 |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30 |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0 |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8) |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8) |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x bfloat> [[TMP7]], [[TMP15]] |
| ; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x bfloat> [[TMP16]], ptr [[ARRAYIDX2]], align 4 |
| ; ZVFHMIN-ZVFBFMIN-NEXT: ret void |
| ; |
| entry: |
| %arrayidx = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 0 |
| %i = load bfloat, ptr %arrayidx, align 4 |
| %arrayidx1 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 30 |
| %i1 = load bfloat, ptr %arrayidx1, align 4 |
| %add = fsub fast bfloat %i1, %i |
| %arrayidx2 = getelementptr inbounds bfloat, ptr %s, i64 0 |
| store bfloat %add, ptr %arrayidx2, align 4 |
| %arrayidx4 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 4 |
| %i2 = load bfloat, ptr %arrayidx4, align 4 |
| %arrayidx6 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 26 |
| %i3 = load bfloat, ptr %arrayidx6, align 4 |
| %add7 = fsub fast bfloat %i3, %i2 |
| %arrayidx9 = getelementptr inbounds bfloat, ptr %s, i64 1 |
| store bfloat %add7, ptr %arrayidx9, align 4 |
| %arrayidx11 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 8 |
| %i4 = load bfloat, ptr %arrayidx11, align 4 |
| %arrayidx13 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 22 |
| %i5 = load bfloat, ptr %arrayidx13, align 4 |
| %add14 = fsub fast bfloat %i5, %i4 |
| %arrayidx16 = getelementptr inbounds bfloat, ptr %s, i64 2 |
| store bfloat %add14, ptr %arrayidx16, align 4 |
| %arrayidx18 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 12 |
| %i6 = load bfloat, ptr %arrayidx18, align 4 |
| %arrayidx20 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 18 |
| %i7 = load bfloat, ptr %arrayidx20, align 4 |
| %add21 = fsub fast bfloat %i7, %i6 |
| %arrayidx23 = getelementptr inbounds bfloat, ptr %s, i64 3 |
| store bfloat %add21, ptr %arrayidx23, align 4 |
| %arrayidx25 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 16 |
| %i8 = load bfloat, ptr %arrayidx25, align 4 |
| %arrayidx27 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 14 |
| %i9 = load bfloat, ptr %arrayidx27, align 4 |
| %add28 = fsub fast bfloat %i9, %i8 |
| %arrayidx30 = getelementptr inbounds bfloat, ptr %s, i64 4 |
| store bfloat %add28, ptr %arrayidx30, align 4 |
| %arrayidx32 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 20 |
| %i10 = load bfloat, ptr %arrayidx32, align 4 |
| %arrayidx34 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 10 |
| %i11 = load bfloat, ptr %arrayidx34, align 4 |
| %add35 = fsub fast bfloat %i11, %i10 |
| %arrayidx37 = getelementptr inbounds bfloat, ptr %s, i64 5 |
| store bfloat %add35, ptr %arrayidx37, align 4 |
| %arrayidx39 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 24 |
| %i12 = load bfloat, ptr %arrayidx39, align 4 |
| %arrayidx41 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 6 |
| %i13 = load bfloat, ptr %arrayidx41, align 4 |
| %add42 = fsub fast bfloat %i13, %i12 |
| %arrayidx44 = getelementptr inbounds bfloat, ptr %s, i64 6 |
| store bfloat %add42, ptr %arrayidx44, align 4 |
| %arrayidx46 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 28 |
| %i14 = load bfloat, ptr %arrayidx46, align 4 |
| %arrayidx48 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 2 |
| %i15 = load bfloat, ptr %arrayidx48, align 4 |
| %add49 = fsub fast bfloat %i15, %i14 |
| %arrayidx51 = getelementptr inbounds bfloat, ptr %s, i64 7 |
| store bfloat %add49, ptr %arrayidx51, align 4 |
| ret void |
| } |
| |
| define void @test_f16(ptr %p, ptr noalias %s) { |
| ; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_f16( |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry: |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load half, ptr [[ARRAYIDX]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load half, ptr [[ARRAYIDX1]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast half [[I1]], [[I]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD]], ptr [[ARRAYIDX2]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load half, ptr [[ARRAYIDX4]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 26 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load half, ptr [[ARRAYIDX6]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast half [[I3]], [[I2]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 1 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD7]], ptr [[ARRAYIDX9]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 8 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load half, ptr [[ARRAYIDX11]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 22 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load half, ptr [[ARRAYIDX13]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast half [[I5]], [[I4]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds half, ptr [[S]], i64 2 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD14]], ptr [[ARRAYIDX16]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 12 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load half, ptr [[ARRAYIDX18]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 18 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load half, ptr [[ARRAYIDX20]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast half [[I7]], [[I6]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds half, ptr [[S]], i64 3 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD21]], ptr [[ARRAYIDX23]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 16 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load half, ptr [[ARRAYIDX25]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 14 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load half, ptr [[ARRAYIDX27]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast half [[I9]], [[I8]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds half, ptr [[S]], i64 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD28]], ptr [[ARRAYIDX30]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 20 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load half, ptr [[ARRAYIDX32]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 10 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load half, ptr [[ARRAYIDX34]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast half [[I11]], [[I10]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds half, ptr [[S]], i64 5 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD35]], ptr [[ARRAYIDX37]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 24 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load half, ptr [[ARRAYIDX39]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 6 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load half, ptr [[ARRAYIDX41]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast half [[I13]], [[I12]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds half, ptr [[S]], i64 6 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD42]], ptr [[ARRAYIDX44]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 28 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load half, ptr [[ARRAYIDX46]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 2 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load half, ptr [[ARRAYIDX48]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast half [[I15]], [[I14]] |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds half, ptr [[S]], i64 7 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD49]], ptr [[ARRAYIDX51]], align 4 |
| ; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void |
| ; |
| ; ZVFHMIN-ZVFBFMIN-LABEL: @test_f16( |
| ; ZVFHMIN-ZVFBFMIN-NEXT: entry: |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0 |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30 |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0 |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8) |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8) |
| ; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x half> [[TMP7]], [[TMP15]] |
| ; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x half> [[TMP16]], ptr [[ARRAYIDX2]], align 4 |
| ; ZVFHMIN-ZVFBFMIN-NEXT: ret void |
| ; |
| entry: |
| %arrayidx = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 0 |
| %i = load half, ptr %arrayidx, align 4 |
| %arrayidx1 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 30 |
| %i1 = load half, ptr %arrayidx1, align 4 |
| %add = fsub fast half %i1, %i |
| %arrayidx2 = getelementptr inbounds half, ptr %s, i64 0 |
| store half %add, ptr %arrayidx2, align 4 |
| %arrayidx4 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 4 |
| %i2 = load half, ptr %arrayidx4, align 4 |
| %arrayidx6 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 26 |
| %i3 = load half, ptr %arrayidx6, align 4 |
| %add7 = fsub fast half %i3, %i2 |
| %arrayidx9 = getelementptr inbounds half, ptr %s, i64 1 |
| store half %add7, ptr %arrayidx9, align 4 |
| %arrayidx11 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 8 |
| %i4 = load half, ptr %arrayidx11, align 4 |
| %arrayidx13 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 22 |
| %i5 = load half, ptr %arrayidx13, align 4 |
| %add14 = fsub fast half %i5, %i4 |
| %arrayidx16 = getelementptr inbounds half, ptr %s, i64 2 |
| store half %add14, ptr %arrayidx16, align 4 |
| %arrayidx18 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 12 |
| %i6 = load half, ptr %arrayidx18, align 4 |
| %arrayidx20 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 18 |
| %i7 = load half, ptr %arrayidx20, align 4 |
| %add21 = fsub fast half %i7, %i6 |
| %arrayidx23 = getelementptr inbounds half, ptr %s, i64 3 |
| store half %add21, ptr %arrayidx23, align 4 |
| %arrayidx25 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 16 |
| %i8 = load half, ptr %arrayidx25, align 4 |
| %arrayidx27 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 14 |
| %i9 = load half, ptr %arrayidx27, align 4 |
| %add28 = fsub fast half %i9, %i8 |
| %arrayidx30 = getelementptr inbounds half, ptr %s, i64 4 |
| store half %add28, ptr %arrayidx30, align 4 |
| %arrayidx32 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 20 |
| %i10 = load half, ptr %arrayidx32, align 4 |
| %arrayidx34 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 10 |
| %i11 = load half, ptr %arrayidx34, align 4 |
| %add35 = fsub fast half %i11, %i10 |
| %arrayidx37 = getelementptr inbounds half, ptr %s, i64 5 |
| store half %add35, ptr %arrayidx37, align 4 |
| %arrayidx39 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 24 |
| %i12 = load half, ptr %arrayidx39, align 4 |
| %arrayidx41 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 6 |
| %i13 = load half, ptr %arrayidx41, align 4 |
| %add42 = fsub fast half %i13, %i12 |
| %arrayidx44 = getelementptr inbounds half, ptr %s, i64 6 |
| store half %add42, ptr %arrayidx44, align 4 |
| %arrayidx46 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 28 |
| %i14 = load half, ptr %arrayidx46, align 4 |
| %arrayidx48 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 2 |
| %i15 = load half, ptr %arrayidx48, align 4 |
| %add49 = fsub fast half %i15, %i14 |
| %arrayidx51 = getelementptr inbounds half, ptr %s, i64 7 |
| store half %add49, ptr %arrayidx51, align 4 |
| ret void |
| } |