| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -mcpu=x86-64 -S | FileCheck %s --check-prefixes=SSE |
| ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -mcpu=x86-64-v2 -S | FileCheck %s --check-prefixes=AVX |
| ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -mcpu=x86-64-v3 -S | FileCheck %s --check-prefixes=AVX2 |
| ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -mcpu=x86-64-v4 -S | FileCheck %s --check-prefixes=AVX2 |
| |
| define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2 dereferenceable(16) %x, ptr nocapture noundef nonnull readonly align 2 dereferenceable(16) %y) { |
| ; SSE-LABEL: @compute_min( |
| ; SSE-NEXT: entry: |
| ; SSE-NEXT: [[ARRAYIDX_I_I_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[X:%.*]], i64 0, i64 4 |
| ; SSE-NEXT: [[ARRAYIDX_I_I10_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y:%.*]], i64 0, i64 4 |
| ; SSE-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[Y]], align 2 |
| ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[X]], align 2 |
| ; SSE-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) |
| ; SSE-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> |
| ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], <i64 0, i64 16, i64 32, i64 48> |
| ; SSE-NEXT: [[RETVAL_SROA_0_0_INSERT_INSERT:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP4]]) |
| ; SSE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[RETVAL_SROA_0_0_INSERT_INSERT]], 0 |
| ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_I_I10_4]], align 2 |
| ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_I_I_4]], align 2 |
| ; SSE-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP6]], <4 x i16> [[TMP7]]) |
| ; SSE-NEXT: [[TMP9:%.*]] = zext <4 x i16> [[TMP8]] to <4 x i64> |
| ; SSE-NEXT: [[TMP10:%.*]] = shl <4 x i64> [[TMP9]], <i64 0, i64 16, i64 32, i64 48> |
| ; SSE-NEXT: [[RETVAL_SROA_5_8_INSERT_INSERT:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP10]]) |
| ; SSE-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[RETVAL_SROA_5_8_INSERT_INSERT]], 1 |
| ; SSE-NEXT: ret { i64, i64 } [[DOTFCA_1_INSERT]] |
| ; |
| ; AVX-LABEL: @compute_min( |
| ; AVX-NEXT: entry: |
| ; AVX-NEXT: [[ARRAYIDX_I_I_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[X:%.*]], i64 0, i64 4 |
| ; AVX-NEXT: [[ARRAYIDX_I_I10_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y:%.*]], i64 0, i64 4 |
| ; AVX-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[Y]], align 2 |
| ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[X]], align 2 |
| ; AVX-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) |
| ; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> |
| ; AVX-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], <i64 0, i64 16, i64 32, i64 48> |
| ; AVX-NEXT: [[TMP24:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP4]]) |
| ; AVX-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[TMP24]], 0 |
| ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_I_I10_4]], align 2 |
| ; AVX-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_I_I_4]], align 2 |
| ; AVX-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP6]], <4 x i16> [[TMP7]]) |
| ; AVX-NEXT: [[TMP9:%.*]] = zext <4 x i16> [[TMP8]] to <4 x i64> |
| ; AVX-NEXT: [[TMP10:%.*]] = shl <4 x i64> [[TMP9]], <i64 0, i64 16, i64 32, i64 48> |
| ; AVX-NEXT: [[TMP25:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP10]]) |
| ; AVX-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[TMP25]], 1 |
| ; AVX-NEXT: ret { i64, i64 } [[DOTFCA_1_INSERT]] |
| ; |
| ; AVX2-LABEL: @compute_min( |
| ; AVX2-NEXT: entry: |
| ; AVX2-NEXT: [[ARRAYIDX_I_I_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[X:%.*]], i64 0, i64 4 |
| ; AVX2-NEXT: [[ARRAYIDX_I_I10_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y:%.*]], i64 0, i64 4 |
| ; AVX2-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[Y]], align 2 |
| ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[X]], align 2 |
| ; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) |
| ; AVX2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> |
| ; AVX2-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], <i64 0, i64 16, i64 32, i64 48> |
| ; AVX2-NEXT: [[TMP24:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP4]]) |
| ; AVX2-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[TMP24]], 0 |
| ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_I_I10_4]], align 2 |
| ; AVX2-NEXT: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_I_I_4]], align 2 |
| ; AVX2-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP6]], <4 x i16> [[TMP7]]) |
| ; AVX2-NEXT: [[TMP9:%.*]] = zext <4 x i16> [[TMP8]] to <4 x i64> |
| ; AVX2-NEXT: [[TMP10:%.*]] = shl <4 x i64> [[TMP9]], <i64 0, i64 16, i64 32, i64 48> |
| ; AVX2-NEXT: [[TMP25:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP10]]) |
| ; AVX2-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[TMP25]], 1 |
| ; AVX2-NEXT: ret { i64, i64 } [[DOTFCA_1_INSERT]] |
| ; |
| entry: |
| %0 = load i16, ptr %y, align 2 |
| %1 = load i16, ptr %x, align 2 |
| %2 = tail call i16 @llvm.smin.i16(i16 %0, i16 %1) |
| %arrayidx.i.i.1 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 1 |
| %arrayidx.i.i10.1 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 1 |
| %3 = load i16, ptr %arrayidx.i.i10.1, align 2 |
| %4 = load i16, ptr %arrayidx.i.i.1, align 2 |
| %5 = tail call i16 @llvm.smin.i16(i16 %3, i16 %4) |
| %arrayidx.i.i.2 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 2 |
| %arrayidx.i.i10.2 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 2 |
| %6 = load i16, ptr %arrayidx.i.i10.2, align 2 |
| %7 = load i16, ptr %arrayidx.i.i.2, align 2 |
| %8 = tail call i16 @llvm.smin.i16(i16 %6, i16 %7) |
| %arrayidx.i.i.3 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 3 |
| %arrayidx.i.i10.3 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 3 |
| %9 = load i16, ptr %arrayidx.i.i10.3, align 2 |
| %10 = load i16, ptr %arrayidx.i.i.3, align 2 |
| %11 = tail call i16 @llvm.smin.i16(i16 %9, i16 %10) |
| %arrayidx.i.i.4 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 4 |
| %arrayidx.i.i10.4 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 4 |
| %12 = load i16, ptr %arrayidx.i.i10.4, align 2 |
| %13 = load i16, ptr %arrayidx.i.i.4, align 2 |
| %14 = tail call i16 @llvm.smin.i16(i16 %12, i16 %13) |
| %arrayidx.i.i.5 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 5 |
| %arrayidx.i.i10.5 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 5 |
| %15 = load i16, ptr %arrayidx.i.i10.5, align 2 |
| %16 = load i16, ptr %arrayidx.i.i.5, align 2 |
| %17 = tail call i16 @llvm.smin.i16(i16 %15, i16 %16) |
| %arrayidx.i.i.6 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 6 |
| %arrayidx.i.i10.6 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 6 |
| %18 = load i16, ptr %arrayidx.i.i10.6, align 2 |
| %19 = load i16, ptr %arrayidx.i.i.6, align 2 |
| %20 = tail call i16 @llvm.smin.i16(i16 %18, i16 %19) |
| %arrayidx.i.i.7 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 7 |
| %arrayidx.i.i10.7 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 7 |
| %21 = load i16, ptr %arrayidx.i.i10.7, align 2 |
| %22 = load i16, ptr %arrayidx.i.i.7, align 2 |
| %23 = tail call i16 @llvm.smin.i16(i16 %21, i16 %22) |
| %retval.sroa.4.0.insert.ext = zext i16 %11 to i64 |
| %retval.sroa.4.0.insert.shift = shl nuw i64 %retval.sroa.4.0.insert.ext, 48 |
| %retval.sroa.3.0.insert.ext = zext i16 %8 to i64 |
| %retval.sroa.3.0.insert.shift = shl nuw nsw i64 %retval.sroa.3.0.insert.ext, 32 |
| %retval.sroa.3.0.insert.insert = or i64 %retval.sroa.4.0.insert.shift, %retval.sroa.3.0.insert.shift |
| %retval.sroa.2.0.insert.ext = zext i16 %5 to i64 |
| %retval.sroa.2.0.insert.shift = shl nuw nsw i64 %retval.sroa.2.0.insert.ext, 16 |
| %retval.sroa.2.0.insert.insert = or i64 %retval.sroa.3.0.insert.insert, %retval.sroa.2.0.insert.shift |
| %retval.sroa.0.0.insert.ext = zext i16 %2 to i64 |
| %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.2.0.insert.insert, %retval.sroa.0.0.insert.ext |
| %.fca.0.insert = insertvalue { i64, i64 } poison, i64 %retval.sroa.0.0.insert.insert, 0 |
| %retval.sroa.9.8.insert.ext = zext i16 %23 to i64 |
| %retval.sroa.9.8.insert.shift = shl nuw i64 %retval.sroa.9.8.insert.ext, 48 |
| %retval.sroa.8.8.insert.ext = zext i16 %20 to i64 |
| %retval.sroa.8.8.insert.shift = shl nuw nsw i64 %retval.sroa.8.8.insert.ext, 32 |
| %retval.sroa.8.8.insert.insert = or i64 %retval.sroa.9.8.insert.shift, %retval.sroa.8.8.insert.shift |
| %retval.sroa.7.8.insert.ext = zext i16 %17 to i64 |
| %retval.sroa.7.8.insert.shift = shl nuw nsw i64 %retval.sroa.7.8.insert.ext, 16 |
| %retval.sroa.7.8.insert.insert = or i64 %retval.sroa.8.8.insert.insert, %retval.sroa.7.8.insert.shift |
| %retval.sroa.5.8.insert.ext = zext i16 %14 to i64 |
| %retval.sroa.5.8.insert.insert = or i64 %retval.sroa.7.8.insert.insert, %retval.sroa.5.8.insert.ext |
| %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.5.8.insert.insert, 1 |
| ret { i64, i64 } %.fca.1.insert |
| } |
| declare i16 @llvm.smin.i16(i16, i16) |