| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s |
| |
| ; |
| ; UNDEF Elts |
| ; |
| |
| define <4 x i32> @undef_pmaddwd_128(<8 x i16> %a0) { |
| ; CHECK-LABEL: @undef_pmaddwd_128( |
| ; CHECK-NEXT: ret <4 x i32> zeroinitializer |
| ; |
| %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> undef) |
| ret <4 x i32> %1 |
| } |
| |
| define <4 x i32> @undef_pmaddwd_128_commute(<8 x i16> %a0) { |
| ; CHECK-LABEL: @undef_pmaddwd_128_commute( |
| ; CHECK-NEXT: ret <4 x i32> zeroinitializer |
| ; |
| %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> %a0) |
| ret <4 x i32> %1 |
| } |
| |
| define <8 x i32> @undef_pmaddwd_256(<16 x i16> %a0) { |
| ; CHECK-LABEL: @undef_pmaddwd_256( |
| ; CHECK-NEXT: ret <8 x i32> zeroinitializer |
| ; |
| %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> undef) |
| ret <8 x i32> %1 |
| } |
| |
| define <8 x i32> @undef_pmaddwd_256_commute(<16 x i16> %a0) { |
| ; CHECK-LABEL: @undef_pmaddwd_256_commute( |
| ; CHECK-NEXT: ret <8 x i32> zeroinitializer |
| ; |
| %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> %a0) |
| ret <8 x i32> %1 |
| } |
| |
| define <16 x i32> @undef_pmaddwd_512(<32 x i16> %a0) { |
| ; CHECK-LABEL: @undef_pmaddwd_512( |
| ; CHECK-NEXT: ret <16 x i32> zeroinitializer |
| ; |
| %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> undef) |
| ret <16 x i32> %1 |
| } |
| |
| define <16 x i32> @undef_pmaddwd_512_commute(<32 x i16> %a0) { |
| ; CHECK-LABEL: @undef_pmaddwd_512_commute( |
| ; CHECK-NEXT: ret <16 x i32> zeroinitializer |
| ; |
| %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> %a0) |
| ret <16 x i32> %1 |
| } |
| |
| ; |
| ; Zero Elts |
| ; |
| |
| define <4 x i32> @zero_pmaddwd_128(<8 x i16> %a0) { |
| ; CHECK-LABEL: @zero_pmaddwd_128( |
| ; CHECK-NEXT: ret <4 x i32> zeroinitializer |
| ; |
| %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> zeroinitializer) |
| ret <4 x i32> %1 |
| } |
| |
| define <4 x i32> @zero_pmaddwd_128_commute(<8 x i16> %a0) { |
| ; CHECK-LABEL: @zero_pmaddwd_128_commute( |
| ; CHECK-NEXT: ret <4 x i32> zeroinitializer |
| ; |
| %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> zeroinitializer, <8 x i16> %a0) |
| ret <4 x i32> %1 |
| } |
| |
| define <8 x i32> @zero_pmaddwd_256(<16 x i16> %a0) { |
| ; CHECK-LABEL: @zero_pmaddwd_256( |
| ; CHECK-NEXT: ret <8 x i32> zeroinitializer |
| ; |
| %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> zeroinitializer) |
| ret <8 x i32> %1 |
| } |
| |
| define <8 x i32> @zero_pmaddwd_256_commute(<16 x i16> %a0) { |
| ; CHECK-LABEL: @zero_pmaddwd_256_commute( |
| ; CHECK-NEXT: ret <8 x i32> zeroinitializer |
| ; |
| %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> zeroinitializer, <16 x i16> %a0) |
| ret <8 x i32> %1 |
| } |
| |
| define <16 x i32> @zero_pmaddwd_512(<32 x i16> %a0) { |
| ; CHECK-LABEL: @zero_pmaddwd_512( |
| ; CHECK-NEXT: ret <16 x i32> zeroinitializer |
| ; |
| %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> zeroinitializer) |
| ret <16 x i32> %1 |
| } |
| |
| define <16 x i32> @zero_pmaddwd_512_commute(<32 x i16> %a0) { |
| ; CHECK-LABEL: @zero_pmaddwd_512_commute( |
| ; CHECK-NEXT: ret <16 x i32> zeroinitializer |
| ; |
| %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> zeroinitializer, <32 x i16> %a0) |
| ret <16 x i32> %1 |
| } |
| |
| ; |
| ; Constant Folding |
| ; |
| |
| define <4 x i32> @fold_pmaddwd_128() { |
| ; CHECK-LABEL: @fold_pmaddwd_128( |
| ; CHECK-NEXT: ret <4 x i32> <i32 19, i32 -229364, i32 -21, i32 -491429> |
| ; |
| %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> <i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8>, <8 x i16> <i16 -5, i16 7, i16 -32768, i16 32765, i16 -9, i16 -11, i16 -32763, i16 32761>) |
| ret <4 x i32> %1 |
| } |
| |
| define <8 x i32> @fold_pmaddwd_256() { |
| ; CHECK-LABEL: @fold_pmaddwd_256( |
| ; CHECK-NEXT: ret <8 x i32> <i32 -7, i32 32762, i32 91, i32 32750, i32 -239, i32 687938, i32 -451, i32 -32756> |
| ; |
| %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>, <16 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>) |
| ret <8 x i32> %1 |
| } |
| |
| define <16 x i32> @fold_pmaddwd_512() { |
| ; CHECK-LABEL: @fold_pmaddwd_512( |
| ; CHECK-NEXT: ret <16 x i32> <i32 -7, i32 32762, i32 91, i32 32750, i32 -239, i32 687938, i32 -451, i32 -32756, i32 -7, i32 32762, i32 91, i32 32750, i32 -239, i32 687938, i32 -451, i32 -32756> |
| ; |
| %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15, i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>, <32 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756, i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>) |
| ret <16 x i32> %1 |
| } |
| |
| ; |
| ; Demanded Elts |
| ; |
| |
| define <4 x i32> @elts_pmaddwd_128(<8 x i16> %a0, <8 x i16> %a1) { |
| ; CHECK-LABEL: @elts_pmaddwd_128( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: ret <4 x i32> [[TMP2]] |
| ; |
| %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2> |
| %2 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
| %3 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %1, <8 x i16> %2) |
| %4 = shufflevector <4 x i32> %3, <4 x i32> poison, <4 x i32> zeroinitializer |
| ret <4 x i32> %4 |
| } |
| |
| define <8 x i32> @elts_pmaddwd_256(<16 x i16> %a0, <16 x i16> %a1) { |
| ; CHECK-LABEL: @elts_pmaddwd_256( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer |
| ; CHECK-NEXT: ret <8 x i32> [[TMP2]] |
| ; |
| %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %2 = shufflevector <16 x i16> %a1, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %3 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %1, <16 x i16> %2) |
| %4 = shufflevector <8 x i32> %3, <8 x i32> poison, <8 x i32> zeroinitializer |
| ret <8 x i32> %4 |
| } |
| |
| define <16 x i32> @elts_pmaddwd_512(<32 x i16> %a0, <32 x i16> %a1) { |
| ; CHECK-LABEL: @elts_pmaddwd_512( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[A0:%.*]], <32 x i16> [[A1:%.*]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer |
| ; CHECK-NEXT: ret <16 x i32> [[TMP2]] |
| ; |
| %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| %2 = shufflevector <32 x i16> %a1, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| %3 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %1, <32 x i16> %2) |
| %4 = shufflevector <16 x i32> %3, <16 x i32> poison, <16 x i32> zeroinitializer |
| ret <16 x i32> %4 |
| } |