blob: 25652e026a8374f70e215d9867e30abbdf6907d3 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
@a64 = common global [8 x i64] zeroinitializer, align 64
@b64 = common global [8 x i64] zeroinitializer, align 64
@c64 = common global [8 x i64] zeroinitializer, align 64
@a32 = common global [16 x i32] zeroinitializer, align 64
@b32 = common global [16 x i32] zeroinitializer, align 64
@c32 = common global [16 x i32] zeroinitializer, align 64
@a16 = common global [32 x i16] zeroinitializer, align 64
@b16 = common global [32 x i16] zeroinitializer, align 64
@c16 = common global [32 x i16] zeroinitializer, align 64
@a8 = common global [64 x i8] zeroinitializer, align 64
@b8 = common global [64 x i8] zeroinitializer, align 64
@c8 = common global [64 x i8] zeroinitializer, align 64
define void @sdiv_v16i32_uniformconst() {
; SSE-LABEL: @sdiv_v16i32_uniformconst(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
; SSE-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; SSE-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
; SSE-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
; SSE-NEXT: ret void
;
; SLM-LABEL: @sdiv_v16i32_uniformconst(
; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
; SLM-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4
; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
; SLM-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; SLM-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
; SLM-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
; SLM-NEXT: ret void
;
; AVX-LABEL: @sdiv_v16i32_uniformconst(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
; AVX-NEXT: [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], splat (i32 5)
; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4
; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; AVX-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], splat (i32 5)
; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; AVX-NEXT: ret void
;
; AVX512-LABEL: @sdiv_v16i32_uniformconst(
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
; AVX512-NEXT: [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], splat (i32 5)
; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4
; AVX512-NEXT: ret void
;
%a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
%a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
%a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
%a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
%a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
%a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
%a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
%a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
%a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
%a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
%a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
%a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
%a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
%a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
%a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
%a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
%r0 = sdiv i32 %a0 , 5
%r1 = sdiv i32 %a1 , 5
%r2 = sdiv i32 %a2 , 5
%r3 = sdiv i32 %a3 , 5
%r4 = sdiv i32 %a4 , 5
%r5 = sdiv i32 %a5 , 5
%r6 = sdiv i32 %a6 , 5
%r7 = sdiv i32 %a7 , 5
%r8 = sdiv i32 %a8 , 5
%r9 = sdiv i32 %a9 , 5
%r10 = sdiv i32 %a10, 5
%r11 = sdiv i32 %a11, 5
%r12 = sdiv i32 %a12, 5
%r13 = sdiv i32 %a13, 5
%r14 = sdiv i32 %a14, 5
%r15 = sdiv i32 %a15, 5
store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
ret void
}
define void @srem_v16i32_uniformconst() {
; SSE-LABEL: @srem_v16i32_uniformconst(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
; SSE-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; SSE-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
; SSE-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
; SSE-NEXT: ret void
;
; SLM-LABEL: @srem_v16i32_uniformconst(
; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
; SLM-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4
; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
; SLM-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; SLM-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
; SLM-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
; SLM-NEXT: ret void
;
; AVX-LABEL: @srem_v16i32_uniformconst(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
; AVX-NEXT: [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], splat (i32 5)
; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4
; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; AVX-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], splat (i32 5)
; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; AVX-NEXT: ret void
;
; AVX512-LABEL: @srem_v16i32_uniformconst(
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
; AVX512-NEXT: [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], splat (i32 5)
; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4
; AVX512-NEXT: ret void
;
%a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
%a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
%a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
%a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
%a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
%a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
%a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
%a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
%a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
%a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
%a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
%a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
%a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
%a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
%a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
%a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
%r0 = srem i32 %a0 , 5
%r1 = srem i32 %a1 , 5
%r2 = srem i32 %a2 , 5
%r3 = srem i32 %a3 , 5
%r4 = srem i32 %a4 , 5
%r5 = srem i32 %a5 , 5
%r6 = srem i32 %a6 , 5
%r7 = srem i32 %a7 , 5
%r8 = srem i32 %a8 , 5
%r9 = srem i32 %a9 , 5
%r10 = srem i32 %a10, 5
%r11 = srem i32 %a11, 5
%r12 = srem i32 %a12, 5
%r13 = srem i32 %a13, 5
%r14 = srem i32 %a14, 5
%r15 = srem i32 %a15, 5
store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
ret void
}
define void @udiv_v16i32_uniformconst() {
; SSE-LABEL: @udiv_v16i32_uniformconst(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
; SSE-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; SSE-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
; SSE-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
; SSE-NEXT: ret void
;
; SLM-LABEL: @udiv_v16i32_uniformconst(
; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
; SLM-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4
; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
; SLM-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; SLM-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
; SLM-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
; SLM-NEXT: ret void
;
; AVX-LABEL: @udiv_v16i32_uniformconst(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
; AVX-NEXT: [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], splat (i32 5)
; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4
; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; AVX-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], splat (i32 5)
; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; AVX-NEXT: ret void
;
; AVX512-LABEL: @udiv_v16i32_uniformconst(
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
; AVX512-NEXT: [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], splat (i32 5)
; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4
; AVX512-NEXT: ret void
;
%a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
%a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
%a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
%a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
%a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
%a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
%a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
%a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
%a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
%a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
%a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
%a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
%a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
%a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
%a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
%a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
%r0 = udiv i32 %a0 , 5
%r1 = udiv i32 %a1 , 5
%r2 = udiv i32 %a2 , 5
%r3 = udiv i32 %a3 , 5
%r4 = udiv i32 %a4 , 5
%r5 = udiv i32 %a5 , 5
%r6 = udiv i32 %a6 , 5
%r7 = udiv i32 %a7 , 5
%r8 = udiv i32 %a8 , 5
%r9 = udiv i32 %a9 , 5
%r10 = udiv i32 %a10, 5
%r11 = udiv i32 %a11, 5
%r12 = udiv i32 %a12, 5
%r13 = udiv i32 %a13, 5
%r14 = udiv i32 %a14, 5
%r15 = udiv i32 %a15, 5
store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
ret void
}
define void @urem_v16i32_uniformconst() {
; SSE-LABEL: @urem_v16i32_uniformconst(
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
; SSE-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; SSE-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
; SSE-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5)
; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
; SSE-NEXT: ret void
;
; SLM-LABEL: @urem_v16i32_uniformconst(
; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
; SLM-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4
; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
; SLM-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; SLM-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
; SLM-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5)
; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
; SLM-NEXT: ret void
;
; AVX-LABEL: @urem_v16i32_uniformconst(
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
; AVX-NEXT: [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], splat (i32 5)
; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4
; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
; AVX-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], splat (i32 5)
; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
; AVX-NEXT: ret void
;
; AVX512-LABEL: @urem_v16i32_uniformconst(
; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
; AVX512-NEXT: [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], splat (i32 5)
; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4
; AVX512-NEXT: ret void
;
%a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
%a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
%a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
%a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
%a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
%a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
%a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
%a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
%a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
%a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
%a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
%a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
%a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
%a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
%a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
%a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
%r0 = urem i32 %a0 , 5
%r1 = urem i32 %a1 , 5
%r2 = urem i32 %a2 , 5
%r3 = urem i32 %a3 , 5
%r4 = urem i32 %a4 , 5
%r5 = urem i32 %a5 , 5
%r6 = urem i32 %a6 , 5
%r7 = urem i32 %a7 , 5
%r8 = urem i32 %a8 , 5
%r9 = urem i32 %a9 , 5
%r10 = urem i32 %a10, 5
%r11 = urem i32 %a11, 5
%r12 = urem i32 %a12, 5
%r13 = urem i32 %a13, 5
%r14 = urem i32 %a14, 5
%r15 = urem i32 %a15, 5
store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
ret void
}