| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE |
| ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SLM |
| ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX |
| ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE |
| ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX |
| ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE |
| ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 |
| ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 |
| ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX |
| |
| @a64 = common global [8 x i64] zeroinitializer, align 64 |
| @b64 = common global [8 x i64] zeroinitializer, align 64 |
| @c64 = common global [8 x i64] zeroinitializer, align 64 |
| @a32 = common global [16 x i32] zeroinitializer, align 64 |
| @b32 = common global [16 x i32] zeroinitializer, align 64 |
| @c32 = common global [16 x i32] zeroinitializer, align 64 |
| @a16 = common global [32 x i16] zeroinitializer, align 64 |
| @b16 = common global [32 x i16] zeroinitializer, align 64 |
| @c16 = common global [32 x i16] zeroinitializer, align 64 |
| @a8 = common global [64 x i8] zeroinitializer, align 64 |
| @b8 = common global [64 x i8] zeroinitializer, align 64 |
| @c8 = common global [64 x i8] zeroinitializer, align 64 |
| |
| define void @sdiv_v16i32_uniformconst() { |
| ; SSE-LABEL: @sdiv_v16i32_uniformconst( |
| ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 |
| ; SSE-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 |
| ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 |
| ; SSE-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 |
| ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; SSE-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| ; SSE-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| ; SSE-NEXT: ret void |
| ; |
| ; SLM-LABEL: @sdiv_v16i32_uniformconst( |
| ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 |
| ; SLM-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 |
| ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 |
| ; SLM-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 |
| ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; SLM-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| ; SLM-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| ; SLM-NEXT: ret void |
| ; |
| ; AVX-LABEL: @sdiv_v16i32_uniformconst( |
| ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 |
| ; AVX-NEXT: [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], splat (i32 5) |
| ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 |
| ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; AVX-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], splat (i32 5) |
| ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; AVX-NEXT: ret void |
| ; |
| ; AVX512-LABEL: @sdiv_v16i32_uniformconst( |
| ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 |
| ; AVX512-NEXT: [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], splat (i32 5) |
| ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 |
| ; AVX512-NEXT: ret void |
| ; |
| %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 |
| %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 |
| %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4 |
| %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4 |
| %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4 |
| %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4 |
| %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4 |
| %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4 |
| %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4 |
| %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4 |
| %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 |
| %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 |
| %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 |
| %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 |
| %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 |
| %r0 = sdiv i32 %a0 , 5 |
| %r1 = sdiv i32 %a1 , 5 |
| %r2 = sdiv i32 %a2 , 5 |
| %r3 = sdiv i32 %a3 , 5 |
| %r4 = sdiv i32 %a4 , 5 |
| %r5 = sdiv i32 %a5 , 5 |
| %r6 = sdiv i32 %a6 , 5 |
| %r7 = sdiv i32 %a7 , 5 |
| %r8 = sdiv i32 %a8 , 5 |
| %r9 = sdiv i32 %a9 , 5 |
| %r10 = sdiv i32 %a10, 5 |
| %r11 = sdiv i32 %a11, 5 |
| %r12 = sdiv i32 %a12, 5 |
| %r13 = sdiv i32 %a13, 5 |
| %r14 = sdiv i32 %a14, 5 |
| %r15 = sdiv i32 %a15, 5 |
| store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4 |
| store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4 |
| store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4 |
| store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4 |
| store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4 |
| store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4 |
| store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4 |
| store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4 |
| store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4 |
| store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4 |
| store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 |
| store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 |
| store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 |
| store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 |
| store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 |
| ret void |
| } |
| |
| define void @srem_v16i32_uniformconst() { |
| ; SSE-LABEL: @srem_v16i32_uniformconst( |
| ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 |
| ; SSE-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 |
| ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 |
| ; SSE-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 |
| ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; SSE-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| ; SSE-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| ; SSE-NEXT: ret void |
| ; |
| ; SLM-LABEL: @srem_v16i32_uniformconst( |
| ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 |
| ; SLM-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 |
| ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 |
| ; SLM-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 |
| ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; SLM-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| ; SLM-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| ; SLM-NEXT: ret void |
| ; |
| ; AVX-LABEL: @srem_v16i32_uniformconst( |
| ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 |
| ; AVX-NEXT: [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], splat (i32 5) |
| ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 |
| ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; AVX-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], splat (i32 5) |
| ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; AVX-NEXT: ret void |
| ; |
| ; AVX512-LABEL: @srem_v16i32_uniformconst( |
| ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 |
| ; AVX512-NEXT: [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], splat (i32 5) |
| ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 |
| ; AVX512-NEXT: ret void |
| ; |
| %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 |
| %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 |
| %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4 |
| %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4 |
| %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4 |
| %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4 |
| %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4 |
| %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4 |
| %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4 |
| %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4 |
| %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 |
| %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 |
| %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 |
| %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 |
| %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 |
| %r0 = srem i32 %a0 , 5 |
| %r1 = srem i32 %a1 , 5 |
| %r2 = srem i32 %a2 , 5 |
| %r3 = srem i32 %a3 , 5 |
| %r4 = srem i32 %a4 , 5 |
| %r5 = srem i32 %a5 , 5 |
| %r6 = srem i32 %a6 , 5 |
| %r7 = srem i32 %a7 , 5 |
| %r8 = srem i32 %a8 , 5 |
| %r9 = srem i32 %a9 , 5 |
| %r10 = srem i32 %a10, 5 |
| %r11 = srem i32 %a11, 5 |
| %r12 = srem i32 %a12, 5 |
| %r13 = srem i32 %a13, 5 |
| %r14 = srem i32 %a14, 5 |
| %r15 = srem i32 %a15, 5 |
| store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4 |
| store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4 |
| store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4 |
| store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4 |
| store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4 |
| store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4 |
| store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4 |
| store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4 |
| store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4 |
| store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4 |
| store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 |
| store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 |
| store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 |
| store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 |
| store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 |
| ret void |
| } |
| |
| define void @udiv_v16i32_uniformconst() { |
| ; SSE-LABEL: @udiv_v16i32_uniformconst( |
| ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 |
| ; SSE-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 |
| ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 |
| ; SSE-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 |
| ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; SSE-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| ; SSE-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| ; SSE-NEXT: ret void |
| ; |
| ; SLM-LABEL: @udiv_v16i32_uniformconst( |
| ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 |
| ; SLM-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 |
| ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 |
| ; SLM-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 |
| ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; SLM-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| ; SLM-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| ; SLM-NEXT: ret void |
| ; |
| ; AVX-LABEL: @udiv_v16i32_uniformconst( |
| ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 |
| ; AVX-NEXT: [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], splat (i32 5) |
| ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 |
| ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; AVX-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], splat (i32 5) |
| ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; AVX-NEXT: ret void |
| ; |
| ; AVX512-LABEL: @udiv_v16i32_uniformconst( |
| ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 |
| ; AVX512-NEXT: [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], splat (i32 5) |
| ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 |
| ; AVX512-NEXT: ret void |
| ; |
| %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 |
| %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 |
| %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4 |
| %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4 |
| %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4 |
| %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4 |
| %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4 |
| %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4 |
| %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4 |
| %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4 |
| %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 |
| %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 |
| %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 |
| %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 |
| %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 |
| %r0 = udiv i32 %a0 , 5 |
| %r1 = udiv i32 %a1 , 5 |
| %r2 = udiv i32 %a2 , 5 |
| %r3 = udiv i32 %a3 , 5 |
| %r4 = udiv i32 %a4 , 5 |
| %r5 = udiv i32 %a5 , 5 |
| %r6 = udiv i32 %a6 , 5 |
| %r7 = udiv i32 %a7 , 5 |
| %r8 = udiv i32 %a8 , 5 |
| %r9 = udiv i32 %a9 , 5 |
| %r10 = udiv i32 %a10, 5 |
| %r11 = udiv i32 %a11, 5 |
| %r12 = udiv i32 %a12, 5 |
| %r13 = udiv i32 %a13, 5 |
| %r14 = udiv i32 %a14, 5 |
| %r15 = udiv i32 %a15, 5 |
| store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4 |
| store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4 |
| store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4 |
| store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4 |
| store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4 |
| store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4 |
| store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4 |
| store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4 |
| store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4 |
| store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4 |
| store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 |
| store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 |
| store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 |
| store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 |
| store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 |
| ret void |
| } |
| |
| define void @urem_v16i32_uniformconst() { |
| ; SSE-LABEL: @urem_v16i32_uniformconst( |
| ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 |
| ; SSE-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 |
| ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 |
| ; SSE-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 |
| ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; SSE-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| ; SSE-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5) |
| ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| ; SSE-NEXT: ret void |
| ; |
| ; SLM-LABEL: @urem_v16i32_uniformconst( |
| ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 |
| ; SLM-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 |
| ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 |
| ; SLM-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 |
| ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; SLM-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| ; SLM-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5) |
| ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| ; SLM-NEXT: ret void |
| ; |
| ; AVX-LABEL: @urem_v16i32_uniformconst( |
| ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 |
| ; AVX-NEXT: [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], splat (i32 5) |
| ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 |
| ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 |
| ; AVX-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], splat (i32 5) |
| ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 |
| ; AVX-NEXT: ret void |
| ; |
| ; AVX512-LABEL: @urem_v16i32_uniformconst( |
| ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 |
| ; AVX512-NEXT: [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], splat (i32 5) |
| ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 |
| ; AVX512-NEXT: ret void |
| ; |
| %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 |
| %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 |
| %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4 |
| %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4 |
| %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4 |
| %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4 |
| %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4 |
| %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4 |
| %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4 |
| %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4 |
| %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 |
| %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 |
| %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 |
| %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 |
| %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 |
| %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 |
| %r0 = urem i32 %a0 , 5 |
| %r1 = urem i32 %a1 , 5 |
| %r2 = urem i32 %a2 , 5 |
| %r3 = urem i32 %a3 , 5 |
| %r4 = urem i32 %a4 , 5 |
| %r5 = urem i32 %a5 , 5 |
| %r6 = urem i32 %a6 , 5 |
| %r7 = urem i32 %a7 , 5 |
| %r8 = urem i32 %a8 , 5 |
| %r9 = urem i32 %a9 , 5 |
| %r10 = urem i32 %a10, 5 |
| %r11 = urem i32 %a11, 5 |
| %r12 = urem i32 %a12, 5 |
| %r13 = urem i32 %a13, 5 |
| %r14 = urem i32 %a14, 5 |
| %r15 = urem i32 %a15, 5 |
| store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4 |
| store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4 |
| store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4 |
| store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4 |
| store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4 |
| store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4 |
| store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4 |
| store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4 |
| store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4 |
| store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4 |
| store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 |
| store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 |
| store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 |
| store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 |
| store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 |
| store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 |
| ret void |
| } |