| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,NOFP16 |
| ; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,FULLFP16 |
| |
| define half @reduce_fast_half2(<2 x half> %vec2) { |
| ; CHECK-LABEL: define half @reduce_fast_half2( |
| ; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: ret half [[ADD1]] |
| ; |
| entry: |
| %elt0 = extractelement <2 x half> %vec2, i64 0 |
| %elt1 = extractelement <2 x half> %vec2, i64 1 |
| %add1 = fadd fast half %elt1, %elt0 |
| ret half %add1 |
| } |
| |
| define half @reduce_half2(<2 x half> %vec2) { |
| ; CHECK-LABEL: define half @reduce_half2( |
| ; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: ret half [[ADD1]] |
| ; |
| entry: |
| %elt0 = extractelement <2 x half> %vec2, i64 0 |
| %elt1 = extractelement <2 x half> %vec2, i64 1 |
| %add1 = fadd half %elt1, %elt0 |
| ret half %add1 |
| } |
| |
| define half @reduce_fast_half4(<4 x half> %vec4) { |
| ; CHECK-LABEL: define half @reduce_fast_half4( |
| ; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]]) |
| ; CHECK-NEXT: ret half [[TMP0]] |
| ; |
| entry: |
| %elt0 = extractelement <4 x half> %vec4, i64 0 |
| %elt1 = extractelement <4 x half> %vec4, i64 1 |
| %elt2 = extractelement <4 x half> %vec4, i64 2 |
| %elt3 = extractelement <4 x half> %vec4, i64 3 |
| %add1 = fadd fast half %elt1, %elt0 |
| %add2 = fadd fast half %elt2, %add1 |
| %add3 = fadd fast half %elt3, %add2 |
| ret half %add3 |
| } |
| |
| define half @reduce_half4(<4 x half> %vec4) { |
| ; CHECK-LABEL: define half @reduce_half4( |
| ; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 |
| ; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 |
| ; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] |
| ; CHECK-NEXT: ret half [[ADD3]] |
| ; |
| entry: |
| %elt0 = extractelement <4 x half> %vec4, i64 0 |
| %elt1 = extractelement <4 x half> %vec4, i64 1 |
| %elt2 = extractelement <4 x half> %vec4, i64 2 |
| %elt3 = extractelement <4 x half> %vec4, i64 3 |
| %add1 = fadd half %elt1, %elt0 |
| %add2 = fadd half %elt2, %add1 |
| %add3 = fadd half %elt3, %add2 |
| ret half %add3 |
| } |
| |
| define half @reduce_fast_half8(<8 x half> %vec8) { |
| ; NOFP16-LABEL: define half @reduce_fast_half8( |
| ; NOFP16-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] { |
| ; NOFP16-NEXT: [[ENTRY:.*:]] |
| ; NOFP16-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; NOFP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]]) |
| ; NOFP16-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; NOFP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP2]]) |
| ; NOFP16-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[TMP1]], [[TMP3]] |
| ; NOFP16-NEXT: ret half [[OP_RDX3]] |
| ; |
| ; FULLFP16-LABEL: define half @reduce_fast_half8( |
| ; FULLFP16-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] { |
| ; FULLFP16-NEXT: [[ENTRY:.*:]] |
| ; FULLFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[VEC8]]) |
| ; FULLFP16-NEXT: ret half [[TMP0]] |
| ; |
| entry: |
| %elt0 = extractelement <8 x half> %vec8, i64 0 |
| %elt1 = extractelement <8 x half> %vec8, i64 1 |
| %elt2 = extractelement <8 x half> %vec8, i64 2 |
| %elt3 = extractelement <8 x half> %vec8, i64 3 |
| %elt4 = extractelement <8 x half> %vec8, i64 4 |
| %elt5 = extractelement <8 x half> %vec8, i64 5 |
| %elt6 = extractelement <8 x half> %vec8, i64 6 |
| %elt7 = extractelement <8 x half> %vec8, i64 7 |
| %add1 = fadd fast half %elt1, %elt0 |
| %add2 = fadd fast half %elt2, %add1 |
| %add3 = fadd fast half %elt3, %add2 |
| %add4 = fadd fast half %elt4, %add3 |
| %add5 = fadd fast half %elt5, %add4 |
| %add6 = fadd fast half %elt6, %add5 |
| %add7 = fadd fast half %elt7, %add6 |
| ret half %add7 |
| } |
| |
| define half @reduce_half8(<8 x half> %vec8) { |
| ; CHECK-LABEL: define half @reduce_half8( |
| ; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1 |
| ; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2 |
| ; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3 |
| ; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4 |
| ; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5 |
| ; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6 |
| ; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] |
| ; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]] |
| ; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]] |
| ; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]] |
| ; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]] |
| ; CHECK-NEXT: ret half [[ADD7]] |
| ; |
| entry: |
| %elt0 = extractelement <8 x half> %vec8, i64 0 |
| %elt1 = extractelement <8 x half> %vec8, i64 1 |
| %elt2 = extractelement <8 x half> %vec8, i64 2 |
| %elt3 = extractelement <8 x half> %vec8, i64 3 |
| %elt4 = extractelement <8 x half> %vec8, i64 4 |
| %elt5 = extractelement <8 x half> %vec8, i64 5 |
| %elt6 = extractelement <8 x half> %vec8, i64 6 |
| %elt7 = extractelement <8 x half> %vec8, i64 7 |
| %add1 = fadd half %elt1, %elt0 |
| %add2 = fadd half %elt2, %add1 |
| %add3 = fadd half %elt3, %add2 |
| %add4 = fadd half %elt4, %add3 |
| %add5 = fadd half %elt5, %add4 |
| %add6 = fadd half %elt6, %add5 |
| %add7 = fadd half %elt7, %add6 |
| ret half %add7 |
| } |
| |
| define half @reduce_fast_half16(<16 x half> %vec16) { |
| ; CHECK-LABEL: define half @reduce_fast_half16( |
| ; CHECK-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[VEC16]]) |
| ; CHECK-NEXT: ret half [[TMP0]] |
| ; |
| entry: |
| %elt0 = extractelement <16 x half> %vec16, i64 0 |
| %elt1 = extractelement <16 x half> %vec16, i64 1 |
| %elt2 = extractelement <16 x half> %vec16, i64 2 |
| %elt3 = extractelement <16 x half> %vec16, i64 3 |
| %elt4 = extractelement <16 x half> %vec16, i64 4 |
| %elt5 = extractelement <16 x half> %vec16, i64 5 |
| %elt6 = extractelement <16 x half> %vec16, i64 6 |
| %elt7 = extractelement <16 x half> %vec16, i64 7 |
| %elt8 = extractelement <16 x half> %vec16, i64 8 |
| %elt9 = extractelement <16 x half> %vec16, i64 9 |
| %elt10 = extractelement <16 x half> %vec16, i64 10 |
| %elt11 = extractelement <16 x half> %vec16, i64 11 |
| %elt12 = extractelement <16 x half> %vec16, i64 12 |
| %elt13 = extractelement <16 x half> %vec16, i64 13 |
| %elt14 = extractelement <16 x half> %vec16, i64 14 |
| %elt15 = extractelement <16 x half> %vec16, i64 15 |
| %add1 = fadd fast half %elt1, %elt0 |
| %add2 = fadd fast half %elt2, %add1 |
| %add3 = fadd fast half %elt3, %add2 |
| %add4 = fadd fast half %elt4, %add3 |
| %add5 = fadd fast half %elt5, %add4 |
| %add6 = fadd fast half %elt6, %add5 |
| %add7 = fadd fast half %elt7, %add6 |
| %add8 = fadd fast half %elt8, %add7 |
| %add9 = fadd fast half %elt9, %add8 |
| %add10 = fadd fast half %elt10, %add9 |
| %add11 = fadd fast half %elt11, %add10 |
| %add12 = fadd fast half %elt12, %add11 |
| %add13 = fadd fast half %elt13, %add12 |
| %add14 = fadd fast half %elt14, %add13 |
| %add15 = fadd fast half %elt15, %add14 |
| ret half %add15 |
| } |
| |
| define half @reduce_half16(<16 x half> %vec16) { |
| ; CHECK-LABEL: define half @reduce_half16( |
| ; CHECK-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <16 x half> [[VEC16]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1 |
| ; CHECK-NEXT: [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2 |
| ; CHECK-NEXT: [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3 |
| ; CHECK-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4 |
| ; CHECK-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5 |
| ; CHECK-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6 |
| ; CHECK-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7 |
| ; CHECK-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8 |
| ; CHECK-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9 |
| ; CHECK-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10 |
| ; CHECK-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11 |
| ; CHECK-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12 |
| ; CHECK-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13 |
| ; CHECK-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14 |
| ; CHECK-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] |
| ; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]] |
| ; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]] |
| ; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]] |
| ; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]] |
| ; CHECK-NEXT: [[ADD8:%.*]] = fadd half [[ELT8]], [[ADD7]] |
| ; CHECK-NEXT: [[ADD9:%.*]] = fadd half [[ELT9]], [[ADD8]] |
| ; CHECK-NEXT: [[ADD10:%.*]] = fadd half [[ELT10]], [[ADD9]] |
| ; CHECK-NEXT: [[ADD11:%.*]] = fadd half [[ELT11]], [[ADD10]] |
| ; CHECK-NEXT: [[ADD12:%.*]] = fadd half [[ELT12]], [[ADD11]] |
| ; CHECK-NEXT: [[ADD13:%.*]] = fadd half [[ELT13]], [[ADD12]] |
| ; CHECK-NEXT: [[ADD14:%.*]] = fadd half [[ELT14]], [[ADD13]] |
| ; CHECK-NEXT: [[ADD15:%.*]] = fadd half [[ELT15]], [[ADD14]] |
| ; CHECK-NEXT: ret half [[ADD15]] |
| ; |
| entry: |
| %elt0 = extractelement <16 x half> %vec16, i64 0 |
| %elt1 = extractelement <16 x half> %vec16, i64 1 |
| %elt2 = extractelement <16 x half> %vec16, i64 2 |
| %elt3 = extractelement <16 x half> %vec16, i64 3 |
| %elt4 = extractelement <16 x half> %vec16, i64 4 |
| %elt5 = extractelement <16 x half> %vec16, i64 5 |
| %elt6 = extractelement <16 x half> %vec16, i64 6 |
| %elt7 = extractelement <16 x half> %vec16, i64 7 |
| %elt8 = extractelement <16 x half> %vec16, i64 8 |
| %elt9 = extractelement <16 x half> %vec16, i64 9 |
| %elt10 = extractelement <16 x half> %vec16, i64 10 |
| %elt11 = extractelement <16 x half> %vec16, i64 11 |
| %elt12 = extractelement <16 x half> %vec16, i64 12 |
| %elt13 = extractelement <16 x half> %vec16, i64 13 |
| %elt14 = extractelement <16 x half> %vec16, i64 14 |
| %elt15 = extractelement <16 x half> %vec16, i64 15 |
| %add1 = fadd half %elt1, %elt0 |
| %add2 = fadd half %elt2, %add1 |
| %add3 = fadd half %elt3, %add2 |
| %add4 = fadd half %elt4, %add3 |
| %add5 = fadd half %elt5, %add4 |
| %add6 = fadd half %elt6, %add5 |
| %add7 = fadd half %elt7, %add6 |
| %add8 = fadd half %elt8, %add7 |
| %add9 = fadd half %elt9, %add8 |
| %add10 = fadd half %elt10, %add9 |
| %add11 = fadd half %elt11, %add10 |
| %add12 = fadd half %elt12, %add11 |
| %add13 = fadd half %elt13, %add12 |
| %add14 = fadd half %elt14, %add13 |
| %add15 = fadd half %elt15, %add14 |
| ret half %add15 |
| } |
| |
| define float @reduce_fast_float2(<2 x float> %vec2) { |
| ; CHECK-LABEL: define float @reduce_fast_float2( |
| ; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: ret float [[ADD1]] |
| ; |
| entry: |
| %elt0 = extractelement <2 x float> %vec2, i64 0 |
| %elt1 = extractelement <2 x float> %vec2, i64 1 |
| %add1 = fadd fast float %elt1, %elt0 |
| ret float %add1 |
| } |
| |
| define float @reduce_float2(<2 x float> %vec2) { |
| ; CHECK-LABEL: define float @reduce_float2( |
| ; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: ret float [[ADD1]] |
| ; |
| entry: |
| %elt0 = extractelement <2 x float> %vec2, i64 0 |
| %elt1 = extractelement <2 x float> %vec2, i64 1 |
| %add1 = fadd float %elt1, %elt0 |
| ret float %add1 |
| } |
| |
| define float @reduce_fast_float4(<4 x float> %vec4) { |
| ; CHECK-LABEL: define float @reduce_fast_float4( |
| ; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC4]]) |
| ; CHECK-NEXT: ret float [[TMP0]] |
| ; |
| entry: |
| %elt0 = extractelement <4 x float> %vec4, i64 0 |
| %elt1 = extractelement <4 x float> %vec4, i64 1 |
| %elt2 = extractelement <4 x float> %vec4, i64 2 |
| %elt3 = extractelement <4 x float> %vec4, i64 3 |
| %add1 = fadd fast float %elt1, %elt0 |
| %add2 = fadd fast float %elt2, %add1 |
| %add3 = fadd fast float %elt3, %add2 |
| ret float %add3 |
| } |
| |
| define float @reduce_float4(<4 x float> %vec4) { |
| ; CHECK-LABEL: define float @reduce_float4( |
| ; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[VEC4]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[VEC4]], i64 1 |
| ; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x float> [[VEC4]], i64 2 |
| ; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x float> [[VEC4]], i64 3 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]] |
| ; CHECK-NEXT: ret float [[ADD3]] |
| ; |
| entry: |
| %elt0 = extractelement <4 x float> %vec4, i64 0 |
| %elt1 = extractelement <4 x float> %vec4, i64 1 |
| %elt2 = extractelement <4 x float> %vec4, i64 2 |
| %elt3 = extractelement <4 x float> %vec4, i64 3 |
| %add1 = fadd float %elt1, %elt0 |
| %add2 = fadd float %elt2, %add1 |
| %add3 = fadd float %elt3, %add2 |
| ret float %add3 |
| } |
| |
| define float @reduce_fast_float8(<8 x float> %vec8) { |
| ; CHECK-LABEL: define float @reduce_fast_float8( |
| ; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[VEC8]]) |
| ; CHECK-NEXT: ret float [[TMP0]] |
| ; |
| entry: |
| %elt0 = extractelement <8 x float> %vec8, i64 0 |
| %elt1 = extractelement <8 x float> %vec8, i64 1 |
| %elt2 = extractelement <8 x float> %vec8, i64 2 |
| %elt3 = extractelement <8 x float> %vec8, i64 3 |
| %elt4 = extractelement <8 x float> %vec8, i64 4 |
| %elt5 = extractelement <8 x float> %vec8, i64 5 |
| %elt6 = extractelement <8 x float> %vec8, i64 6 |
| %elt7 = extractelement <8 x float> %vec8, i64 7 |
| %add1 = fadd fast float %elt1, %elt0 |
| %add2 = fadd fast float %elt2, %add1 |
| %add3 = fadd fast float %elt3, %add2 |
| %add4 = fadd fast float %elt4, %add3 |
| %add5 = fadd fast float %elt5, %add4 |
| %add6 = fadd fast float %elt6, %add5 |
| %add7 = fadd fast float %elt7, %add6 |
| ret float %add7 |
| } |
| |
| define float @reduce_float8(<8 x float> %vec8) { |
| ; CHECK-LABEL: define float @reduce_float8( |
| ; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x float> [[VEC8]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x float> [[VEC8]], i64 1 |
| ; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x float> [[VEC8]], i64 2 |
| ; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x float> [[VEC8]], i64 3 |
| ; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x float> [[VEC8]], i64 4 |
| ; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x float> [[VEC8]], i64 5 |
| ; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x float> [[VEC8]], i64 6 |
| ; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x float> [[VEC8]], i64 7 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]] |
| ; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ELT4]], [[ADD3]] |
| ; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ELT5]], [[ADD4]] |
| ; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ELT6]], [[ADD5]] |
| ; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ELT7]], [[ADD6]] |
| ; CHECK-NEXT: ret float [[ADD7]] |
| ; |
| entry: |
| %elt0 = extractelement <8 x float> %vec8, i64 0 |
| %elt1 = extractelement <8 x float> %vec8, i64 1 |
| %elt2 = extractelement <8 x float> %vec8, i64 2 |
| %elt3 = extractelement <8 x float> %vec8, i64 3 |
| %elt4 = extractelement <8 x float> %vec8, i64 4 |
| %elt5 = extractelement <8 x float> %vec8, i64 5 |
| %elt6 = extractelement <8 x float> %vec8, i64 6 |
| %elt7 = extractelement <8 x float> %vec8, i64 7 |
| %add1 = fadd float %elt1, %elt0 |
| %add2 = fadd float %elt2, %add1 |
| %add3 = fadd float %elt3, %add2 |
| %add4 = fadd float %elt4, %add3 |
| %add5 = fadd float %elt5, %add4 |
| %add6 = fadd float %elt6, %add5 |
| %add7 = fadd float %elt7, %add6 |
| ret float %add7 |
| } |
| |
| define double @reduce_fast_double2(<2 x double> %vec2) { |
| ; CHECK-LABEL: define double @reduce_fast_double2( |
| ; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: ret double [[ADD1]] |
| ; |
| entry: |
| %elt0 = extractelement <2 x double> %vec2, i64 0 |
| %elt1 = extractelement <2 x double> %vec2, i64 1 |
| %add1 = fadd fast double %elt1, %elt0 |
| ret double %add1 |
| } |
| |
| define double @reduce_double2(<2 x double> %vec2) { |
| ; CHECK-LABEL: define double @reduce_double2( |
| ; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: ret double [[ADD1]] |
| ; |
| entry: |
| %elt0 = extractelement <2 x double> %vec2, i64 0 |
| %elt1 = extractelement <2 x double> %vec2, i64 1 |
| %add1 = fadd double %elt1, %elt0 |
| ret double %add1 |
| } |
| |
| define double @reduce_fast_double4(<4 x double> %vec4) { |
| ; CHECK-LABEL: define double @reduce_fast_double4( |
| ; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VEC4]]) |
| ; CHECK-NEXT: ret double [[TMP0]] |
| ; |
| entry: |
| %elt0 = extractelement <4 x double> %vec4, i64 0 |
| %elt1 = extractelement <4 x double> %vec4, i64 1 |
| %elt2 = extractelement <4 x double> %vec4, i64 2 |
| %elt3 = extractelement <4 x double> %vec4, i64 3 |
| %add1 = fadd fast double %elt1, %elt0 |
| %add2 = fadd fast double %elt2, %add1 |
| %add3 = fadd fast double %elt3, %add2 |
| ret double %add3 |
| } |
| |
| define double @reduce_double4(<4 x double> %vec4) { |
| ; CHECK-LABEL: define double @reduce_double4( |
| ; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x double> [[VEC4]], i64 0 |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x double> [[VEC4]], i64 1 |
| ; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x double> [[VEC4]], i64 2 |
| ; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x double> [[VEC4]], i64 3 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[ELT2]], [[ADD1]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ELT3]], [[ADD2]] |
| ; CHECK-NEXT: ret double [[ADD3]] |
| ; |
| entry: |
| %elt0 = extractelement <4 x double> %vec4, i64 0 |
| %elt1 = extractelement <4 x double> %vec4, i64 1 |
| %elt2 = extractelement <4 x double> %vec4, i64 2 |
| %elt3 = extractelement <4 x double> %vec4, i64 3 |
| %add1 = fadd double %elt1, %elt0 |
| %add2 = fadd double %elt2, %add1 |
| %add3 = fadd double %elt3, %add2 |
| ret double %add3 |
| } |
| |
| ; Fixed iteration count. sum += a[i] |
| define float @reduce_fast_float_case1(ptr %a) { |
| ; CHECK-LABEL: define float @reduce_fast_float_case1( |
| ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4 |
| ; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16 |
| ; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP0]]) |
| ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[LOAD4]] |
| ; CHECK-NEXT: ret float [[ADD4]] |
| ; |
| entry: |
| %load = load float, ptr %a |
| %gep = getelementptr inbounds i8, ptr %a, i64 4 |
| %load1 = load float, ptr %gep |
| %add1 = fadd fast float %load1, %load |
| %gep2 = getelementptr inbounds i8, ptr %a, i64 8 |
| %load2 = load float, ptr %gep2 |
| %add2 = fadd fast float %load2, %add1 |
| %gep3 = getelementptr inbounds i8, ptr %a, i64 12 |
| %load3 = load float, ptr %gep3 |
| %add3 = fadd fast float %load3, %add2 |
| %gep4 = getelementptr inbounds i8, ptr %a, i64 16 |
| %load4 = load float, ptr %gep4 |
| %add4 = fadd fast float %load4, %add3 |
| ret float %add4 |
| } |
| |
| ; Fixed iteration count. sum += a[i] |
| define float @reduce_float_case1(ptr %a) { |
| ; CHECK-LABEL: define float @reduce_float_case1( |
| ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4 |
| ; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP]], align 4 |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOAD1]], [[LOAD]] |
| ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 |
| ; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[LOAD2]], [[ADD1]] |
| ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12 |
| ; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOAD3]], [[ADD2]] |
| ; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16 |
| ; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 |
| ; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[LOAD4]], [[ADD3]] |
| ; CHECK-NEXT: ret float [[ADD4]] |
| ; |
| entry: |
| %load = load float, ptr %a |
| %gep = getelementptr inbounds i8, ptr %a, i64 4 |
| %load1 = load float, ptr %gep |
| %add1 = fadd float %load1, %load |
| %gep2 = getelementptr inbounds i8, ptr %a, i64 8 |
| %load2 = load float, ptr %gep2 |
| %add2 = fadd float %load2, %add1 |
| %gep3 = getelementptr inbounds i8, ptr %a, i64 12 |
| %load3 = load float, ptr %gep3 |
| %add3 = fadd float %load3, %add2 |
| %gep4 = getelementptr inbounds i8, ptr %a, i64 16 |
| %load4 = load float, ptr %gep4 |
| %add4 = fadd float %load4, %add3 |
| ret float %add4 |
| } |
| |
| ; Reduction needs a shuffle. See add2 and add3. |
| define float @reduce_fast_float_case2(ptr %a, ptr %b) { |
| ; CHECK-LABEL: define float @reduce_fast_float_case2( |
| ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[TMP1]], i64 0) |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP2]], <4 x float> [[TMP0]], i64 4) |
| ; CHECK-NEXT: [[RED3:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) |
| ; CHECK-NEXT: ret float [[RED3]] |
| ; |
| entry: |
| %gepa1 = getelementptr inbounds float, ptr %a, i32 1 |
| %gepa2 = getelementptr inbounds float, ptr %a, i32 2 |
| %gepa3 = getelementptr inbounds float, ptr %a, i32 3 |
| %gepb1 = getelementptr inbounds float, ptr %b, i32 1 |
| %gepb2 = getelementptr inbounds float, ptr %b, i32 2 |
| %gepb3 = getelementptr inbounds float, ptr %b, i32 3 |
| %loada = load float, ptr %a |
| %loada1 = load float, ptr %gepa1 |
| %loada2 = load float, ptr %gepa2 |
| %loada3 = load float, ptr %gepa3 |
| %loadb = load float, ptr %b |
| %loadb1 = load float, ptr %gepb1 |
| %loadb2 = load float, ptr %gepb2 |
| %loadb3 = load float, ptr %gepb3 |
| %add = fadd fast float %loada, %loadb |
| %add1 = fadd fast float %loada1, %loadb1 |
| %add2 = fadd fast float %loada3, %loadb2 |
| %add3 = fadd fast float %loada2, %loadb3 |
| %red1 = fadd fast float %add, %add1 |
| %red2 = fadd fast float %add2, %red1 |
| %red3 = fadd fast float %add3, %red2 |
| ret float %red3 |
| } |
| |
| ; Reduction needs a shuffle. See add2 and add3. |
| define float @reduce_float_case2(ptr %a, ptr %b) { |
| ; CHECK-LABEL: define float @reduce_float_case2( |
| ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[GEPA2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 |
| ; CHECK-NEXT: [[GEPA3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 |
| ; CHECK-NEXT: [[GEPB2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2 |
| ; CHECK-NEXT: [[GEPB3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3 |
| ; CHECK-NEXT: [[LOADA2:%.*]] = load float, ptr [[GEPA2]], align 4 |
| ; CHECK-NEXT: [[LOADA3:%.*]] = load float, ptr [[GEPA3]], align 4 |
| ; CHECK-NEXT: [[LOADB2:%.*]] = load float, ptr [[GEPB2]], align 4 |
| ; CHECK-NEXT: [[LOADB3:%.*]] = load float, ptr [[GEPB3]], align 4 |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[B]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[TMP0]], [[TMP1]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[LOADA3]], [[LOADB2]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOADA2]], [[LOADB3]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 |
| ; CHECK-NEXT: [[RED1:%.*]] = fadd float [[TMP3]], [[TMP4]] |
| ; CHECK-NEXT: [[RED2:%.*]] = fadd float [[ADD2]], [[RED1]] |
| ; CHECK-NEXT: [[RED3:%.*]] = fadd float [[ADD3]], [[RED2]] |
| ; CHECK-NEXT: ret float [[RED3]] |
| ; |
| entry: |
| %gepa1 = getelementptr inbounds float, ptr %a, i32 1 |
| %gepa2 = getelementptr inbounds float, ptr %a, i32 2 |
| %gepa3 = getelementptr inbounds float, ptr %a, i32 3 |
| %gepb1 = getelementptr inbounds float, ptr %b, i32 1 |
| %gepb2 = getelementptr inbounds float, ptr %b, i32 2 |
| %gepb3 = getelementptr inbounds float, ptr %b, i32 3 |
| %loada = load float, ptr %a |
| %loada1 = load float, ptr %gepa1 |
| %loada2 = load float, ptr %gepa2 |
| %loada3 = load float, ptr %gepa3 |
| %loadb = load float, ptr %b |
| %loadb1 = load float, ptr %gepb1 |
| %loadb2 = load float, ptr %gepb2 |
| %loadb3 = load float, ptr %gepb3 |
| %add = fadd float %loada, %loadb |
| %add1 = fadd float %loada1, %loadb1 |
| %add2 = fadd float %loada3, %loadb2 |
| %add3 = fadd float %loada2, %loadb3 |
| %red1 = fadd float %add, %add1 |
| %red2 = fadd float %add2, %red1 |
| %red3 = fadd float %add3, %red2 |
| ret float %red3 |
| } |
| |
| ; Addition of log. |
| define float @reduce_fast_float_case3(ptr %a) { |
| ; CHECK-LABEL: define float @reduce_fast_float_case3( |
| ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1 |
| ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 |
| ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 |
| ; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4 |
| ; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5 |
| ; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6 |
| ; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7 |
| ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 |
| ; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4 |
| ; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 |
| ; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 |
| ; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 |
| ; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4 |
| ; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4 |
| ; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4 |
| ; CHECK-NEXT: [[LOG:%.*]] = call fast float @llvm.log.f32(float [[LOAD]]) |
| ; CHECK-NEXT: [[LOG1:%.*]] = call fast float @llvm.log.f32(float [[LOAD1]]) |
| ; CHECK-NEXT: [[LOG2:%.*]] = call fast float @llvm.log.f32(float [[LOAD2]]) |
| ; CHECK-NEXT: [[LOG3:%.*]] = call fast float @llvm.log.f32(float [[LOAD3]]) |
| ; CHECK-NEXT: [[LOG4:%.*]] = call fast float @llvm.log.f32(float [[LOAD4]]) |
| ; CHECK-NEXT: [[LOG5:%.*]] = call fast float @llvm.log.f32(float [[LOAD5]]) |
| ; CHECK-NEXT: [[LOG6:%.*]] = call fast float @llvm.log.f32(float [[LOAD6]]) |
| ; CHECK-NEXT: [[LOG7:%.*]] = call fast float @llvm.log.f32(float [[LOAD7]]) |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[LOG]], [[LOG1]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[ADD1]], [[LOG2]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd fast float [[ADD2]], [[LOG3]] |
| ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[ADD3]], [[LOG4]] |
| ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[LOG5]] |
| ; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float [[ADD5]], [[LOG6]] |
| ; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD6]], [[LOG7]] |
| ; CHECK-NEXT: ret float [[ADD7]] |
| ; |
| entry: |
| %gep1 = getelementptr inbounds float, ptr %a, i32 1 |
| %gep2 = getelementptr inbounds float, ptr %a, i32 2 |
| %gep3 = getelementptr inbounds float, ptr %a, i32 3 |
| %gep4 = getelementptr inbounds float, ptr %a, i32 4 |
| %gep5 = getelementptr inbounds float, ptr %a, i32 5 |
| %gep6 = getelementptr inbounds float, ptr %a, i32 6 |
| %gep7 = getelementptr inbounds float, ptr %a, i32 7 |
| %load = load float, ptr %a |
| %load1 = load float, ptr %gep1 |
| %load2 = load float, ptr %gep2 |
| %load3 = load float, ptr %gep3 |
| %load4 = load float, ptr %gep4 |
| %load5 = load float, ptr %gep5 |
| %load6 = load float, ptr %gep6 |
| %load7 = load float, ptr %gep7 |
| %log = call fast float @llvm.log.f32(float %load) |
| %log1 = call fast float @llvm.log.f32(float %load1) |
| %log2 = call fast float @llvm.log.f32(float %load2) |
| %log3 = call fast float @llvm.log.f32(float %load3) |
| %log4 = call fast float @llvm.log.f32(float %load4) |
| %log5 = call fast float @llvm.log.f32(float %load5) |
| %log6 = call fast float @llvm.log.f32(float %load6) |
| %log7 = call fast float @llvm.log.f32(float %load7) |
| %add1 = fadd fast float %log, %log1 |
| %add2 = fadd fast float %add1, %log2 |
| %add3 = fadd fast float %add2, %log3 |
| %add4 = fadd fast float %add3, %log4 |
| %add5 = fadd fast float %add4, %log5 |
| %add6 = fadd fast float %add5, %log6 |
| %add7 = fadd fast float %add6, %log7 |
| ret float %add7 |
| } |
| |
| ; Addition of log. |
| define float @reduce_float_case3(ptr %a) { |
| ; CHECK-LABEL: define float @reduce_float_case3( |
| ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1 |
| ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 |
| ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 |
| ; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4 |
| ; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5 |
| ; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6 |
| ; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7 |
| ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 |
| ; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4 |
| ; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 |
| ; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 |
| ; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 |
| ; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4 |
| ; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4 |
| ; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4 |
| ; CHECK-NEXT: [[LOG:%.*]] = call float @llvm.log.f32(float [[LOAD]]) |
| ; CHECK-NEXT: [[LOG1:%.*]] = call float @llvm.log.f32(float [[LOAD1]]) |
| ; CHECK-NEXT: [[LOG2:%.*]] = call float @llvm.log.f32(float [[LOAD2]]) |
| ; CHECK-NEXT: [[LOG3:%.*]] = call float @llvm.log.f32(float [[LOAD3]]) |
| ; CHECK-NEXT: [[LOG4:%.*]] = call float @llvm.log.f32(float [[LOAD4]]) |
| ; CHECK-NEXT: [[LOG5:%.*]] = call float @llvm.log.f32(float [[LOAD5]]) |
| ; CHECK-NEXT: [[LOG6:%.*]] = call float @llvm.log.f32(float [[LOAD6]]) |
| ; CHECK-NEXT: [[LOG7:%.*]] = call float @llvm.log.f32(float [[LOAD7]]) |
| ; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOG]], [[LOG1]] |
| ; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ADD1]], [[LOG2]] |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ADD2]], [[LOG3]] |
| ; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ADD3]], [[LOG4]] |
| ; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ADD4]], [[LOG5]] |
| ; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]] |
| ; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]] |
| ; CHECK-NEXT: ret float [[ADD7]] |
| ; |
| entry: |
| %gep1 = getelementptr inbounds float, ptr %a, i32 1 |
| %gep2 = getelementptr inbounds float, ptr %a, i32 2 |
| %gep3 = getelementptr inbounds float, ptr %a, i32 3 |
| %gep4 = getelementptr inbounds float, ptr %a, i32 4 |
| %gep5 = getelementptr inbounds float, ptr %a, i32 5 |
| %gep6 = getelementptr inbounds float, ptr %a, i32 6 |
| %gep7 = getelementptr inbounds float, ptr %a, i32 7 |
| %load = load float, ptr %a |
| %load1 = load float, ptr %gep1 |
| %load2 = load float, ptr %gep2 |
| %load3 = load float, ptr %gep3 |
| %load4 = load float, ptr %gep4 |
| %load5 = load float, ptr %gep5 |
| %load6 = load float, ptr %gep6 |
| %load7 = load float, ptr %gep7 |
| %log = call float @llvm.log.f32(float %load) |
| %log1 = call float @llvm.log.f32(float %load1) |
| %log2 = call float @llvm.log.f32(float %load2) |
| %log3 = call float @llvm.log.f32(float %load3) |
| %log4 = call float @llvm.log.f32(float %load4) |
| %log5 = call float @llvm.log.f32(float %load5) |
| %log6 = call float @llvm.log.f32(float %load6) |
| %log7 = call float @llvm.log.f32(float %load7) |
| %add1 = fadd float %log, %log1 |
| %add2 = fadd float %add1, %log2 |
| %add3 = fadd float %add2, %log3 |
| %add4 = fadd float %add3, %log4 |
| %add5 = fadd float %add4, %log5 |
| %add6 = fadd float %add5, %log6 |
| %add7 = fadd float %add6, %log7 |
| ret float %add7 |
| } |
| |
| define half @reduce_unordered_fast_half4(<4 x half> %vec4) { |
| ; CHECK-LABEL: define half @reduce_unordered_fast_half4( |
| ; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]]) |
| ; CHECK-NEXT: ret half [[TMP0]] |
| ; |
| entry: |
| %elt0 = extractelement <4 x half> %vec4, i64 0 |
| %elt1 = extractelement <4 x half> %vec4, i64 1 |
| %elt2 = extractelement <4 x half> %vec4, i64 2 |
| %elt3 = extractelement <4 x half> %vec4, i64 3 |
| %add1 = fadd fast half %elt1, %elt0 |
| %add2 = fadd fast half %elt2, %elt3 |
| %add3 = fadd fast half %add1, %add2 |
| ret half %add3 |
| } |
| |
| define half @reduce_unordered_half4(<4 x half> %vec4) { |
| ; CHECK-LABEL: define half @reduce_unordered_half4( |
| ; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 0, i32 3> |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x half> [[TMP0]], [[TMP1]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x half> [[TMP2]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x half> [[TMP2]], i32 1 |
| ; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[TMP3]], [[TMP4]] |
| ; CHECK-NEXT: ret half [[ADD3]] |
| ; |
| entry: |
| %elt0 = extractelement <4 x half> %vec4, i64 0 |
| %elt1 = extractelement <4 x half> %vec4, i64 1 |
| %elt2 = extractelement <4 x half> %vec4, i64 2 |
| %elt3 = extractelement <4 x half> %vec4, i64 3 |
| %add1 = fadd half %elt1, %elt0 |
| %add2 = fadd half %elt2, %elt3 |
| %add3 = fadd half %add1, %add2 |
| ret half %add3 |
| } |