| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx2 | FileCheck %s --check-prefix=CHECK |
| ; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx2 -mattr=+f16c | FileCheck %s --check-prefix=CHECK-F16C |
| ; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-AVX512 |
| |
| define void @fpext_v4xf16_v4xf32(ptr %s0, ptr %d0) { |
| ; CHECK-LABEL: define void @fpext_v4xf16_v4xf32( |
| ; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1 |
| ; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2 |
| ; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3 |
| ; CHECK-NEXT: [[L0:%.*]] = load half, ptr [[S0]], align 2 |
| ; CHECK-NEXT: [[L1:%.*]] = load half, ptr [[S1]], align 2 |
| ; CHECK-NEXT: [[L2:%.*]] = load half, ptr [[S2]], align 2 |
| ; CHECK-NEXT: [[L3:%.*]] = load half, ptr [[S3]], align 2 |
| ; CHECK-NEXT: [[E0:%.*]] = fpext half [[L0]] to float |
| ; CHECK-NEXT: [[E1:%.*]] = fpext half [[L1]] to float |
| ; CHECK-NEXT: [[E2:%.*]] = fpext half [[L2]] to float |
| ; CHECK-NEXT: [[E3:%.*]] = fpext half [[L3]] to float |
| ; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 1 |
| ; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 2 |
| ; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 3 |
| ; CHECK-NEXT: store float [[E0]], ptr [[D0]], align 8 |
| ; CHECK-NEXT: store float [[E1]], ptr [[D1]], align 8 |
| ; CHECK-NEXT: store float [[E2]], ptr [[D2]], align 8 |
| ; CHECK-NEXT: store float [[E3]], ptr [[D3]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| ; CHECK-F16C-LABEL: define void @fpext_v4xf16_v4xf32( |
| ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2 |
| ; CHECK-F16C-NEXT: [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x float> |
| ; CHECK-F16C-NEXT: store <4 x float> [[TMP2]], ptr [[D0]], align 8 |
| ; CHECK-F16C-NEXT: ret void |
| ; |
| ; CHECK-AVX512-LABEL: define void @fpext_v4xf16_v4xf32( |
| ; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2 |
| ; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x float> |
| ; CHECK-AVX512-NEXT: store <4 x float> [[TMP2]], ptr [[D0]], align 8 |
| ; CHECK-AVX512-NEXT: ret void |
| ; |
| %s1 = getelementptr inbounds half, ptr %s0, i64 1 |
| %s2 = getelementptr inbounds half, ptr %s0, i64 2 |
| %s3 = getelementptr inbounds half, ptr %s0, i64 3 |
| %l0 = load half, ptr %s0, align 2 |
| %l1 = load half, ptr %s1, align 2 |
| %l2 = load half, ptr %s2, align 2 |
| %l3 = load half, ptr %s3, align 2 |
| |
| %e0 = fpext half %l0 to float |
| %e1 = fpext half %l1 to float |
| %e2 = fpext half %l2 to float |
| %e3 = fpext half %l3 to float |
| |
| %d1 = getelementptr inbounds float, ptr %d0, i64 1 |
| %d2 = getelementptr inbounds float, ptr %d0, i64 2 |
| %d3 = getelementptr inbounds float, ptr %d0, i64 3 |
| store float %e0, ptr %d0, align 8 |
| store float %e1, ptr %d1, align 8 |
| store float %e2, ptr %d2, align 8 |
| store float %e3, ptr %d3, align 8 |
| ret void |
| } |
| |
| define void @fpext_v4xf16_v4xf64(ptr %s0, ptr %d0) { |
| ; CHECK-LABEL: define void @fpext_v4xf16_v4xf64( |
| ; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1 |
| ; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2 |
| ; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3 |
| ; CHECK-NEXT: [[L0:%.*]] = load half, ptr [[S0]], align 2 |
| ; CHECK-NEXT: [[L1:%.*]] = load half, ptr [[S1]], align 2 |
| ; CHECK-NEXT: [[L2:%.*]] = load half, ptr [[S2]], align 2 |
| ; CHECK-NEXT: [[L3:%.*]] = load half, ptr [[S3]], align 2 |
| ; CHECK-NEXT: [[E0:%.*]] = fpext half [[L0]] to double |
| ; CHECK-NEXT: [[E1:%.*]] = fpext half [[L1]] to double |
| ; CHECK-NEXT: [[E2:%.*]] = fpext half [[L2]] to double |
| ; CHECK-NEXT: [[E3:%.*]] = fpext half [[L3]] to double |
| ; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 1 |
| ; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 2 |
| ; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 3 |
| ; CHECK-NEXT: store double [[E0]], ptr [[D0]], align 8 |
| ; CHECK-NEXT: store double [[E1]], ptr [[D1]], align 8 |
| ; CHECK-NEXT: store double [[E2]], ptr [[D2]], align 8 |
| ; CHECK-NEXT: store double [[E3]], ptr [[D3]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| ; CHECK-F16C-LABEL: define void @fpext_v4xf16_v4xf64( |
| ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2 |
| ; CHECK-F16C-NEXT: [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x double> |
| ; CHECK-F16C-NEXT: store <4 x double> [[TMP2]], ptr [[D0]], align 8 |
| ; CHECK-F16C-NEXT: ret void |
| ; |
| ; CHECK-AVX512-LABEL: define void @fpext_v4xf16_v4xf64( |
| ; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2 |
| ; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x double> |
| ; CHECK-AVX512-NEXT: store <4 x double> [[TMP2]], ptr [[D0]], align 8 |
| ; CHECK-AVX512-NEXT: ret void |
| ; |
| %s1 = getelementptr inbounds half, ptr %s0, i64 1 |
| %s2 = getelementptr inbounds half, ptr %s0, i64 2 |
| %s3 = getelementptr inbounds half, ptr %s0, i64 3 |
| %l0 = load half, ptr %s0, align 2 |
| %l1 = load half, ptr %s1, align 2 |
| %l2 = load half, ptr %s2, align 2 |
| %l3 = load half, ptr %s3, align 2 |
| |
| %e0 = fpext half %l0 to double |
| %e1 = fpext half %l1 to double |
| %e2 = fpext half %l2 to double |
| %e3 = fpext half %l3 to double |
| |
| %d1 = getelementptr inbounds double, ptr %d0, i64 1 |
| %d2 = getelementptr inbounds double, ptr %d0, i64 2 |
| %d3 = getelementptr inbounds double, ptr %d0, i64 3 |
| store double %e0, ptr %d0, align 8 |
| store double %e1, ptr %d1, align 8 |
| store double %e2, ptr %d2, align 8 |
| store double %e3, ptr %d3, align 8 |
| ret void |
| } |
| |
| define void @fpext_v16xf16_v16xf32(ptr %s0, ptr %d0) { |
| ; CHECK-LABEL: define void @fpext_v16xf16_v16xf32( |
| ; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1 |
| ; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2 |
| ; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3 |
| ; CHECK-NEXT: [[S4:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 4 |
| ; CHECK-NEXT: [[S5:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 5 |
| ; CHECK-NEXT: [[S6:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 6 |
| ; CHECK-NEXT: [[S7:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 7 |
| ; CHECK-NEXT: [[S8:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 8 |
| ; CHECK-NEXT: [[S9:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 9 |
| ; CHECK-NEXT: [[S10:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 10 |
| ; CHECK-NEXT: [[S11:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 11 |
| ; CHECK-NEXT: [[S12:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 12 |
| ; CHECK-NEXT: [[S13:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 13 |
| ; CHECK-NEXT: [[S14:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 14 |
| ; CHECK-NEXT: [[S15:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 15 |
| ; CHECK-NEXT: [[L0:%.*]] = load half, ptr [[S0]], align 2 |
| ; CHECK-NEXT: [[L1:%.*]] = load half, ptr [[S1]], align 2 |
| ; CHECK-NEXT: [[L2:%.*]] = load half, ptr [[S2]], align 2 |
| ; CHECK-NEXT: [[L3:%.*]] = load half, ptr [[S3]], align 2 |
| ; CHECK-NEXT: [[L4:%.*]] = load half, ptr [[S4]], align 2 |
| ; CHECK-NEXT: [[L5:%.*]] = load half, ptr [[S5]], align 2 |
| ; CHECK-NEXT: [[L6:%.*]] = load half, ptr [[S6]], align 2 |
| ; CHECK-NEXT: [[L7:%.*]] = load half, ptr [[S7]], align 2 |
| ; CHECK-NEXT: [[L8:%.*]] = load half, ptr [[S8]], align 2 |
| ; CHECK-NEXT: [[L9:%.*]] = load half, ptr [[S9]], align 2 |
| ; CHECK-NEXT: [[L10:%.*]] = load half, ptr [[S10]], align 2 |
| ; CHECK-NEXT: [[L11:%.*]] = load half, ptr [[S11]], align 2 |
| ; CHECK-NEXT: [[L12:%.*]] = load half, ptr [[S12]], align 2 |
| ; CHECK-NEXT: [[L13:%.*]] = load half, ptr [[S13]], align 2 |
| ; CHECK-NEXT: [[L14:%.*]] = load half, ptr [[S14]], align 2 |
| ; CHECK-NEXT: [[L15:%.*]] = load half, ptr [[S15]], align 2 |
| ; CHECK-NEXT: [[E0:%.*]] = fpext half [[L0]] to float |
| ; CHECK-NEXT: [[E1:%.*]] = fpext half [[L1]] to float |
| ; CHECK-NEXT: [[E2:%.*]] = fpext half [[L2]] to float |
| ; CHECK-NEXT: [[E3:%.*]] = fpext half [[L3]] to float |
| ; CHECK-NEXT: [[E4:%.*]] = fpext half [[L4]] to float |
| ; CHECK-NEXT: [[E5:%.*]] = fpext half [[L5]] to float |
| ; CHECK-NEXT: [[E6:%.*]] = fpext half [[L6]] to float |
| ; CHECK-NEXT: [[E7:%.*]] = fpext half [[L7]] to float |
| ; CHECK-NEXT: [[E8:%.*]] = fpext half [[L8]] to float |
| ; CHECK-NEXT: [[E9:%.*]] = fpext half [[L9]] to float |
| ; CHECK-NEXT: [[E10:%.*]] = fpext half [[L10]] to float |
| ; CHECK-NEXT: [[E11:%.*]] = fpext half [[L11]] to float |
| ; CHECK-NEXT: [[E12:%.*]] = fpext half [[L12]] to float |
| ; CHECK-NEXT: [[E13:%.*]] = fpext half [[L13]] to float |
| ; CHECK-NEXT: [[E14:%.*]] = fpext half [[L14]] to float |
| ; CHECK-NEXT: [[E15:%.*]] = fpext half [[L15]] to float |
| ; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 1 |
| ; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 2 |
| ; CHECK-NEXT: [[D15:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 3 |
| ; CHECK-NEXT: [[D4:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 4 |
| ; CHECK-NEXT: [[D5:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 5 |
| ; CHECK-NEXT: [[D6:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 6 |
| ; CHECK-NEXT: [[D7:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 7 |
| ; CHECK-NEXT: [[D8:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 8 |
| ; CHECK-NEXT: [[D9:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 9 |
| ; CHECK-NEXT: [[D10:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 10 |
| ; CHECK-NEXT: [[D11:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 11 |
| ; CHECK-NEXT: [[D12:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 12 |
| ; CHECK-NEXT: [[D13:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 13 |
| ; CHECK-NEXT: [[D14:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 14 |
| ; CHECK-NEXT: [[D16:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 15 |
| ; CHECK-NEXT: store float [[E0]], ptr [[D0]], align 8 |
| ; CHECK-NEXT: store float [[E1]], ptr [[D1]], align 8 |
| ; CHECK-NEXT: store float [[E2]], ptr [[D2]], align 8 |
| ; CHECK-NEXT: store float [[E3]], ptr [[D15]], align 8 |
| ; CHECK-NEXT: store float [[E4]], ptr [[D4]], align 8 |
| ; CHECK-NEXT: store float [[E5]], ptr [[D5]], align 8 |
| ; CHECK-NEXT: store float [[E6]], ptr [[D6]], align 8 |
| ; CHECK-NEXT: store float [[E7]], ptr [[D7]], align 8 |
| ; CHECK-NEXT: store float [[E8]], ptr [[D8]], align 8 |
| ; CHECK-NEXT: store float [[E9]], ptr [[D9]], align 8 |
| ; CHECK-NEXT: store float [[E10]], ptr [[D10]], align 8 |
| ; CHECK-NEXT: store float [[E11]], ptr [[D11]], align 8 |
| ; CHECK-NEXT: store float [[E12]], ptr [[D12]], align 8 |
| ; CHECK-NEXT: store float [[E13]], ptr [[D13]], align 8 |
| ; CHECK-NEXT: store float [[E14]], ptr [[D14]], align 8 |
| ; CHECK-NEXT: store float [[E15]], ptr [[D16]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| ; CHECK-F16C-LABEL: define void @fpext_v16xf16_v16xf32( |
| ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 8 |
| ; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 8 |
| ; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[S0]], align 2 |
| ; CHECK-F16C-NEXT: [[TMP2:%.*]] = fpext <8 x half> [[TMP1]] to <8 x float> |
| ; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x half>, ptr [[S8]], align 2 |
| ; CHECK-F16C-NEXT: [[TMP4:%.*]] = fpext <8 x half> [[TMP3]] to <8 x float> |
| ; CHECK-F16C-NEXT: store <8 x float> [[TMP2]], ptr [[D0]], align 8 |
| ; CHECK-F16C-NEXT: store <8 x float> [[TMP4]], ptr [[D8]], align 8 |
| ; CHECK-F16C-NEXT: ret void |
| ; |
| ; CHECK-AVX512-LABEL: define void @fpext_v16xf16_v16xf32( |
| ; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <16 x half>, ptr [[S0]], align 2 |
| ; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fpext <16 x half> [[TMP1]] to <16 x float> |
| ; CHECK-AVX512-NEXT: store <16 x float> [[TMP2]], ptr [[D0]], align 8 |
| ; CHECK-AVX512-NEXT: ret void |
| ; |
| %s1 = getelementptr inbounds half, ptr %s0, i64 1 |
| %s2 = getelementptr inbounds half, ptr %s0, i64 2 |
| %s3 = getelementptr inbounds half, ptr %s0, i64 3 |
| %s4 = getelementptr inbounds half, ptr %s0, i64 4 |
| %s5 = getelementptr inbounds half, ptr %s0, i64 5 |
| %s6 = getelementptr inbounds half, ptr %s0, i64 6 |
| %s7 = getelementptr inbounds half, ptr %s0, i64 7 |
| %s8 = getelementptr inbounds half, ptr %s0, i64 8 |
| %s9 = getelementptr inbounds half, ptr %s0, i64 9 |
| %s10 = getelementptr inbounds half, ptr %s0, i64 10 |
| %s11 = getelementptr inbounds half, ptr %s0, i64 11 |
| %s12 = getelementptr inbounds half, ptr %s0, i64 12 |
| %s13 = getelementptr inbounds half, ptr %s0, i64 13 |
| %s14 = getelementptr inbounds half, ptr %s0, i64 14 |
| %s15 = getelementptr inbounds half, ptr %s0, i64 15 |
| %l0 = load half, ptr %s0, align 2 |
| %l1 = load half, ptr %s1, align 2 |
| %l2 = load half, ptr %s2, align 2 |
| %l3 = load half, ptr %s3, align 2 |
| %l4 = load half, ptr %s4, align 2 |
| %l5 = load half, ptr %s5, align 2 |
| %l6 = load half, ptr %s6, align 2 |
| %l7 = load half, ptr %s7, align 2 |
| %l8 = load half, ptr %s8, align 2 |
| %l9 = load half, ptr %s9, align 2 |
| %l10 = load half, ptr %s10, align 2 |
| %l11 = load half, ptr %s11, align 2 |
| %l12 = load half, ptr %s12, align 2 |
| %l13 = load half, ptr %s13, align 2 |
| %l14 = load half, ptr %s14, align 2 |
| %l15 = load half, ptr %s15, align 2 |
| |
| %e0 = fpext half %l0 to float |
| %e1 = fpext half %l1 to float |
| %e2 = fpext half %l2 to float |
| %e3 = fpext half %l3 to float |
| %e4 = fpext half %l4 to float |
| %e5 = fpext half %l5 to float |
| %e6 = fpext half %l6 to float |
| %e7 = fpext half %l7 to float |
| %e8 = fpext half %l8 to float |
| %e9 = fpext half %l9 to float |
| %e10 = fpext half %l10 to float |
| %e11 = fpext half %l11 to float |
| %e12 = fpext half %l12 to float |
| %e13 = fpext half %l13 to float |
| %e14 = fpext half %l14 to float |
| %e15 = fpext half %l15 to float |
| |
| %d1 = getelementptr inbounds float, ptr %d0, i64 1 |
| %d2 = getelementptr inbounds float, ptr %d0, i64 2 |
| %d3 = getelementptr inbounds float, ptr %d0, i64 3 |
| %d4 = getelementptr inbounds float, ptr %d0, i64 4 |
| %d5 = getelementptr inbounds float, ptr %d0, i64 5 |
| %d6 = getelementptr inbounds float, ptr %d0, i64 6 |
| %d7 = getelementptr inbounds float, ptr %d0, i64 7 |
| %d8 = getelementptr inbounds float, ptr %d0, i64 8 |
| %d9 = getelementptr inbounds float, ptr %d0, i64 9 |
| %d10 = getelementptr inbounds float, ptr %d0, i64 10 |
| %d11 = getelementptr inbounds float, ptr %d0, i64 11 |
| %d12 = getelementptr inbounds float, ptr %d0, i64 12 |
| %d13 = getelementptr inbounds float, ptr %d0, i64 13 |
| %d14 = getelementptr inbounds float, ptr %d0, i64 14 |
| %d15 = getelementptr inbounds float, ptr %d0, i64 15 |
| store float %e0, ptr %d0, align 8 |
| store float %e1, ptr %d1, align 8 |
| store float %e2, ptr %d2, align 8 |
| store float %e3, ptr %d3, align 8 |
| store float %e4, ptr %d4, align 8 |
| store float %e5, ptr %d5, align 8 |
| store float %e6, ptr %d6, align 8 |
| store float %e7, ptr %d7, align 8 |
| store float %e8, ptr %d8, align 8 |
| store float %e9, ptr %d9, align 8 |
| store float %e10, ptr %d10, align 8 |
| store float %e11, ptr %d11, align 8 |
| store float %e12, ptr %d12, align 8 |
| store float %e13, ptr %d13, align 8 |
| store float %e14, ptr %d14, align 8 |
| store float %e15, ptr %d15, align 8 |
| ret void |
| } |
| |
| define void @fpround_v4xf32_v4xf16(ptr %s0, ptr %d0) { |
| ; CHECK-LABEL: define void @fpround_v4xf32_v4xf16( |
| ; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 1 |
| ; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 2 |
| ; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 3 |
| ; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[S0]], align 4 |
| ; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[S1]], align 4 |
| ; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[S2]], align 4 |
| ; CHECK-NEXT: [[L3:%.*]] = load float, ptr [[S3]], align 4 |
| ; CHECK-NEXT: [[T0:%.*]] = fptrunc float [[L0]] to half |
| ; CHECK-NEXT: [[T1:%.*]] = fptrunc float [[L1]] to half |
| ; CHECK-NEXT: [[T2:%.*]] = fptrunc float [[L2]] to half |
| ; CHECK-NEXT: [[T3:%.*]] = fptrunc float [[L3]] to half |
| ; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 |
| ; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 2 |
| ; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 3 |
| ; CHECK-NEXT: store half [[T0]], ptr [[D0]], align 2 |
| ; CHECK-NEXT: store half [[T1]], ptr [[D1]], align 2 |
| ; CHECK-NEXT: store half [[T2]], ptr [[D2]], align 2 |
| ; CHECK-NEXT: store half [[T3]], ptr [[D3]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| ; CHECK-F16C-LABEL: define void @fpround_v4xf32_v4xf16( |
| ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[S0]], align 4 |
| ; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <4 x float> [[TMP1]] to <4 x half> |
| ; CHECK-F16C-NEXT: store <4 x half> [[TMP2]], ptr [[D0]], align 2 |
| ; CHECK-F16C-NEXT: ret void |
| ; |
| ; CHECK-AVX512-LABEL: define void @fpround_v4xf32_v4xf16( |
| ; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[S0]], align 4 |
| ; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fptrunc <4 x float> [[TMP1]] to <4 x half> |
| ; CHECK-AVX512-NEXT: store <4 x half> [[TMP2]], ptr [[D0]], align 2 |
| ; CHECK-AVX512-NEXT: ret void |
| ; |
| %s1 = getelementptr inbounds float, ptr %s0, i64 1 |
| %s2 = getelementptr inbounds float, ptr %s0, i64 2 |
| %s3 = getelementptr inbounds float, ptr %s0, i64 3 |
| %l0 = load float, ptr %s0, align 4 |
| %l1 = load float, ptr %s1, align 4 |
| %l2 = load float, ptr %s2, align 4 |
| %l3 = load float, ptr %s3, align 4 |
| |
| %t0 = fptrunc float %l0 to half |
| %t1 = fptrunc float %l1 to half |
| %t2 = fptrunc float %l2 to half |
| %t3 = fptrunc float %l3 to half |
| |
| %d1 = getelementptr inbounds half, ptr %d0, i64 1 |
| %d2 = getelementptr inbounds half, ptr %d0, i64 2 |
| %d3 = getelementptr inbounds half, ptr %d0, i64 3 |
| store half %t0, ptr %d0, align 2 |
| store half %t1, ptr %d1, align 2 |
| store half %t2, ptr %d2, align 2 |
| store half %t3, ptr %d3, align 2 |
| ret void |
| } |
| |
| define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) { |
| ; CHECK-LABEL: define void @fpround_v16xf32_v16xf16( |
| ; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 1 |
| ; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 2 |
| ; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 3 |
| ; CHECK-NEXT: [[S4:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 4 |
| ; CHECK-NEXT: [[S5:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 5 |
| ; CHECK-NEXT: [[S6:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 6 |
| ; CHECK-NEXT: [[S7:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 7 |
| ; CHECK-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8 |
| ; CHECK-NEXT: [[S9:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 9 |
| ; CHECK-NEXT: [[S10:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 10 |
| ; CHECK-NEXT: [[S11:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 11 |
| ; CHECK-NEXT: [[S12:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 12 |
| ; CHECK-NEXT: [[S13:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 13 |
| ; CHECK-NEXT: [[S14:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 14 |
| ; CHECK-NEXT: [[S15:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 15 |
| ; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[S0]], align 4 |
| ; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[S1]], align 4 |
| ; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[S2]], align 4 |
| ; CHECK-NEXT: [[L3:%.*]] = load float, ptr [[S3]], align 4 |
| ; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[S4]], align 4 |
| ; CHECK-NEXT: [[L5:%.*]] = load float, ptr [[S5]], align 4 |
| ; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[S6]], align 4 |
| ; CHECK-NEXT: [[L7:%.*]] = load float, ptr [[S7]], align 4 |
| ; CHECK-NEXT: [[L8:%.*]] = load float, ptr [[S8]], align 4 |
| ; CHECK-NEXT: [[L9:%.*]] = load float, ptr [[S9]], align 4 |
| ; CHECK-NEXT: [[L10:%.*]] = load float, ptr [[S10]], align 4 |
| ; CHECK-NEXT: [[L11:%.*]] = load float, ptr [[S11]], align 4 |
| ; CHECK-NEXT: [[L12:%.*]] = load float, ptr [[S12]], align 4 |
| ; CHECK-NEXT: [[L13:%.*]] = load float, ptr [[S13]], align 4 |
| ; CHECK-NEXT: [[L14:%.*]] = load float, ptr [[S14]], align 4 |
| ; CHECK-NEXT: [[L15:%.*]] = load float, ptr [[S15]], align 4 |
| ; CHECK-NEXT: [[T0:%.*]] = fptrunc float [[L0]] to half |
| ; CHECK-NEXT: [[T1:%.*]] = fptrunc float [[L1]] to half |
| ; CHECK-NEXT: [[T2:%.*]] = fptrunc float [[L2]] to half |
| ; CHECK-NEXT: [[T3:%.*]] = fptrunc float [[L3]] to half |
| ; CHECK-NEXT: [[T4:%.*]] = fptrunc float [[L4]] to half |
| ; CHECK-NEXT: [[T5:%.*]] = fptrunc float [[L5]] to half |
| ; CHECK-NEXT: [[T6:%.*]] = fptrunc float [[L6]] to half |
| ; CHECK-NEXT: [[T7:%.*]] = fptrunc float [[L7]] to half |
| ; CHECK-NEXT: [[T8:%.*]] = fptrunc float [[L8]] to half |
| ; CHECK-NEXT: [[T9:%.*]] = fptrunc float [[L9]] to half |
| ; CHECK-NEXT: [[T10:%.*]] = fptrunc float [[L10]] to half |
| ; CHECK-NEXT: [[T11:%.*]] = fptrunc float [[L11]] to half |
| ; CHECK-NEXT: [[T12:%.*]] = fptrunc float [[L12]] to half |
| ; CHECK-NEXT: [[T13:%.*]] = fptrunc float [[L13]] to half |
| ; CHECK-NEXT: [[T14:%.*]] = fptrunc float [[L14]] to half |
| ; CHECK-NEXT: [[T15:%.*]] = fptrunc float [[L15]] to half |
| ; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 |
| ; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 2 |
| ; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 3 |
| ; CHECK-NEXT: [[D4:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 4 |
| ; CHECK-NEXT: [[D5:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 5 |
| ; CHECK-NEXT: [[D6:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 6 |
| ; CHECK-NEXT: [[D7:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 7 |
| ; CHECK-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8 |
| ; CHECK-NEXT: [[D9:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 9 |
| ; CHECK-NEXT: [[D10:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 10 |
| ; CHECK-NEXT: [[D11:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 11 |
| ; CHECK-NEXT: [[D12:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 12 |
| ; CHECK-NEXT: [[D13:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 13 |
| ; CHECK-NEXT: [[D14:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 14 |
| ; CHECK-NEXT: [[D15:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 15 |
| ; CHECK-NEXT: store half [[T0]], ptr [[D0]], align 2 |
| ; CHECK-NEXT: store half [[T1]], ptr [[D1]], align 2 |
| ; CHECK-NEXT: store half [[T2]], ptr [[D2]], align 2 |
| ; CHECK-NEXT: store half [[T3]], ptr [[D3]], align 2 |
| ; CHECK-NEXT: store half [[T4]], ptr [[D4]], align 2 |
| ; CHECK-NEXT: store half [[T5]], ptr [[D5]], align 2 |
| ; CHECK-NEXT: store half [[T6]], ptr [[D6]], align 2 |
| ; CHECK-NEXT: store half [[T7]], ptr [[D7]], align 2 |
| ; CHECK-NEXT: store half [[T8]], ptr [[D8]], align 2 |
| ; CHECK-NEXT: store half [[T9]], ptr [[D9]], align 2 |
| ; CHECK-NEXT: store half [[T10]], ptr [[D10]], align 2 |
| ; CHECK-NEXT: store half [[T11]], ptr [[D11]], align 2 |
| ; CHECK-NEXT: store half [[T12]], ptr [[D12]], align 2 |
| ; CHECK-NEXT: store half [[T13]], ptr [[D13]], align 2 |
| ; CHECK-NEXT: store half [[T14]], ptr [[D14]], align 2 |
| ; CHECK-NEXT: store half [[T15]], ptr [[D15]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| ; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16( |
| ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4 |
| ; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half> |
| ; CHECK-F16C-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2 |
| ; CHECK-F16C-NEXT: ret void |
| ; |
| ; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16( |
| ; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4 |
| ; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half> |
| ; CHECK-AVX512-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2 |
| ; CHECK-AVX512-NEXT: ret void |
| ; |
| %s1 = getelementptr inbounds float, ptr %s0, i64 1 |
| %s2 = getelementptr inbounds float, ptr %s0, i64 2 |
| %s3 = getelementptr inbounds float, ptr %s0, i64 3 |
| %s4 = getelementptr inbounds float, ptr %s0, i64 4 |
| %s5 = getelementptr inbounds float, ptr %s0, i64 5 |
| %s6 = getelementptr inbounds float, ptr %s0, i64 6 |
| %s7 = getelementptr inbounds float, ptr %s0, i64 7 |
| %s8 = getelementptr inbounds float, ptr %s0, i64 8 |
| %s9 = getelementptr inbounds float, ptr %s0, i64 9 |
| %s10 = getelementptr inbounds float, ptr %s0, i64 10 |
| %s11 = getelementptr inbounds float, ptr %s0, i64 11 |
| %s12 = getelementptr inbounds float, ptr %s0, i64 12 |
| %s13 = getelementptr inbounds float, ptr %s0, i64 13 |
| %s14 = getelementptr inbounds float, ptr %s0, i64 14 |
| %s15 = getelementptr inbounds float, ptr %s0, i64 15 |
| %l0 = load float, ptr %s0, align 4 |
| %l1 = load float, ptr %s1, align 4 |
| %l2 = load float, ptr %s2, align 4 |
| %l3 = load float, ptr %s3, align 4 |
| %l4 = load float, ptr %s4, align 4 |
| %l5 = load float, ptr %s5, align 4 |
| %l6 = load float, ptr %s6, align 4 |
| %l7 = load float, ptr %s7, align 4 |
| %l8 = load float, ptr %s8, align 4 |
| %l9 = load float, ptr %s9, align 4 |
| %l10 = load float, ptr %s10, align 4 |
| %l11 = load float, ptr %s11, align 4 |
| %l12 = load float, ptr %s12, align 4 |
| %l13 = load float, ptr %s13, align 4 |
| %l14 = load float, ptr %s14, align 4 |
| %l15 = load float, ptr %s15, align 4 |
| |
| %t0 = fptrunc float %l0 to half |
| %t1 = fptrunc float %l1 to half |
| %t2 = fptrunc float %l2 to half |
| %t3 = fptrunc float %l3 to half |
| %t4 = fptrunc float %l4 to half |
| %t5 = fptrunc float %l5 to half |
| %t6 = fptrunc float %l6 to half |
| %t7 = fptrunc float %l7 to half |
| %t8 = fptrunc float %l8 to half |
| %t9 = fptrunc float %l9 to half |
| %t10 = fptrunc float %l10 to half |
| %t11 = fptrunc float %l11 to half |
| %t12 = fptrunc float %l12 to half |
| %t13 = fptrunc float %l13 to half |
| %t14 = fptrunc float %l14 to half |
| %t15 = fptrunc float %l15 to half |
| |
| %d1 = getelementptr inbounds half, ptr %d0, i64 1 |
| %d2 = getelementptr inbounds half, ptr %d0, i64 2 |
| %d3 = getelementptr inbounds half, ptr %d0, i64 3 |
| %d4 = getelementptr inbounds half, ptr %d0, i64 4 |
| %d5 = getelementptr inbounds half, ptr %d0, i64 5 |
| %d6 = getelementptr inbounds half, ptr %d0, i64 6 |
| %d7 = getelementptr inbounds half, ptr %d0, i64 7 |
| %d8 = getelementptr inbounds half, ptr %d0, i64 8 |
| %d9 = getelementptr inbounds half, ptr %d0, i64 9 |
| %d10 = getelementptr inbounds half, ptr %d0, i64 10 |
| %d11 = getelementptr inbounds half, ptr %d0, i64 11 |
| %d12 = getelementptr inbounds half, ptr %d0, i64 12 |
| %d13 = getelementptr inbounds half, ptr %d0, i64 13 |
| %d14 = getelementptr inbounds half, ptr %d0, i64 14 |
| %d15 = getelementptr inbounds half, ptr %d0, i64 15 |
| store half %t0, ptr %d0, align 2 |
| store half %t1, ptr %d1, align 2 |
| store half %t2, ptr %d2, align 2 |
| store half %t3, ptr %d3, align 2 |
| store half %t4, ptr %d4, align 2 |
| store half %t5, ptr %d5, align 2 |
| store half %t6, ptr %d6, align 2 |
| store half %t7, ptr %d7, align 2 |
| store half %t8, ptr %d8, align 2 |
| store half %t9, ptr %d9, align 2 |
| store half %t10, ptr %d10, align 2 |
| store half %t11, ptr %d11, align 2 |
| store half %t12, ptr %d12, align 2 |
| store half %t13, ptr %d13, align 2 |
| store half %t14, ptr %d14, align 2 |
| store half %t15, ptr %d15, align 2 |
| ret void |
| |
| } |
| |
| ; There is no instruction to round f64 to f16; this should not get vectorized! |
| define void @fpround_v2xf64_v2xf16(ptr %s0, ptr %d0) { |
| ; CHECK-LABEL: define void @fpround_v2xf64_v2xf16( |
| ; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1 |
| ; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[S0]], align 4 |
| ; CHECK-NEXT: [[L1:%.*]] = load double, ptr [[S1]], align 4 |
| ; CHECK-NEXT: [[T0:%.*]] = fptrunc double [[L0]] to half |
| ; CHECK-NEXT: [[T1:%.*]] = fptrunc double [[L1]] to half |
| ; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 |
| ; CHECK-NEXT: store half [[T0]], ptr [[D0]], align 2 |
| ; CHECK-NEXT: store half [[T1]], ptr [[D1]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| ; CHECK-F16C-LABEL: define void @fpround_v2xf64_v2xf16( |
| ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-F16C-NEXT: [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1 |
| ; CHECK-F16C-NEXT: [[L0:%.*]] = load double, ptr [[S0]], align 4 |
| ; CHECK-F16C-NEXT: [[L1:%.*]] = load double, ptr [[S1]], align 4 |
| ; CHECK-F16C-NEXT: [[T0:%.*]] = fptrunc double [[L0]] to half |
| ; CHECK-F16C-NEXT: [[T1:%.*]] = fptrunc double [[L1]] to half |
| ; CHECK-F16C-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 |
| ; CHECK-F16C-NEXT: store half [[T0]], ptr [[D0]], align 2 |
| ; CHECK-F16C-NEXT: store half [[T1]], ptr [[D1]], align 2 |
| ; CHECK-F16C-NEXT: ret void |
| ; |
| ; CHECK-AVX512-LABEL: define void @fpround_v2xf64_v2xf16( |
| ; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { |
| ; CHECK-AVX512-NEXT: [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1 |
| ; CHECK-AVX512-NEXT: [[L0:%.*]] = load double, ptr [[S0]], align 4 |
| ; CHECK-AVX512-NEXT: [[L1:%.*]] = load double, ptr [[S1]], align 4 |
| ; CHECK-AVX512-NEXT: [[T0:%.*]] = fptrunc double [[L0]] to half |
| ; CHECK-AVX512-NEXT: [[T1:%.*]] = fptrunc double [[L1]] to half |
| ; CHECK-AVX512-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 |
| ; CHECK-AVX512-NEXT: store half [[T0]], ptr [[D0]], align 2 |
| ; CHECK-AVX512-NEXT: store half [[T1]], ptr [[D1]], align 2 |
| ; CHECK-AVX512-NEXT: ret void |
| ; |
| %s1 = getelementptr inbounds double, ptr %s0, i64 1 |
| %l0 = load double, ptr %s0, align 4 |
| %l1 = load double, ptr %s1, align 4 |
| |
| %t0 = fptrunc double %l0 to half |
| %t1 = fptrunc double %l1 to half |
| |
| %d1 = getelementptr inbounds half, ptr %d0, i64 1 |
| store half %t0, ptr %d0, align 2 |
| store half %t1, ptr %d1, align 2 |
| ret void |
| } |