| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; FIXME: Missing operand promote for f16 |
| ; XUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s |
| ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s |
| ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s |
| ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s |
| ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s |
| |
| define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 { |
| ; GCN-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") |
| ret float %result |
| } |
| |
| define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) #0 { |
| ; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0 |
| ; GFX89-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX89-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0 |
| ; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX10-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") |
| ret <2 x float> %result |
| } |
| |
| define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) #0 { |
| ; GFX89-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v0 |
| ; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v1 |
| ; GFX89-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX89-NEXT: v_mov_b32_e32 v1, v3 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v0 |
| ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 |
| ; GFX10-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX10-NEXT: v_mov_b32_e32 v1, v3 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v2 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 |
| ; GFX11-NEXT: v_mov_b32_e32 v1, v3 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") |
| ret <3 x float> %result |
| } |
| |
| define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 { |
| ; GCN-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") |
| ret double %result |
| } |
| |
| define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 { |
| ; GCN-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v2, v1 |
| ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GCN-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %arg, metadata !"fpexcept.strict") |
| ret <2 x double> %result |
| } |
| |
| define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) #0 { |
| ; GFX89-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX89-NEXT: v_mov_b32_e32 v2, v1 |
| ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 |
| ; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: v_mov_b32_e32 v4, v2 |
| ; GFX1011-NEXT: v_mov_b32_e32 v2, v1 |
| ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 |
| ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float> %arg, metadata !"fpexcept.strict") |
| ret <3 x double> %result |
| } |
| |
| define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 { |
| ; GCN-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.experimental.constrained.fpext.f64.f16(half %arg, metadata !"fpexcept.strict") |
| ret double %result |
| } |
| |
| define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) #0 { |
| ; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_cvt_f32_f16_e32 v1, v0 |
| ; GFX89-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v1 |
| ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0 |
| ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v1 |
| ; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 |
| ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") |
| ret <2 x double> %result |
| } |
| |
| define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) #0 { |
| ; GFX89-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0 |
| ; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v1 |
| ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 |
| ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 |
| ; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0 |
| ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1 |
| ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 |
| ; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 |
| ; GFX10-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1 |
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v3 |
| ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") |
| ret <3 x double> %result |
| } |
| |
| define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 { |
| ; GCN-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") |
| %neg.result = fneg float %result |
| ret float %neg.result |
| } |
| |
| define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 { |
| ; GCN-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_f32_f16_e64 v0, -v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %neg.arg = fneg half %arg |
| %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %neg.arg, metadata !"fpexcept.strict") |
| ret float %result |
| } |
| |
| define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 { |
| ; GCN-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %neg.arg = fneg float %arg |
| %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") |
| ret double %result |
| } |
| |
| define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 { |
| ; GCN-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 |
| ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") |
| %neg.result = fneg double %result |
| ret double %neg.result |
| } |
| |
| declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1 |
| declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1 |
| declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1 |
| |
| declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1 |
| declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1 |
| declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1 |
| |
| declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1 |
| declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1 |
| declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1 |
| |
| attributes #0 = { strictfp } |
| attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GFX8: {{.*}} |
| ; GFX9: {{.*}} |