| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s |
| |
| ; This file tests the following combinations related to streaming-enabled functions: |
| ; [ ] N -> S (Normal -> Streaming) |
| ; [ ] S -> N (Streaming -> Normal) |
| ; [ ] S -> S (Streaming -> Streaming) |
| ; [ ] S -> SC (Streaming -> Streaming-compatible) |
| ; |
| ; The following combination is tested in sme-streaming-compatible-interface.ll |
| ; [ ] SC -> S (Streaming-compatible -> Streaming) |
| |
| declare void @normal_callee() |
| declare void @streaming_callee() "aarch64_pstate_sm_enabled" |
| declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" |
| |
| ; [x] N -> S |
| ; [ ] S -> N |
| ; [ ] S -> S |
| ; [ ] S -> SC |
| define void @normal_caller_streaming_callee() nounwind { |
| ; CHECK-LABEL: normal_caller_streaming_callee: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: bl streaming_callee |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void @streaming_callee() |
| ret void; |
| } |
| |
| ; [ ] N -> S |
| ; [x] S -> N |
| ; [ ] S -> S |
| ; [ ] S -> SC |
| define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" { |
| ; CHECK-LABEL: streaming_caller_normal_callee: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: bl normal_callee |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void @normal_callee() |
| ret void; |
| } |
| |
| ; [ ] N -> S |
| ; [ ] S -> N |
| ; [x] S -> S |
| ; [ ] S -> SC |
| define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" { |
| ; CHECK-LABEL: streaming_caller_streaming_callee: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: bl streaming_callee |
| ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void @streaming_callee() |
| ret void; |
| } |
| |
| ; [ ] N -> S |
| ; [ ] S -> N |
| ; [ ] S -> S |
| ; [x] S -> SC |
| define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" { |
| ; CHECK-LABEL: streaming_caller_streaming_compatible_callee: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: bl streaming_compatible_callee |
| ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void @streaming_compatible_callee() |
| ret void; |
| } |
| |
| ; |
| ; Handle special cases here. |
| ; |
| |
| ; Call to function-pointer (with attribute) |
| define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind { |
| ; CHECK-LABEL: call_to_function_pointer_streaming_enabled: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: blr x0 |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void %p() "aarch64_pstate_sm_enabled" |
| ret void |
| } |
| |
| ; Ensure NEON registers are preserved correctly. |
| define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind { |
| ; CHECK-LABEL: smstart_clobber_simdfp: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #96 |
| ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill |
| ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: bl streaming_callee |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #96 |
| ; CHECK-NEXT: ret |
| call void @streaming_callee() |
| ret <4 x i32> %x; |
| } |
| |
| ; Ensure SVE registers are preserved correctly. |
| define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) #0 { |
| ; CHECK-LABEL: smstart_clobber_sve: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-18 |
| ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: bl streaming_callee |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: addvl sp, sp, #18 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void @streaming_callee() |
| ret <vscale x 4 x i32> %x; |
| } |
| |
| ; Call streaming callee twice; there should be no spills/fills between the two |
| ; calls since the registers should have already been clobbered. |
| define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) #0 { |
| ; CHECK-LABEL: smstart_clobber_sve_duplicate: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-18 |
| ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: bl streaming_callee |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: bl streaming_callee |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: addvl sp, sp, #18 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void @streaming_callee() |
| call void @streaming_callee() |
| ret <vscale x 4 x i32> %x; |
| } |
| |
| ; Ensure smstart is not removed, because call to llvm.cos is not part of a chain. |
| define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" { |
| ; CHECK-LABEL: call_to_intrinsic_without_chain: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #96 |
| ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill |
| ; CHECK-NEXT: str d0, [sp, #88] // 8-byte Folded Spill |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload |
| ; CHECK-NEXT: bl cos |
| ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload |
| ; CHECK-NEXT: fadd d0, d1, d0 |
| ; CHECK-NEXT: add sp, sp, #96 |
| ; CHECK-NEXT: ret |
| entry: |
| %res = call fast double @llvm.cos.f64(double %x) |
| %res.fadd = fadd fast double %res, %x |
| ret double %res.fadd |
| } |
| |
| declare double @llvm.cos.f64(double) |
| |
| ; Ensure that tail call optimization is disabled when the streaming mode |
| ; doesn't match. |
| define void @disable_tailcallopt() nounwind { |
| ; CHECK-LABEL: disable_tailcallopt: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: bl streaming_callee |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| tail call void @streaming_callee() |
| ret void; |
| } |
| |
| define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #1 { |
| ; CHECK-LABEL: call_to_non_streaming_pass_sve_objects: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-3 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: addvl x9, sp, #2 |
| ; CHECK-NEXT: addvl x10, sp, #1 |
| ; CHECK-NEXT: mov x11, sp |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: mov x0, x9 |
| ; CHECK-NEXT: mov x1, x10 |
| ; CHECK-NEXT: mov x2, x11 |
| ; CHECK-NEXT: mov x3, x8 |
| ; CHECK-NEXT: bl foo |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp, #2, mul vl] |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: addvl sp, sp, #3 |
| ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| %Data1 = alloca <vscale x 16 x i8>, align 16 |
| %Data2 = alloca <vscale x 16 x i8>, align 16 |
| %Data3 = alloca <vscale x 16 x i8>, align 16 |
| %0 = tail call i64 @llvm.aarch64.sme.cntsb() |
| call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0) |
| %1 = load <vscale x 16 x i8>, ptr %Data1, align 16 |
| %vecext = extractelement <vscale x 16 x i8> %1, i64 0 |
| ret i8 %vecext |
| } |
| |
| declare i64 @llvm.aarch64.sme.cntsb() |
| |
| declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef) |
| |
| attributes #0 = { nounwind "target-features"="+sve" } |
| attributes #1 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" } |