| ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-postlegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s |
| ; RUN: llc -debugify-and-strip-all-safe -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-postlegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s |
| |
| define ptr @test_simple_load_pre(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_simple_load_pre |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 |
| ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[C]](s64), 1 :: (volatile load (s8) from %ir.next) |
| ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %next = getelementptr i8, ptr %ptr, i32 42 |
| load volatile i8, ptr %next |
| ret ptr %next |
| } |
| |
| define ptr @test_unused_load_pre(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_unused_load_pre |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 |
| ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) |
| ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next) |
| ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 |
| ; CHECK-NEXT: $x0 = COPY [[C1]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %next = getelementptr i8, ptr %ptr, i32 42 |
| load volatile i8, ptr %next |
| ret ptr null |
| } |
| |
| define ptr @test_simple_store_pre(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_simple_store_pre |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 |
| ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 |
| ; CHECK-NEXT: [[INDEXED_STORE:%[0-9]+]]:_(p0) = G_INDEXED_STORE [[C1]](s8), [[COPY]], [[C]](s64), 1 :: (volatile store (s8) into %ir.next) |
| ; CHECK-NEXT: $x0 = COPY [[INDEXED_STORE]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %next = getelementptr i8, ptr %ptr, i32 42 |
| store volatile i8 0, ptr %next |
| ret ptr %next |
| } |
| |
| ; The potentially pre-indexed address is used as the value stored. Converting |
| ; would produce the value too late but only by one instruction. |
| define ptr @test_store_pre_val_loop(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_store_pre_val_loop |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 336 |
| ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) |
| ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[PTR_ADD]](p0) :: (volatile store (p0) into %ir.next) |
| ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %next = getelementptr ptr, ptr %ptr, i32 42 |
| store volatile ptr %next, ptr %next |
| ret ptr %next |
| } |
| |
| ; Potentially pre-indexed address is used between GEP computing it and load. |
| define ptr @test_load_pre_before(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_load_pre_before |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 |
| ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) |
| ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) |
| ; CHECK-NEXT: BL @bar, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp |
| ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next) |
| ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %next = getelementptr i8, ptr %ptr, i32 42 |
| call void @bar(ptr %next) |
| load volatile i8, ptr %next |
| ret ptr %next |
| } |
| |
| ; Materializing the base into a writable register (from sp/fp) would be just as |
| ; bad as the original GEP. |
| define ptr @test_alloca_load_pre() { |
| |
| ; CHECK-LABEL: name: test_alloca_load_pre |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 |
| ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr |
| ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) |
| ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next) |
| ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %ptr = alloca i8, i32 128 |
| %next = getelementptr i8, ptr %ptr, i32 42 |
| load volatile i8, ptr %next |
| ret ptr %next |
| } |
| |
| define ptr @test_simple_load_post(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_simple_load_post |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 |
| ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[C]](s64), 0 :: (volatile load (s8) from %ir.ptr) |
| ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %next = getelementptr i8, ptr %ptr, i32 42 |
| load volatile i8, ptr %ptr |
| ret ptr %next |
| } |
| |
| define ptr @test_simple_load_post_gep_after(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_simple_load_post_gep_after |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp |
| ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 |
| ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[COPY1]](s64), 0 :: (volatile load (s8) from %ir.ptr) |
| ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %offset = call i64 @get_offset() |
| load volatile i8, ptr %ptr |
| %next = getelementptr i8, ptr %ptr, i64 %offset |
| ret ptr %next |
| } |
| |
| define ptr @test_load_post_keep_looking(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_load_post_keep_looking |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp |
| ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 |
| ; CHECK-NEXT: [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[COPY1]](s64), 0 :: (volatile load (s8) from %ir.ptr) |
| ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) |
| ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PTRTOINT]](s64) |
| ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var |
| ; CHECK-NEXT: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var |
| ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[ADD_LOW]](p0) :: (store (s8) into @var) |
| ; CHECK-NEXT: $x0 = COPY [[INDEXED_LOAD1]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %offset = call i64 @get_offset() |
| load volatile i8, ptr %ptr |
| %intval = ptrtoint ptr %ptr to i8 |
| store i8 %intval, ptr @var |
| |
| %next = getelementptr i8, ptr %ptr, i64 %offset |
| ret ptr %next |
| } |
| |
| ; Base is frame index. Using indexing would need copy anyway. |
| define ptr @test_load_post_alloca() { |
| |
| ; CHECK-LABEL: name: test_load_post_alloca |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr |
| ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (volatile load (s8) from %ir.ptr) |
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 |
| ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) |
| ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| %ptr = alloca i8, i32 128 |
| %next = getelementptr i8, ptr %ptr, i32 42 |
| load volatile i8, ptr %ptr |
| ret ptr %next |
| } |
| |
| ; Offset computation does not dominate the load we might be indexing. |
| define ptr @test_load_post_gep_offset_after(ptr %ptr) { |
| |
| ; CHECK-LABEL: name: test_load_post_gep_offset_after |
| ; CHECK: bb.1 (%ir-block.0): |
| ; CHECK-NEXT: liveins: $x0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (volatile load (s8) from %ir.ptr) |
| ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp |
| ; CHECK-NEXT: BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0 |
| ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 |
| ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64) |
| ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) |
| ; CHECK-NEXT: RET_ReallyLR implicit $x0 |
| load volatile i8, ptr %ptr |
| %offset = call i64 @get_offset() |
| %next = getelementptr i8, ptr %ptr, i64 %offset |
| ret ptr %next |
| } |
| |
| declare void @bar(ptr) |
| declare i64 @get_offset() |
| @var = global i8 0 |
| @varp8 = global ptr null |