| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s |
| |
| ; Checks that memsets don't block PromoteAlloca. |
| |
| define amdgpu_kernel void @memset_all_zero(i64 %val) { |
| ; CHECK-LABEL: @memset_all_zero( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <6 x i64> zeroinitializer, i64 [[VAL:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <6 x i64> [[TMP0]], i64 [[VAL]], i32 1 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [6 x i64], align 4, addrspace(5) |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 48, i1 false) |
| store i64 %val, ptr addrspace(5) %stack |
| %reload = load i64, ptr addrspace(5) %stack |
| %stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1 |
| store i64 %val, ptr addrspace(5) %stack.1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @memset_all_5(i64 %val) { |
| ; CHECK-LABEL: @memset_all_5( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> splat (i64 361700864190383365), i64 [[VAL:%.*]], i32 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i32 1 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 5, i64 32, i1 false) |
| store i64 %val, ptr addrspace(5) %stack |
| %reload = load i64, ptr addrspace(5) %stack |
| %stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1 |
| store i64 %val, ptr addrspace(5) %stack.1 |
| ret void |
| } |
| |
| define amdgpu_kernel void @memset_volatile_nopromote(i64 %val) { |
| ; CHECK-LABEL: @memset_volatile_nopromote( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5) |
| ; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 32, i1 true) |
| ; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 32, i1 true) |
| store i64 %val, ptr addrspace(5) %stack |
| ret void |
| } |
| |
| define amdgpu_kernel void @memset_badsize_nopromote(i64 %val) { |
| ; CHECK-LABEL: @memset_badsize_nopromote( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5) |
| ; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 31, i1 true) |
| ; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 31, i1 true) |
| store i64 %val, ptr addrspace(5) %stack |
| ret void |
| } |
| |
| define amdgpu_kernel void @memset_offset_ptr_nopromote(i64 %val) { |
| ; CHECK-LABEL: @memset_offset_ptr_nopromote( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5) |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i64], ptr addrspace(5) [[STACK]], i64 0, i64 1 |
| ; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[GEP]], i8 0, i64 24, i1 true) |
| ; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| %gep = getelementptr [4 x i64], ptr addrspace(5) %stack, i64 0, i64 1 |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %gep, i8 0, i64 24, i1 true) |
| store i64 %val, ptr addrspace(5) %stack |
| ret void |
| } |
| |
| define amdgpu_kernel void @memset_array_ptr_alloca(ptr %out) { |
| ; CHECK-LABEL: @memset_array_ptr_alloca( |
| ; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %alloca = alloca [6 x ptr], align 16, addrspace(5) |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false) |
| %load = load i64, ptr addrspace(5) %alloca |
| store i64 %load, ptr %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @memset_vector_ptr_alloca(ptr %out) { |
| ; CHECK-LABEL: @memset_vector_ptr_alloca( |
| ; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %alloca = alloca <6 x ptr>, align 16, addrspace(5) |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false) |
| %load = load i64, ptr addrspace(5) %alloca |
| store i64 %load, ptr %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @memset_array_of_array_ptr_alloca(ptr %out) { |
| ; CHECK-LABEL: @memset_array_of_array_ptr_alloca( |
| ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x [3 x ptr]], align 16, addrspace(5) |
| ; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[ALLOCA]], i8 0, i64 48, i1 false) |
| ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(5) [[ALLOCA]], align 8 |
| ; CHECK-NEXT: store i64 [[LOAD]], ptr [[OUT:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %alloca = alloca [2 x [3 x ptr]], align 16, addrspace(5) |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false) |
| %load = load i64, ptr addrspace(5) %alloca |
| store i64 %load, ptr %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @memset_array_of_vec_ptr_alloca(ptr %out) { |
| ; CHECK-LABEL: @memset_array_of_vec_ptr_alloca( |
| ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x <3 x ptr>], align 16, addrspace(5) |
| ; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[ALLOCA]], i8 0, i64 48, i1 false) |
| ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(5) [[ALLOCA]], align 8 |
| ; CHECK-NEXT: store i64 [[LOAD]], ptr [[OUT:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %alloca = alloca [2 x <3 x ptr>], align 16, addrspace(5) |
| call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false) |
| %load = load i64, ptr addrspace(5) %alloca |
| store i64 %load, ptr %out |
| ret void |
| } |
| |
| declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg) |