| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s |
| ; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll |
| |
| @scalar = internal addrspace(3) global float 0.0, align 4 |
| @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 |
| |
| define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @load_store_lds_f32( |
| ; CHECK-SAME: i32 [[I:%.*]], float [[V:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[BB:.*:]] |
| ; CHECK-NEXT: [[TMP:%.*]] = load float, ptr addrspace(3) @scalar, align 4 |
| ; CHECK-NEXT: call void @use(float [[TMP]]) |
| ; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 |
| ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() |
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(3) @scalar, align 4 |
| ; CHECK-NEXT: call void @use(float [[TMP2]]) |
| ; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 |
| ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() |
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 |
| ; CHECK-NEXT: call void @use(float [[TMP3]]) |
| ; CHECK-NEXT: store float [[V]], ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 |
| ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 5 |
| ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(3) [[TMP4]], align 4 |
| ; CHECK-NEXT: call void @use(float [[TMP5]]) |
| ; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP4]], align 4 |
| ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 [[I]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr addrspace(3) [[TMP7]], align 4 |
| ; CHECK-NEXT: call void @use(float [[TMP8]]) |
| ; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP7]], align 4 |
| ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() |
| ; CHECK-NEXT: ret void |
| ; |
| bb: |
| %tmp = load float, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4 |
| call void @use(float %tmp) |
| store float %v, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4 |
| call void @llvm.amdgcn.s.barrier() |
| %tmp1 = addrspacecast ptr addrspace(3) @scalar to ptr |
| %tmp2 = load float, ptr %tmp1, align 4 |
| call void @use(float %tmp2) |
| store float %v, ptr %tmp1, align 4 |
| call void @llvm.amdgcn.s.barrier() |
| %tmp3 = load float, ptr getelementptr inbounds ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5), align 4 |
| call void @use(float %tmp3) |
| store float %v, ptr getelementptr inbounds ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5), align 4 |
| call void @llvm.amdgcn.s.barrier() |
| %tmp4 = getelementptr inbounds [10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5 |
| %tmp5 = load float, ptr %tmp4, align 4 |
| call void @use(float %tmp5) |
| store float %v, ptr %tmp4, align 4 |
| call void @llvm.amdgcn.s.barrier() |
| %tmp6 = addrspacecast ptr addrspace(3) @array to ptr |
| %tmp7 = getelementptr inbounds [10 x float], ptr %tmp6, i32 0, i32 %i |
| %tmp8 = load float, ptr %tmp7, align 4 |
| call void @use(float %tmp8) |
| store float %v, ptr %tmp7, align 4 |
| call void @llvm.amdgcn.s.barrier() |
| ret void |
| } |
| |
| define i32 @constexpr_load_int_from_float_lds() #0 { |
| ; CHECK-LABEL: define i32 @constexpr_load_int_from_float_lds( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: [[BB:.*:]] |
| ; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(3) @scalar, align 4 |
| ; CHECK-NEXT: ret i32 [[TMP]] |
| ; |
| bb: |
| %tmp = load i32, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4 |
| ret i32 %tmp |
| } |
| |
| define i32 @load_int_from_global_float(ptr addrspace(1) %input, i32 %i, i32 %j) #0 { |
| ; CHECK-LABEL: define i32 @load_int_from_global_float( |
| ; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[BB:.*:]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i32 [[I]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i32 [[J]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP2]], align 4 |
| ; CHECK-NEXT: ret i32 [[TMP4]] |
| ; |
| bb: |
| %tmp = addrspacecast ptr addrspace(1) %input to ptr |
| %tmp1 = getelementptr float, ptr %tmp, i32 %i |
| %tmp2 = getelementptr float, ptr %tmp1, i32 %j |
| %tmp4 = load i32, ptr %tmp2 |
| ret i32 %tmp4 |
| } |
| |
| define amdgpu_kernel void @nested_const_expr() #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @nested_const_expr( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: store i32 1, ptr addrspace(3) getelementptr ([10 x float], ptr addrspace(3) @array, i64 0, i64 1), align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| store i32 1, ptr bitcast (ptr getelementptr ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 1) to ptr), align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rauw(ptr addrspace(1) %input) #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @rauw( |
| ; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[BB:.*:]] |
| ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i64 10 |
| ; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[ADDR]], align 4 |
| ; CHECK-NEXT: store float [[V]], ptr addrspace(1) [[ADDR]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| bb: |
| %generic_input = addrspacecast ptr addrspace(1) %input to ptr |
| %addr = getelementptr float, ptr %generic_input, i64 10 |
| %v = load float, ptr %addr |
| store float %v, ptr %addr |
| ret void |
| } |
| |
| ; FIXME: Should be able to eliminate the cast inside the loop |
| define amdgpu_kernel void @loop() #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @loop( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: [[END:%.*]] = getelementptr float, ptr addrspace(3) @array, i64 10 |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 |
| ; CHECK-NEXT: call void @use(float [[V]]) |
| ; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 |
| ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr addrspace(3) [[I2]], [[END]] |
| ; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %p = addrspacecast ptr addrspace(3) @array to ptr |
| %end = getelementptr float, ptr %p, i64 10 |
| br label %loop |
| |
| loop: ; preds = %loop, %entry |
| %i = phi ptr [ %p, %entry ], [ %i2, %loop ] |
| %v = load float, ptr %i |
| call void @use(float %v) |
| %i2 = getelementptr float, ptr %i, i64 1 |
| %exit_cond = icmp eq ptr %i2, %end |
| br i1 %exit_cond, label %exit, label %loop |
| |
| exit: ; preds = %loop |
| ret void |
| } |
| |
| @generic_end = external addrspace(1) global ptr |
| |
| define amdgpu_kernel void @loop_with_generic_bound() #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @loop_with_generic_bound( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: [[END:%.*]] = load ptr, ptr addrspace(1) @generic_end, align 8 |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 |
| ; CHECK-NEXT: call void @use(float [[V]]) |
| ; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 |
| ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr |
| ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr [[TMP0]], [[END]] |
| ; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %p = addrspacecast ptr addrspace(3) @array to ptr |
| %end = load ptr, ptr addrspace(1) @generic_end |
| br label %loop |
| |
| loop: ; preds = %loop, %entry |
| %i = phi ptr [ %p, %entry ], [ %i2, %loop ] |
| %v = load float, ptr %i |
| call void @use(float %v) |
| %i2 = getelementptr float, ptr %i, i64 1 |
| %exit_cond = icmp eq ptr %i2, %end |
| br i1 %exit_cond, label %exit, label %loop |
| |
| exit: ; preds = %loop |
| ret void |
| } |
| |
| define void @select_bug() #0 { |
| ; CHECK-LABEL: define void @select_bug( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr inttoptr (i64 4873 to ptr), null |
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 73, i64 93 |
| ; CHECK-NEXT: [[ADD_PTR157:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[SEL]] |
| ; CHECK-NEXT: [[CMP169:%.*]] = icmp uge ptr undef, [[ADD_PTR157]] |
| ; CHECK-NEXT: unreachable |
| ; |
| %cmp = icmp ne ptr inttoptr (i64 4873 to ptr), null |
| %sel = select i1 %cmp, i64 73, i64 93 |
| %add.ptr157 = getelementptr inbounds i64, ptr undef, i64 %sel |
| %cmp169 = icmp uge ptr undef, %add.ptr157 |
| unreachable |
| } |
| |
| declare void @llvm.amdgcn.s.barrier() #1 |
| declare void @use(float) #0 |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { convergent nounwind } |