| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=amdgpu-codegenprepare < %s | FileCheck %s |
| |
| ; Test full mbcnt pattern optimization for wave64 architecture |
| ; Covers: full pattern optimization |
| |
| ; ============================================================================= |
| ; FULL PATTERN OPTIMIZATION - mbcnt.hi(~0, mbcnt.lo(~0, 0)) -> workitem.id.x |
| ; ============================================================================= |
| |
| ; Test with work group size = wave size (64) |
| define i32 @test_mbcnt_full_pattern_wave64_basic() !reqd_work_group_size !0 { |
| ; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave64_basic( |
| ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META0:![0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| ; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 64) i32 @llvm.amdgcn.workitem.id.x() |
| ; CHECK-NEXT: ret i32 [[TMP0]] |
| ; |
| entry: |
| %a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| %b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a) |
| ret i32 %b |
| } |
| |
| ; Test with work group size = 2 * wave size (128) |
| define i32 @test_mbcnt_full_pattern_wave64_128() !reqd_work_group_size !1 { |
| ; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave64_128( |
| ; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META1:![0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| ; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 128) i32 @llvm.amdgcn.workitem.id.x() |
| ; CHECK-NEXT: [[B:%.*]] = and i32 [[TMP0]], 63 |
| ; CHECK-NEXT: ret i32 [[B]] |
| ; |
| entry: |
| %a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| %b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a) |
| ret i32 %b |
| } |
| |
| ; Test with multidimensional work group where X dimension matches pattern |
| define i32 @test_mbcnt_full_pattern_wave64_multidim() !reqd_work_group_size !2 { |
| ; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave64_multidim( |
| ; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META2:![0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| ; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 64) i32 @llvm.amdgcn.workitem.id.x() |
| ; CHECK-NEXT: ret i32 [[TMP0]] |
| ; |
| entry: |
| %a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| %b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a) |
| ret i32 %b |
| } |
| |
| ; Test with work group size = 0.75 * wave size (48) |
| define i32 @test_mbcnt_full_pattern_wave64_partial() !reqd_work_group_size !3 { |
| ; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave64_partial( |
| ; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META3:![0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| ; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 48) i32 @llvm.amdgcn.workitem.id.x() |
| ; CHECK-NEXT: [[B:%.*]] = and i32 [[TMP0]], 63 |
| ; CHECK-NEXT: ret i32 [[B]] |
| ; |
| entry: |
| %a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) |
| %b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a) |
| ret i32 %b |
| } |
| |
| ; ============================================================================= |
| ; METADATA |
| ; ============================================================================= |
| |
| !0 = !{i32 64, i32 1, i32 1} ; X=64 (1*wave), Y=1, Z=1 |
| !1 = !{i32 128, i32 1, i32 1} ; X=128 (2*wave), Y=1, Z=1 |
| !2 = !{i32 64, i32 2, i32 1} ; X=64 (1*wave), Y=2, Z=1 |
| !3 = !{i32 48, i32 1, i32 1} ; X=48 (0.75*wave), Y=1, Z=1 |
| |
| ; ============================================================================= |
| ; FUNCTION DECLARATIONS |
| ; ============================================================================= |
| |
| declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 |
| declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 |
| |
| attributes #0 = { nounwind readnone speculatable willreturn } |
| ;. |
| ; CHECK: [[META0]] = !{i32 64, i32 1, i32 1} |
| ; CHECK: [[META1]] = !{i32 128, i32 1, i32 1} |
| ; CHECK: [[META2]] = !{i32 64, i32 2, i32 1} |
| ; CHECK: [[META3]] = !{i32 48, i32 1, i32 1} |
| ;. |