blob: 3da06b6692ecc79bc788749a3f65fc6824be0992 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=amdgpu-codegenprepare < %s | FileCheck %s
; Test full mbcnt pattern optimization for wave64 architecture
; Covers: full pattern optimization
; =============================================================================
; FULL PATTERN OPTIMIZATION - mbcnt.hi(~0, mbcnt.lo(~0, 0)) -> workitem.id.x
; =============================================================================
; Test with work group size = wave size (64)
define i32 @test_mbcnt_full_pattern_wave64_basic() !reqd_work_group_size !0 {
; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave64_basic(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META0:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 64) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a)
ret i32 %b
}
; Test with work group size = 2 * wave size (128)
define i32 @test_mbcnt_full_pattern_wave64_128() !reqd_work_group_size !1 {
; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave64_128(
; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META1:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 128) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[B:%.*]] = and i32 [[TMP0]], 63
; CHECK-NEXT: ret i32 [[B]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a)
ret i32 %b
}
; Test with multidimensional work group where X dimension matches pattern
define i32 @test_mbcnt_full_pattern_wave64_multidim() !reqd_work_group_size !2 {
; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave64_multidim(
; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META2:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 64) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a)
ret i32 %b
}
; Test with work group size = 0.75 * wave size (48)
define i32 @test_mbcnt_full_pattern_wave64_partial() !reqd_work_group_size !3 {
; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave64_partial(
; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META3:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 48) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[B:%.*]] = and i32 [[TMP0]], 63
; CHECK-NEXT: ret i32 [[B]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a)
ret i32 %b
}
; =============================================================================
; METADATA
; =============================================================================
!0 = !{i32 64, i32 1, i32 1} ; X=64 (1*wave), Y=1, Z=1
!1 = !{i32 128, i32 1, i32 1} ; X=128 (2*wave), Y=1, Z=1
!2 = !{i32 64, i32 2, i32 1} ; X=64 (1*wave), Y=2, Z=1
!3 = !{i32 48, i32 1, i32 1} ; X=48 (0.75*wave), Y=1, Z=1
; =============================================================================
; FUNCTION DECLARATIONS
; =============================================================================
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
attributes #0 = { nounwind readnone speculatable willreturn }
;.
; CHECK: [[META0]] = !{i32 64, i32 1, i32 1}
; CHECK: [[META1]] = !{i32 128, i32 1, i32 1}
; CHECK: [[META2]] = !{i32 64, i32 2, i32 1}
; CHECK: [[META3]] = !{i32 48, i32 1, i32 1}
;.