blob: e0afa3e876ec27810ce164631148eeddf354f2b2 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=amdgpu-codegenprepare < %s | FileCheck %s
; Test mbcnt optimizations for wave32 architecture
; Covers: simple replacement, bitmask optimization, copy optimization
; =============================================================================
; SIMPLE REPLACEMENT OPTIMIZATIONS (mbcnt.lo -> workitem.id.x)
; =============================================================================
; Test with work group size = wave size (32)
define i32 @test_mbcnt_lo_simple_wave32() !reqd_work_group_size !0 {
; CHECK-LABEL: define i32 @test_mbcnt_lo_simple_wave32(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META0:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 32) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
ret i32 %a
}
; =============================================================================
; BITMASK OPTIMIZATIONS (mbcnt.lo -> workitem.id.x & 0x1f)
; =============================================================================
; Test with work group size = 2 * wave size (64)
define i32 @test_mbcnt_lo_bitmask_64() !reqd_work_group_size !1 {
; CHECK-LABEL: define i32 @test_mbcnt_lo_bitmask_64(
; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META1:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 64) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31
; CHECK-NEXT: ret i32 [[TMP1]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
ret i32 %a
}
; Test with work group size = 3 * wave size (96)
define i32 @test_mbcnt_lo_bitmask_96() !reqd_work_group_size !2 {
; CHECK-LABEL: define i32 @test_mbcnt_lo_bitmask_96(
; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META2:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 96) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31
; CHECK-NEXT: ret i32 [[TMP1]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
ret i32 %a
}
; Test with work group size = 0.75 * wave size (48)
define i32 @test_mbcnt_lo_bitmask_48() !reqd_work_group_size !3 {
; CHECK-LABEL: define i32 @test_mbcnt_lo_bitmask_48(
; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META3:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 48) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[A:%.*]] = and i32 [[TMP0]], 31
; CHECK-NEXT: ret i32 [[A]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
ret i32 %a
}
; =============================================================================
; COPY OPTIMIZATION (mbcnt.hi(mask, val) -> val)
; =============================================================================
; Test with mask = wave32 range
define i32 @test_mbcnt_hi_copy_basic(i32 %val) !reqd_work_group_size !0 {
; CHECK-LABEL: define i32 @test_mbcnt_hi_copy_basic(
; CHECK-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] !reqd_work_group_size [[META0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret i32 [[VAL]]
;
entry:
%result = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %val)
ret i32 %result
}
; Test with partial mask
define i32 @test_mbcnt_hi_copy_partial_mask(i32 %val) !reqd_work_group_size !0 {
; CHECK-LABEL: define i32 @test_mbcnt_hi_copy_partial_mask(
; CHECK-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] !reqd_work_group_size [[META0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: ret i32 [[VAL]]
;
entry:
%result = call i32 @llvm.amdgcn.mbcnt.hi(i32 65535, i32 %val)
ret i32 %result
}
; =============================================================================
; FULL PATTERN OPTIMIZATION (mbcnt.hi(~0, mbcnt.lo(~0, 0)) -> workitem.id.x)
; =============================================================================
; Test with work group size = wave size (32)
define i32 @test_mbcnt_full_pattern_wave32() !reqd_work_group_size !0 {
; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave32(
; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 32) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a)
ret i32 %b
}
; Test with work group size = 0.75 * wave size (48)
define i32 @test_mbcnt_full_pattern_wave32_partial() !reqd_work_group_size !3 {
; CHECK-LABEL: define i32 @test_mbcnt_full_pattern_wave32_partial(
; CHECK-SAME: ) #[[ATTR0]] !reqd_work_group_size [[META3]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK-NEXT: [[TMP0:%.*]] = call range(i32 0, 48) i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[B:%.*]] = and i32 [[TMP0]], 31
; CHECK-NEXT: ret i32 [[B]]
;
entry:
%a = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%b = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %a)
ret i32 %b
}
; =============================================================================
; METADATA
; =============================================================================
!0 = !{i32 32, i32 1, i32 1} ; X=32 (1*wave32), Y=1, Z=1
!1 = !{i32 64, i32 1, i32 1} ; X=64 (2*wave32), Y=1, Z=1
!2 = !{i32 96, i32 1, i32 1} ; X=96 (3*wave32), Y=1, Z=1
!3 = !{i32 48, i32 1, i32 1} ; X=48 (1.5*wave32), Y=1, Z=1
; =============================================================================
; FUNCTION DECLARATIONS
; =============================================================================
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
attributes #0 = { nounwind readnone speculatable willreturn }
;.
; CHECK: [[META0]] = !{i32 32, i32 1, i32 1}
; CHECK: [[META1]] = !{i32 64, i32 1, i32 1}
; CHECK: [[META2]] = !{i32 96, i32 1, i32 1}
; CHECK: [[META3]] = !{i32 48, i32 1, i32 1}
;.