llvm/test/CodeGen/AMDGPU/lro-phi-samebb-nonlookthrough-store.ll - llvm-project - Git at Google

 ; RUN: opt -S -passes=amdgpu-late-codegenprepare \
 ; RUN:   -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s | FileCheck %s

 ; Goal: With a loop-header PHI in illegal vector type and a same-BB
 ; non-lookthrough user (vector add) in the header, LRO should still coerce
 ; the PHI to i32 because a profitable sink (store) exists across BB.

 define amdgpu_kernel void @phi_samebb_nonlookthrough_store(
     ptr addrspace(1) %out, <4 x i8> %v, i1 %exit) {
 ; CHECK-LABEL: @phi_samebb_nonlookthrough_store(
 entry:
   br label %loop

 loop:                                             ; preds = %entry, %loop
   ; Loop-carried PHI in illegal vector type.
   %acc = phi <4 x i8> [ zeroinitializer, %entry ], [ %acc.next, %loop ]

   ; Same-BB non-lookthrough use in header.
   %acc.next = add <4 x i8> %acc, %v

   ; Make it a real loop: either iterate or exit to the sink block.
   br i1 %exit, label %store, label %loop

 store:                                            ; preds = %loop
   ; The across-BB sink: storing the PHI coerced to i32.
   %acc.bc = bitcast <4 x i8> %acc to i32
   store i32 %acc.bc, ptr addrspace(1) %out, align 4
   ret void
 }

 ; After AMDGPULateCodeGenPrepare we expect:
 ;  - PHI is coerced to i32
 ;  - A header bitcast materializes for the add
 ; This proves the same-BB non-lookthrough user (add) did not get pruned
 ; when the def is a PHI.

 ; CHECK: loop:
 ; CHECK:   %[[ACC_TC:[^ ]+]] = phi i32
 ; CHECK:   %[[ACC_TC_BC:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8>
 ; CHECK:   %[[ACC_NEXT:[^ ]+]] = add <4 x i8> %[[ACC_TC_BC]], %v
 ; CHECK:   br i1 %exit, label %store, label %loop
 ; CHECK: store:
 ; CHECK:   %[[ACC_TC_BC2:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8>
 ; CHECK:   %[[ST_I32:[^ ]+]] = bitcast <4 x i8> %[[ACC_TC_BC2]] to i32
 ; CHECK:   store i32 %[[ST_I32]],
	; RUN: opt -S -passes=amdgpu-late-codegenprepare \
	; RUN: -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s \| FileCheck %s

	; Goal: With a loop-header PHI in illegal vector type and a same-BB
	; non-lookthrough user (vector add) in the header, LRO should still coerce
	; the PHI to i32 because a profitable sink (store) exists across BB.

	define amdgpu_kernel void @phi_samebb_nonlookthrough_store(
	ptr addrspace(1) %out, <4 x i8> %v, i1 %exit) {
	; CHECK-LABEL: @phi_samebb_nonlookthrough_store(
	entry:
	br label %loop

	loop: ; preds = %entry, %loop
	; Loop-carried PHI in illegal vector type.
	%acc = phi <4 x i8> [ zeroinitializer, %entry ], [ %acc.next, %loop ]

	; Same-BB non-lookthrough use in header.
	%acc.next = add <4 x i8> %acc, %v

	; Make it a real loop: either iterate or exit to the sink block.
	br i1 %exit, label %store, label %loop

	store: ; preds = %loop
	; The across-BB sink: storing the PHI coerced to i32.
	%acc.bc = bitcast <4 x i8> %acc to i32
	store i32 %acc.bc, ptr addrspace(1) %out, align 4
	ret void
	}

	; After AMDGPULateCodeGenPrepare we expect:
	; - PHI is coerced to i32
	; - A header bitcast materializes for the add
	; This proves the same-BB non-lookthrough user (add) did not get pruned
	; when the def is a PHI.

	; CHECK: loop:
	; CHECK: %[[ACC_TC:[^ ]+]] = phi i32
	; CHECK: %[[ACC_TC_BC:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8>
	; CHECK: %[[ACC_NEXT:[^ ]+]] = add <4 x i8> %[[ACC_TC_BC]], %v
	; CHECK: br i1 %exit, label %store, label %loop
	; CHECK: store:
	; CHECK: %[[ACC_TC_BC2:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8>
	; CHECK: %[[ST_I32:[^ ]+]] = bitcast <4 x i8> %[[ACC_TC_BC2]] to i32
	; CHECK: store i32 %[[ST_I32]],