| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: opt -mtriple=amdgcn -mcpu=gfx906 -amdgpu-late-codegenprepare -S -o - %s | FileCheck --check-prefix=GFX906 %s |
| |
| define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) { |
| ; GFX906-LABEL: define amdgpu_kernel void @v3i8_liveout( |
| ; GFX906-SAME: ptr addrspace(1) [[SRC1:%.*]], ptr addrspace(1) [[SRC2:%.*]], ptr addrspace(1) nocapture [[DST:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr <3 x i8>, ptr addrspace(1) [[SRC1]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC1:%.*]] = load <3 x i8>, ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX906-NEXT: [[TMP0:%.*]] = shufflevector <3 x i8> [[VEC1]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <4 x i8> [[TMP0]] to i32 |
| ; GFX906-NEXT: [[GEP2:%.*]] = getelementptr <3 x i8>, ptr addrspace(1) [[SRC2]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC2:%.*]] = load <3 x i8>, ptr addrspace(1) [[GEP2]], align 4 |
| ; GFX906-NEXT: [[TMP1:%.*]] = shufflevector <3 x i8> [[VEC2]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; GFX906-NEXT: [[VEC2_BC:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 |
| ; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX]], 15 |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_1:%.*]], label [[BB_2:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: br label [[BB_2]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: [[TMP5_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP5_TC]] to i24 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> |
| ; GFX906-NEXT: store <3 x i8> [[TMP3]], ptr addrspace(1) [[DST]], align 4 |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| %gep1 = getelementptr <3 x i8>, ptr addrspace(1) %src1, i32 %idx |
| %vec1 = load <3 x i8>, ptr addrspace(1) %gep1 |
| %gep2 = getelementptr <3 x i8>, ptr addrspace(1) %src2, i32 %idx |
| %vec2 = load <3 x i8>, ptr addrspace(1) %gep2 |
| %cmp = icmp ult i32 %idx, 15 |
| br i1 %cmp, label %bb.1, label %bb.2 |
| bb.1: |
| br label %bb.2 |
| |
| bb.2: |
| %tmp5 = phi <3 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ] |
| store <3 x i8> %tmp5, ptr addrspace(1) %dst, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) { |
| ; GFX906-LABEL: define amdgpu_kernel void @v4i8_liveout( |
| ; GFX906-SAME: ptr addrspace(1) [[SRC1:%.*]], ptr addrspace(1) [[SRC2:%.*]], ptr addrspace(1) nocapture [[DST:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[SRC1]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC1:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <4 x i8> [[VEC1]] to i32 |
| ; GFX906-NEXT: [[GEP2:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[SRC2]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC2:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP2]], align 4 |
| ; GFX906-NEXT: [[VEC2_BC:%.*]] = bitcast <4 x i8> [[VEC2]] to i32 |
| ; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX]], 15 |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_1:%.*]], label [[BB_2:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: br label [[BB_2]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: [[TMP5_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ] |
| ; GFX906-NEXT: [[TMP5_TC_BC:%.*]] = bitcast i32 [[TMP5_TC]] to <4 x i8> |
| ; GFX906-NEXT: store <4 x i8> [[TMP5_TC_BC]], ptr addrspace(1) [[DST]], align 4 |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %src1, i32 %idx |
| %vec1 = load <4 x i8>, ptr addrspace(1) %gep1 |
| %gep2 = getelementptr <4 x i8>, ptr addrspace(1) %src2, i32 %idx |
| %vec2 = load <4 x i8>, ptr addrspace(1) %gep2 |
| %cmp = icmp ult i32 %idx, 15 |
| br i1 %cmp, label %bb.1, label %bb.2 |
| bb.1: |
| br label %bb.2 |
| |
| bb.2: |
| %tmp5 = phi <4 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ] |
| store <4 x i8> %tmp5, ptr addrspace(1) %dst, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @v5i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) { |
| ; GFX906-LABEL: define amdgpu_kernel void @v5i8_liveout( |
| ; GFX906-SAME: ptr addrspace(1) [[SRC1:%.*]], ptr addrspace(1) [[SRC2:%.*]], ptr addrspace(1) nocapture [[DST:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr <5 x i8>, ptr addrspace(1) [[SRC1]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC1:%.*]] = load <5 x i8>, ptr addrspace(1) [[GEP1]], align 8 |
| ; GFX906-NEXT: [[TMP0:%.*]] = shufflevector <5 x i8> [[VEC1]], <5 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 5, i32 5> |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
| ; GFX906-NEXT: [[GEP2:%.*]] = getelementptr <5 x i8>, ptr addrspace(1) [[SRC2]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC2:%.*]] = load <5 x i8>, ptr addrspace(1) [[GEP2]], align 8 |
| ; GFX906-NEXT: [[TMP1:%.*]] = shufflevector <5 x i8> [[VEC2]], <5 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 5, i32 5> |
| ; GFX906-NEXT: [[VEC2_BC:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> |
| ; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX]], 15 |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_1:%.*]], label [[BB_2:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: br label [[BB_2]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: [[TMP5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP5_TC]] to <8 x i8> |
| ; GFX906-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4> |
| ; GFX906-NEXT: store <5 x i8> [[TMP3]], ptr addrspace(1) [[DST]], align 4 |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| %gep1 = getelementptr <5 x i8>, ptr addrspace(1) %src1, i32 %idx |
| %vec1 = load <5 x i8>, ptr addrspace(1) %gep1 |
| %gep2 = getelementptr <5 x i8>, ptr addrspace(1) %src2, i32 %idx |
| %vec2 = load <5 x i8>, ptr addrspace(1) %gep2 |
| %cmp = icmp ult i32 %idx, 15 |
| br i1 %cmp, label %bb.1, label %bb.2 |
| bb.1: |
| br label %bb.2 |
| |
| bb.2: |
| %tmp5 = phi <5 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ] |
| store <5 x i8> %tmp5, ptr addrspace(1) %dst, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) { |
| ; GFX906-LABEL: define amdgpu_kernel void @v8i8_liveout( |
| ; GFX906-SAME: ptr addrspace(1) [[SRC1:%.*]], ptr addrspace(1) [[SRC2:%.*]], ptr addrspace(1) nocapture [[DST:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr <8 x i8>, ptr addrspace(1) [[SRC1]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC1:%.*]] = load <8 x i8>, ptr addrspace(1) [[GEP1]], align 8 |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <8 x i8> [[VEC1]] to <2 x i32> |
| ; GFX906-NEXT: [[GEP2:%.*]] = getelementptr <8 x i8>, ptr addrspace(1) [[SRC2]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC2:%.*]] = load <8 x i8>, ptr addrspace(1) [[GEP2]], align 8 |
| ; GFX906-NEXT: [[VEC2_BC:%.*]] = bitcast <8 x i8> [[VEC2]] to <2 x i32> |
| ; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX]], 15 |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_1:%.*]], label [[BB_2:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: br label [[BB_2]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: [[TMP5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ] |
| ; GFX906-NEXT: [[TMP5_TC_BC:%.*]] = bitcast <2 x i32> [[TMP5_TC]] to <8 x i8> |
| ; GFX906-NEXT: store <8 x i8> [[TMP5_TC_BC]], ptr addrspace(1) [[DST]], align 4 |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| %gep1 = getelementptr <8 x i8>, ptr addrspace(1) %src1, i32 %idx |
| %vec1 = load <8 x i8>, ptr addrspace(1) %gep1 |
| %gep2 = getelementptr <8 x i8>, ptr addrspace(1) %src2, i32 %idx |
| %vec2 = load <8 x i8>, ptr addrspace(1) %gep2 |
| %cmp = icmp ult i32 %idx, 15 |
| br i1 %cmp, label %bb.1, label %bb.2 |
| bb.1: |
| br label %bb.2 |
| |
| bb.2: |
| %tmp5 = phi <8 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ] |
| store <8 x i8> %tmp5, ptr addrspace(1) %dst, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @repeat_successor(i32 %in, ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) { |
| ; GFX906-LABEL: define amdgpu_kernel void @repeat_successor( |
| ; GFX906-SAME: i32 [[IN:%.*]], ptr addrspace(1) [[SRC1:%.*]], ptr addrspace(1) [[SRC2:%.*]], ptr addrspace(1) nocapture [[DST:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[SRC1]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC1:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <4 x i8> [[VEC1]] to i32 |
| ; GFX906-NEXT: [[GEP2:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[SRC2]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC2:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP2]], align 4 |
| ; GFX906-NEXT: [[VEC2_BC:%.*]] = bitcast <4 x i8> [[VEC2]] to i32 |
| ; GFX906-NEXT: switch i32 [[IN]], label [[RETURN:%.*]] [ |
| ; GFX906-NEXT: i32 1, label [[RETURN_SINK_SPLIT:%.*]] |
| ; GFX906-NEXT: i32 2, label [[RETURN_SINK_SPLIT]] |
| ; GFX906-NEXT: i32 3, label [[SW_BB5:%.*]] |
| ; GFX906-NEXT: ] |
| ; GFX906: sw.bb5: |
| ; GFX906-NEXT: br label [[RETURN_SINK_SPLIT]] |
| ; GFX906: return.sink.split: |
| ; GFX906-NEXT: [[TMP5_TC:%.*]] = phi i32 [ [[VEC2_BC]], [[SW_BB5]] ], [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC1_BC]], [[ENTRY]] ] |
| ; GFX906-NEXT: [[TMP5_TC_BC:%.*]] = bitcast i32 [[TMP5_TC]] to <4 x i8> |
| ; GFX906-NEXT: store <4 x i8> [[TMP5_TC_BC]], ptr addrspace(1) [[DST]], align 4 |
| ; GFX906-NEXT: ret void |
| ; GFX906: return: |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %src1, i32 %idx |
| %vec1 = load <4 x i8>, ptr addrspace(1) %gep1 |
| %gep2 = getelementptr <4 x i8>, ptr addrspace(1) %src2, i32 %idx |
| %vec2 = load <4 x i8>, ptr addrspace(1) %gep2 |
| switch i32 %in, label %return [ |
| i32 1, label %return.sink.split |
| i32 2, label %return.sink.split |
| i32 3, label %sw.bb5 |
| ] |
| |
| sw.bb5: |
| br label %return.sink.split |
| |
| return.sink.split: |
| %tmp5 = phi <4 x i8> [ %vec2, %sw.bb5 ], [ %vec1, %entry ], [ %vec1, %entry ] |
| store <4 x i8> %tmp5, ptr addrspace(1) %dst, align 4 |
| ret void |
| |
| return: |
| ret void |
| } |
| |
| define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst0, ptr addrspace(1) nocapture %dst1) { |
| ; GFX906-LABEL: define amdgpu_kernel void @v8i8_phi_chain( |
| ; GFX906-SAME: ptr addrspace(1) [[SRC1:%.*]], ptr addrspace(1) [[SRC2:%.*]], ptr addrspace(1) nocapture [[DST0:%.*]], ptr addrspace(1) nocapture [[DST1:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr <8 x i8>, ptr addrspace(1) [[SRC1]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC1:%.*]] = load <8 x i8>, ptr addrspace(1) [[GEP1]], align 8 |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <8 x i8> [[VEC1]] to <2 x i32> |
| ; GFX906-NEXT: [[GEP2:%.*]] = getelementptr <8 x i8>, ptr addrspace(1) [[SRC2]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC2:%.*]] = load <8 x i8>, ptr addrspace(1) [[GEP2]], align 8 |
| ; GFX906-NEXT: [[VEC2_BC:%.*]] = bitcast <8 x i8> [[VEC2]] to <2 x i32> |
| ; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX]], 15 |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_1:%.*]], label [[BB_2:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX]], 7 |
| ; GFX906-NEXT: br i1 [[CMP2]], label [[BB_2]], label [[BB_3:%.*]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: [[TMP5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ] |
| ; GFX906-NEXT: [[TMP5_TC_BC:%.*]] = bitcast <2 x i32> [[TMP5_TC]] to <8 x i8> |
| ; GFX906-NEXT: store <8 x i8> [[TMP5_TC_BC]], ptr addrspace(1) [[DST0]], align 4 |
| ; GFX906-NEXT: br label [[BB_3]] |
| ; GFX906: bb.3: |
| ; GFX906-NEXT: [[TMP7_TC:%.*]] = phi <2 x i32> [ [[VEC2_BC]], [[BB_1]] ], [ [[TMP5_TC]], [[BB_2]] ] |
| ; GFX906-NEXT: [[TMP7_TC_BC:%.*]] = bitcast <2 x i32> [[TMP7_TC]] to <8 x i8> |
| ; GFX906-NEXT: store <8 x i8> [[TMP7_TC_BC]], ptr addrspace(1) [[DST1]], align 4 |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| %gep1 = getelementptr <8 x i8>, ptr addrspace(1) %src1, i32 %idx |
| %vec1 = load <8 x i8>, ptr addrspace(1) %gep1 |
| %gep2 = getelementptr <8 x i8>, ptr addrspace(1) %src2, i32 %idx |
| %vec2 = load <8 x i8>, ptr addrspace(1) %gep2 |
| %cmp = icmp ult i32 %idx, 15 |
| br i1 %cmp, label %bb.1, label %bb.2 |
| bb.1: |
| %cmp2 = icmp ult i32 %idx, 7 |
| br i1 %cmp2, label %bb.2, label %bb.3 |
| |
| bb.2: |
| %tmp5 = phi <8 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ] |
| store <8 x i8> %tmp5, ptr addrspace(1) %dst0, align 4 |
| br label %bb.3 |
| |
| bb.3: |
| %tmp7 = phi <8 x i8> [ %vec2, %bb.1], [%tmp5, %bb.2] |
| store <8 x i8> %tmp7, ptr addrspace(1) %dst1, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst0, ptr addrspace(1) nocapture %dst1) { |
| ; GFX906-LABEL: define amdgpu_kernel void @v8i8_multi_block( |
| ; GFX906-SAME: ptr addrspace(1) [[SRC1:%.*]], ptr addrspace(1) [[SRC2:%.*]], ptr addrspace(1) nocapture [[DST0:%.*]], ptr addrspace(1) nocapture [[DST1:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr <8 x i8>, ptr addrspace(1) [[SRC1]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC1:%.*]] = load <8 x i8>, ptr addrspace(1) [[GEP1]], align 8 |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <8 x i8> [[VEC1]] to <2 x i32> |
| ; GFX906-NEXT: [[GEP2:%.*]] = getelementptr <8 x i8>, ptr addrspace(1) [[SRC2]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC2:%.*]] = load <8 x i8>, ptr addrspace(1) [[GEP2]], align 8 |
| ; GFX906-NEXT: [[VEC2_BC:%.*]] = bitcast <8 x i8> [[VEC2]] to <2 x i32> |
| ; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX]], 15 |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_1:%.*]], label [[BB_3:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IDX]], 7 |
| ; GFX906-NEXT: br i1 [[CMP2]], label [[BB_2:%.*]], label [[BB_3]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: [[VEC1_BC_BC:%.*]] = bitcast <2 x i32> [[VEC1_BC]] to <8 x i8> |
| ; GFX906-NEXT: store <8 x i8> [[VEC1_BC_BC]], ptr addrspace(1) [[DST0]], align 4 |
| ; GFX906-NEXT: br label [[BB_3]] |
| ; GFX906: bb.3: |
| ; GFX906-NEXT: [[TMP5_TC:%.*]] = phi <2 x i32> [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC]], [[BB_1]] ], [ [[VEC2_BC]], [[BB_2]] ] |
| ; GFX906-NEXT: [[TMP5_TC_BC:%.*]] = bitcast <2 x i32> [[TMP5_TC]] to <8 x i8> |
| ; GFX906-NEXT: store <8 x i8> [[TMP5_TC_BC]], ptr addrspace(1) [[DST1]], align 4 |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| %gep1 = getelementptr <8 x i8>, ptr addrspace(1) %src1, i32 %idx |
| %vec1 = load <8 x i8>, ptr addrspace(1) %gep1 |
| %gep2 = getelementptr <8 x i8>, ptr addrspace(1) %src2, i32 %idx |
| %vec2 = load <8 x i8>, ptr addrspace(1) %gep2 |
| %cmp = icmp ult i32 %idx, 15 |
| br i1 %cmp, label %bb.1, label %bb.3 |
| bb.1: |
| %cmp2 = icmp ult i32 %idx, 7 |
| br i1 %cmp2, label %bb.2, label %bb.3 |
| |
| bb.2: |
| store <8 x i8> %vec1, ptr addrspace(1) %dst0, align 4 |
| br label %bb.3 |
| |
| bb.3: |
| %tmp5 = phi <8 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ], [ %vec2, %bb.2] |
| store <8 x i8> %tmp5, ptr addrspace(1) %dst1, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @v32i8_loop_carried(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) { |
| ; GFX906-LABEL: define amdgpu_kernel void @v32i8_loop_carried( |
| ; GFX906-SAME: ptr addrspace(1) [[SRC1:%.*]], ptr addrspace(1) [[SRC2:%.*]], ptr addrspace(1) nocapture [[DST:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; GFX906-NEXT: [[GEP1:%.*]] = getelementptr <32 x i8>, ptr addrspace(1) [[SRC1]], i32 [[IDX]] |
| ; GFX906-NEXT: [[VEC1:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP1]], align 4 |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <4 x i8> [[VEC1]] to i32 |
| ; GFX906-NEXT: br label [[BB_1:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: [[TEMP_TC:%.*]] = phi i32 [ [[VEC1_BC]], [[ENTRY:%.*]] ], [ [[VEC2_BC:%.*]], [[BB_1]] ] |
| ; GFX906-NEXT: [[TEMP_TC_BC:%.*]] = bitcast i32 [[TEMP_TC]] to <4 x i8> |
| ; GFX906-NEXT: [[VEC1_BC_BC:%.*]] = bitcast i32 [[VEC1_BC]] to <4 x i8> |
| ; GFX906-NEXT: [[VEC2:%.*]] = shufflevector <4 x i8> [[VEC1_BC_BC]], <4 x i8> [[TEMP_TC_BC]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; GFX906-NEXT: [[VEC2_BC]] = bitcast <4 x i8> [[VEC2]] to i32 |
| ; GFX906-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX]], 15 |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_1]], label [[BB_2:%.*]] |
| ; GFX906: 0: |
| ; GFX906-NEXT: br label [[BB_2]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: [[VEC2_BC_BC:%.*]] = bitcast i32 [[VEC2_BC]] to <4 x i8> |
| ; GFX906-NEXT: store <4 x i8> [[VEC2_BC_BC]], ptr addrspace(1) [[DST]], align 4 |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %idx = call i32 @llvm.amdgcn.workitem.id.x() |
| %gep1 = getelementptr <32 x i8>, ptr addrspace(1) %src1, i32 %idx |
| %vec1 = load <4 x i8>, ptr addrspace(1) %gep1 |
| br label %bb.1 |
| |
| bb.1: |
| %temp = phi <4 x i8> [ %vec1, %entry ], [ %vec2, %bb.1 ] |
| %vec2 = shufflevector <4 x i8> %vec1, <4 x i8> %temp, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| %cmp = icmp ult i32 %idx, 15 |
| br i1 %cmp, label %bb.1, label %bb.2 |
| br label %bb.2 |
| |
| bb.2: |
| store <4 x i8> %vec2, ptr addrspace(1) %dst, align 4 |
| ret void |
| } |
| |
| ; Should not produce a broken phi |
| |
| define void @broken_phi() { |
| ; GFX906-LABEL: define void @broken_phi( |
| ; GFX906-SAME: ) #[[ATTR0]] { |
| ; GFX906-NEXT: bb: |
| ; GFX906-NEXT: br label [[BB1:%.*]] |
| ; GFX906: bb1: |
| ; GFX906-NEXT: [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] |
| ; GFX906-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]] |
| ; GFX906: bb2: |
| ; GFX906-NEXT: br label [[BB3]] |
| ; GFX906: bb3: |
| ; GFX906-NEXT: [[I4:%.*]] = phi <4 x i8> [ zeroinitializer, [[BB2]] ], [ [[I]], [[BB1]] ] |
| ; GFX906-NEXT: br i1 false, label [[BB7]], label [[BB5:%.*]] |
| ; GFX906: bb5: |
| ; GFX906-NEXT: [[I6:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[I4]], <4 x i8> zeroinitializer) |
| ; GFX906-NEXT: br label [[BB7]] |
| ; GFX906: bb7: |
| ; GFX906-NEXT: [[I8]] = phi <4 x i8> [ zeroinitializer, [[BB5]] ], [ zeroinitializer, [[BB3]] ] |
| ; GFX906-NEXT: br label [[BB1]] |
| ; |
| bb: |
| br label %bb1 |
| bb1: |
| %i = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %bb ], [ %i8, %bb7 ] |
| br i1 false, label %bb3, label %bb2 |
| bb2: |
| br label %bb3 |
| bb3: |
| %i4 = phi <4 x i8> [ zeroinitializer, %bb2 ], [ %i, %bb1 ] |
| br i1 false, label %bb7, label %bb5 |
| bb5: |
| %i6 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %i4, <4 x i8> zeroinitializer) |
| br label %bb7 |
| bb7: |
| %i8 = phi <4 x i8> [ zeroinitializer, %bb5 ], [ zeroinitializer, %bb3 ] |
| br label %bb1 |
| } |
| |
| ; %sel1 should just use %sel0 instead of trying to convert back the |
| ; converted version of %sel0 |
| |
| define amdgpu_kernel void @reuseOp() { |
| ; GFX906-LABEL: define amdgpu_kernel void @reuseOp( |
| ; GFX906-SAME: ) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: [[VEC1:%.*]] = insertelement <16 x i8> zeroinitializer, i8 0, i64 0 |
| ; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <16 x i8> [[VEC1]] to <4 x i32> |
| ; GFX906-NEXT: br label [[BB_1:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: [[VEC1_BC_BC:%.*]] = bitcast <4 x i32> [[VEC1_BC]] to <16 x i8> |
| ; GFX906-NEXT: [[SEL0:%.*]] = select i1 false, <16 x i8> zeroinitializer, <16 x i8> zeroinitializer |
| ; GFX906-NEXT: [[SEL0_BC:%.*]] = bitcast <16 x i8> [[SEL0]] to <4 x i32> |
| ; GFX906-NEXT: [[SEL1:%.*]] = select i1 false, <16 x i8> [[VEC1_BC_BC]], <16 x i8> [[SEL0]] |
| ; GFX906-NEXT: br label [[BB_2:%.*]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: [[SEL0_BC_BC:%.*]] = bitcast <4 x i32> [[SEL0_BC]] to <16 x i8> |
| ; GFX906-NEXT: [[VAL:%.*]] = extractelement <16 x i8> [[SEL0_BC_BC]], i64 0 |
| ; GFX906-NEXT: ret void |
| ; |
| entry: |
| %vec1 = insertelement <16 x i8> zeroinitializer, i8 0, i64 0 |
| br label %bb.1 |
| |
| bb.1: |
| %sel0 = select i1 false, <16 x i8> zeroinitializer, <16 x i8> zeroinitializer |
| %sel1 = select i1 false, <16 x i8> %vec1, <16 x i8> %sel0 |
| br label %bb.2 |
| |
| bb.2: |
| %val = extractelement <16 x i8> %sel0, i64 0 |
| ret void |
| } |
| |
| |
| define amdgpu_kernel void @deletedPHI(i32 %in0, i1 %cmp, <10 x i8> %invec0) { |
| ; GFX906-LABEL: define amdgpu_kernel void @deletedPHI( |
| ; GFX906-SAME: i32 [[IN0:%.*]], i1 [[CMP:%.*]], <10 x i8> [[INVEC0:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: br label [[BB_1:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: [[PHI0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB_11:%.*]] ] |
| ; GFX906-NEXT: [[PHI1:%.*]] = phi <10 x i8> [ splat (i8 1), [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ] |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: br label [[BB_3]] |
| ; GFX906: bb.3: |
| ; GFX906-NEXT: [[PHI2:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_2]] ], [ [[PHI1]], [[BB_1]] ] |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_5:%.*]], label [[BB_4:%.*]] |
| ; GFX906: bb.4: |
| ; GFX906-NEXT: [[VEC0:%.*]] = insertelement <10 x i8> [[PHI2]], i8 0, i64 0 |
| ; GFX906-NEXT: br label [[BB_5]] |
| ; GFX906: bb.5: |
| ; GFX906-NEXT: [[PHI3:%.*]] = phi <10 x i8> [ [[VEC0]], [[BB_4]] ], [ [[PHI2]], [[BB_3]] ] |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_7:%.*]], label [[BB_6:%.*]] |
| ; GFX906: bb.6: |
| ; GFX906-NEXT: br label [[BB_7]] |
| ; GFX906: bb.7: |
| ; GFX906-NEXT: [[PHI4:%.*]] = phi <10 x i8> [ [[INVEC0]], [[BB_6]] ], [ [[PHI3]], [[BB_5]] ] |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_9:%.*]], label [[BB_8:%.*]] |
| ; GFX906: bb.8: |
| ; GFX906-NEXT: br label [[BB_9]] |
| ; GFX906: bb.9: |
| ; GFX906-NEXT: [[PHI5:%.*]] = phi <10 x i8> [ [[INVEC0]], [[BB_8]] ], [ [[PHI4]], [[BB_7]] ] |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_11]], label [[BB_10:%.*]] |
| ; GFX906: bb.10: |
| ; GFX906-NEXT: br label [[BB_11]] |
| ; GFX906: bb.11: |
| ; GFX906-NEXT: [[PHI6:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_10]] ], [ [[PHI5]], [[BB_9]] ] |
| ; GFX906-NEXT: [[VEC1]] = shufflevector <10 x i8> [[PHI6]], <10 x i8> zeroinitializer, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 15, i32 16, i32 17, i32 18, i32 19> |
| ; GFX906-NEXT: br label [[BB_1]] |
| ; |
| entry: |
| br label %bb.1 |
| |
| bb.1: |
| %phi0 = phi i32 [ 0, %entry ], [ 1, %bb.11 ] |
| %phi1 = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %entry ], [ %vec1, %bb.11 ] |
| br i1 %cmp, label %bb.3, label %bb.2 |
| |
| bb.2: |
| br label %bb.3 |
| |
| bb.3: |
| %phi2 = phi <10 x i8> [ zeroinitializer, %bb.2 ], [ %phi1, %bb.1 ] |
| br i1 %cmp, label %bb.5, label %bb.4 |
| |
| bb.4: |
| %vec0 = insertelement <10 x i8> %phi2, i8 0, i64 0 |
| br label %bb.5 |
| |
| bb.5: ; preds = %bb.4, %bb.3 |
| %phi3 = phi <10 x i8> [ %vec0, %bb.4 ], [ %phi2, %bb.3 ] |
| br i1 %cmp, label %bb.7, label %bb.6 |
| |
| bb.6: |
| br label %bb.7 |
| |
| bb.7: ; preds = %bb.6, %bb.5 |
| %phi4 = phi <10 x i8> [ %invec0, %bb.6 ], [ %phi3, %bb.5 ] |
| br i1 %cmp, label %bb.9, label %bb.8 |
| |
| bb.8: |
| br label %bb.9 |
| |
| bb.9: |
| %phi5 = phi <10 x i8> [ %invec0, %bb.8 ], [ %phi4, %bb.7 ] |
| br i1 %cmp, label %bb.11, label %bb.10 |
| |
| bb.10: |
| br label %bb.11 |
| |
| bb.11: |
| %phi6 = phi <10 x i8> [ zeroinitializer, %bb.10 ], [ %phi5, %bb.9 ] |
| %vec1 = shufflevector <10 x i8> %phi6, <10 x i8> zeroinitializer, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 15, i32 16, i32 17, i32 18, i32 19> |
| br label %bb.1 |
| } |
| |
| define amdgpu_kernel void @multiple_unwind(i1 %cmp, <10 x i8> %invec) { |
| ; GFX906-LABEL: define amdgpu_kernel void @multiple_unwind( |
| ; GFX906-SAME: i1 [[CMP:%.*]], <10 x i8> [[INVEC:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: entry: |
| ; GFX906-NEXT: br label [[BB_1:%.*]] |
| ; GFX906: bb.1: |
| ; GFX906-NEXT: [[PHI0:%.*]] = phi <10 x i8> [ splat (i8 1), [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ] |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]] |
| ; GFX906: bb.2: |
| ; GFX906-NEXT: br label [[BB_3]] |
| ; GFX906: bb.3: |
| ; GFX906-NEXT: [[PHI1:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_2]] ], [ [[PHI0]], [[BB_1]] ] |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_5:%.*]], label [[BB_4:%.*]] |
| ; GFX906: bb.4: |
| ; GFX906-NEXT: br label [[BB_5]] |
| ; GFX906: bb.5: |
| ; GFX906-NEXT: [[PHI2:%.*]] = phi <10 x i8> [ [[PHI0]], [[BB_4]] ], [ [[PHI1]], [[BB_3]] ] |
| ; GFX906-NEXT: br i1 [[CMP]], label [[BB_7:%.*]], label [[BB_6:%.*]] |
| ; GFX906: bb.6: |
| ; GFX906-NEXT: br label [[BB_7]] |
| ; GFX906: bb.7: |
| ; GFX906-NEXT: [[PHI3]] = phi <10 x i8> [ [[INVEC]], [[BB_6]] ], [ [[PHI2]], [[BB_5]] ] |
| ; GFX906-NEXT: br label [[BB_8]] |
| ; GFX906: bb.8: |
| ; GFX906-NEXT: br label [[BB_1]] |
| ; |
| entry: |
| br label %bb.1 |
| |
| bb.1: |
| %phi0 = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %entry ], [ %phi3, %bb.8 ] |
| br i1 %cmp, label %bb.3, label %bb.2 |
| |
| bb.2: |
| br label %bb.3 |
| |
| bb.3: |
| %phi1 = phi <10 x i8> [ zeroinitializer, %bb.2 ], [ %phi0, %bb.1 ] |
| br i1 %cmp, label %bb.5, label %bb.4 |
| |
| bb.4: |
| br label %bb.5 |
| |
| bb.5: |
| %phi2 = phi <10 x i8> [ %phi0, %bb.4 ], [ %phi1, %bb.3 ] |
| br i1 %cmp, label %bb.7, label %bb.6 |
| |
| bb.6: ; preds = %bb.5 |
| br label %bb.7 |
| |
| bb.7: |
| %phi3 = phi <10 x i8> [ %invec, %bb.6 ], [ %phi2, %bb.5 ] |
| br label %bb.8 |
| |
| bb.8: |
| br label %bb.1 |
| } |
| |
| |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() |