blob: 5590208865bd23eafdf0441484e716ef02036dd3 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -loop-reduce %s -S -o - | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
; Tests where the loop termination condition is not generated by a compare.
; The call to get.active.lane.mask in the loop should use the postincrement
; value of %index.
define void @lane_mask(ptr %dst, i64 %n) #0 {
; CHECK-LABEL: define void @lane_mask(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[VSCALEX4:%.*]] = shl i64 [[VSCALE]], 2
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP1:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[IV]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
; CHECK-NEXT: tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> splat (i32 1), ptr align 4 [[SCEVGEP]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP1]] = add i64 [[IV]], [[VSCALEX4]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP1]], i64 [[N]])
; CHECK-NEXT: [[COND:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%vscale = tail call i64 @llvm.vscale.i64()
%vscalex4 = shl i64 %vscale, 2
%active.lane.mask.entry = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %n)
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%active.lane.mask = phi <vscale x 4 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %loop ]
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> splat (i32 1), ptr %gep, i32 4, <vscale x 4 x i1> %active.lane.mask)
%iv.next = add i64 %iv, %vscalex4
%active.lane.mask.next = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %iv.next, i64 %n)
%cond = extractelement <vscale x 4 x i1> %active.lane.mask.next, i64 0
br i1 %cond, label %loop, label %exit
exit:
ret void
}
; The store between the call and the branch shouldn't prevent the
; postincement value from being used.
define void @lane_mask_not_last(ptr %dst, i64 %n) #0 {
; CHECK-LABEL: define void @lane_mask_not_last(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[VSCALEX4:%.*]] = shl i64 [[VSCALE]], 2
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP0:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IV]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP0]] = add i64 [[IV]], [[VSCALEX4]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP0]], i64 [[N]])
; CHECK-NEXT: tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> splat (i32 1), ptr align 4 [[SCEVGEP]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[COND:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%vscale = tail call i64 @llvm.vscale.i64()
%vscalex4 = shl i64 %vscale, 2
%active.lane.mask.entry = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %n)
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%active.lane.mask = phi <vscale x 4 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %loop ]
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
%iv.next = add i64 %iv, %vscalex4
%active.lane.mask.next = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %iv.next, i64 %n)
tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> splat (i32 1), ptr %gep, i32 4, <vscale x 4 x i1> %active.lane.mask)
%cond = extractelement <vscale x 4 x i1> %active.lane.mask.next, i64 0
br i1 %cond, label %loop, label %exit
exit:
ret void
}
; The call to cmp_fn in the loop should use the postincrement value of %iv.
define void @uses_cmp_fn(ptr %dst, i64 %n) {
; CHECK-LABEL: define void @uses_cmp_fn(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[LSR_IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[LSR_IV]], 2
; CHECK-NEXT: [[LSR_IV1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
; CHECK-NEXT: store i32 0, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 1
; CHECK-NEXT: [[COND:%.*]] = tail call i1 @cmp_fn(i64 [[LSR_IV_NEXT]])
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
store i32 0, ptr %gep, align 4
%iv.next = add i64 %iv, 1
%cond = tail call i1 @cmp_fn(i64 %iv.next)
br i1 %cond, label %loop, label %exit
exit:
ret void
}
; The store between the call and the branch shouldn't prevent the
; postincement value from being used.
define void @uses_cmp_fn_not_last(ptr %dst, i64 %n) {
; CHECK-LABEL: define void @uses_cmp_fn_not_last(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[LSR_IV:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[IV]], 2
; CHECK-NEXT: [[LSR_IV1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
; CHECK-NEXT: [[LSR_IV]] = add i64 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = tail call i1 @cmp_fn(i64 [[LSR_IV]])
; CHECK-NEXT: store i32 0, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
%iv.next = add i64 %iv, 1
%cond = tail call i1 @cmp_fn(i64 %iv.next)
store i32 0, ptr %gep, align 4
br i1 %cond, label %loop, label %exit
exit:
ret void
}
; cmp2 will use a preincrement induction variable as it isn't directly the loop
; termination condition.
; FIXME: We could potentially handle this by examining the operands of the 'and'
; instruction.
define void @cmp_and(ptr %dst, i64 %n) {
; CHECK-LABEL: define void @cmp_and(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[LOOP]] ], [ [[DST]], %[[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[TMP0]], %[[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = load i64, ptr [[LSR_IV1]], align 8
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[VAL]], [[N]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[LSR_IV]], 0
; CHECK-NEXT: [[COND:%.*]] = and i1 [[CMP1]], [[CMP2]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
%val = load i64, ptr %gep, align 8
%iv.next = add i64 %iv, 1
%cmp1 = icmp ne i64 %val, %n
%cmp2 = icmp ne i64 %iv.next, %n
%cond = and i1 %cmp1, %cmp2
br i1 %cond, label %loop, label %exit
exit:
ret void
}
declare i64 @llvm.vscale.i64()
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)
declare void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32>, ptr captures(none), i32 immarg, <vscale x 4 x i1>)
declare i1 @cmp_fn(i64)
attributes #0 = { "target-features"="+sve2" }