| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s |
| |
| declare i64 @may_inf_loop_ro() nounwind readonly |
| declare i64 @may_inf_loop_rw() nounwind |
| declare i64 @may_throw() willreturn |
| |
| ; Base case with no interesting control dependencies |
| define void @test_no_control(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test_no_control( |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| |
| %c1 = load i64, ptr %c |
| %ca2 = getelementptr i64, ptr %c, i32 1 |
| %c2 = load i64, ptr %ca2 |
| %add1 = add i64 %v1, %c1 |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @test1(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test1( |
| ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| |
| %c1 = load i64, ptr %c |
| %c2 = call i64 @may_inf_loop_ro() |
| %add1 = add i64 %v1, %c1 |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @test2(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test2( |
| ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %c1 = load i64, ptr %c |
| %c2 = call i64 @may_inf_loop_ro() |
| |
| %v1 = load i64, ptr %a |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| |
| %add1 = add i64 %v1, %c1 |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @test3(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test3( |
| ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| %c1 = load i64, ptr %c |
| %add1 = add i64 %v1, %c1 |
| |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| %c2 = call i64 @may_inf_loop_ro() |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @test4(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test4( |
| ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| %c1 = load i64, ptr %c |
| %add1 = add i64 %v1, %c1 |
| |
| %c2 = call i64 @may_inf_loop_ro() |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @test5(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test5( |
| ; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[C1:%.*]] = load i64, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[C2]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| %c2 = call i64 @may_inf_loop_ro() |
| %add2 = add i64 %v2, %c2 |
| |
| %v1 = load i64, ptr %a |
| %c1 = load i64, ptr %c |
| %add1 = add i64 %v1, %c1 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| define void @test6(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test6( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| call i64 @may_inf_loop_ro() |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| |
| %c1 = load i64, ptr %c |
| %ca2 = getelementptr i64, ptr %c, i32 1 |
| %c2 = load i64, ptr %ca2 |
| %add1 = add i64 %v1, %c1 |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| ; In this case, we can't vectorize the load pair because there's no valid |
| ; scheduling point which respects both memory and control dependence. If |
| ; we scheduled the second load before the store holding the first one in place, |
| ; we'd have hoisted a potentially faulting load above a potentially infinite |
| ; call and thus have introduced a possible fault into a program which didn't |
| ; previously exist. |
| define void @test7(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test7( |
| ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1 |
| ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8 |
| ; CHECK-NEXT: store i64 0, ptr [[A]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| store i64 0, ptr %a |
| call i64 @may_inf_loop_ro() |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| |
| %c1 = load i64, ptr %c |
| %ca2 = getelementptr i64, ptr %c, i32 1 |
| %c2 = load i64, ptr %ca2 |
| %add1 = add i64 %v1, %c1 |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| ; Same as test7, but with a throwing call |
| define void @test8(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test8( |
| ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1 |
| ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8 |
| ; CHECK-NEXT: store i64 0, ptr [[A]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() #[[ATTR4:[0-9]+]] |
| ; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| store i64 0, ptr %a |
| call i64 @may_throw() readonly |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| |
| %c1 = load i64, ptr %c |
| %ca2 = getelementptr i64, ptr %c, i32 1 |
| %c2 = load i64, ptr %ca2 |
| %add1 = add i64 %v1, %c1 |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| ; Same as test8, but with a readwrite maythrow call |
| define void @test9(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test9( |
| ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1 |
| ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A]], align 8 |
| ; CHECK-NEXT: store i64 0, ptr [[A]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() |
| ; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[V2]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| store i64 0, ptr %a |
| call i64 @may_throw() |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| |
| %c1 = load i64, ptr %c |
| %ca2 = getelementptr i64, ptr %c, i32 1 |
| %c2 = load i64, ptr %ca2 |
| %add1 = add i64 %v1, %c1 |
| %add2 = add i64 %v2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| ; A variant of test7 which shows the same problem with a non-load instruction |
| define void @test10(ptr %a, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test10( |
| ; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A:%.*]], align 8 |
| ; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[A]], i32 1 |
| ; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 |
| ; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[V1]] |
| ; CHECK-NEXT: store i64 [[U1]], ptr [[A]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[V2]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[U2]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %v1 = load i64, ptr %a |
| %a2 = getelementptr i64, ptr %a, i32 1 |
| %v2 = load i64, ptr %a2 |
| |
| %u1 = udiv i64 200, %v1 |
| store i64 %u1, ptr %a |
| call i64 @may_inf_loop_ro() |
| %u2 = udiv i64 200, %v2 |
| |
| %c1 = load i64, ptr %c |
| %ca2 = getelementptr i64, ptr %c, i32 1 |
| %c2 = load i64, ptr %ca2 |
| %add1 = add i64 %u1, %c1 |
| %add2 = add i64 %u2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |
| |
| ; Variant of test10 block invariant operands to the udivs |
| ; FIXME: This is wrong, we're hoisting a faulting udiv above an infinite loop. |
| define void @test11(i64 %x, i64 %y, ptr %b, ptr %c) { |
| ; CHECK-LABEL: @test11( |
| ; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[X:%.*]] |
| ; CHECK-NEXT: store i64 [[U1]], ptr [[B:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() |
| ; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[Y:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[U2]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP2]] |
| ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[B]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %u1 = udiv i64 200, %x |
| store i64 %u1, ptr %b |
| call i64 @may_inf_loop_ro() |
| %u2 = udiv i64 200, %y |
| |
| %c1 = load i64, ptr %c |
| %ca2 = getelementptr i64, ptr %c, i32 1 |
| %c2 = load i64, ptr %ca2 |
| %add1 = add i64 %u1, %c1 |
| %add2 = add i64 %u2, %c2 |
| |
| store i64 %add1, ptr %b |
| %b2 = getelementptr i64, ptr %b, i32 1 |
| store i64 %add2, ptr %b2 |
| ret void |
| } |