| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; Test moves between FPRs and GPRs. The 32-bit cases test the z10 |
| ; implementation, which has no high-word support. |
| ; |
| ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s |
| |
| declare i64 @foo() |
| declare double @bar() |
| @dptr = external global double |
| @iptr = external global i64 |
| |
| ; Test 32-bit moves from GPRs to FPRs. The GPR must be moved into the high |
| ; 32 bits of the FPR. |
| define float @f1(i32 %a) { |
| ; CHECK-LABEL: f1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d |
| ; CHECK-NEXT: sllg %r0, %r2, 32 |
| ; CHECK-NEXT: ldgr %f0, %r0 |
| ; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d |
| ; CHECK-NEXT: br %r14 |
| %res = bitcast i32 %a to float |
| ret float %res |
| } |
| |
| ; Like f1, but create a situation where the shift can be folded with |
| ; surrounding code. |
| define float @f2(i64 %big) { |
| ; CHECK-LABEL: f2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: risbg %r0, %r2, 0, 159, 31 |
| ; CHECK-NEXT: ldgr %f0, %r0 |
| ; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d |
| ; CHECK-NEXT: br %r14 |
| %shift = lshr i64 %big, 1 |
| %a = trunc i64 %shift to i32 |
| %res = bitcast i32 %a to float |
| ret float %res |
| } |
| |
| ; Another example of the same thing. |
| define float @f3(i64 %big) { |
| ; CHECK-LABEL: f3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: risbg %r0, %r2, 0, 159, 2 |
| ; CHECK-NEXT: ldgr %f0, %r0 |
| ; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d |
| ; CHECK-NEXT: br %r14 |
| %shift = ashr i64 %big, 30 |
| %a = trunc i64 %shift to i32 |
| %res = bitcast i32 %a to float |
| ret float %res |
| } |
| |
| ; Like f1, but the value to transfer is already in the high 32 bits. |
| define float @f4(i64 %big) { |
| ; CHECK-LABEL: f4: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: nilf %r2, 0 |
| ; CHECK-NEXT: ldgr %f0, %r2 |
| ; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d |
| ; CHECK-NEXT: br %r14 |
| %shift = ashr i64 %big, 32 |
| %a = trunc i64 %shift to i32 |
| %res = bitcast i32 %a to float |
| ret float %res |
| } |
| |
| ; Test 64-bit moves from GPRs to FPRs. |
| define double @f5(i64 %a) { |
| ; CHECK-LABEL: f5: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ldgr %f0, %r2 |
| ; CHECK-NEXT: br %r14 |
| %res = bitcast i64 %a to double |
| ret double %res |
| } |
| |
| ; Test 128-bit moves from GPRs to FPRs. i128 isn't a legitimate type, |
| ; so this goes through memory. |
| define void @f6(ptr %a, ptr %b) { |
| ; CHECK-LABEL: f6: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lg %r0, 8(%r3) |
| ; CHECK-NEXT: lg %r1, 0(%r3) |
| ; CHECK-NEXT: stg %r0, 8(%r2) |
| ; CHECK-NEXT: stg %r1, 0(%r2) |
| ; CHECK-NEXT: br %r14 |
| %val = load i128, ptr %b |
| %res = bitcast i128 %val to fp128 |
| store fp128 %res, ptr %a |
| ret void |
| } |
| |
| ; Test 32-bit moves from FPRs to GPRs. The high 32 bits of the FPR should |
| ; be moved into the low 32 bits of the GPR. |
| define i32 @f7(float %a) { |
| ; CHECK-LABEL: f7: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: # kill: def $f0s killed $f0s def $f0d |
| ; CHECK-NEXT: lgdr %r0, %f0 |
| ; CHECK-NEXT: srlg %r2, %r0, 32 |
| ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d |
| ; CHECK-NEXT: br %r14 |
| %res = bitcast float %a to i32 |
| ret i32 %res |
| } |
| |
| ; Test 64-bit moves from FPRs to GPRs. |
| define i64 @f8(double %a) { |
| ; CHECK-LABEL: f8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lgdr %r2, %f0 |
| ; CHECK-NEXT: br %r14 |
| %res = bitcast double %a to i64 |
| ret i64 %res |
| } |
| |
| ; Test 128-bit moves from FPRs to GPRs, with the same restriction as f6. |
| define void @f9(ptr %a, ptr %b) { |
| ; CHECK-LABEL: f9: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ld %f0, 0(%r2) |
| ; CHECK-NEXT: ld %f2, 8(%r2) |
| ; CHECK-NEXT: std %f0, 0(%r3) |
| ; CHECK-NEXT: std %f2, 8(%r3) |
| ; CHECK-NEXT: br %r14 |
| %val = load fp128, ptr %a |
| %res = bitcast fp128 %val to i128 |
| store i128 %res, ptr %b |
| ret void |
| } |
| |
| ; Test cases where the destination of an LGDR needs to be spilled. |
| ; We shouldn't have any integer stack stores or floating-point loads. |
| define void @f10(double %extra) { |
| ; CHECK-LABEL: f10: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: stmg %r6, %r15, 48(%r15) |
| ; CHECK-NEXT: .cfi_offset %r6, -112 |
| ; CHECK-NEXT: .cfi_offset %r7, -104 |
| ; CHECK-NEXT: .cfi_offset %r8, -96 |
| ; CHECK-NEXT: .cfi_offset %r9, -88 |
| ; CHECK-NEXT: .cfi_offset %r10, -80 |
| ; CHECK-NEXT: .cfi_offset %r11, -72 |
| ; CHECK-NEXT: .cfi_offset %r12, -64 |
| ; CHECK-NEXT: .cfi_offset %r13, -56 |
| ; CHECK-NEXT: .cfi_offset %r14, -48 |
| ; CHECK-NEXT: .cfi_offset %r15, -40 |
| ; CHECK-NEXT: aghi %r15, -184 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 344 |
| ; CHECK-NEXT: lgrl %r1, dptr@GOT |
| ; CHECK-NEXT: ldr %f1, %f0 |
| ; CHECK-NEXT: adb %f1, 0(%r1) |
| ; CHECK-NEXT: ldr %f2, %f0 |
| ; CHECK-NEXT: adb %f2, 0(%r1) |
| ; CHECK-NEXT: ldr %f3, %f0 |
| ; CHECK-NEXT: adb %f3, 0(%r1) |
| ; CHECK-NEXT: std %f1, 176(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f2, 168(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f3, 160(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: ldr %f1, %f0 |
| ; CHECK-NEXT: adb %f1, 0(%r1) |
| ; CHECK-NEXT: ldr %f2, %f0 |
| ; CHECK-NEXT: adb %f2, 0(%r1) |
| ; CHECK-NEXT: ldr %f3, %f0 |
| ; CHECK-NEXT: adb %f3, 0(%r1) |
| ; CHECK-NEXT: ldr %f4, %f0 |
| ; CHECK-NEXT: adb %f4, 0(%r1) |
| ; CHECK-NEXT: lgdr %r10, %f1 |
| ; CHECK-NEXT: lgdr %r9, %f2 |
| ; CHECK-NEXT: lgdr %r8, %f3 |
| ; CHECK-NEXT: lgdr %r7, %f4 |
| ; CHECK-NEXT: ldr %f1, %f0 |
| ; CHECK-NEXT: adb %f1, 0(%r1) |
| ; CHECK-NEXT: ldr %f2, %f0 |
| ; CHECK-NEXT: adb %f2, 0(%r1) |
| ; CHECK-NEXT: adb %f0, 0(%r1) |
| ; CHECK-NEXT: lgrl %r6, iptr@GOT |
| ; CHECK-NEXT: lgdr %r13, %f1 |
| ; CHECK-NEXT: lgdr %r12, %f2 |
| ; CHECK-NEXT: lgdr %r11, %f0 |
| ; CHECK-NEXT: .LBB9_1: # %loop |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: brasl %r14, foo@PLT |
| ; CHECK-NEXT: lgr %r0, %r2 |
| ; CHECK-NEXT: og %r0, 176(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: og %r0, 168(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: og %r0, 160(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ogr %r0, %r10 |
| ; CHECK-NEXT: ogr %r0, %r9 |
| ; CHECK-NEXT: ogr %r0, %r8 |
| ; CHECK-NEXT: ogr %r0, %r7 |
| ; CHECK-NEXT: ogr %r0, %r13 |
| ; CHECK-NEXT: ogr %r0, %r12 |
| ; CHECK-NEXT: ogr %r0, %r11 |
| ; CHECK-NEXT: stg %r0, 0(%r6) |
| ; CHECK-NEXT: cgijlh %r2, 1, .LBB9_1 |
| ; CHECK-NEXT: # %bb.2: # %exit |
| ; CHECK-NEXT: lmg %r6, %r15, 232(%r15) |
| ; CHECK-NEXT: br %r14 |
| entry: |
| %double0 = load volatile double, ptr@dptr |
| %biased0 = fadd double %double0, %extra |
| %int0 = bitcast double %biased0 to i64 |
| %double1 = load volatile double, ptr@dptr |
| %biased1 = fadd double %double1, %extra |
| %int1 = bitcast double %biased1 to i64 |
| %double2 = load volatile double, ptr@dptr |
| %biased2 = fadd double %double2, %extra |
| %int2 = bitcast double %biased2 to i64 |
| %double3 = load volatile double, ptr@dptr |
| %biased3 = fadd double %double3, %extra |
| %int3 = bitcast double %biased3 to i64 |
| %double4 = load volatile double, ptr@dptr |
| %biased4 = fadd double %double4, %extra |
| %int4 = bitcast double %biased4 to i64 |
| %double5 = load volatile double, ptr@dptr |
| %biased5 = fadd double %double5, %extra |
| %int5 = bitcast double %biased5 to i64 |
| %double6 = load volatile double, ptr@dptr |
| %biased6 = fadd double %double6, %extra |
| %int6 = bitcast double %biased6 to i64 |
| %double7 = load volatile double, ptr@dptr |
| %biased7 = fadd double %double7, %extra |
| %int7 = bitcast double %biased7 to i64 |
| %double8 = load volatile double, ptr@dptr |
| %biased8 = fadd double %double8, %extra |
| %int8 = bitcast double %biased8 to i64 |
| %double9 = load volatile double, ptr@dptr |
| %biased9 = fadd double %double9, %extra |
| %int9 = bitcast double %biased9 to i64 |
| br label %loop |
| |
| loop: |
| %start = call i64 @foo() |
| %or0 = or i64 %start, %int0 |
| %or1 = or i64 %or0, %int1 |
| %or2 = or i64 %or1, %int2 |
| %or3 = or i64 %or2, %int3 |
| %or4 = or i64 %or3, %int4 |
| %or5 = or i64 %or4, %int5 |
| %or6 = or i64 %or5, %int6 |
| %or7 = or i64 %or6, %int7 |
| %or8 = or i64 %or7, %int8 |
| %or9 = or i64 %or8, %int9 |
| store i64 %or9, ptr@iptr |
| %cont = icmp ne i64 %start, 1 |
| br i1 %cont, label %loop, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; ...likewise LDGR, with the requirements the other way around. |
| define void @f11(i64 %mask) { |
| ; CHECK-LABEL: f11: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: stmg %r12, %r15, 96(%r15) |
| ; CHECK-NEXT: .cfi_offset %r12, -64 |
| ; CHECK-NEXT: .cfi_offset %r13, -56 |
| ; CHECK-NEXT: .cfi_offset %r14, -48 |
| ; CHECK-NEXT: .cfi_offset %r15, -40 |
| ; CHECK-NEXT: aghi %r15, -240 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 400 |
| ; CHECK-NEXT: std %f8, 232(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f9, 224(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f10, 216(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f11, 208(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f12, 200(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f13, 192(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f14, 184(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f15, 176(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset %f8, -168 |
| ; CHECK-NEXT: .cfi_offset %f9, -176 |
| ; CHECK-NEXT: .cfi_offset %f10, -184 |
| ; CHECK-NEXT: .cfi_offset %f11, -192 |
| ; CHECK-NEXT: .cfi_offset %f12, -200 |
| ; CHECK-NEXT: .cfi_offset %f13, -208 |
| ; CHECK-NEXT: .cfi_offset %f14, -216 |
| ; CHECK-NEXT: .cfi_offset %f15, -224 |
| ; CHECK-NEXT: lgrl %r1, iptr@GOT |
| ; CHECK-NEXT: lgr %r0, %r2 |
| ; CHECK-NEXT: ng %r0, 0(%r1) |
| ; CHECK-NEXT: lgr %r3, %r2 |
| ; CHECK-NEXT: ng %r3, 0(%r1) |
| ; CHECK-NEXT: lgr %r4, %r2 |
| ; CHECK-NEXT: ng %r4, 0(%r1) |
| ; CHECK-NEXT: stg %r0, 168(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: stg %r3, 160(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: lgr %r0, %r2 |
| ; CHECK-NEXT: ng %r0, 0(%r1) |
| ; CHECK-NEXT: ldgr %f10, %r4 |
| ; CHECK-NEXT: lgr %r3, %r2 |
| ; CHECK-NEXT: ng %r3, 0(%r1) |
| ; CHECK-NEXT: lgr %r4, %r2 |
| ; CHECK-NEXT: ng %r4, 0(%r1) |
| ; CHECK-NEXT: ldgr %f11, %r0 |
| ; CHECK-NEXT: lgr %r0, %r2 |
| ; CHECK-NEXT: ng %r0, 0(%r1) |
| ; CHECK-NEXT: ldgr %f12, %r3 |
| ; CHECK-NEXT: ldgr %f13, %r4 |
| ; CHECK-NEXT: lgr %r3, %r2 |
| ; CHECK-NEXT: ng %r3, 0(%r1) |
| ; CHECK-NEXT: ldgr %f14, %r0 |
| ; CHECK-NEXT: lgr %r0, %r2 |
| ; CHECK-NEXT: ng %r0, 0(%r1) |
| ; CHECK-NEXT: ng %r2, 0(%r1) |
| ; CHECK-NEXT: ldgr %f15, %r3 |
| ; CHECK-NEXT: lgrl %r13, dptr@GOT |
| ; CHECK-NEXT: ldgr %f8, %r0 |
| ; CHECK-NEXT: ldgr %f9, %r2 |
| ; CHECK-NEXT: larl %r12, .LCPI10_0 |
| ; CHECK-NEXT: .LBB10_1: # %loop |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: brasl %r14, bar@PLT |
| ; CHECK-NEXT: ldr %f1, %f0 |
| ; CHECK-NEXT: adb %f1, 168(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: adb %f1, 160(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: adbr %f1, %f10 |
| ; CHECK-NEXT: adbr %f1, %f11 |
| ; CHECK-NEXT: adbr %f1, %f12 |
| ; CHECK-NEXT: adbr %f1, %f13 |
| ; CHECK-NEXT: adbr %f1, %f14 |
| ; CHECK-NEXT: adbr %f1, %f15 |
| ; CHECK-NEXT: adbr %f1, %f8 |
| ; CHECK-NEXT: adbr %f1, %f9 |
| ; CHECK-NEXT: cdb %f0, 0(%r12) |
| ; CHECK-NEXT: std %f1, 0(%r13) |
| ; CHECK-NEXT: jlh .LBB10_1 |
| ; CHECK-NEXT: # %bb.2: # %exit |
| ; CHECK-NEXT: ld %f8, 232(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f9, 224(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f10, 216(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f11, 208(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f12, 200(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f13, 192(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f14, 184(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f15, 176(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: lmg %r12, %r15, 336(%r15) |
| ; CHECK-NEXT: br %r14 |
| entry: |
| %int0 = load volatile i64, ptr@iptr |
| %masked0 = and i64 %int0, %mask |
| %double0 = bitcast i64 %masked0 to double |
| %int1 = load volatile i64, ptr@iptr |
| %masked1 = and i64 %int1, %mask |
| %double1 = bitcast i64 %masked1 to double |
| %int2 = load volatile i64, ptr@iptr |
| %masked2 = and i64 %int2, %mask |
| %double2 = bitcast i64 %masked2 to double |
| %int3 = load volatile i64, ptr@iptr |
| %masked3 = and i64 %int3, %mask |
| %double3 = bitcast i64 %masked3 to double |
| %int4 = load volatile i64, ptr@iptr |
| %masked4 = and i64 %int4, %mask |
| %double4 = bitcast i64 %masked4 to double |
| %int5 = load volatile i64, ptr@iptr |
| %masked5 = and i64 %int5, %mask |
| %double5 = bitcast i64 %masked5 to double |
| %int6 = load volatile i64, ptr@iptr |
| %masked6 = and i64 %int6, %mask |
| %double6 = bitcast i64 %masked6 to double |
| %int7 = load volatile i64, ptr@iptr |
| %masked7 = and i64 %int7, %mask |
| %double7 = bitcast i64 %masked7 to double |
| %int8 = load volatile i64, ptr@iptr |
| %masked8 = and i64 %int8, %mask |
| %double8 = bitcast i64 %masked8 to double |
| %int9 = load volatile i64, ptr@iptr |
| %masked9 = and i64 %int9, %mask |
| %double9 = bitcast i64 %masked9 to double |
| br label %loop |
| |
| loop: |
| %start = call double @bar() |
| %add0 = fadd double %start, %double0 |
| %add1 = fadd double %add0, %double1 |
| %add2 = fadd double %add1, %double2 |
| %add3 = fadd double %add2, %double3 |
| %add4 = fadd double %add3, %double4 |
| %add5 = fadd double %add4, %double5 |
| %add6 = fadd double %add5, %double6 |
| %add7 = fadd double %add6, %double7 |
| %add8 = fadd double %add7, %double8 |
| %add9 = fadd double %add8, %double9 |
| store double %add9, ptr@dptr |
| %cont = fcmp one double %start, 1.0 |
| br i1 %cont, label %loop, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Test cases where the source of an LDGR needs to be spilled. |
| ; We shouldn't have any integer stack stores or floating-point loads. |
| define void @f12() { |
| ; CHECK-LABEL: f12: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: stmg %r6, %r15, 48(%r15) |
| ; CHECK-NEXT: .cfi_offset %r6, -112 |
| ; CHECK-NEXT: .cfi_offset %r7, -104 |
| ; CHECK-NEXT: .cfi_offset %r8, -96 |
| ; CHECK-NEXT: .cfi_offset %r9, -88 |
| ; CHECK-NEXT: .cfi_offset %r10, -80 |
| ; CHECK-NEXT: .cfi_offset %r11, -72 |
| ; CHECK-NEXT: .cfi_offset %r12, -64 |
| ; CHECK-NEXT: .cfi_offset %r13, -56 |
| ; CHECK-NEXT: .cfi_offset %r14, -48 |
| ; CHECK-NEXT: .cfi_offset %r15, -40 |
| ; CHECK-NEXT: aghi %r15, -176 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 336 |
| ; CHECK-NEXT: lghi %r12, 0 |
| ; CHECK-NEXT: lghi %r13, 0 |
| ; CHECK-NEXT: lghi %r6, 0 |
| ; CHECK-NEXT: lghi %r7, 0 |
| ; CHECK-NEXT: lghi %r8, 0 |
| ; CHECK-NEXT: lghi %r9, 0 |
| ; CHECK-NEXT: lghi %r10, 0 |
| ; CHECK-NEXT: lghi %r11, 0 |
| ; CHECK-NEXT: mvghi 160(%r15), 0 # 8-byte Folded Spill |
| ; CHECK-NEXT: mvghi 168(%r15), 0 # 8-byte Folded Spill |
| ; CHECK-NEXT: .LBB11_1: # %loop |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: brasl %r14, foo@PLT |
| ; CHECK-NEXT: agr %r12, %r2 |
| ; CHECK-NEXT: agr %r13, %r2 |
| ; CHECK-NEXT: agr %r6, %r2 |
| ; CHECK-NEXT: agr %r7, %r2 |
| ; CHECK-NEXT: agr %r8, %r2 |
| ; CHECK-NEXT: agr %r9, %r2 |
| ; CHECK-NEXT: agr %r10, %r2 |
| ; CHECK-NEXT: agr %r11, %r2 |
| ; CHECK-NEXT: lg %r0, 160(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: agr %r0, %r2 |
| ; CHECK-NEXT: stg %r0, 160(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: lg %r0, 168(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: agr %r0, %r2 |
| ; CHECK-NEXT: stg %r0, 168(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: cgijlh %r2, 1, .LBB11_1 |
| ; CHECK-NEXT: # %bb.2: # %exit |
| ; CHECK-NEXT: brasl %r14, foo@PLT |
| ; CHECK-NEXT: lgrl %r1, dptr@GOT |
| ; CHECK-NEXT: ld %f0, 0(%r1) |
| ; CHECK-NEXT: ldgr %f1, %r12 |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ldgr %f1, %r13 |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ldgr %f1, %r6 |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ldgr %f1, %r7 |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ldgr %f1, %r8 |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ldgr %f1, %r9 |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ldgr %f1, %r10 |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ldgr %f1, %r11 |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ld %f1, 160(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: ld %f1, 168(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 0(%r1) |
| ; CHECK-NEXT: brasl %r14, foo@PLT |
| ; CHECK-NEXT: lmg %r6, %r15, 224(%r15) |
| ; CHECK-NEXT: br %r14 |
| entry: |
| br label %loop |
| |
| loop: |
| %int0 = phi i64 [ 0, %entry ], [ %add0, %loop ] |
| %int1 = phi i64 [ 0, %entry ], [ %add1, %loop ] |
| %int2 = phi i64 [ 0, %entry ], [ %add2, %loop ] |
| %int3 = phi i64 [ 0, %entry ], [ %add3, %loop ] |
| %int4 = phi i64 [ 0, %entry ], [ %add4, %loop ] |
| %int5 = phi i64 [ 0, %entry ], [ %add5, %loop ] |
| %int6 = phi i64 [ 0, %entry ], [ %add6, %loop ] |
| %int7 = phi i64 [ 0, %entry ], [ %add7, %loop ] |
| %int8 = phi i64 [ 0, %entry ], [ %add8, %loop ] |
| %int9 = phi i64 [ 0, %entry ], [ %add9, %loop ] |
| |
| %bias = call i64 @foo() |
| %add0 = add i64 %int0, %bias |
| %add1 = add i64 %int1, %bias |
| %add2 = add i64 %int2, %bias |
| %add3 = add i64 %int3, %bias |
| %add4 = add i64 %int4, %bias |
| %add5 = add i64 %int5, %bias |
| %add6 = add i64 %int6, %bias |
| %add7 = add i64 %int7, %bias |
| %add8 = add i64 %int8, %bias |
| %add9 = add i64 %int9, %bias |
| %cont = icmp ne i64 %bias, 1 |
| br i1 %cont, label %loop, label %exit |
| |
| exit: |
| %unused1 = call i64 @foo() |
| %factor = load volatile double, ptr@dptr |
| |
| %conv0 = bitcast i64 %add0 to double |
| %mul0 = fmul double %conv0, %factor |
| store volatile double %mul0, ptr@dptr |
| %conv1 = bitcast i64 %add1 to double |
| %mul1 = fmul double %conv1, %factor |
| store volatile double %mul1, ptr@dptr |
| %conv2 = bitcast i64 %add2 to double |
| %mul2 = fmul double %conv2, %factor |
| store volatile double %mul2, ptr@dptr |
| %conv3 = bitcast i64 %add3 to double |
| %mul3 = fmul double %conv3, %factor |
| store volatile double %mul3, ptr@dptr |
| %conv4 = bitcast i64 %add4 to double |
| %mul4 = fmul double %conv4, %factor |
| store volatile double %mul4, ptr@dptr |
| %conv5 = bitcast i64 %add5 to double |
| %mul5 = fmul double %conv5, %factor |
| store volatile double %mul5, ptr@dptr |
| %conv6 = bitcast i64 %add6 to double |
| %mul6 = fmul double %conv6, %factor |
| store volatile double %mul6, ptr@dptr |
| %conv7 = bitcast i64 %add7 to double |
| %mul7 = fmul double %conv7, %factor |
| store volatile double %mul7, ptr@dptr |
| %conv8 = bitcast i64 %add8 to double |
| %mul8 = fmul double %conv8, %factor |
| store volatile double %mul8, ptr@dptr |
| %conv9 = bitcast i64 %add9 to double |
| %mul9 = fmul double %conv9, %factor |
| store volatile double %mul9, ptr@dptr |
| |
| %unused2 = call i64 @foo() |
| |
| ret void |
| } |
| |
| ; ...likewise LGDR, with the requirements the other way around. |
| define void @f13() { |
| ; CHECK-LABEL: f13: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: stmg %r13, %r15, 104(%r15) |
| ; CHECK-NEXT: .cfi_offset %r13, -56 |
| ; CHECK-NEXT: .cfi_offset %r14, -48 |
| ; CHECK-NEXT: .cfi_offset %r15, -40 |
| ; CHECK-NEXT: aghi %r15, -240 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 400 |
| ; CHECK-NEXT: std %f8, 232(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f9, 224(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f10, 216(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f11, 208(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f12, 200(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f13, 192(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f14, 184(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f15, 176(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_offset %f8, -168 |
| ; CHECK-NEXT: .cfi_offset %f9, -176 |
| ; CHECK-NEXT: .cfi_offset %f10, -184 |
| ; CHECK-NEXT: .cfi_offset %f11, -192 |
| ; CHECK-NEXT: .cfi_offset %f12, -200 |
| ; CHECK-NEXT: .cfi_offset %f13, -208 |
| ; CHECK-NEXT: .cfi_offset %f14, -216 |
| ; CHECK-NEXT: .cfi_offset %f15, -224 |
| ; CHECK-NEXT: larl %r13, .LCPI12_0 |
| ; CHECK-NEXT: ld %f8, 0(%r13) |
| ; CHECK-NEXT: ldr %f9, %f8 |
| ; CHECK-NEXT: ldr %f15, %f8 |
| ; CHECK-NEXT: ldr %f14, %f8 |
| ; CHECK-NEXT: ldr %f13, %f8 |
| ; CHECK-NEXT: ldr %f12, %f8 |
| ; CHECK-NEXT: ldr %f11, %f8 |
| ; CHECK-NEXT: ldr %f10, %f8 |
| ; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: .LBB12_1: # %loop |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: brasl %r14, bar@PLT |
| ; CHECK-NEXT: mdbr %f8, %f0 |
| ; CHECK-NEXT: mdbr %f9, %f0 |
| ; CHECK-NEXT: mdbr %f15, %f0 |
| ; CHECK-NEXT: mdbr %f14, %f0 |
| ; CHECK-NEXT: mdbr %f13, %f0 |
| ; CHECK-NEXT: mdbr %f12, %f0 |
| ; CHECK-NEXT: cdb %f0, 0(%r13) |
| ; CHECK-NEXT: mdbr %f11, %f0 |
| ; CHECK-NEXT: mdbr %f10, %f0 |
| ; CHECK-NEXT: ld %f1, 160(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 160(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: ld %f1, 168(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: mdbr %f1, %f0 |
| ; CHECK-NEXT: std %f1, 168(%r15) # 8-byte Folded Spill |
| ; CHECK-NEXT: jlh .LBB12_1 |
| ; CHECK-NEXT: # %bb.2: # %exit |
| ; CHECK-NEXT: brasl %r14, foo@PLT |
| ; CHECK-NEXT: lgrl %r1, iptr@GOT |
| ; CHECK-NEXT: lg %r0, 0(%r1) |
| ; CHECK-NEXT: lgdr %r2, %f8 |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lgdr %r2, %f9 |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lgdr %r2, %f15 |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lgdr %r2, %f14 |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lgdr %r2, %f13 |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lgdr %r2, %f12 |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lgdr %r2, %f11 |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lgdr %r2, %f10 |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lg %r2, 160(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: lg %r2, 168(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: agr %r2, %r0 |
| ; CHECK-NEXT: stg %r2, 0(%r1) |
| ; CHECK-NEXT: brasl %r14, foo@PLT |
| ; CHECK-NEXT: ld %f8, 232(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f9, 224(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f10, 216(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f11, 208(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f12, 200(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f13, 192(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f14, 184(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: ld %f15, 176(%r15) # 8-byte Folded Reload |
| ; CHECK-NEXT: lmg %r13, %r15, 344(%r15) |
| ; CHECK-NEXT: br %r14 |
| entry: |
| br label %loop |
| |
| loop: |
| %double0 = phi double [ 1.0, %entry ], [ %mul0, %loop ] |
| %double1 = phi double [ 1.0, %entry ], [ %mul1, %loop ] |
| %double2 = phi double [ 1.0, %entry ], [ %mul2, %loop ] |
| %double3 = phi double [ 1.0, %entry ], [ %mul3, %loop ] |
| %double4 = phi double [ 1.0, %entry ], [ %mul4, %loop ] |
| %double5 = phi double [ 1.0, %entry ], [ %mul5, %loop ] |
| %double6 = phi double [ 1.0, %entry ], [ %mul6, %loop ] |
| %double7 = phi double [ 1.0, %entry ], [ %mul7, %loop ] |
| %double8 = phi double [ 1.0, %entry ], [ %mul8, %loop ] |
| %double9 = phi double [ 1.0, %entry ], [ %mul9, %loop ] |
| |
| %factor = call double @bar() |
| %mul0 = fmul double %double0, %factor |
| %mul1 = fmul double %double1, %factor |
| %mul2 = fmul double %double2, %factor |
| %mul3 = fmul double %double3, %factor |
| %mul4 = fmul double %double4, %factor |
| %mul5 = fmul double %double5, %factor |
| %mul6 = fmul double %double6, %factor |
| %mul7 = fmul double %double7, %factor |
| %mul8 = fmul double %double8, %factor |
| %mul9 = fmul double %double9, %factor |
| %cont = fcmp one double %factor, 1.0 |
| br i1 %cont, label %loop, label %exit |
| |
| exit: |
| %unused1 = call i64 @foo() |
| %bias = load volatile i64, ptr@iptr |
| |
| %conv0 = bitcast double %mul0 to i64 |
| %add0 = add i64 %conv0, %bias |
| store volatile i64 %add0, ptr@iptr |
| %conv1 = bitcast double %mul1 to i64 |
| %add1 = add i64 %conv1, %bias |
| store volatile i64 %add1, ptr@iptr |
| %conv2 = bitcast double %mul2 to i64 |
| %add2 = add i64 %conv2, %bias |
| store volatile i64 %add2, ptr@iptr |
| %conv3 = bitcast double %mul3 to i64 |
| %add3 = add i64 %conv3, %bias |
| store volatile i64 %add3, ptr@iptr |
| %conv4 = bitcast double %mul4 to i64 |
| %add4 = add i64 %conv4, %bias |
| store volatile i64 %add4, ptr@iptr |
| %conv5 = bitcast double %mul5 to i64 |
| %add5 = add i64 %conv5, %bias |
| store volatile i64 %add5, ptr@iptr |
| %conv6 = bitcast double %mul6 to i64 |
| %add6 = add i64 %conv6, %bias |
| store volatile i64 %add6, ptr@iptr |
| %conv7 = bitcast double %mul7 to i64 |
| %add7 = add i64 %conv7, %bias |
| store volatile i64 %add7, ptr@iptr |
| %conv8 = bitcast double %mul8 to i64 |
| %add8 = add i64 %conv8, %bias |
| store volatile i64 %add8, ptr@iptr |
| %conv9 = bitcast double %mul9 to i64 |
| %add9 = add i64 %conv9, %bias |
| store volatile i64 %add9, ptr@iptr |
| |
| %unused2 = call i64 @foo() |
| |
| ret void |
| } |