| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
 | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s | 
 |  | 
 | define void @eggs(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, ptr %arg13, ptr %arg14) nounwind { | 
 | ; CHECK-LABEL: eggs: | 
 | ; CHECK:       ## %bb.0: ## %bb | 
 | ; CHECK-NEXT:    pushq %r15 | 
 | ; CHECK-NEXT:    pushq %r14 | 
 | ; CHECK-NEXT:    pushq %r12 | 
 | ; CHECK-NEXT:    pushq %rbx | 
 | ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r14 | 
 | ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax | 
 | ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r15 | 
 | ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r12 | 
 | ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r11 | 
 | ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r10 | 
 | ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbx | 
 | ; CHECK-NEXT:    leaq (%rbx,%r10,8), %r10 | 
 | ; CHECK-NEXT:    leaq (%rbx,%r11,8), %r11 | 
 | ; CHECK-NEXT:    vxorpd %xmm0, %xmm0, %xmm0 | 
 | ; CHECK-NEXT:    xorl %ebx, %ebx | 
 | ; CHECK-NEXT:    vmovupd (%r14,%r15,8), %zmm1 | 
 | ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r15 | 
 | ; CHECK-NEXT:    addq %r12, %r15 | 
 | ; CHECK-NEXT:    vmovupd (%r14,%r15,8), %zmm2 | 
 | ; CHECK-NEXT:    addq {{[0-9]+}}(%rsp), %r12 | 
 | ; CHECK-NEXT:    vmovupd (%r14,%r12,8), %zmm8 | 
 | ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3 | 
 | ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4 | 
 | ; CHECK-NEXT:    vxorpd %xmm5, %xmm5, %xmm5 | 
 | ; CHECK-NEXT:    vxorpd %xmm6, %xmm6, %xmm6 | 
 | ; CHECK-NEXT:    vxorpd %xmm7, %xmm7, %xmm7 | 
 | ; CHECK-NEXT:    .p2align 4, 0x90 | 
 | ; CHECK-NEXT:  LBB0_1: ## %bb15 | 
 | ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1 | 
 | ; CHECK-NEXT:    vbroadcastsd (%r11,%rbx,8), %zmm9 | 
 | ; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm0 = (zmm1 * zmm9) + zmm0 | 
 | ; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm3 = (zmm2 * zmm9) + zmm3 | 
 | ; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm4 = (zmm8 * zmm9) + zmm4 | 
 | ; CHECK-NEXT:    vbroadcastsd (%r10,%rbx,8), %zmm9 | 
 | ; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm5 = (zmm1 * zmm9) + zmm5 | 
 | ; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm6 = (zmm2 * zmm9) + zmm6 | 
 | ; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm7 = (zmm8 * zmm9) + zmm7 | 
 | ; CHECK-NEXT:    incq %rbx | 
 | ; CHECK-NEXT:    cmpq %rbx, %rax | 
 | ; CHECK-NEXT:    jne LBB0_1 | 
 | ; CHECK-NEXT:  ## %bb.2: ## %bb51 | 
 | ; CHECK-NEXT:    vmovapd %zmm0, (%rdi) | 
 | ; CHECK-NEXT:    vmovapd %zmm3, (%rsi) | 
 | ; CHECK-NEXT:    vmovapd %zmm4, (%rdx) | 
 | ; CHECK-NEXT:    vmovapd %zmm5, (%rcx) | 
 | ; CHECK-NEXT:    vmovapd %zmm6, (%r8) | 
 | ; CHECK-NEXT:    vmovapd %zmm7, (%r9) | 
 | ; CHECK-NEXT:    popq %rbx | 
 | ; CHECK-NEXT:    popq %r12 | 
 | ; CHECK-NEXT:    popq %r14 | 
 | ; CHECK-NEXT:    popq %r15 | 
 | ; CHECK-NEXT:    vzeroupper | 
 | ; CHECK-NEXT:    retq | 
 | bb: | 
 |   br label %bb15 | 
 |  | 
 | bb15:                                             ; preds = %bb15, %bb | 
 |   %tmp = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp38, %bb15 ] | 
 |   %tmp16 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp39, %bb15 ] | 
 |   %tmp17 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp40, %bb15 ] | 
 |   %tmp18 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp46, %bb15 ] | 
 |   %tmp19 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp47, %bb15 ] | 
 |   %tmp20 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp48, %bb15 ] | 
 |   %tmp21 = phi i64 [ 0, %bb ], [ %tmp49, %bb15 ] | 
 |   %tmp22 = getelementptr inbounds double, ptr %arg14, i64 %arg11 | 
 |   %tmp24 = load <8 x double>, ptr %tmp22, align 8 | 
 |   %tmp25 = add i64 %arg10, %arg6 | 
 |   %tmp26 = getelementptr inbounds double, ptr %arg14, i64 %tmp25 | 
 |   %tmp28 = load <8 x double>, ptr %tmp26, align 8 | 
 |   %tmp29 = add i64 %arg10, %arg7 | 
 |   %tmp30 = getelementptr inbounds double, ptr %arg14, i64 %tmp29 | 
 |   %tmp32 = load <8 x double>, ptr %tmp30, align 8 | 
 |   %tmp33 = add i64 %tmp21, %arg8 | 
 |   %tmp34 = getelementptr inbounds double, ptr %arg13, i64 %tmp33 | 
 |   %tmp35 = load double, ptr %tmp34, align 8 | 
 |   %tmp36 = insertelement <8 x double> undef, double %tmp35, i32 0 | 
 |   %tmp37 = shufflevector <8 x double> %tmp36, <8 x double> undef, <8 x i32> zeroinitializer | 
 |   %tmp38 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp24, <8 x double> %tmp37, <8 x double> %tmp) | 
 |   %tmp39 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp28, <8 x double> %tmp37, <8 x double> %tmp16) | 
 |   %tmp40 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp32, <8 x double> %tmp37, <8 x double> %tmp17) | 
 |   %tmp41 = add i64 %tmp21, %arg9 | 
 |   %tmp42 = getelementptr inbounds double, ptr %arg13, i64 %tmp41 | 
 |   %tmp43 = load double, ptr %tmp42, align 8 | 
 |   %tmp44 = insertelement <8 x double> undef, double %tmp43, i32 0 | 
 |   %tmp45 = shufflevector <8 x double> %tmp44, <8 x double> undef, <8 x i32> zeroinitializer | 
 |   %tmp46 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp24, <8 x double> %tmp45, <8 x double> %tmp18) | 
 |   %tmp47 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp28, <8 x double> %tmp45, <8 x double> %tmp19) | 
 |   %tmp48 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp32, <8 x double> %tmp45, <8 x double> %tmp20) | 
 |   %tmp49 = add nuw nsw i64 %tmp21, 1 | 
 |   %tmp50 = icmp eq i64 %tmp49, %arg12 | 
 |   br i1 %tmp50, label %bb51, label %bb15 | 
 |  | 
 | bb51:                                             ; preds = %bb15 | 
 |   store <8 x double> %tmp38, ptr %arg | 
 |   store <8 x double> %tmp39, ptr %arg1 | 
 |   store <8 x double> %tmp40, ptr %arg2 | 
 |   store <8 x double> %tmp46, ptr %arg3 | 
 |   store <8 x double> %tmp47, ptr %arg4 | 
 |   store <8 x double> %tmp48, ptr %arg5 | 
 |   ret void | 
 | } | 
 |  | 
 | declare <8 x double> @llvm.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>) |