| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py |
| # RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s |
| |
| # LLVM-MCA-BEGIN G01 |
| ld1 { v1.1d }, [x27], #8 |
| add x0, x27, 1 |
| ld1 { v1.2d }, [x27], #16 |
| add x0, x27, 1 |
| ld1 { v1.2s }, [x27], #8 |
| add x0, x27, 1 |
| ld1 { v1.4h }, [x27], #8 |
| add x0, x27, 1 |
| ld1 { v1.4s }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G02 |
| ld1 { v1.8b }, [x27], #8 |
| add x0, x27, 1 |
| ld1 { v1.8h }, [x27], #16 |
| add x0, x27, 1 |
| ld1 { v1.16b }, [x27], #16 |
| add x0, x27, 1 |
| ld1 { v1.1d }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.2d }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G03 |
| ld1 { v1.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.4s }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.8h }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G04 |
| ld1 { v1.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.1d, v2.1d }, [x27], #16 |
| add x0, x27, 1 |
| ld1 { v1.2d, v2.2d }, [x27], #32 |
| add x0, x27, 1 |
| ld1 { v1.2s, v2.2s }, [x27], #16 |
| add x0, x27, 1 |
| ld1 { v1.4h, v2.4h }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G05 |
| ld1 { v1.4s, v2.4s }, [x27], #32 |
| add x0, x27, 1 |
| ld1 { v1.8b, v2.8b }, [x27], #16 |
| add x0, x27, 1 |
| ld1 { v1.8h, v2.8h }, [x27], #32 |
| add x0, x27, 1 |
| ld1 { v1.16b, v2.16b }, [x27], #32 |
| add x0, x27, 1 |
| ld1 { v1.1d, v2.1d }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G06 |
| ld1 { v1.2d, v2.2d }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.2s, v2.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.4h, v2.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.4s, v2.4s }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.8b, v2.8b }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G07 |
| ld1 { v1.8h, v2.8h }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.16b, v2.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| add x0, x27, 1 |
| ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| add x0, x27, 1 |
| ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G08 |
| ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| add x0, x27, 1 |
| ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| add x0, x27, 1 |
| ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| add x0, x27, 1 |
| ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| add x0, x27, 1 |
| ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G09 |
| ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G10 |
| ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| add x0, x27, 1 |
| ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G11 |
| ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| add x0, x27, 1 |
| ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| add x0, x27, 1 |
| ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| add x0, x27, 1 |
| ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| add x0, x27, 1 |
| ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G12 |
| ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| add x0, x27, 1 |
| ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G13 |
| ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.b }[0], [x27], #1 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G14 |
| ld1 { v1.b }[8], [x27], #1 |
| add x0, x27, 1 |
| ld1 { v1.b }[0], [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.b }[8], [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.h }[0], [x27], #2 |
| add x0, x27, 1 |
| ld1 { v1.h }[4], [x27], #2 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G15 |
| ld1 { v1.h }[0], [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.h }[4], [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.s }[0], [x27], #4 |
| add x0, x27, 1 |
| ld1 { v1.s }[0], [x27], x28 |
| add x0, x27, 1 |
| ld1 { v1.d }[0], [x27], #8 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G16 |
| ld1 { v1.d }[0], [x27], x28 |
| add x0, x27, 1 |
| ld1r { v1.1d }, [x27], #8 |
| add x0, x27, 1 |
| ld1r { v1.2d }, [x27], #8 |
| add x0, x27, 1 |
| ld1r { v1.2s }, [x27], #4 |
| add x0, x27, 1 |
| ld1r { v1.4h }, [x27], #2 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G17 |
| ld1r { v1.4s }, [x27], #4 |
| add x0, x27, 1 |
| ld1r { v1.8b }, [x27], #1 |
| add x0, x27, 1 |
| ld1r { v1.8h }, [x27], #2 |
| add x0, x27, 1 |
| ld1r { v1.16b }, [x27], #1 |
| add x0, x27, 1 |
| ld1r { v1.1d }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G18 |
| ld1r { v1.2d }, [x27], x28 |
| add x0, x27, 1 |
| ld1r { v1.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld1r { v1.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld1r { v1.4s }, [x27], x28 |
| add x0, x27, 1 |
| ld1r { v1.8b }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G19 |
| ld1r { v1.8h }, [x27], x28 |
| add x0, x27, 1 |
| ld1r { v1.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.2d, v2.2d }, [x27], #32 |
| add x0, x27, 1 |
| ld2 { v1.2s, v2.2s }, [x27], #16 |
| add x0, x27, 1 |
| ld2 { v1.4h, v2.4h }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G20 |
| ld2 { v1.4s, v2.4s }, [x27], #32 |
| add x0, x27, 1 |
| ld2 { v1.8b, v2.8b }, [x27], #16 |
| add x0, x27, 1 |
| ld2 { v1.8h, v2.8h }, [x27], #32 |
| add x0, x27, 1 |
| ld2 { v1.16b, v2.16b }, [x27], #32 |
| add x0, x27, 1 |
| ld2 { v1.2d, v2.2d }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G21 |
| ld2 { v1.2s, v2.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.4h, v2.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.4s, v2.4s }, [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.8b, v2.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.8h, v2.8h }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G22 |
| ld2 { v1.16b, v2.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.b, v2.b }[0], [x27], #2 |
| add x0, x27, 1 |
| ld2 { v1.b, v2.b }[8], [x27], #2 |
| add x0, x27, 1 |
| ld2 { v1.b, v2.b }[0], [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.b, v2.b }[8], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G23 |
| ld2 { v1.h, v2.h }[0], [x27], #4 |
| add x0, x27, 1 |
| ld2 { v1.h, v2.h }[4], [x27], #4 |
| add x0, x27, 1 |
| ld2 { v1.h, v2.h }[0], [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.h, v2.h }[4], [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.s, v2.s }[0], [x27], #8 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G24 |
| ld2 { v1.s, v2.s }[0], [x27], x28 |
| add x0, x27, 1 |
| ld2 { v1.d, v2.d }[0], [x27], #16 |
| add x0, x27, 1 |
| ld2 { v1.d, v2.d }[0], [x27], x28 |
| add x0, x27, 1 |
| ld2r { v1.1d, v2.1d }, [x27], #16 |
| add x0, x27, 1 |
| ld2r { v1.2d, v2.2d }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G25 |
| ld2r { v1.2s, v2.2s }, [x27], #8 |
| add x0, x27, 1 |
| ld2r { v1.4h, v2.4h }, [x27], #4 |
| add x0, x27, 1 |
| ld2r { v1.4s, v2.4s }, [x27], #8 |
| add x0, x27, 1 |
| ld2r { v1.8b, v2.8b }, [x27], #2 |
| add x0, x27, 1 |
| ld2r { v1.8h, v2.8h }, [x27], #4 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G26 |
| ld2r { v1.16b, v2.16b }, [x27], #2 |
| add x0, x27, 1 |
| ld2r { v1.1d, v2.1d }, [x27], x28 |
| add x0, x27, 1 |
| ld2r { v1.2d, v2.2d }, [x27], x28 |
| add x0, x27, 1 |
| ld2r { v1.2s, v2.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld2r { v1.4h, v2.4h }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G27 |
| ld2r { v1.4s, v2.4s }, [x27], x28 |
| add x0, x27, 1 |
| ld2r { v1.8b, v2.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld2r { v1.8h, v2.8h }, [x27], x28 |
| add x0, x27, 1 |
| ld2r { v1.16b, v2.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G28 |
| ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| add x0, x27, 1 |
| ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| add x0, x27, 1 |
| ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| add x0, x27, 1 |
| ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| add x0, x27, 1 |
| ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G29 |
| ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| add x0, x27, 1 |
| ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G30 |
| ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 |
| add x0, x27, 1 |
| ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G31 |
| ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 |
| add x0, x27, 1 |
| ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 |
| add x0, x27, 1 |
| ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G32 |
| ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 |
| add x0, x27, 1 |
| ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 |
| add x0, x27, 1 |
| ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 |
| add x0, x27, 1 |
| ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G33 |
| ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| add x0, x27, 1 |
| ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 |
| add x0, x27, 1 |
| ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 |
| add x0, x27, 1 |
| ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 |
| add x0, x27, 1 |
| ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G34 |
| ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 |
| add x0, x27, 1 |
| ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 |
| add x0, x27, 1 |
| ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 |
| add x0, x27, 1 |
| ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| add x0, x27, 1 |
| ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G35 |
| ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| add x0, x27, 1 |
| ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G36 |
| ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| add x0, x27, 1 |
| ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| add x0, x27, 1 |
| ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| add x0, x27, 1 |
| ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G37 |
| ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| add x0, x27, 1 |
| ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| add x0, x27, 1 |
| ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| add x0, x27, 1 |
| ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G38 |
| ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G39 |
| ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 |
| add x0, x27, 1 |
| ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 |
| add x0, x27, 1 |
| ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G40 |
| ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 |
| add x0, x27, 1 |
| ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 |
| add x0, x27, 1 |
| ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 |
| add x0, x27, 1 |
| ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G41 |
| ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 |
| add x0, x27, 1 |
| ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 |
| add x0, x27, 1 |
| ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| add x0, x27, 1 |
| ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 |
| add x0, x27, 1 |
| ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G42 |
| ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 |
| add x0, x27, 1 |
| ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 |
| add x0, x27, 1 |
| ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 |
| add x0, x27, 1 |
| ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 |
| add x0, x27, 1 |
| ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G43 |
| ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| add x0, x27, 1 |
| ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| add x0, x27, 1 |
| ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| add x0, x27, 1 |
| ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| add x0, x27, 1 |
| ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G44 |
| ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| add x0, x27, 1 |
| ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| add x0, x27, 1 |
| ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| add x0, x27, 1 |
| ldp s1, s2, [x27], #248 |
| add x0, x27, 1 |
| ldp d1, d2, [x27], #496 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G45 |
| ldp q1, q2, [x27], #992 |
| add x0, x27, 1 |
| ldp s1, s2, [x27, #248]! |
| add x0, x27, 1 |
| ldp d1, d2, [x27, #496]! |
| add x0, x27, 1 |
| ldp q1, q2, [x27, #992]! |
| add x0, x27, 1 |
| ldp w1, w2, [x27], #248 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G46 |
| ldp x1, x2, [x27], #496 |
| add x0, x27, 1 |
| ldp w1, w2, [x27, #248]! |
| add x0, x27, 1 |
| ldp x1, x2, [x27, #496]! |
| add x0, x27, 1 |
| ldpsw x1, x2, [x27], #248 |
| add x0, x27, 1 |
| ldpsw x1, x2, [x27, #248]! |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G47 |
| ldr b1, [x27], #254 |
| add x0, x27, 1 |
| ldr h1, [x27], #254 |
| add x0, x27, 1 |
| ldr s1, [x27], #254 |
| add x0, x27, 1 |
| ldr d1, [x27], #254 |
| add x0, x27, 1 |
| ldr q1, [x27], #254 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G48 |
| ldr b1, [x27, #254]! |
| add x0, x27, 1 |
| ldr h1, [x27, #254]! |
| add x0, x27, 1 |
| ldr s1, [x27, #254]! |
| add x0, x27, 1 |
| ldr d1, [x27, #254]! |
| add x0, x27, 1 |
| ldr q1, [x27, #254]! |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G49 |
| ldr w1, [x27], #254 |
| add x0, x27, 1 |
| ldr x1, [x27], #254 |
| add x0, x27, 1 |
| ldr w1, [x27, #254]! |
| add x0, x27, 1 |
| ldr x1, [x27, #254]! |
| add x0, x27, 1 |
| ldrb w1, [x27], #254 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G50 |
| ldrb w1, [x27, #254]! |
| add x0, x27, 1 |
| ldrh w1, [x27], #254 |
| add x0, x27, 1 |
| ldrh w1, [x27, #254]! |
| add x0, x27, 1 |
| ldrsb w1, [x27], #254 |
| add x0, x27, 1 |
| ldrsb x1, [x27], #254 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G51 |
| ldrsb w1, [x27, #254]! |
| add x0, x27, 1 |
| ldrsb x1, [x27, #254]! |
| add x0, x27, 1 |
| ldrsh w1, [x27], #254 |
| add x0, x27, 1 |
| ldrsh x1, [x27], #254 |
| add x0, x27, 1 |
| ldrsh w1, [x27, #254]! |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G52 |
| ldrsh x1, [x27, #254]! |
| add x0, x27, 1 |
| ldrsw x1, [x27], #254 |
| add x0, x27, 1 |
| ldrsw x1, [x27, #254]! |
| add x0, x27, 1 |
| st1 { v1.1d }, [x27], #8 |
| add x0, x27, 1 |
| st1 { v1.2d }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G53 |
| st1 { v1.2s }, [x27], #8 |
| add x0, x27, 1 |
| st1 { v1.4h }, [x27], #8 |
| add x0, x27, 1 |
| st1 { v1.4s }, [x27], #16 |
| add x0, x27, 1 |
| st1 { v1.8b }, [x27], #8 |
| add x0, x27, 1 |
| st1 { v1.8h }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G54 |
| st1 { v1.16b }, [x27], #16 |
| add x0, x27, 1 |
| st1 { v1.1d }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.2d }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.2s }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.4h }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G55 |
| st1 { v1.4s }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.8b }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.8h }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.16b }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.1d, v2.1d }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G56 |
| st1 { v1.2d, v2.2d }, [x27], #32 |
| add x0, x27, 1 |
| st1 { v1.2s, v2.2s }, [x27], #16 |
| add x0, x27, 1 |
| st1 { v1.4h, v2.4h }, [x27], #16 |
| add x0, x27, 1 |
| st1 { v1.4s, v2.4s }, [x27], #32 |
| add x0, x27, 1 |
| st1 { v1.8b, v2.8b }, [x27], #16 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G57 |
| st1 { v1.8h, v2.8h }, [x27], #32 |
| add x0, x27, 1 |
| st1 { v1.16b, v2.16b }, [x27], #32 |
| add x0, x27, 1 |
| st1 { v1.1d, v2.1d }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.2d, v2.2d }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.2s, v2.2s }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G58 |
| st1 { v1.4h, v2.4h }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.4s, v2.4s }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.8b, v2.8b }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.8h, v2.8h }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.16b, v2.16b }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G59 |
| st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| add x0, x27, 1 |
| st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| add x0, x27, 1 |
| st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| add x0, x27, 1 |
| st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| add x0, x27, 1 |
| st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G60 |
| st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| add x0, x27, 1 |
| st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| add x0, x27, 1 |
| st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| add x0, x27, 1 |
| st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G61 |
| st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G62 |
| st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| add x0, x27, 1 |
| st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| add x0, x27, 1 |
| st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| add x0, x27, 1 |
| st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G63 |
| st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| add x0, x27, 1 |
| st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| add x0, x27, 1 |
| st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| add x0, x27, 1 |
| st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| add x0, x27, 1 |
| st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G64 |
| st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G65 |
| st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.b }[0], [x27], #1 |
| add x0, x27, 1 |
| st1 { v1.b }[8], [x27], #1 |
| add x0, x27, 1 |
| st1 { v1.b }[0], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G66 |
| st1 { v1.b }[8], [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.h }[0], [x27], #2 |
| add x0, x27, 1 |
| st1 { v1.h }[4], [x27], #2 |
| add x0, x27, 1 |
| st1 { v1.h }[0], [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.h }[4], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G67 |
| st1 { v1.s }[0], [x27], #4 |
| add x0, x27, 1 |
| st1 { v1.s }[0], [x27], x28 |
| add x0, x27, 1 |
| st1 { v1.d }[0], [x27], #8 |
| add x0, x27, 1 |
| st1 { v1.d }[0], [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.2d, v2.2d }, [x27], #32 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G68 |
| st2 { v1.2s, v2.2s }, [x27], #16 |
| add x0, x27, 1 |
| st2 { v1.4h, v2.4h }, [x27], #16 |
| add x0, x27, 1 |
| st2 { v1.4s, v2.4s }, [x27], #32 |
| add x0, x27, 1 |
| st2 { v1.8b, v2.8b }, [x27], #16 |
| add x0, x27, 1 |
| st2 { v1.8h, v2.8h }, [x27], #32 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G69 |
| st2 { v1.16b, v2.16b }, [x27], #32 |
| add x0, x27, 1 |
| st2 { v1.2d, v2.2d }, [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.2s, v2.2s }, [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.4h, v2.4h }, [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.4s, v2.4s }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G70 |
| st2 { v1.8b, v2.8b }, [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.8h, v2.8h }, [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.16b, v2.16b }, [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.b, v2.b }[0], [x27], #2 |
| add x0, x27, 1 |
| st2 { v1.b, v2.b }[8], [x27], #2 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G71 |
| st2 { v1.b, v2.b }[0], [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.b, v2.b }[8], [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.h, v2.h }[0], [x27], #4 |
| add x0, x27, 1 |
| st2 { v1.h, v2.h }[4], [x27], #4 |
| add x0, x27, 1 |
| st2 { v1.h, v2.h }[0], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G72 |
| st2 { v1.h, v2.h }[4], [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.s, v2.s }[0], [x27], #8 |
| add x0, x27, 1 |
| st2 { v1.s, v2.s }[0], [x27], x28 |
| add x0, x27, 1 |
| st2 { v1.d, v2.d }[0], [x27], #16 |
| add x0, x27, 1 |
| st2 { v1.d, v2.d }[0], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G73 |
| st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| add x0, x27, 1 |
| st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| add x0, x27, 1 |
| st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G74 |
| st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| add x0, x27, 1 |
| st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| add x0, x27, 1 |
| st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| add x0, x27, 1 |
| st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| add x0, x27, 1 |
| st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G75 |
| st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G76 |
| st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.b, v2.b, v3.b }[0], [x27], #3 |
| add x0, x27, 1 |
| st3 { v1.b, v2.b, v3.b }[8], [x27], #3 |
| add x0, x27, 1 |
| st3 { v1.b, v2.b, v3.b }[0], [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.b, v2.b, v3.b }[8], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G77 |
| st3 { v1.h, v2.h, v3.h }[0], [x27], #6 |
| add x0, x27, 1 |
| st3 { v1.h, v2.h, v3.h }[4], [x27], #6 |
| add x0, x27, 1 |
| st3 { v1.h, v2.h, v3.h }[0], [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.h, v2.h, v3.h }[4], [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.s, v2.s, v3.s }[0], [x27], #12 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G78 |
| st3 { v1.s, v2.s, v3.s }[0], [x27], x28 |
| add x0, x27, 1 |
| st3 { v1.d, v2.d, v3.d }[0], [x27], #24 |
| add x0, x27, 1 |
| st3 { v1.d, v2.d, v3.d }[0], [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| add x0, x27, 1 |
| st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G79 |
| st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| add x0, x27, 1 |
| st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| add x0, x27, 1 |
| st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| add x0, x27, 1 |
| st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| add x0, x27, 1 |
| st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G80 |
| st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G81 |
| st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 |
| add x0, x27, 1 |
| st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 |
| add x0, x27, 1 |
| st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G82 |
| st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 |
| add x0, x27, 1 |
| st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 |
| add x0, x27, 1 |
| st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G83 |
| st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 |
| add x0, x27, 1 |
| st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 |
| add x0, x27, 1 |
| st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 |
| add x0, x27, 1 |
| st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G84 |
| stp s1, s2, [x27], #248 |
| add x0, x27, 1 |
| stp d1, d2, [x27], #496 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G85 |
| stp q1, q2, [x27], #992 |
| add x0, x27, 1 |
| stp s1, s2, [x27, #248]! |
| add x0, x27, 1 |
| stp d1, d2, [x27, #496]! |
| add x0, x27, 1 |
| stp q1, q2, [x27, #992]! |
| add x0, x27, 1 |
| stp w1, w2, [x27], #248 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G86 |
| stp x1, x2, [x27], #496 |
| add x0, x27, 1 |
| stp w1, w2, [x27, #248]! |
| add x0, x27, 1 |
| stp x1, x2, [x27, #496]! |
| add x0, x27, 1 |
| str b1, [x27], #254 |
| add x0, x27, 1 |
| str h1, [x27], #254 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G87 |
| str s1, [x27], #254 |
| add x0, x27, 1 |
| str d1, [x27], #254 |
| add x0, x27, 1 |
| str q1, [x27], #254 |
| add x0, x27, 1 |
| str b1, [x27, #254]! |
| add x0, x27, 1 |
| str h1, [x27, #254]! |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G88 |
| str s1, [x27, #254]! |
| add x0, x27, 1 |
| str d1, [x27, #254]! |
| add x0, x27, 1 |
| str q1, [x27, #254]! |
| add x0, x27, 1 |
| str w1, [x27], #254 |
| add x0, x27, 1 |
| str x1, [x27], #254 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G89 |
| str w1, [x27, #254]! |
| add x0, x27, 1 |
| str x1, [x27, #254]! |
| add x0, x27, 1 |
| strb w1, [x27], #254 |
| add x0, x27, 1 |
| strb w1, [x27, #254]! |
| add x0, x27, 1 |
| strh w1, [x27], #254 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G90 |
| strh w1, [x27, #254]! |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # LLVM-MCA-BEGIN G91 |
| ldr x1, [x27], #254 |
| add x0, x27, 1 |
| ldr x2, [x1], #254 |
| add x0, x27, 1 |
| # LLVM-MCA-END |
| |
| # CHECK: [0] Code Region - G01 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 1500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.95 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d }, [x27], #8 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s }, [x27], #8 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.4h }, [x27], #8 |
| # CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.4s }, [x27], #16 |
| # CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 |
| # CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 |
| # CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.1 2.0 <total> |
| |
| # CHECK: [1] Code Region - G02 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 1500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.95 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b }, [x27], #8 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b }, [x27], #16 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.1d }, [x27], x28 |
| # CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.2d }, [x27], x28 |
| # CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 |
| # CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 |
| # CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.1 2.0 <total> |
| |
| # CHECK: [2] Code Region - G03 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 1500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.95 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4s }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.8b }, [x27], x28 |
| # CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.8h }, [x27], x28 |
| # CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 |
| # CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 |
| # CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.1 2.0 <total> |
| |
| # CHECK: [3] Code Region - G04 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 1900 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.74 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 3.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.16b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 |
| # CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 |
| # CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 |
| # CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 |
| # CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.3 0.1 2.0 <total> |
| |
| # CHECK: [4] Code Region - G05 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.94 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 3.3 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 |
| # CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 2.0 <total> |
| |
| # CHECK: [5] Code Region - G06 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.94 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 3.3 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 2.0 <total> |
| |
| # CHECK: [6] Code Region - G07 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 2300 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.53 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 4.3 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| # CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| # CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| # CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 2.0 <total> |
| |
| # CHECK: [7] Code Region - G08 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 2500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.92 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| # CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| # CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| # CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 2.0 <total> |
| |
| # CHECK: [8] Code Region - G09 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 2500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.92 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 2.0 <total> |
| |
| # CHECK: [9] Code Region - G10 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 608 |
| # CHECK-NEXT: Total uOps: 2500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.11 |
| # CHECK-NEXT: IPC: 1.64 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| # CHECK-NEXT: [0,7] .D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| # CHECK-NEXT: [0,9] .D====eE-----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| # CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 2.1 <total> |
| |
| # CHECK: [10] Code Region - G11 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 509 |
| # CHECK-NEXT: Total uOps: 2400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.72 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 4.7 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| # CHECK-NEXT: [0,5] D===eE-----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| # CHECK-NEXT: [0,7] .D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| # CHECK-NEXT: [0,9] .D====eE-----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| # CHECK-NEXT: 5. 1 4.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| # CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 2.2 <total> |
| |
| # CHECK: [11] Code Region - G12 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 2400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.72 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 4.7 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeER. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| # CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE-----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| # CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.0 0.1 2.2 <total> |
| |
| # CHECK: [12] Code Region - G13 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 1110 |
| # CHECK-NEXT: Total uOps: 2600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.34 |
| # CHECK-NEXT: IPC: 0.90 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 |
| # CHECK-NEXT: Index 0123456789 0 |
| |
| # CHECK: [0,0] DeeeeeeeER. . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE-----R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE----R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE-----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE-----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D=========eeeeeeeeER ld1 { v1.b }[0], [x27], #1 |
| # CHECK-NEXT: [0,9] .D==========eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 10.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 |
| # CHECK-NEXT: 9. 1 11.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 4.2 0.1 2.5 <total> |
| |
| # CHECK: [13] Code Region - G14 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 4003 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 0.50 |
| # CHECK-NEXT: IPC: 0.25 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 0123456789 |
| # CHECK-NEXT: Index 0123456789 0123456789 012 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28 |
| # CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 |
| # CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2 |
| # CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld1 { v1.h }[4], [x27], #2 |
| # CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28 |
| # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 |
| # CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 |
| # CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 |
| # CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 17.2 0.1 3.0 <total> |
| |
| # CHECK: [14] Code Region - G15 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 4003 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 0.50 |
| # CHECK-NEXT: IPC: 0.25 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 0123456789 |
| # CHECK-NEXT: Index 0123456789 0123456789 012 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28 |
| # CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 |
| # CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28 |
| # CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld1 { v1.d }[0], [x27], #8 |
| # CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28 |
| # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 |
| # CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 |
| # CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 |
| # CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 17.2 0.1 3.0 <total> |
| |
| # CHECK: [15] Code Region - G16 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 1203 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 1.66 |
| # CHECK-NEXT: IPC: 0.83 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld1 { v1.d }[0], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.1d }, [x27], #8 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.2d }, [x27], #8 |
| # CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.2s }, [x27], #4 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.4h }, [x27], #2 |
| # CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 |
| # CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 |
| # CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 3.0 <total> |
| |
| # CHECK: [16] Code Region - G17 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.92 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld1r { v1.4s }, [x27], #4 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.8b }, [x27], #1 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.8h }, [x27], #2 |
| # CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.16b }, [x27], #1 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.1d }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 |
| # CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 3.0 <total> |
| |
| # CHECK: [17] Code Region - G18 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.92 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld1r { v1.2d }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.2s }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.4h }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.4s }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.8b }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 3.0 <total> |
| |
| # CHECK: [18] Code Region - G19 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 2400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.71 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld1r { v1.8h }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.16b }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 |
| # CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 |
| # CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 |
| # CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 |
| # CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 3.0 <total> |
| |
| # CHECK: [19] Code Region - G20 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 2900 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.69 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 3.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], #32 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 |
| # CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 |
| # CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.7 0.1 3.0 <total> |
| |
| # CHECK: [20] Code Region - G21 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 2700 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.29 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.0 0.1 3.0 <total> |
| |
| # CHECK: [21] Code Region - G22 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 3310 |
| # CHECK-NEXT: Total uOps: 2600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 0.79 |
| # CHECK-NEXT: IPC: 0.30 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 0123456789 |
| # CHECK-NEXT: Index 0123456789 0123456789 012 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 |
| # CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 |
| # CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28 |
| # CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 |
| # CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 |
| # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 |
| # CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 |
| # CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 |
| # CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 17.0 0.1 3.0 <total> |
| |
| # CHECK: [22] Code Region - G23 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 4003 |
| # CHECK-NEXT: Total uOps: 2500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 0.62 |
| # CHECK-NEXT: IPC: 0.25 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 0123456789 |
| # CHECK-NEXT: Index 0123456789 0123456789 012 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 |
| # CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 |
| # CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28 |
| # CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 |
| # CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 |
| # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 |
| # CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 |
| # CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 |
| # CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 17.1 0.1 3.0 <total> |
| |
| # CHECK: [23] Code Region - G24 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 2603 |
| # CHECK-NEXT: Total uOps: 2500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 0.96 |
| # CHECK-NEXT: IPC: 0.38 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 |
| # CHECK-NEXT: Index 0123456789 012345678 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . ld2 { v1.s, v2.s }[0], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . ld2 { v1.d, v2.d }[0], [x27], #16 |
| # CHECK-NEXT: [0,3] D=========eE------R . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D================eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28 |
| # CHECK-NEXT: [0,5] D=================eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16 |
| # CHECK-NEXT: [0,7] .D=================eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D=================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 |
| # CHECK-NEXT: [0,9] .D==================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 |
| # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 |
| # CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 |
| # CHECK-NEXT: 7. 1 18.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 |
| # CHECK-NEXT: 9. 1 19.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 12.9 0.1 3.0 <total> |
| |
| # CHECK: [24] Code Region - G25 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 2500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.90 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld2r { v1.2s, v2.2s }, [x27], #8 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 |
| # CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 |
| # CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 |
| # CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 |
| # CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 3.0 <total> |
| |
| # CHECK: [25] Code Region - G26 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 2500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.90 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], #2 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 3.0 <total> |
| |
| # CHECK: [26] Code Region - G27 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 2800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.49 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 2.8 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| # CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| # CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 3.0 <total> |
| |
| # CHECK: [27] Code Region - G28 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 3700 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 7.25 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 4.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| # CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| # CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| # CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| # CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.7 0.1 3.0 <total> |
| |
| # CHECK: [28] Code Region - G29 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 3800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 7.45 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 4.3 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.6 0.1 3.0 <total> |
| |
| # CHECK: [29] Code Region - G30 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 1910 |
| # CHECK-NEXT: Total uOps: 3700 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 1.94 |
| # CHECK-NEXT: IPC: 0.52 |
| # CHECK-NEXT: Block RThroughput: 4.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 |
| # CHECK-NEXT: Index 0123456789 012345678 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D=========eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 |
| # CHECK-NEXT: [0,7] .D==========eE------R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 |
| # CHECK-NEXT: [0,9] . D=================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 10.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 |
| # CHECK-NEXT: 7. 1 11.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 |
| # CHECK-NEXT: 9. 1 18.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 6.9 0.1 3.0 <total> |
| |
| # CHECK: [30] Code Region - G31 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 4003 |
| # CHECK-NEXT: Total uOps: 3500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 0.87 |
| # CHECK-NEXT: IPC: 0.25 |
| # CHECK-NEXT: Block RThroughput: 3.8 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 0123456789 |
| # CHECK-NEXT: Index 0123456789 0123456789 012 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 |
| # CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 |
| # CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 |
| # CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 |
| # CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 |
| # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 |
| # CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 |
| # CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 |
| # CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 16.7 0.1 3.0 <total> |
| |
| # CHECK: [31] Code Region - G32 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 4003 |
| # CHECK-NEXT: Total uOps: 3500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 0.87 |
| # CHECK-NEXT: IPC: 0.25 |
| # CHECK-NEXT: Block RThroughput: 3.8 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 0123456789 |
| # CHECK-NEXT: Index 0123456789 0123456789 012 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 |
| # CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 |
| # CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 |
| # CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 |
| # CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 |
| # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 |
| # CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 |
| # CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 |
| # CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 16.7 0.1 3.0 <total> |
| |
| # CHECK: [32] Code Region - G33 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 3500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 6.86 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 3.8 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 |
| # CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 |
| # CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 |
| # CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 |
| # CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.7 0.1 3.0 <total> |
| |
| # CHECK: [33] Code Region - G34 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 3500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 6.86 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 3.8 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 |
| # CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 |
| # CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.7 0.1 3.0 <total> |
| |
| # CHECK: [34] Code Region - G35 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 3500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 6.86 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 3.8 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.7 0.1 3.0 <total> |
| |
| # CHECK: [35] Code Region - G36 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 611 |
| # CHECK-NEXT: Total uOps: 4500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 7.36 |
| # CHECK-NEXT: IPC: 1.64 |
| # CHECK-NEXT: Block RThroughput: 5.3 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| # CHECK-NEXT: [0,3] .D=eE-------R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| # CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| # CHECK-NEXT: [0,7] . D=eE------R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D=eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| # CHECK-NEXT: [0,9] . D==eE-------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| # CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| # CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| # CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| # CHECK-NEXT: 9. 1 3.0 0.0 7.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 1.7 0.2 3.2 <total> |
| |
| # CHECK: [36] Code Region - G37 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 610 |
| # CHECK-NEXT: Total uOps: 4800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 7.87 |
| # CHECK-NEXT: IPC: 1.64 |
| # CHECK-NEXT: Block RThroughput: 6.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012345 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| # CHECK-NEXT: [0,3] .D=eE-------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . DeeeeeeeeeER . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| # CHECK-NEXT: [0,5] . D=eE-------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . DeeeeeeeeeER. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: [0,7] . D=eE-------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D=eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D==eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| # CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| # CHECK-NEXT: 5. 1 2.0 0.0 7.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: 9. 1 3.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 1.7 0.2 3.3 <total> |
| |
| # CHECK: [37] Code Region - G38 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 660 |
| # CHECK-NEXT: Total uOps: 4800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 7.27 |
| # CHECK-NEXT: IPC: 1.52 |
| # CHECK-NEXT: Block RThroughput: 6.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeeeeER .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eE-------R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . DeeeeeeeeeER.. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: [0,7] . D=eE-------R.. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D=eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D==eE-------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: 9. 1 3.0 0.0 7.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 1.7 0.2 3.3 <total> |
| |
| # CHECK: [38] Code Region - G39 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 4003 |
| # CHECK-NEXT: Total uOps: 4500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 1.12 |
| # CHECK-NEXT: IPC: 0.25 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 0123456789 |
| # CHECK-NEXT: Index 0123456789 0123456789 012 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 |
| # CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 |
| # CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 |
| # CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 |
| # CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 |
| # CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 |
| # CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 |
| # CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 |
| # CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 15.5 0.1 3.0 <total> |
| |
| # CHECK: [39] Code Region - G40 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 4003 |
| # CHECK-NEXT: Total uOps: 4500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 1.12 |
| # CHECK-NEXT: IPC: 0.25 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 0123456789 |
| # CHECK-NEXT: Index 0123456789 0123456789 012 |
| |
| # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 |
| # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 |
| # CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 |
| # CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 |
| # CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 |
| # CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 |
| # CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 |
| # CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 |
| # CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 |
| # CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 15.5 0.1 3.0 <total> |
| |
| # CHECK: [40] Code Region - G41 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 1903 |
| # CHECK-NEXT: Total uOps: 4500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.36 |
| # CHECK-NEXT: IPC: 0.53 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456789 |
| # CHECK-NEXT: Index 0123456789 01 |
| |
| # CHECK: [0,0] DeeeeeeeeER . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 |
| # CHECK-NEXT: [0,1] D=eE------R . .. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .D=======eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 |
| # CHECK-NEXT: [0,3] .D========eE------R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . D=======eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| # CHECK-NEXT: [0,5] . D========eE------R.. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D=======eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 |
| # CHECK-NEXT: [0,7] . D========eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D=======eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 |
| # CHECK-NEXT: [0,9] . D========eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 |
| # CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 8.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| # CHECK-NEXT: 5. 1 9.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 |
| # CHECK-NEXT: 7. 1 9.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 8.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 |
| # CHECK-NEXT: 9. 1 9.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 7.1 0.1 3.0 <total> |
| |
| # CHECK: [41] Code Region - G42 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 4500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 8.82 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 |
| # CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 |
| # CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 |
| # CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 |
| # CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 |
| # CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 |
| # CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 |
| # CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 |
| # CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 1.5 0.1 3.0 <total> |
| |
| # CHECK: [42] Code Region - G43 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 510 |
| # CHECK-NEXT: Total uOps: 4500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 8.82 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . DeeeeeeeeER . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . DeeeeeeeeER. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 1.5 0.1 3.0 <total> |
| |
| # CHECK: [43] Code Region - G44 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 3300 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 6.50 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 3.7 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eE------R. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: [0,5] . D=eE------R add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D=eeeeeeE-R ldp s1, s2, [x27], #248 |
| # CHECK-NEXT: [0,7] . D==eE-----R add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeER ldp d1, d2, [x27], #496 |
| # CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 2.0 0.0 1.0 ldp s1, s2, [x27], #248 |
| # CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldp d1, d2, [x27], #496 |
| # CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.1 0.1 2.8 <total> |
| |
| # CHECK: [44] Code Region - G45 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 507 |
| # CHECK-NEXT: Total uOps: 1700 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.35 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER .. ldp q1, q2, [x27], #992 |
| # CHECK-NEXT: [0,1] D=eE----R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER.. ldp s1, s2, [x27, #248]! |
| # CHECK-NEXT: [0,3] D==eE----R.. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER. ldp d1, d2, [x27, #496]! |
| # CHECK-NEXT: [0,5] D===eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER ldp q1, q2, [x27, #992]! |
| # CHECK-NEXT: [0,7] D====eE----R add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeE-R ldp w1, w2, [x27], #248 |
| # CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp s1, s2, [x27, #248]! |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldp d1, d2, [x27, #496]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldp q1, q2, [x27, #992]! |
| # CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 1.0 ldp w1, w2, [x27], #248 |
| # CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.3 0.1 2.0 <total> |
| |
| # CHECK: [45] Code Region - G46 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 507 |
| # CHECK-NEXT: Total uOps: 1900 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.75 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 3.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER .. ldp x1, x2, [x27], #496 |
| # CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER .. ldp w1, w2, [x27, #248]! |
| # CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER .. ldp x1, x2, [x27, #496]! |
| # CHECK-NEXT: [0,5] D===eE--R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeER. ldpsw x1, x2, [x27], #248 |
| # CHECK-NEXT: [0,7] D====eE---R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeeER ldpsw x1, x2, [x27, #248]! |
| # CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]! |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldp x1, x2, [x27, #496]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27], #248 |
| # CHECK-NEXT: 7. 1 5.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! |
| # CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.3 0.1 1.2 <total> |
| |
| # CHECK: [46] Code Region - G47 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 1500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.95 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27], #254 |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ldr s1, [x27], #254 |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER. ldr d1, [x27], #254 |
| # CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] D====eeeeeeER ldr q1, [x27], #254 |
| # CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27], #254 |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27], #254 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr d1, [x27], #254 |
| # CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldr q1, [x27], #254 |
| # CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.1 2.0 <total> |
| |
| # CHECK: [47] Code Region - G48 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 1500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.95 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . ldr b1, [x27, #254]! |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27, #254]! |
| # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeeeER . ldr s1, [x27, #254]! |
| # CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeeeER. ldr d1, [x27, #254]! |
| # CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] D====eeeeeeER ldr q1, [x27, #254]! |
| # CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]! |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27, #254]! |
| # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27, #254]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr d1, [x27, #254]! |
| # CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldr q1, [x27, #254]! |
| # CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.1 2.0 <total> |
| |
| # CHECK: [48] Code Region - G49 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 1500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.96 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . ldr w1, [x27], #254 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . ldr x1, [x27], #254 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . ldr w1, [x27, #254]! |
| # CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeER. ldr x1, [x27, #254]! |
| # CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] D====eeeeER ldrb w1, [x27], #254 |
| # CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr x1, [x27], #254 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr w1, [x27, #254]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr x1, [x27, #254]! |
| # CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrb w1, [x27], #254 |
| # CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.1 1.0 <total> |
| |
| # CHECK: [49] Code Region - G50 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 1500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.96 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]! |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . ldrh w1, [x27], #254 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . ldrh w1, [x27, #254]! |
| # CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeER. ldrsb w1, [x27], #254 |
| # CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] D====eeeeER ldrsb x1, [x27], #254 |
| # CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]! |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrh w1, [x27], #254 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldrsb w1, [x27], #254 |
| # CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrsb x1, [x27], #254 |
| # CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.1 1.0 <total> |
| |
| # CHECK: [50] Code Region - G51 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 1500 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 2.96 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]! |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . ldrsb x1, [x27, #254]! |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . ldrsh w1, [x27], #254 |
| # CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeER. ldrsh x1, [x27], #254 |
| # CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] D====eeeeER ldrsh w1, [x27, #254]! |
| # CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]! |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]! |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldrsh x1, [x27], #254 |
| # CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrsh w1, [x27, #254]! |
| # CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.1 1.0 <total> |
| |
| # CHECK: [51] Code Region - G52 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 1700 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.37 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER. ldrsw x1, [x27], #254 |
| # CHECK-NEXT: [0,3] D==eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER ldrsw x1, [x27, #254]! |
| # CHECK-NEXT: [0,5] D===eE--R add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeE-R st1 { v1.1d }, [x27], #8 |
| # CHECK-NEXT: [0,7] D====eE-R add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER st1 { v1.2d }, [x27], #16 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]! |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 1.0 st1 { v1.1d }, [x27], #8 |
| # CHECK-NEXT: 7. 1 5.0 0.0 1.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], #16 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.3 0.1 0.8 <total> |
| |
| # CHECK: [52] Code Region - G53 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.97 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h }, [x27], #8 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . st1 { v1.4s }, [x27], #16 |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeER. st1 { v1.8b }, [x27], #8 |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER st1 { v1.8h }, [x27], #16 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4s }, [x27], #16 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.8b }, [x27], #8 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8h }, [x27], #16 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 0.0 <total> |
| |
| # CHECK: [53] Code Region - G54 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.97 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . st1 { v1.2d }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeER. st1 { v1.2s }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER st1 { v1.4h }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.2d }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2s }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.4h }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 0.0 <total> |
| |
| # CHECK: [54] Code Region - G55 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.97 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.8b }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . st1 { v1.8h }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeER. st1 { v1.16b }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER st1 { v1.1d, v2.1d }, [x27], #16 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.16b }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 0.0 <total> |
| |
| # CHECK: [55] Code Region - G56 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.76 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 3.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.2s, v2.2s }, [x27], #16 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . st1 { v1.4h, v2.4h }, [x27], #16 |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeER. st1 { v1.4s, v2.4s }, [x27], #32 |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER st1 { v1.8b, v2.8b }, [x27], #16 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 0.0 <total> |
| |
| # CHECK: [56] Code Region - G57 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.16 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 4.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . st1 { v1.1d, v2.1d }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER st1 { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.0 0.1 0.0 <total> |
| |
| # CHECK: [57] Code Region - G58 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.16 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 4.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . st1 { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER st1 { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 0.0 <total> |
| |
| # CHECK: [58] Code Region - G59 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 703 |
| # CHECK-NEXT: Total uOps: 3400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.84 |
| # CHECK-NEXT: IPC: 1.42 |
| # CHECK-NEXT: Block RThroughput: 6.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| # CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D===eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| # CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D===eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| # CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| # CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| # CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.2 0.0 <total> |
| |
| # CHECK: [59] Code Region - G60 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 703 |
| # CHECK-NEXT: Total uOps: 3600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.12 |
| # CHECK-NEXT: IPC: 1.42 |
| # CHECK-NEXT: Block RThroughput: 6.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| # CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D===eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D===eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| # CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 |
| # CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.2 0.0 <total> |
| |
| # CHECK: [60] Code Region - G61 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 703 |
| # CHECK-NEXT: Total uOps: 3400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.84 |
| # CHECK-NEXT: IPC: 1.42 |
| # CHECK-NEXT: Block RThroughput: 6.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D===eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.9 0.2 0.0 <total> |
| |
| # CHECK: [61] Code Region - G62 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 704 |
| # CHECK-NEXT: Total uOps: 3600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.11 |
| # CHECK-NEXT: IPC: 1.42 |
| # CHECK-NEXT: Block RThroughput: 6.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D==eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| # CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D===eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| # CHECK-NEXT: [0,7] . D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D====eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| # CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| # CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.4 0.3 0.0 <total> |
| |
| # CHECK: [62] Code Region - G63 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 804 |
| # CHECK-NEXT: Total uOps: 4200 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.22 |
| # CHECK-NEXT: IPC: 1.24 |
| # CHECK-NEXT: Block RThroughput: 8.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeER. .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| # CHECK-NEXT: [0,1] D=eER. .. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| # CHECK-NEXT: [0,3] .D=eER .. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D==eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| # CHECK-NEXT: [0,5] .D===eER .. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D==eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| # CHECK-NEXT: [0,7] . D===eER .. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D=====eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| # CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 6.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 |
| # CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.3 0.4 0.0 <total> |
| |
| # CHECK: [63] Code Region - G64 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 703 |
| # CHECK-NEXT: Total uOps: 3800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.41 |
| # CHECK-NEXT: IPC: 1.42 |
| # CHECK-NEXT: Block RThroughput: 7.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D==eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D==eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: [0,7] . D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D===eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.9 0.2 0.0 <total> |
| |
| # CHECK: [64] Code Region - G65 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 706 |
| # CHECK-NEXT: Total uOps: 3200 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.53 |
| # CHECK-NEXT: IPC: 1.42 |
| # CHECK-NEXT: Block RThroughput: 5.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eER. . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eER . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D===eeeeER . st1 { v1.b }[0], [x27], #1 |
| # CHECK-NEXT: [0,5] .D====eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D===eeeeER. st1 { v1.b }[8], [x27], #1 |
| # CHECK-NEXT: [0,7] . D====eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D====eeeeER st1 { v1.b }[0], [x27], x28 |
| # CHECK-NEXT: [0,9] . D=====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 4.0 2.0 0.0 st1 { v1.b }[0], [x27], #1 |
| # CHECK-NEXT: 5. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 |
| # CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 |
| # CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.5 0.3 0.6 <total> |
| |
| # CHECK: [65] Code Region - G66 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.95 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st1 { v1.h }[0], [x27], #2 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . st1 { v1.h }[4], [x27], #2 |
| # CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeER. st1 { v1.h }[0], [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeER st1 { v1.h }[4], [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 1.0 <total> |
| |
| # CHECK: [66] Code Region - G67 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 2200 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.35 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 3.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . st1 { v1.s }[0], [x27], #4 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st1 { v1.s }[0], [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . st1 { v1.d }[0], [x27], #8 |
| # CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeER. st1 { v1.d }[0], [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.2d, v2.2d }, [x27], #32 |
| # CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 |
| # CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 1.0 <total> |
| |
| # CHECK: [67] Code Region - G68 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 2400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.74 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 3.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . st2 { v1.2s, v2.2s }, [x27], #16 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.4h, v2.4h }, [x27], #16 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.4s, v2.4s }, [x27], #32 |
| # CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.8b, v2.8b }, [x27], #16 |
| # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.8h, v2.8h }, [x27], #32 |
| # CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 |
| # CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 1.0 <total> |
| |
| # CHECK: [68] Code Region - G69 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 2600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.14 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 4.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . st2 { v1.16b, v2.16b }, [x27], #32 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.0 0.1 1.0 <total> |
| |
| # CHECK: [69] Code Region - G70 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 2400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.74 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 3.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . st2 { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.b, v2.b }[0], [x27], #2 |
| # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.b, v2.b }[8], [x27], #2 |
| # CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 |
| # CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.0 0.1 1.0 <total> |
| |
| # CHECK: [70] Code Region - G71 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.95 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.h, v2.h }[0], [x27], #4 |
| # CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.h, v2.h }[4], [x27], #4 |
| # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.h, v2.h }[0], [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 1.0 <total> |
| |
| # CHECK: [71] Code Region - G72 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.95 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.s, v2.s }[0], [x27], x28 |
| # CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.d, v2.d }[0], [x27], #16 |
| # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.d, v2.d }[0], [x27], x28 |
| # CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 1.0 <total> |
| |
| # CHECK: [72] Code Region - G73 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 600 |
| # CHECK-NEXT: Total Cycles: 406 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.93 |
| # CHECK-NEXT: IPC: 1.48 |
| # CHECK-NEXT: Block RThroughput: 3.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| # CHECK-NEXT: [0,1] D=eE---R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D==eeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| # CHECK-NEXT: [0,5] .D===eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 |
| # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.5 0.3 1.2 <total> |
| |
| # CHECK: [73] Code Region - G74 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 707 |
| # CHECK-NEXT: Total uOps: 3800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.37 |
| # CHECK-NEXT: IPC: 1.41 |
| # CHECK-NEXT: Block RThroughput: 7.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| # CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| # CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D==eeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| # CHECK-NEXT: [0,5] .D===eE---R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D===eeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| # CHECK-NEXT: [0,7] . D===eE---R . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D====eeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D=====eE---R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 |
| # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 |
| # CHECK-NEXT: 5. 1 4.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 |
| # CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 |
| # CHECK-NEXT: 9. 1 6.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.4 0.3 1.4 <total> |
| |
| # CHECK: [74] Code Region - G75 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 706 |
| # CHECK-NEXT: Total uOps: 3400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.82 |
| # CHECK-NEXT: IPC: 1.42 |
| # CHECK-NEXT: Block RThroughput: 6.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeER. . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE---R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE--R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D===eeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D====eE---R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.9 0.2 1.2 <total> |
| |
| # CHECK: [75] Code Region - G76 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 606 |
| # CHECK-NEXT: Total uOps: 3200 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.28 |
| # CHECK-NEXT: IPC: 1.65 |
| # CHECK-NEXT: Block RThroughput: 5.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeER .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3 |
| # CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D==eeeeER.. st3 { v1.b, v2.b, v3.b }[8], [x27], #3 |
| # CHECK-NEXT: [0,5] .D===eE--R.. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D===eeeeER. st3 { v1.b, v2.b, v3.b }[0], [x27], x28 |
| # CHECK-NEXT: [0,7] .D====eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D===eeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 |
| # CHECK-NEXT: [0,9] . D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 |
| # CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.3 0.2 1.1 <total> |
| |
| # CHECK: [76] Code Region - G77 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 3000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.93 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 |
| # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeER. st3 { v1.h, v2.h, v3.h }[4], [x27], x28 |
| # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 |
| # CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 |
| # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 |
| # CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.7 0.1 1.0 <total> |
| |
| # CHECK: [77] Code Region - G78 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 706 |
| # CHECK-NEXT: Total uOps: 3600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.10 |
| # CHECK-NEXT: IPC: 1.42 |
| # CHECK-NEXT: Block RThroughput: 6.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 |
| # CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 |
| # CHECK-NEXT: [0,5] .D==eE--R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeeeER. . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| # CHECK-NEXT: [0,7] . D==eE--R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| # CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 |
| # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 |
| # CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 |
| # CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 |
| # CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.6 0.1 1.2 <total> |
| |
| # CHECK: [78] Code Region - G79 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 1205 |
| # CHECK-NEXT: Total uOps: 5800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.81 |
| # CHECK-NEXT: IPC: 0.83 |
| # CHECK-NEXT: Block RThroughput: 12.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0123456 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| # CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| # CHECK-NEXT: [0,3] .D=eE-----R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . D=eeeeeeER .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| # CHECK-NEXT: [0,5] . D==eE----R .. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D===eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| # CHECK-NEXT: [0,7] . D====eE-----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D===eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| # CHECK-NEXT: [0,9] . D====eE-----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 |
| # CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 |
| # CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 2.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 |
| # CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 |
| # CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.9 0.4 2.3 <total> |
| |
| # CHECK: [79] Code Region - G80 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 1006 |
| # CHECK-NEXT: Total uOps: 4800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.77 |
| # CHECK-NEXT: IPC: 0.99 |
| # CHECK-NEXT: Block RThroughput: 9.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012345 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeER. . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D===eeeeeeER . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: [0,5] . D===eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D===eeeeeeeER. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: [0,7] . D====eE-----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D====eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: [0,9] . D=====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 4.0 2.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 |
| # CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 |
| # CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 5.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 |
| # CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.4 0.4 1.9 <total> |
| |
| # CHECK: [80] Code Region - G81 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 807 |
| # CHECK-NEXT: Total uOps: 5200 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 6.44 |
| # CHECK-NEXT: IPC: 1.24 |
| # CHECK-NEXT: Block RThroughput: 6.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 01234 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeeER. . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] .DeeeeeeeER . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] . DeeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 |
| # CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D===eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 |
| # CHECK-NEXT: [0,7] . D===eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D===eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 |
| # CHECK-NEXT: [0,9] . D====eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 |
| # CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 2.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 |
| # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 |
| # CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.6 0.3 2.2 <total> |
| |
| # CHECK: [81] Code Region - G82 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 508 |
| # CHECK-NEXT: Total uOps: 4000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 7.87 |
| # CHECK-NEXT: IPC: 1.97 |
| # CHECK-NEXT: Block RThroughput: 5.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 012 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 |
| # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 |
| # CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D=eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 |
| # CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] . D=eeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 |
| # CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] . D==eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 |
| # CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 |
| # CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 |
| # CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 |
| # CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 |
| # CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.3 0.1 2.0 <total> |
| |
| # CHECK: [82] Code Region - G83 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 800 |
| # CHECK-NEXT: Total Cycles: 506 |
| # CHECK-NEXT: Total uOps: 2800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.53 |
| # CHECK-NEXT: IPC: 1.58 |
| # CHECK-NEXT: Block RThroughput: 2.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 |
| # CHECK-NEXT: [0,1] D=eE----R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 |
| # CHECK-NEXT: [0,3] .D=eE----R. add x0, x27, #1 |
| # CHECK-NEXT: [0,4] .D==eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 |
| # CHECK-NEXT: [0,5] .D===eE--R. add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D===eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 |
| # CHECK-NEXT: [0,7] .D====eE--R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 |
| # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 |
| # CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 |
| # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 |
| # CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.9 0.3 1.5 <total> |
| |
| # CHECK: [83] Code Region - G84 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 400 |
| # CHECK-NEXT: Total Cycles: 204 |
| # CHECK-NEXT: Total uOps: 800 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.92 |
| # CHECK-NEXT: IPC: 1.96 |
| # CHECK-NEXT: Block RThroughput: 1.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345 |
| |
| # CHECK: [0,0] DeeER. stp s1, s2, [x27], #248 |
| # CHECK-NEXT: [0,1] D=eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER stp d1, d2, [x27], #496 |
| # CHECK-NEXT: [0,3] D==eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp s1, s2, [x27], #248 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp d1, d2, [x27], #496 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.0 0.3 0.0 <total> |
| |
| # CHECK: [84] Code Region - G85 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2200 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 4.37 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 3.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . stp s1, s2, [x27, #248]! |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . stp d1, d2, [x27, #496]! |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] .D==eeER. stp q1, q2, [x27, #992]! |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eER. stp w1, w2, [x27], #248 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp s1, s2, [x27, #248]! |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp d1, d2, [x27, #496]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 3.0 0.0 0.0 stp q1, q2, [x27, #992]! |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 stp w1, w2, [x27], #248 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.1 0.1 0.0 <total> |
| |
| # CHECK: [85] Code Region - G86 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.97 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeER . . stp x1, x2, [x27], #496 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]! |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eER . stp x1, x2, [x27, #496]! |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeER. str b1, [x27], #254 |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER str h1, [x27], #254 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]! |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp x1, x2, [x27, #496]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27], #254 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 str h1, [x27], #254 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 0.0 <total> |
| |
| # CHECK: [86] Code Region - G87 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.97 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . str s1, [x27], #254 |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . str d1, [x27], #254 |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . str q1, [x27], #254 |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eeER. str b1, [x27, #254]! |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eeER str h1, [x27, #254]! |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254 |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27], #254 |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 str q1, [x27], #254 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27, #254]! |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 str h1, [x27, #254]! |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 0.0 <total> |
| |
| # CHECK: [87] Code Region - G88 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.97 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeeER. . str s1, [x27, #254]! |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eeER . str d1, [x27, #254]! |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eeER . str q1, [x27, #254]! |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eER . str w1, [x27], #254 |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eER. str x1, [x27], #254 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]! |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27, #254]! |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 str q1, [x27, #254]! |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 str w1, [x27], #254 |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 str x1, [x27], #254 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 0.0 <total> |
| |
| # CHECK: [88] Code Region - G89 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 1000 |
| # CHECK-NEXT: Total Cycles: 504 |
| # CHECK-NEXT: Total uOps: 2000 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.97 |
| # CHECK-NEXT: IPC: 1.98 |
| # CHECK-NEXT: Block RThroughput: 2.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 012345678 |
| |
| # CHECK: [0,0] DeER . . str w1, [x27, #254]! |
| # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D=eER. . str x1, [x27, #254]! |
| # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,4] D==eER . strb w1, [x27], #254 |
| # CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 |
| # CHECK-NEXT: [0,6] D===eER . strb w1, [x27, #254]! |
| # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 |
| # CHECK-NEXT: [0,8] .D===eER. strh w1, [x27], #254 |
| # CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]! |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 2.0 0.0 0.0 str x1, [x27, #254]! |
| # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 4. 1 3.0 0.0 0.0 strb w1, [x27], #254 |
| # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 6. 1 4.0 0.0 0.0 strb w1, [x27, #254]! |
| # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 8. 1 4.0 0.0 0.0 strh w1, [x27], #254 |
| # CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 3.2 0.1 0.0 <total> |
| |
| # CHECK: [89] Code Region - G90 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 200 |
| # CHECK-NEXT: Total Cycles: 104 |
| # CHECK-NEXT: Total uOps: 400 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 3.85 |
| # CHECK-NEXT: IPC: 1.92 |
| # CHECK-NEXT: Block RThroughput: 0.5 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: Index 01234 |
| |
| # CHECK: [0,0] DeER. strh w1, [x27, #254]! |
| # CHECK-NEXT: [0,1] D=eER add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]! |
| # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 1.5 0.5 0.0 <total> |
| |
| # CHECK: [90] Code Region - G91 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 400 |
| # CHECK-NEXT: Total Cycles: 110 |
| # CHECK-NEXT: Total uOps: 600 |
| |
| # CHECK: Dispatch Width: 15 |
| # CHECK-NEXT: uOps Per Cycle: 5.45 |
| # CHECK-NEXT: IPC: 3.64 |
| # CHECK-NEXT: Block RThroughput: 1.0 |
| |
| # CHECK: Timeline view: |
| # CHECK-NEXT: 0 |
| # CHECK-NEXT: Index 0123456789 |
| |
| # CHECK: [0,0] DeeeeER . ldr x1, [x27], #254 |
| # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 |
| # CHECK-NEXT: [0,2] D====eeeeER ldr x2, [x1], #254 |
| # CHECK-NEXT: [0,3] D=eE------R add x0, x27, #1 |
| |
| # CHECK: Average Wait times (based on the timeline view): |
| # CHECK-NEXT: [0]: Executions |
| # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue |
| # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready |
| # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage |
| |
| # CHECK: [0] [1] [2] [3] |
| # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254 |
| # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 |
| # CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr x2, [x1], #254 |
| # CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 |
| # CHECK-NEXT: 1 2.5 0.3 2.0 <total> |