| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -march=hexagon < %s | FileCheck %s |
| |
| define <4 x i8> @f0(<4 x i8> %a0, <4 x i8> %a1) #0 { |
| ; CHECK-LABEL: f0: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r2 = r1 |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r7:6 = combine(#0,#0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5:4 = vmpybu(r0,r1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: p1 = vcmpb.gt(r1:0,#-1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: p0 = vcmpb.gt(r3:2,#-1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r3:2 = vmux(p1,r7:6,r3:2) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vmux(p0,r7:6,r1:0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r4 = vtrunohb(r5:4) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r7:6 = vmpybu(r0,r0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vaddub(r3:2,r1:0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5 = vtrunohb(r7:6) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vsubub(r5:4,r1:0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = sext <4 x i8> %a0 to <4 x i16> |
| %v1 = sext <4 x i8> %a1 to <4 x i16> |
| %v2 = mul <4 x i16> %v0, %v1 |
| %v3 = lshr <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8> |
| %v4 = trunc <4 x i16> %v3 to <4 x i8> |
| ret <4 x i8> %v4 |
| } |
| |
| define <4 x i8> @f1(<4 x i8> %a0, <4 x i8> %a1) #0 { |
| ; CHECK-LABEL: f1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vmpybu(r0,r1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = vtrunohb(r1:0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = zext <4 x i8> %a0 to <4 x i16> |
| %v1 = zext <4 x i8> %a1 to <4 x i16> |
| %v2 = mul <4 x i16> %v0, %v1 |
| %v3 = lshr <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8> |
| %v4 = trunc <4 x i16> %v3 to <4 x i8> |
| ret <4 x i8> %v4 |
| } |
| |
| define <8 x i8> @f2(<8 x i8> %a0, <8 x i8> %a1) #0 { |
| ; CHECK-LABEL: f2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r7:6 = combine(#0,#0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: p0 = vcmpb.gt(r3:2,#-1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5:4 = vmpybu(r0,r2) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r9:8 = vmux(p0,r7:6,r1:0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: p0 = vcmpb.gt(r1:0,#-1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vmpybu(r1,r3) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r7:6 = vmux(p0,r7:6,r3:2) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r4 = vtrunohb(r5:4) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r3:2 = vaddub(r7:6,r9:8) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5 = vtrunohb(r1:0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vsubub(r5:4,r3:2) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = sext <8 x i8> %a0 to <8 x i16> |
| %v1 = sext <8 x i8> %a1 to <8 x i16> |
| %v2 = mul <8 x i16> %v0, %v1 |
| %v3 = lshr <8 x i16> %v2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> |
| %v4 = trunc <8 x i16> %v3 to <8 x i8> |
| ret <8 x i8> %v4 |
| } |
| |
| define <8 x i8> @f3(<8 x i8> %a0, <8 x i8> %a1) #0 { |
| ; CHECK-LABEL: f3: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5:4 = vmpybu(r0,r2) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r7:6 = vmpybu(r1,r3) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = vtrunohb(r5:4) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1 = vtrunohb(r7:6) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = zext <8 x i8> %a0 to <8 x i16> |
| %v1 = zext <8 x i8> %a1 to <8 x i16> |
| %v2 = mul <8 x i16> %v0, %v1 |
| %v3 = lshr <8 x i16> %v2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> |
| %v4 = trunc <8 x i16> %v3 to <8 x i8> |
| ret <8 x i8> %v4 |
| } |
| |
| define <2 x i16> @f4(<2 x i16> %a0, <2 x i16> %a1) #0 { |
| ; CHECK-LABEL: f4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vmpyh(r0,r1):sat |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = combine(r1.h,r0.h) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = sext <2 x i16> %a0 to <2 x i32> |
| %v1 = sext <2 x i16> %a1 to <2 x i32> |
| %v2 = mul <2 x i32> %v0, %v1 |
| %v3 = lshr <2 x i32> %v2, <i32 16, i32 16> |
| %v4 = trunc <2 x i32> %v3 to <2 x i16> |
| ret <2 x i16> %v4 |
| } |
| |
| define <2 x i16> @f5(<2 x i16> %a0, <2 x i16> %a1) #0 { |
| ; CHECK-LABEL: f5: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r3:2 = combine(r0,r1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vasrh(r3:2,#15) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5:4 = vmpyh(r3,r2):sat |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = and(r3,r0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1 = and(r2,r1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r4 = combine(r5.h,r4.h) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = vaddh(r0,r1) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = vaddh(r4,r0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = zext <2 x i16> %a0 to <2 x i32> |
| %v1 = zext <2 x i16> %a1 to <2 x i32> |
| %v2 = mul <2 x i32> %v0, %v1 |
| %v3 = lshr <2 x i32> %v2, <i32 16, i32 16> |
| %v4 = trunc <2 x i32> %v3 to <2 x i16> |
| ret <2 x i16> %v4 |
| } |
| |
| define <4 x i16> @f6(<4 x i16> %a0, <4 x i16> %a1) #0 { |
| ; CHECK-LABEL: f6: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5:4 = vmpyh(r0,r2):sat |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r7:6 = vmpyh(r1,r3):sat |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = combine(r5.h,r4.h) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1 = combine(r7.h,r6.h) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = sext <4 x i16> %a0 to <4 x i32> |
| %v1 = sext <4 x i16> %a1 to <4 x i32> |
| %v2 = mul <4 x i32> %v0, %v1 |
| %v3 = lshr <4 x i32> %v2, <i32 16, i32 16, i32 16, i32 16> |
| %v4 = trunc <4 x i32> %v3 to <4 x i16> |
| ret <4 x i16> %v4 |
| } |
| |
| define <4 x i16> @f7(<4 x i16> %a0, <4 x i16> %a1) #0 { |
| ; CHECK-LABEL: f7: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r7:6 = vasrh(r1:0,#15) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r9:8 = vasrh(r3:2,#15) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5:4 = vmpyh(r0,r2):sat |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r7:6 = and(r3:2,r7:6) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r3:2 = vmpyh(r1,r3):sat |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = and(r1:0,r9:8) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r4 = combine(r5.h,r4.h) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r5 = combine(r3.h,r2.h) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vaddh(r1:0,r7:6) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1:0 = vaddh(r5:4,r1:0) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = zext <4 x i16> %a0 to <4 x i32> |
| %v1 = zext <4 x i16> %a1 to <4 x i32> |
| %v2 = mul <4 x i32> %v0, %v1 |
| %v3 = lshr <4 x i32> %v2, <i32 16, i32 16, i32 16, i32 16> |
| %v4 = trunc <4 x i32> %v3 to <4 x i16> |
| ret <4 x i16> %v4 |
| } |
| |
| define <2 x i32> @f8(<2 x i32> %a0, <2 x i32> %a1) #0 { |
| ; CHECK-LABEL: f8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = mpy(r0,r2) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1 = mpy(r1,r3) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = sext <2 x i32> %a0 to <2 x i64> |
| %v1 = sext <2 x i32> %a1 to <2 x i64> |
| %v2 = mul <2 x i64> %v0, %v1 |
| %v3 = lshr <2 x i64> %v2, <i64 32, i64 32> |
| %v4 = trunc <2 x i64> %v3 to <2 x i32> |
| ret <2 x i32> %v4 |
| } |
| |
| define <2 x i32> @f9(<2 x i32> %a0, <2 x i32> %a1) #0 { |
| ; CHECK-LABEL: f9: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = mpyu(r0,r2) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r1 = mpyu(r1,r3) |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jumpr r31 |
| ; CHECK-NEXT: } |
| %v0 = zext <2 x i32> %a0 to <2 x i64> |
| %v1 = zext <2 x i32> %a1 to <2 x i64> |
| %v2 = mul <2 x i64> %v0, %v1 |
| %v3 = lshr <2 x i64> %v2, <i64 32, i64 32> |
| %v4 = trunc <2 x i64> %v3 to <2 x i32> |
| ret <2 x i32> %v4 |
| } |
| |
| attributes #0 = { nounwind memory(none) "target-features"="-packets" } |