blob: f0c9dccb21d84aba32d0eb462bb7e8161ce2f732 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; Supported combines
define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
; CHECK-SD-LABEL: dupsext_v8i8_v8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: dup v1.8b, w0
; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupsext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: lsl w8, w0, #8
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: dup v1.8h, w8
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
entry:
%in = sext i8 %src to i16
%ext.b = sext <8 x i8> %b to <8 x i16>
%broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
%broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
%out = mul nsw <8 x i16> %broadcast.splat, %ext.b
ret <8 x i16> %out
}
define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
; CHECK-SD-LABEL: dupzext_v8i8_v8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: dup v1.8b, w0
; CHECK-SD-NEXT: umull v0.8h, v1.8b, v0.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupzext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xff
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: dup v1.8h, w8
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
entry:
%in = zext i8 %src to i16
%ext.b = zext <8 x i8> %b to <8 x i16>
%broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
%broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
%out = mul nuw <8 x i16> %broadcast.splat, %ext.b
ret <8 x i16> %out
}
define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
; CHECK-SD-LABEL: dupsext_v4i16_v4i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: dup v1.4h, w0
; CHECK-SD-NEXT: smull v0.4s, v1.4h, v0.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupsext_v4i16_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxth w8, w0
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: dup v1.4s, w8
; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
entry:
%in = sext i16 %src to i32
%ext.b = sext <4 x i16> %b to <4 x i32>
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%out = mul nsw <4 x i32> %broadcast.splat, %ext.b
ret <4 x i32> %out
}
define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
; CHECK-SD-LABEL: dupzext_v4i16_v4i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: dup v1.4h, w0
; CHECK-SD-NEXT: umull v0.4s, v1.4h, v0.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupzext_v4i16_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xffff
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: dup v1.4s, w8
; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
entry:
%in = zext i16 %src to i32
%ext.b = zext <4 x i16> %b to <4 x i32>
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%out = mul nuw <4 x i32> %broadcast.splat, %ext.b
ret <4 x i32> %out
}
define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
; CHECK-SD-LABEL: dupsext_v2i32_v2i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: dup v1.2s, w0
; CHECK-SD-NEXT: smull v0.2d, v1.2s, v0.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupsext_v2i32_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT: sxtw x8, w0
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: dup v1.2d, x8
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov x10, v1.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: ret
entry:
%in = sext i32 %src to i64
%ext.b = sext <2 x i32> %b to <2 x i64>
%broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
%out = mul nsw <2 x i64> %broadcast.splat, %ext.b
ret <2 x i64> %out
}
define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
; CHECK-SD-LABEL: dupzext_v2i32_v2i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: dup v1.2s, w0
; CHECK-SD-NEXT: umull v0.2d, v1.2s, v0.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupzext_v2i32_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: dup v1.2d, x8
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov x10, v1.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: ret
entry:
%in = zext i32 %src to i64
%ext.b = zext <2 x i32> %b to <2 x i64>
%broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
%out = mul nuw <2 x i64> %broadcast.splat, %ext.b
ret <2 x i64> %out
}
define <2 x i32> @dupzext_v2i32_v2i64_trunc(i32 %src, <2 x i32> %b) {
; CHECK-SD-LABEL: dupzext_v2i32_v2i64_trunc:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: dup v1.2s, w0
; CHECK-SD-NEXT: smull v0.2d, v1.2s, v0.2s
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupzext_v2i32_v2i64_trunc:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: dup v1.2d, x8
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov x10, v1.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: ret
entry:
%in = zext i32 %src to i64
%ext.b = zext <2 x i32> %b to <2 x i64>
%broadcast.splatinsert = insertelement <2 x i64> poison, i64 %in, i64 0
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
%prod = mul nuw <2 x i64> %broadcast.splat, %ext.b
%out = trunc <2 x i64> %prod to <2 x i32>
ret <2 x i32> %out
}
; Unsupported combines
define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
; CHECK-SD-LABEL: dupsext_v2i8_v2i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sxtb w8, w0
; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
; CHECK-SD-NEXT: dup v1.2s, w8
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT: mul v0.2s, v1.2s, v0.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupsext_v2i8_v2i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: lsl w8, w0, #8
; CHECK-GI-NEXT: shl v0.2s, v0.2s, #24
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-GI-NEXT: dup v1.4h, w8
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
entry:
%in = sext i8 %src to i16
%ext.b = sext <2 x i8> %b to <2 x i16>
%broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0
%broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
%out = mul nsw <2 x i16> %broadcast.splat, %ext.b
ret <2 x i16> %out
}
define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
; CHECK-SD-LABEL: dupzext_v2i16_v2i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-SD-NEXT: and w8, w0, #0xffff
; CHECK-SD-NEXT: dup v2.2s, w8
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: umull v0.2d, v2.2s, v0.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: dupzext_v2i16_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v1.2d, #0x0000000000ffff
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT: and x8, x0, #0xffff
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: dup v1.2d, x8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: mov x10, v1.d[1]
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: ret
entry:
%in = zext i16 %src to i64
%ext.b = zext <2 x i16> %b to <2 x i64>
%broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
%out = mul nuw <2 x i64> %broadcast.splat, %ext.b
ret <2 x i64> %out
}
; dupsext_v4i8_v4i16
; dupsext_v2i8_v2i32
; dupsext_v4i8_v4i32
; dupsext_v2i8_v2i64
; dupsext_v2i16_v2i32
; dupsext_v2i16_v2i64
; dupzext_v2i8_v2i16
; dupzext_v4i8_v4i16
; dupzext_v2i8_v2i32
; dupzext_v4i8_v4i32
; dupzext_v2i8_v2i64
; dupzext_v2i16_v2i32
; dupzext_v2i16_v2i64
; Unsupported states
define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
; CHECK-SD-LABEL: nonsplat_shuffleinsert:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: dup v1.8b, w0
; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: nonsplat_shuffleinsert:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: lsl w8, w0, #8
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: mov v1.h[1], w8
; CHECK-GI-NEXT: ext v1.16b, v1.16b, v1.16b, #4
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
entry:
%in = sext i8 %src to i16
%ext.b = sext <8 x i8> %b to <8 x i16>
%broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1
%broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
%out = mul nsw <8 x i16> %broadcast.splat, %ext.b
ret <8 x i16> %out
}
define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b2, i16 %b3) {
; CHECK-SD-LABEL: nonsplat_shuffleinsert2:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s1, w0
; CHECK-SD-NEXT: mov v1.h[1], w1
; CHECK-SD-NEXT: mov v1.h[2], w2
; CHECK-SD-NEXT: mov v1.h[3], w3
; CHECK-SD-NEXT: smull v0.4s, v1.4h, v0.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: nonsplat_shuffleinsert2:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxth w8, w0
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: mov v1.s[0], w8
; CHECK-GI-NEXT: sxth w8, w1
; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: sxth w8, w2
; CHECK-GI-NEXT: mov v1.s[2], w8
; CHECK-GI-NEXT: sxth w8, w3
; CHECK-GI-NEXT: mov v1.s[3], w8
; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
entry:
%s0 = sext i16 %b0 to i32
%s1 = sext i16 %b1 to i32
%s2 = sext i16 %b2 to i32
%s3 = sext i16 %b3 to i32
%ext.b = sext <4 x i16> %b to <4 x i32>
%v0 = insertelement <4 x i32> undef, i32 %s0, i32 0
%v1 = insertelement <4 x i32> %v0, i32 %s1, i32 1
%v2 = insertelement <4 x i32> %v1, i32 %s2, i32 2
%v3 = insertelement <4 x i32> %v2, i32 %s3, i32 3
%out = mul nsw <4 x i32> %v3, %ext.b
ret <4 x i32> %out
}
define void @typei1_orig(i64 %a, ptr %p, ptr %q) {
; CHECK-SD-LABEL: typei1_orig:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp x0, #0
; CHECK-SD-NEXT: ldr q0, [x2]
; CHECK-SD-NEXT: cset w8, gt
; CHECK-SD-NEXT: dup v1.8h, w8
; CHECK-SD-NEXT: cmtst v0.8h, v0.8h, v0.8h
; CHECK-SD-NEXT: cmeq v1.8h, v1.8h, #0
; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
; CHECK-SD-NEXT: str q0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: typei1_orig:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q1, [x2]
; CHECK-GI-NEXT: cmp x0, #0
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: neg v1.8h, v1.8h
; CHECK-GI-NEXT: dup v2.8h, w8
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: mul v1.8h, v1.8h, v2.8h
; CHECK-GI-NEXT: cmeq v1.8h, v1.8h, #0
; CHECK-GI-NEXT: mvn v1.16b, v1.16b
; CHECK-GI-NEXT: uzp1 v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
; CHECK-GI-NEXT: str q0, [x1]
; CHECK-GI-NEXT: ret
%tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
%tmp6 = load <8 x i16>, ptr %q, align 2
%tmp7 = sub <8 x i16> zeroinitializer, %tmp6
%tmp8 = shufflevector <8 x i16> %tmp7, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp9 = icmp slt i64 0, %a
%tmp10 = zext i1 %tmp9 to i16
%tmp11 = insertelement <16 x i16> undef, i16 %tmp10, i64 0
%tmp12 = shufflevector <16 x i16> %tmp11, <16 x i16> undef, <16 x i32> zeroinitializer
%tmp13 = mul nuw <16 x i16> %tmp8, %tmp12
%tmp14 = icmp ne <16 x i16> %tmp13, zeroinitializer
%tmp15 = and <16 x i1> %tmp14, %tmp
%tmp16 = sext <16 x i1> %tmp15 to <16 x i8>
store <16 x i8> %tmp16, ptr %p, align 1
ret void
}
define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
; CHECK-SD-LABEL: typei1_v8i1_v8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v1.8b, #1
; CHECK-SD-NEXT: and w8, w0, #0x1
; CHECK-SD-NEXT: dup v2.8b, w8
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: umull v0.8h, v2.8b, v0.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: typei1_v8i1_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v1.8h, #1
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: and w8, w0, #0x1
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: dup v1.8h, w8
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
entry:
%in = zext i1 %src to i16
%ext.b = zext <8 x i1> %b to <8 x i16>
%broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
%broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
%out = mul nsw <8 x i16> %broadcast.splat, %ext.b
ret <8 x i16> %out
}
define <8 x i16> @missing_insert(<8 x i8> %b) {
; CHECK-SD-LABEL: missing_insert:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v1.8b, v0.8b, v0.8b, #2
; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: missing_insert:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #4
; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
entry:
%ext.b = sext <8 x i8> %b to <8 x i16>
%broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
%out = mul nsw <8 x i16> %broadcast.splat, %ext.b
ret <8 x i16> %out
}
define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
; CHECK-SD-LABEL: shufsext_v8i8_v8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: rev64 v0.8b, v0.8b
; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
%in = sext <8 x i8> %src to <8 x i16>
%ext.b = sext <8 x i8> %b to <8 x i16>
%shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
%out = mul nsw <8 x i16> %shuf, %ext.b
ret <8 x i16> %out
}
define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
; CHECK-SD-LABEL: shufsext_v2i32_v2i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: rev64 v0.2s, v0.2s
; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: mov x11, v1.d[1]
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: mov x10, v0.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: ret
entry:
%in = sext <2 x i32> %src to <2 x i64>
%ext.b = sext <2 x i32> %b to <2 x i64>
%shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
%out = mul nsw <2 x i64> %shuf, %ext.b
ret <2 x i64> %out
}
define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
; CHECK-SD-LABEL: shufzext_v8i8_v8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: rev64 v0.8b, v0.8b
; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
%in = zext <8 x i8> %src to <8 x i16>
%ext.b = zext <8 x i8> %b to <8 x i16>
%shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
%out = mul nsw <8 x i16> %shuf, %ext.b
ret <8 x i16> %out
}
define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
; CHECK-SD-LABEL: shufzext_v2i32_v2i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: rev64 v0.2s, v0.2s
; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: mov x11, v1.d[1]
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: mov x10, v0.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x10, x11
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov v0.d[1], x9
; CHECK-GI-NEXT: ret
entry:
%in = sext <2 x i32> %src to <2 x i64>
%ext.b = sext <2 x i32> %b to <2 x i64>
%shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
%out = mul nsw <2 x i64> %shuf, %ext.b
ret <2 x i64> %out
}
define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
; CHECK-SD-LABEL: shufzext_v8i8_v8i16_twoin:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: trn1 v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: umull v0.8h, v0.8b, v2.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufzext_v8i8_v8i16_twoin:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: trn1 v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ushll v1.8h, v2.8b, #0
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
%in1 = zext <8 x i8> %src1 to <8 x i16>
%in2 = zext <8 x i8> %src2 to <8 x i16>
%ext.b = zext <8 x i8> %b to <8 x i16>
%shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%out = mul nsw <8 x i16> %shuf, %ext.b
ret <8 x i16> %out
}
define <8 x i16> @shufszext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
; CHECK-LABEL: shufszext_v8i8_v8i16_twoin:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-NEXT: trn1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ushll v1.8h, v2.8b, #0
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
entry:
%in1 = zext <8 x i8> %src1 to <8 x i16>
%in2 = sext <8 x i8> %src2 to <8 x i16>
%ext.b = zext <8 x i8> %b to <8 x i16>
%shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%out = mul nsw <8 x i16> %shuf, %ext.b
ret <8 x i16> %out
}