blob: 2f543cc324bc22e8f7dfab2427d8486ffe0beb32 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for sqshl1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl1d_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu16b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu1d_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i64_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i32_constant
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn16b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun16b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn16b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun16b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn16b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn16b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_vscalar_constant_shift
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_scalar_constant_shift
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_vscalar_constant_shift
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift_m1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra_scalar
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli16b
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8h
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_zero_shift_amount
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_trunc_v2i64_v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_trunc_v2i64_v2i8
define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @sqshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: sqshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshl d0, d0, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d1, x9
; CHECK-NEXT: sqshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @sqshl_scalar_constant(ptr %A) nounwind {
; CHECK-LABEL: sqshl_scalar_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshl d0, d0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @uqshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: uqshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: uqshl d0, d0, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d1, x9
; CHECK-NEXT: uqshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @uqshl_scalar_constant(ptr %A) nounwind {
; CHECK-LABEL: uqshl_scalar_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: uqshl d0, d0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @srshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: srshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: srshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @srshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d1, x9
; CHECK-NEXT: srshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @srshl_scalar_constant(ptr %A) nounwind {
; CHECK-LABEL: srshl_scalar_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: srshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @urshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: urshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: urshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @urshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d1, x9
; CHECK-NEXT: urshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @urshl_scalar_constant(ptr %A) nounwind {
; CHECK-LABEL: urshl_scalar_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: urshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: sqrshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d1, x9
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-LABEL: sqrshl_scalar_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: uqrshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d1, x9
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @uqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-LABEL: uqrshl_scalar_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i8> @urshr8b(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: urshr v0.8b, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <8 x i8> %tmp3
}
define <4 x i16> @urshr4h(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: urshr v0.4h, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @urshr2s(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: urshr v0.2s, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @urshr16b(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: urshr v0.16b, v0.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @urshr8h(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: urshr v0.8h, v0.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @urshr4s(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: urshr v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @urshr2d(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: urshr v0.2d, v0.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @urshr1d(ptr %A) nounwind {
; CHECK-LABEL: urshr1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: urshr d0, d0, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
ret <1 x i64> %tmp3
}
define i64 @urshr_scalar(ptr %A) nounwind {
; CHECK-LABEL: urshr_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: urshr d0, d0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
ret i64 %tmp3
}
define <8 x i8> @srshr8b(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: srshr v0.8b, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <8 x i8> %tmp3
}
define <4 x i16> @srshr4h(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: srshr v0.4h, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @srshr2s(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: srshr v0.2s, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @srshr16b(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: srshr v0.16b, v0.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @srshr8h(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: srshr v0.8h, v0.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @srshr4s(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: srshr v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @srshr2d(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: srshr v0.2d, v0.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @srshr1d(ptr %A) nounwind {
; CHECK-LABEL: srshr1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: srshr d0, d0, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
ret <1 x i64> %tmp3
}
define i64 @srshr_scalar(ptr %A) nounwind {
; CHECK-LABEL: srshr_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: srshr d0, d0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
ret i64 %tmp3
}
define <8 x i8> @sqshlu8b(ptr %A) nounwind {
; CHECK-LABEL: sqshlu8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshlu v0.8b, v0.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshlu4h(ptr %A) nounwind {
; CHECK-LABEL: sqshlu4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshlu v0.4h, v0.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshlu2s(ptr %A) nounwind {
; CHECK-LABEL: sqshlu2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshlu v0.2s, v0.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshlu16b(ptr %A) nounwind {
; CHECK-LABEL: sqshlu16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshlu v0.16b, v0.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @sqshlu8h(ptr %A) nounwind {
; CHECK-LABEL: sqshlu8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshlu v0.8h, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @sqshlu4s(ptr %A) nounwind {
; CHECK-LABEL: sqshlu4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshlu v0.4s, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @sqshlu2d(ptr %A) nounwind {
; CHECK-LABEL: sqshlu2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshlu v0.2d, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind {
; CHECK-LABEL: sqshlu1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshlu d0, d0, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqshlu_i64_constant(ptr %A) nounwind {
; CHECK-LABEL: sqshlu_i64_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshlu d0, d0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define i32 @sqshlu_i32_constant(ptr %A) nounwind {
; CHECK-LABEL: sqshlu_i32_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: sqshlu s0, s0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load i32, ptr %A
%tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1)
ret i32 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i8> @rshrn8b(ptr %A) nounwind {
; CHECK-LABEL: rshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: rshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @rshrn4h(ptr %A) nounwind {
; CHECK-LABEL: rshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: rshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @rshrn2s(ptr %A) nounwind {
; CHECK-LABEL: rshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: rshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: rshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: rshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: rshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
define <8 x i8> @shrn8b(ptr %A) nounwind {
; CHECK-LABEL: shrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
ret <8 x i8> %tmp3
}
define <4 x i16> @shrn4h(ptr %A) nounwind {
; CHECK-LABEL: shrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
ret <4 x i16> %tmp3
}
define <2 x i32> @shrn2s(ptr %A) nounwind {
; CHECK-LABEL: shrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
ret <2 x i32> %tmp3
}
define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: shrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: shrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: shrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @sqshrn1s(i64 %A) nounwind {
; CHECK-LABEL: sqshrn1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqshrn s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @sqshrn8b(ptr %A) nounwind {
; CHECK-LABEL: sqshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshrn4h(ptr %A) nounwind {
; CHECK-LABEL: sqshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshrn2s(ptr %A) nounwind {
; CHECK-LABEL: sqshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @sqshrun1s(i64 %A) nounwind {
; CHECK-LABEL: sqshrun1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqshrun s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @sqshrun8b(ptr %A) nounwind {
; CHECK-LABEL: sqshrun8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrun v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshrun4h(ptr %A) nounwind {
; CHECK-LABEL: sqshrun4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrun v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshrun2s(ptr %A) nounwind {
; CHECK-LABEL: sqshrun2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrun v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrun16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrun8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrun4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @sqrshrn1s(i64 %A) nounwind {
; CHECK-LABEL: sqrshrn1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqrshrn s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @sqrshrn8b(ptr %A) nounwind {
; CHECK-LABEL: sqrshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqrshrn4h(ptr %A) nounwind {
; CHECK-LABEL: sqrshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqrshrn2s(ptr %A) nounwind {
; CHECK-LABEL: sqrshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @sqrshrun1s(i64 %A) nounwind {
; CHECK-LABEL: sqrshrun1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqrshrun s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @sqrshrun8b(ptr %A) nounwind {
; CHECK-LABEL: sqrshrun8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrun v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqrshrun4h(ptr %A) nounwind {
; CHECK-LABEL: sqrshrun4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrun v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqrshrun2s(ptr %A) nounwind {
; CHECK-LABEL: sqrshrun2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrun v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrun16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrun2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrun8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrun2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrun4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrun2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @uqrshrn1s(i64 %A) nounwind {
; CHECK-LABEL: uqrshrn1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: uqrshrn s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @uqrshrn8b(ptr %A) nounwind {
; CHECK-LABEL: uqrshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqrshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqrshrn4h(ptr %A) nounwind {
; CHECK-LABEL: uqrshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqrshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqrshrn2s(ptr %A) nounwind {
; CHECK-LABEL: uqrshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqrshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqrshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqrshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqrshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @uqshrn1s(i64 %A) nounwind {
; CHECK-LABEL: uqshrn1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: uqshrn s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @uqshrn8b(ptr %A) nounwind {
; CHECK-LABEL: uqshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqshrn4h(ptr %A) nounwind {
; CHECK-LABEL: uqshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqshrn2s(ptr %A) nounwind {
; CHECK-LABEL: uqshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
define <8 x i16> @ushll8h(ptr %A) nounwind {
; CHECK-LABEL: ushll8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
}
define <4 x i32> @ushll4s(ptr %A) nounwind {
; CHECK-LABEL: ushll4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %tmp3
}
define <2 x i64> @ushll2d(ptr %A) nounwind {
; CHECK-LABEL: ushll2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.2d, v0.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
}
define <8 x i16> @ushll2_8h(ptr %A) nounwind {
; CHECK-SD-LABEL: ushll2_8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ushll2_8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #1
; CHECK-GI-NEXT: ret
%load1 = load <16 x i8>, ptr %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
}
define <4 x i32> @ushll2_4s(ptr %A) nounwind {
; CHECK-SD-LABEL: ushll2_4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ushll2_4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-GI-NEXT: ret
%load1 = load <8 x i16>, ptr %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %tmp3
}
define <2 x i64> @ushll2_2d(ptr %A) nounwind {
; CHECK-SD-LABEL: ushll2_2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ushll2_2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #1
; CHECK-GI-NEXT: ret
%load1 = load <4 x i32>, ptr %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
}
declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)
define <8 x i16> @neon_ushll8h_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushll8h_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll8h_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.8h, #1
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushl v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <8 x i16> @neon_ushl8h_no_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon_ushl8h_no_constant_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushl v0.8h, v0.8h, v0.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @neon_ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushl8h_constant_shift_extend_not_2x:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushl8h_constant_shift_extend_not_2x:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w8, [x0]
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: uxtb w8, w8
; CHECK-GI-NEXT: mov b2, v1.b[2]
; CHECK-GI-NEXT: mov b3, v1.b[1]
; CHECK-GI-NEXT: mov b4, v1.b[3]
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: fmov w9, s2
; CHECK-GI-NEXT: fmov w10, s3
; CHECK-GI-NEXT: fmov w11, s4
; CHECK-GI-NEXT: uxtb w9, w9
; CHECK-GI-NEXT: uxtb w10, w10
; CHECK-GI-NEXT: uxtb w11, w11
; CHECK-GI-NEXT: fmov s2, w9
; CHECK-GI-NEXT: mov v1.h[1], w10
; CHECK-GI-NEXT: mov v2.h[1], w11
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i8>, ptr %A
%tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <8 x i16> @neon_ushl8_noext_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushl8_noext_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushl8_noext_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8h, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ushl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @neon_ushll4s_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushll4s_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll4s_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.4s, #1
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
; FIXME: unnecessary ushll.4s v0, v0, #0?
define <4 x i32> @neon_ushll4s_neg_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushll4s_neg_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll4s_neg_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
; FIXME: should be constant folded.
define <4 x i32> @neon_ushll4s_constant_fold() nounwind {
; CHECK-SD-LABEL: neon_ushll4s_constant_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI160_0
; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI160_0]
; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll4s_constant_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: adrp x8, .LCPI160_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI160_0]
; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @neon_ushll2d_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushll2d_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll2d_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: adrp x8, .LCPI161_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI161_0]
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @neon_ushl_vscalar_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon_ushl_vscalar_constant_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s
; CHECK-NEXT: shl d0, d0, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i32>, ptr %A
%tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
%tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @neon_ushl_scalar_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon_ushl_scalar_constant_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: shl d0, d0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i32, ptr %A
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
ret i64 %tmp3
}
define <8 x i16> @sshll8h(ptr %A) nounwind {
; CHECK-LABEL: sshll8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sshll v0.8h, v0.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
}
define <2 x i64> @sshll2d(ptr %A) nounwind {
; CHECK-LABEL: sshll2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sshll v0.2d, v0.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
}
declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)
define <16 x i8> @neon_sshl16b_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl16b_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl16b_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.16b, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp2
}
define <16 x i8> @neon_sshl16b_non_splat_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl16b_non_splat_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI167_0
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI167_0]
; CHECK-SD-NEXT: sshl v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl16b_non_splat_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI167_0
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI167_0]
; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp2
}
define <16 x i8> @neon_sshl16b_neg_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl16b_neg_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl16b_neg_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.16b, #254
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
ret <16 x i8> %tmp2
}
define <8 x i16> @neon_sshll8h_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll8h_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll8h_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.8h, #1
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @neon_sshl4s_wrong_ext_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl4s_wrong_ext_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl4s_wrong_ext_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w8, [x0]
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: sxtb w8, w8
; CHECK-GI-NEXT: mov b2, v1.b[2]
; CHECK-GI-NEXT: mov b3, v1.b[1]
; CHECK-GI-NEXT: mov b4, v1.b[3]
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: fmov w9, s2
; CHECK-GI-NEXT: fmov w10, s3
; CHECK-GI-NEXT: fmov w11, s4
; CHECK-GI-NEXT: sxtb w9, w9
; CHECK-GI-NEXT: sxtb w10, w10
; CHECK-GI-NEXT: sxtb w11, w11
; CHECK-GI-NEXT: fmov s2, w9
; CHECK-GI-NEXT: mov v1.h[1], w10
; CHECK-GI-NEXT: mov v2.h[1], w11
; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i8>, ptr %A
%tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <4 x i32> @neon_sshll4s_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll4s_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll4s_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.4s, #1
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <4 x i32> @neon_sshll4s_neg_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll4s_neg_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll4s_neg_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
; FIXME: should be constant folded.
define <4 x i32> @neon_sshl4s_constant_fold() nounwind {
; CHECK-SD-LABEL: neon_sshl4s_constant_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI173_0
; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI173_0]
; CHECK-SD-NEXT: shl v0.4s, v0.4s, #2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl4s_constant_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #2
; CHECK-GI-NEXT: adrp x8, .LCPI173_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI173_0]
; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
ret <4 x i32> %tmp3
}
define <4 x i32> @neon_sshl4s_no_fold(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl4s_no_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl4s_no_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @neon_sshll2d_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll2d_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll2d_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: adrp x8, .LCPI175_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI175_0]
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @neon_sshll_vscalar_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon_sshll_vscalar_constant_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s
; CHECK-NEXT: shl d0, d0, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i32>, ptr %A
%tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon_sshll_scalar_constant_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: shl d0, d0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i32, ptr %A
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
ret i64 %tmp3
}
define i64 @neon_sshll_scalar_constant_shift_m1(ptr %A) nounwind {
; CHECK-LABEL: neon_sshll_scalar_constant_shift_m1:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: sshr d0, d0, #1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i32, ptr %A
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1)
ret i64 %tmp3
}
; FIXME: should be constant folded.
define <2 x i64> @neon_sshl2d_constant_fold() nounwind {
; CHECK-SD-LABEL: neon_sshl2d_constant_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI179_0
; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI179_0]
; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl2d_constant_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI179_1
; CHECK-GI-NEXT: adrp x9, .LCPI179_0
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI179_1]
; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI179_0]
; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <2 x i64> @neon_sshl2d_no_fold(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl2d_no_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl2d_no_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI180_0
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI180_0]
; CHECK-GI-NEXT: sshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp2 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
ret <2 x i64> %tmp3
}
define <8 x i16> @sshll2_8h(ptr %A) nounwind {
; CHECK-SD-LABEL: sshll2_8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sshll2_8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #1
; CHECK-GI-NEXT: ret
%load1 = load <16 x i8>, ptr %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
}
define <4 x i32> @sshll2_4s(ptr %A) nounwind {
; CHECK-SD-LABEL: sshll2_4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sshll2_4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #1
; CHECK-GI-NEXT: ret
%load1 = load <8 x i16>, ptr %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %tmp3
}
define <2 x i64> @sshll2_2d(ptr %A) nounwind {
; CHECK-SD-LABEL: sshll2_2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sshll2_2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #1
; CHECK-GI-NEXT: ret
%load1 = load <4 x i32>, ptr %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
}
define <8 x i8> @sqshli8b(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshl v0.8b, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8b, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: sqshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshli4h(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshl v0.4h, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4h, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: sqshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshli2s(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshl v0.2s, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2s, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: sqshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshli16b(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sqshl v0.16b, v0.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.16b, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sqshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @sqshli8h(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sqshl v0.8h, v0.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8h, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sqshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @sqshli4s(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sqshl v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sqshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @sqshli2d(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sqshl v0.2d, v0.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI190_0
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI190_0]
; CHECK-GI-NEXT: sqshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <8 x i8> @uqshli8b(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: uqshl v0.8b, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8b, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: uqshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
define <8 x i8> @uqshli8b_1(ptr %A) nounwind {
; CHECK-LABEL: uqshli8b_1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.8b, #8
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: uqshl v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqshli4h(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: uqshl v0.4h, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4h, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: uqshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqshli2s(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: uqshl v0.2s, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2s, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: uqshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @uqshli16b(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: uqshl v0.16b, v0.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.16b, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: uqshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @uqshli8h(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: uqshl v0.8h, v0.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8h, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: uqshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @uqshli4s(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: uqshl v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: uqshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @uqshli2d(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: uqshl v0.2d, v0.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI198_0
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI198_0]
; CHECK-GI-NEXT: uqshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: ursra v0.8b, v1.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%tmp4 = load <8 x i8>, ptr %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: ursra v0.4h, v1.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
%tmp4 = load <4 x i16>, ptr %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: ursra v0.2s, v1.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
%tmp4 = load <2 x i32>, ptr %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: ursra v0.16b, v1.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%tmp4 = load <16 x i8>, ptr %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: ursra v0.8h, v1.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
%tmp4 = load <8 x i16>, ptr %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: ursra v0.4s, v1.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
%tmp4 = load <4 x i32>, ptr %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: ursra v0.2d, v1.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
%tmp4 = load <2 x i64>, ptr %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ursra1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ursra d0, d1, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
%tmp4 = load <1 x i64>, ptr %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define i64 @ursra_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ursra_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ursra d1, d0, #1
; CHECK-NEXT: fmov x0, d1
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
%tmp4 = load i64, ptr %B
%tmp5 = add i64 %tmp3, %tmp4
ret i64 %tmp5
}
define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: srsra v0.8b, v1.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%tmp4 = load <8 x i8>, ptr %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: srsra v0.4h, v1.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
%tmp4 = load <4 x i16>, ptr %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: srsra v0.2s, v1.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
%tmp4 = load <2 x i32>, ptr %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: srsra v0.16b, v1.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%tmp4 = load <16 x i8>, ptr %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: srsra v0.8h, v1.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
%tmp4 = load <8 x i16>, ptr %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: srsra v0.4s, v1.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
%tmp4 = load <4 x i32>, ptr %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: srsra v0.2d, v1.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
%tmp4 = load <2 x i64>, ptr %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srsra1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: srsra d0, d1, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
%tmp4 = load <1 x i64>, ptr %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define i64 @srsra_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srsra_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srsra d1, d0, #1
; CHECK-NEXT: fmov x0, d1
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
%tmp4 = load i64, ptr %B
%tmp5 = add i64 %tmp3, %tmp4
ret i64 %tmp5
}
define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: usra v0.8b, v1.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp4 = load <8 x i8>, ptr %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: usra v0.4h, v1.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp4 = load <4 x i16>, ptr %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: usra v0.2s, v1.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
%tmp4 = load <2 x i32>, ptr %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: usra v0.16b, v1.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp4 = load <16 x i8>, ptr %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: usra v0.8h, v1.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp4 = load <8 x i16>, ptr %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: usra v0.4s, v1.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp4 = load <4 x i32>, ptr %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: usra v0.2d, v1.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp4 = load <2 x i64>, ptr %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: usra1d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: usra d0, d1, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: usra1d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr x8, [x0]
; CHECK-GI-NEXT: ldr x9, [x1]
; CHECK-GI-NEXT: add x8, x9, x8, lsr #1
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = lshr <1 x i64> %tmp1, <i64 1>
%tmp4 = load <1 x i64>, ptr %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ssra v0.8b, v1.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp4 = load <8 x i8>, ptr %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ssra v0.4h, v1.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp4 = load <4 x i16>, ptr %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ssra v0.2s, v1.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
%tmp4 = load <2 x i32>, ptr %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ssra v0.16b, v1.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp4 = load <16 x i8>, ptr %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ssra v0.8h, v1.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp4 = load <8 x i16>, ptr %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ssra v0.4s, v1.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp4 = load <4 x i32>, ptr %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ssra v0.2d, v1.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp4 = load <2 x i64>, ptr %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ushr v0.8b, v0.8b, #1
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp4 = load <8 x i8>, ptr %B
%tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ushr v0.4h, v0.4h, #1
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp4 = load <4 x i16>, ptr %B
%tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ushr v0.2s, v0.2s, #1
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp4 = load <2 x i32>, ptr %B
%tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
%tmp5 = or <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ushr v0.16b, v0.16b, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp4 = load <16 x i8>, ptr %B
%tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ushr v0.8h, v0.8h, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp4 = load <8 x i16>, ptr %B
%tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ushr v0.4s, v0.4s, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp4 = load <4 x i32>, ptr %B
%tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp5 = or <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ushr v0.2d, v0.2d, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp4 = load <2 x i64>, ptr %B
%tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp5 = or <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ldr d1, [x1]
; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: shl v0.8b, v0.8b, #1
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp4 = load <8 x i8>, ptr %B
%tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ldr d1, [x1]
; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: shl v0.4h, v0.4h, #1
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp4 = load <4 x i16>, ptr %B
%tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ldr d1, [x1]
; CHECK-SD-NEXT: add v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: shl v0.2s, v0.2s, #1
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp4 = load <2 x i32>, ptr %B
%tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
%tmp5 = or <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x1]
; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #1
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp4 = load <16 x i8>, ptr %B
%tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x1]
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #1
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp4 = load <8 x i16>, ptr %B
%tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x1]
; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #1
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp4 = load <4 x i32>, ptr %B
%tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp5 = or <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x1]
; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #1
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp4 = load <2 x i64>, ptr %B
%tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
%tmp5 = or <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <8 x i16> @shll(<8 x i8> %in) {
; CHECK-SD-LABEL: shll:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shll v0.8h, v0.8b, #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shll:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
; CHECK-GI-NEXT: ret
%ext = zext <8 x i8> %in to <8 x i16>
%res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
ret <8 x i16> %res
}
define <4 x i32> @shll_high(<8 x i16> %in) {
; CHECK-SD-LABEL: shll_high:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shll2 v0.4s, v0.8h, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shll_high:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #16
; CHECK-GI-NEXT: ret
%extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext = zext <4 x i16> %extract to <4 x i32>
%res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
ret <4 x i32> %res
}
define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sli v0.8b, v1.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sli v0.4h, v1.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sli v0.2s, v1.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
ret <2 x i32> %tmp3
}
define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sli d0, d1, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
ret <1 x i64> %tmp3
}
define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sli v0.16b, v1.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
ret <16 x i8> %tmp3
}
define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sli v0.8h, v1.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
ret <8 x i16> %tmp3
}
define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sli v0.4s, v1.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
ret <4 x i32> %tmp3
}
define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sli v0.2d, v1.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
ret <2 x i64> %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-SD-LABEL: ashr_v1i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: neg d1, d1
; CHECK-SD-NEXT: sshl d0, d0, d1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ashr_v1i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: asr x8, x8, x9
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%c = ashr <1 x i64> %a, %b
ret <1 x i64> %c
}
define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: sqshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: sqshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: uqshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: uqshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: srshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: srshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: urshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: urshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-LABEL: sqshlu_zero_shift_amount:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-NEXT: sqshlu v0.2d, v0.2d, #0
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: sshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: ushl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ushl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) {
; CHECK-LABEL: sext_rshrn:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
entry:
%vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
%vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32>
ret <4 x i32> %vmovl.i
}
define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) {
; CHECK-LABEL: zext_rshrn:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
entry:
%vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
%vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32>
ret <4 x i32> %vmovl.i
}
define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) {
; CHECK-LABEL: mul_rshrn:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v1.4s, #3
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
; CHECK-NEXT: ret
entry:
%b = add <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
%vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13)
ret <4 x i16> %vrshrn_n1
}
define <8 x i16> @signbits_vashr(<8 x i16> %a) {
; CHECK-SD-LABEL: signbits_vashr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #8
; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: signbits_vashr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mvni v1.8h, #7
; CHECK-GI-NEXT: mvni v2.8h, #8
; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v2.8h
; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #7
; CHECK-GI-NEXT: ret
%b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
%c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> <i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9>)
%d = ashr <8 x i16> %c, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %d
}
define <2 x i8> @lshr_trunc_v2i64_v2i8(<2 x i64> %a) {
; CHECK-LABEL: lshr_trunc_v2i64_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shrn v0.2s, v0.2d, #16
; CHECK-NEXT: ret
%b = lshr <2 x i64> %a, <i64 16, i64 16>
%c = trunc <2 x i64> %b to <2 x i8>
ret <2 x i8> %c
}
define <2 x i8> @ashr_trunc_v2i64_v2i8(<2 x i64> %a) {
; CHECK-LABEL: ashr_trunc_v2i64_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shrn v0.2s, v0.2d, #16
; CHECK-NEXT: ret
%b = ashr <2 x i64> %a, <i64 16, i64 16>
%c = trunc <2 x i64> %b to <2 x i8>
ret <2 x i8> %c
}
define <2 x i8> @shl_trunc_v2i64_v2i8(<2 x i64> %a) {
; CHECK-SD-LABEL: shl_trunc_v2i64_v2i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_trunc_v2i64_v2i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #16
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: ret
%b = shl <2 x i64> %a, <i64 16, i64 16>
%c = trunc <2 x i64> %b to <2 x i8>
ret <2 x i8> %c
}
declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)