| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt < %s -interleaved-access -S | FileCheck %s |
| ; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s |
| |
| target triple = "aarch64-linux-gnu" |
| |
| define void @load_factor2(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv8i16(<vscale x 8 x i16> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv8i16(<vscale x 8 x i16> [[TMP4]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <32 x i16>, ptr %ptr, align 4 |
| %v0 = shufflevector <32 x i16> %interleaved.vec, <32 x i16> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, |
| i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> |
| %v1 = shufflevector <32 x i16> %interleaved.vec, <32 x i16> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, |
| i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> |
| ret void |
| } |
| |
| define void @load_factor3(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_factor3( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32(<vscale x 4 x i32> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32(<vscale x 4 x i32> [[TMP6]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <24 x i32>, ptr %ptr, align 4 |
| %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> |
| %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> |
| %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> |
| ret void |
| } |
| |
| define void @load_factor4(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_factor4( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 3 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP6]], i64 0) |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP8]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <16 x i64>, ptr %ptr, align 4 |
| %v0 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %v1 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> |
| %v2 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> |
| %v3 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> |
| ret void |
| } |
| |
| define void @store_factor2(ptr %ptr, <16 x i16> %v0, <16 x i16> %v1) #0 { |
| ; CHECK-LABEL: define void @store_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <16 x i16> [[V0:%.*]], <16 x i16> [[V1:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[V0]], <16 x i16> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[V0]], <16 x i16> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> [[TMP4]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = shufflevector <16 x i16> %v0, <16 x i16> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, |
| i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> |
| store <32 x i16> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_factor3(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) #0 { |
| ; CHECK-LABEL: define void @store_factor3( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <8 x i32> [[V2:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> undef, <8 x i32> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> undef, <8 x i32> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> undef, <8 x i32> [[TMP6]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, |
| i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %s1 = shufflevector <8 x i32> %v2, <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, |
| i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> |
| %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, |
| i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> |
| store <24 x i32> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) #0 { |
| ; CHECK-LABEL: define void @store_factor4( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0]], <4 x i64> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2]], <4 x i64> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP6]], i64 0) |
| ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP8]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| store <16 x i64> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @load_ptrvec_factor2(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_ptrvec_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP5]], i64 0) |
| ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <4 x i64> [[TMP6]] to <4 x ptr> |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <8 x ptr>, ptr %ptr, align 4 |
| %v0 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| %v1 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ret void |
| } |
| |
| define void @load_ptrvec_factor3(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_ptrvec_factor3( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP5]], i64 0) |
| ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <4 x i64> [[TMP6]] to <4 x ptr> |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP8]], i64 0) |
| ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr <4 x i64> [[TMP9]] to <4 x ptr> |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <12 x ptr>, ptr %ptr, align 4 |
| %v0 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> |
| %v1 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> |
| %v2 = shufflevector <12 x ptr> %interleaved.vec, <12 x ptr> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> |
| ret void |
| } |
| |
| define void @load_ptrvec_factor4(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_ptrvec_factor4( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 3 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2 |
| ; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP5]], i64 0) |
| ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <4 x i64> [[TMP6]] to <4 x ptr> |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP8]], i64 0) |
| ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr <4 x i64> [[TMP9]] to <4 x ptr> |
| ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP12:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP11]], i64 0) |
| ; CHECK-NEXT: [[TMP13:%.*]] = inttoptr <4 x i64> [[TMP12]] to <4 x ptr> |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <16 x ptr>, ptr %ptr, align 4 |
| %v0 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %v1 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> |
| %v2 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> |
| %v3 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> |
| ret void |
| } |
| |
| define void @store_ptrvec_factor2(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1) #0 { |
| ; CHECK-LABEL: define void @store_ptrvec_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <4 x ptr> [[V0:%.*]], <4 x ptr> [[V1:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr> [[V0]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> [[V1]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP6]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| store <8 x ptr> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_ptrvec_factor3(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x ptr> %v2) #0 { |
| ; CHECK-LABEL: define void @store_ptrvec_factor3( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <4 x ptr> [[V0:%.*]], <4 x ptr> [[V1:%.*]], <4 x ptr> [[V2:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x ptr> [[V0]], <4 x ptr> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x ptr> [[V2]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[S0]] to <8 x i64> |
| ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[S1]] to <8 x i64> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP6]], i64 0) |
| ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP8]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x ptr> %v2, <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> |
| %interleaved.vec = shufflevector <8 x ptr> %s0, <8 x ptr> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> |
| store <12 x ptr> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x ptr> %v2, <4 x ptr> %v3) #0 { |
| ; CHECK-LABEL: define void @store_ptrvec_factor4( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <4 x ptr> [[V0:%.*]], <4 x ptr> [[V1:%.*]], <4 x ptr> [[V2:%.*]], <4 x ptr> [[V3:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x ptr> [[V0]], <4 x ptr> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x ptr> [[V2]], <4 x ptr> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[S0]] to <8 x i64> |
| ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[S1]] to <8 x i64> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP6]], i64 0) |
| ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP8]], i64 0) |
| ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP10]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x ptr> %v2, <4 x ptr> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %interleaved.vec = shufflevector <8 x ptr> %s0, <8 x ptr> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, |
| i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| store <16 x ptr> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @load_factor2_wide(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_factor2_wide( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[PTR]], i32 8 |
| ; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP1]], ptr [[TMP6]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN1]], 1 |
| ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP7]], i64 0) |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN1]], 0 |
| ; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP9]], i64 0) |
| ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <16 x i64>, ptr %ptr, align 4 |
| %v0 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| %v1 = shufflevector <16 x i64> %interleaved.vec, <16 x i64> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| ret void |
| } |
| |
| define void @store_factor2_wide(ptr %ptr, <8 x i64> %v0, <8 x i64> %v1) #0 { |
| ; CHECK-LABEL: define void @store_factor2_wide( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <8 x i64> [[V0:%.*]], <8 x i64> [[V1:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP6]], i64 0) |
| ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP8]], i64 0) |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[PTR]], i32 8 |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[TMP10]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = shufflevector <8 x i64> %v0, <8 x i64> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| store <16 x i64> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| ; Check that neon is used for illegal multiples of 128-bit types |
| define void @load_384bit(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_384bit( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[PTR]], i32 4 |
| ; CHECK-NEXT: [[LDN1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[TMP3]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN1]], 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN1]], 0 |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP3]], i32 4 |
| ; CHECK-NEXT: [[LDN2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[TMP6]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN2]], 1 |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN2]], 0 |
| ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> |
| ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <12 x i64>, ptr %ptr, align 4 |
| %v0 = shufflevector <12 x i64> %interleaved.vec, <12 x i64> poison, <6 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10> |
| %v1 = shufflevector <12 x i64> %interleaved.vec, <12 x i64> poison, <6 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11> |
| ret void |
| } |
| |
| ; Check that neon is used for 128-bit vectors |
| define void @load_128bit(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_128bit( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <4 x i64>, ptr %ptr, align 4 |
| %v0 = shufflevector <4 x i64> %interleaved.vec, <4 x i64> poison, <2 x i32> <i32 0, i32 2> |
| %v1 = shufflevector <4 x i64> %interleaved.vec, <4 x i64> poison, <2 x i32> <i32 1, i32 3> |
| ret void |
| } |
| |
| ; Check that correct ptrues are generated for min != max case |
| define void @load_min_not_max(ptr %ptr) #1 { |
| ; CHECK-LABEL: define void @load_min_not_max( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 4) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <8 x i64>, ptr %ptr, align 4 |
| %v0 = shufflevector <8 x i64> %interleaved.vec, <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| %v1 = shufflevector <8 x i64> %interleaved.vec, <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ret void |
| } |
| |
| define void @store_min_not_max(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1) #1 { |
| ; CHECK-LABEL: define void @store_min_not_max( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 4) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[V0]], <4 x i64> [[V1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[V0]], <4 x i64> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| store <8 x i64> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| ; Check that correct ptrues are generated for min > type case |
| define void @load_min_ge_type(ptr %ptr) #2 { |
| ; CHECK-LABEL: define void @load_min_ge_type( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 4) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <8 x i64>, ptr %ptr, align 4 |
| %v0 = shufflevector <8 x i64> %interleaved.vec, <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| %v1 = shufflevector <8 x i64> %interleaved.vec, <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ret void |
| } |
| |
| define void @store_min_ge_type(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1) #2 { |
| ; CHECK-LABEL: define void @store_min_ge_type( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]]) #[[ATTR2]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 4) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[V0]], <4 x i64> [[V1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[V0]], <4 x i64> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP4]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| store <8 x i64> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @load_double_factor4(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_double_factor4( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 3 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 2 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[TMP6]], i64 0) |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[TMP8]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <16 x double>, ptr %ptr, align 4 |
| %v0 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %v1 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> |
| %v2 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> |
| %v3 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> |
| ret void |
| } |
| |
| define void @load_float_factor3(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_float_factor3( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 2 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[TMP6]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <24 x float>, ptr %ptr, align 4 |
| %v0 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> |
| %v1 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> |
| %v2 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> |
| ret void |
| } |
| |
| define void @load_half_factor2(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_half_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x half> @llvm.vector.extract.v16f16.nxv8f16(<vscale x 8 x half> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x half> @llvm.vector.extract.v16f16.nxv8f16(<vscale x 8 x half> [[TMP4]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <32 x half>, ptr %ptr, align 4 |
| %v0 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> |
| %v1 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> |
| ret void |
| } |
| |
| define void @load_bfloat_factor2(ptr %ptr) #0 { |
| ; CHECK-LABEL: define void @load_bfloat_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[TMP4]], i64 0) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = load <32 x bfloat>, ptr %ptr, align 4 |
| %v0 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> |
| %v1 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> |
| ret void |
| } |
| |
| define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) #0 { |
| ; CHECK-LABEL: define void @store_double_factor4( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0]], <4 x double> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2]], <4 x double> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> undef, <4 x double> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> undef, <4 x double> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> undef, <4 x double> [[TMP6]], i64 0) |
| ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> undef, <4 x double> [[TMP8]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| store <16 x double> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8 x float> %v2) #0 { |
| ; CHECK-LABEL: define void @store_float_factor3( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <8 x float> [[V0:%.*]], <8 x float> [[V1:%.*]], <8 x float> [[V2:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[V0]], <8 x float> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[TMP6]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP7]], <vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %s0 = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, |
| i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| %s1 = shufflevector <8 x float> %v2, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, |
| i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> |
| %interleaved.vec = shufflevector <16 x float> %s0, <16 x float> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, |
| i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> |
| store <24 x float> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| define void @store_half_factor2(ptr %ptr, <16 x half> %v0, <16 x half> %v1) #0 { |
| ; CHECK-LABEL: define void @store_half_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <16 x half> [[V0:%.*]], <16 x half> [[V1:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v16f16(<vscale x 8 x half> undef, <16 x half> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v16f16(<vscale x 8 x half> undef, <16 x half> [[TMP4]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = shufflevector <16 x half> %v0, <16 x half> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, |
| i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> |
| store <32 x half> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| |
| define void @store_bfloat_factor2(ptr %ptr, <16 x bfloat> %v0, <16 x bfloat> %v1) #0 { |
| ; CHECK-LABEL: define void @store_bfloat_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]], <16 x bfloat> [[V0:%.*]], <16 x bfloat> [[V1:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> undef, <16 x bfloat> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> undef, <16 x bfloat> [[TMP4]], i64 0) |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) |
| ; CHECK-NEXT: ret void |
| ; |
| %interleaved.vec = shufflevector <16 x bfloat> %v0, <16 x bfloat> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, |
| i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> |
| store <32 x bfloat> %interleaved.vec, ptr %ptr, align 4 |
| ret void |
| } |
| |
| ; Ensure vscale_range property does not affect scalable vector types. |
| define void @deinterleave_nxptr_factor2(ptr %ptr) #2 { |
| ; CHECK-LABEL: define void @deinterleave_nxptr_factor2( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR2]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 0 |
| ; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> splat (i1 true), ptr [[TMP1]]) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0) |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0) |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 2 |
| ; CHECK-NEXT: [[LDN2:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> splat (i1 true), ptr [[TMP6]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN2]], 0 |
| ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP7]], i64 2) |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN2]], 1 |
| ; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP5]], <vscale x 2 x double> [[TMP9]], i64 2) |
| ; CHECK-NEXT: ret void |
| ; |
| %wide.vec = load <vscale x 8 x double>, ptr %ptr, align 8 |
| %ldN = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %wide.vec) |
| %extract1 = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } %ldN, 0 |
| %extract2 = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } %ldN, 1 |
| ret void |
| } |
| |
| attributes #0 = { vscale_range(2,2) "target-features"="+sve" } |
| attributes #1 = { vscale_range(2,4) "target-features"="+sve" } |
| attributes #2 = { vscale_range(4,4) "target-features"="+sve" } |