| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes=loop-vectorize,interleaved-access -mattr=+sve -S -o - %s | FileCheck %s |
| |
| target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" |
| target triple = "aarch64" |
| |
| %struct.xyzt = type { i32, i32, i32, i32 } |
| ; for (int i = 0; i < 1024; ++i) { |
| ; dst[i].x = a[i].x + b[i].x; |
| ; dst[i].y = a[i].y - b[i].y; |
| ; dst[i].z = a[i].z << b[i].z; |
| ; dst[i].t = a[i].t >> b[i].t; |
| ; } |
| |
| define void @interleave_deinterleave(ptr noalias %dst, ptr %a, ptr %b) { |
| ; CHECK-LABEL: @interleave_deinterleave( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]] |
| ; CHECK: vector.ph: |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] |
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A:%.*]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP7]]) |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2 |
| ; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 3 |
| ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B:%.*]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[LDN9:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP13]]) |
| ; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN9]], 0 |
| ; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN9]], 1 |
| ; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN9]], 2 |
| ; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN9]], 3 |
| ; CHECK-NEXT: [[TMP20:%.*]] = add nsw <vscale x 4 x i32> [[TMP16]], [[TMP9]] |
| ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST:%.*]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = sub nsw <vscale x 4 x i32> [[TMP10]], [[TMP17]] |
| ; CHECK-NEXT: [[TMP23:%.*]] = shl <vscale x 4 x i32> [[TMP11]], [[TMP18]] |
| ; CHECK-NEXT: [[TMP24:%.*]] = ashr <vscale x 4 x i32> [[TMP12]], [[TMP19]] |
| ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[TMP23]], <vscale x 4 x i32> [[TMP24]], <vscale x 4 x i1> splat (i1 true), ptr [[TMP21]]) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] |
| ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: middle.block: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER]] |
| ; CHECK: scalar.ph: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[A]], i64 [[INDVARS_IV]] |
| ; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B]], i64 [[INDVARS_IV]] |
| ; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 |
| ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP31]] |
| ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[DST]], i64 [[INDVARS_IV]] |
| ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4 |
| ; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 |
| ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[Y]], align 4 |
| ; CHECK-NEXT: [[Y11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 4 |
| ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[Y11]], align 4 |
| ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], [[TMP26]] |
| ; CHECK-NEXT: [[Y14:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 4 |
| ; CHECK-NEXT: store i32 [[SUB]], ptr [[Y14]], align 4 |
| ; CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8 |
| ; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[Z]], align 4 |
| ; CHECK-NEXT: [[Z19:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 8 |
| ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[Z19]], align 4 |
| ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP27]], [[TMP28]] |
| ; CHECK-NEXT: [[Z22:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 8 |
| ; CHECK-NEXT: store i32 [[SHL]], ptr [[Z22]], align 4 |
| ; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 |
| ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[T]], align 4 |
| ; CHECK-NEXT: [[T27:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 12 |
| ; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[T27]], align 4 |
| ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[TMP29]], [[TMP30]] |
| ; CHECK-NEXT: [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 12 |
| ; CHECK-NEXT: store i32 [[SHR]], ptr [[T30]], align 4 |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] |
| %gep.a = getelementptr inbounds %struct.xyzt, ptr %a, i64 %iv |
| %a.0 = load i32, ptr %gep.a, align 4 |
| %gep.b = getelementptr inbounds %struct.xyzt, ptr %b, i64 %iv |
| %b.0 = load i32, ptr %gep.b, align 4 |
| %add = add nsw i32 %b.0, %a.0 |
| %gep.dst = getelementptr inbounds %struct.xyzt, ptr %dst, i64 %iv |
| store i32 %add, ptr %gep.dst, align 4 |
| %gep.a.1 = getelementptr inbounds nuw i8, ptr %gep.a, i64 4 |
| %a.1 = load i32, ptr %gep.a.1, align 4 |
| %gep.b.1 = getelementptr inbounds nuw i8, ptr %gep.b, i64 4 |
| %b.1 = load i32, ptr %gep.b.1, align 4 |
| %sub = sub nsw i32 %a.1, %b.1 |
| %gep.dst.1 = getelementptr inbounds nuw i8, ptr %gep.dst, i64 4 |
| store i32 %sub, ptr %gep.dst.1, align 4 |
| %gep.a.2 = getelementptr inbounds nuw i8, ptr %gep.a, i64 8 |
| %a.2 = load i32, ptr %gep.a.2, align 4 |
| %gep.b.2 = getelementptr inbounds nuw i8, ptr %gep.b, i64 8 |
| %b.2 = load i32, ptr %gep.b.2, align 4 |
| %shl = shl i32 %a.2, %b.2 |
| %gep.dst.2 = getelementptr inbounds nuw i8, ptr %gep.dst, i64 8 |
| store i32 %shl, ptr %gep.dst.2, align 4 |
| %gep.a.3 = getelementptr inbounds nuw i8, ptr %gep.a, i64 12 |
| %a.3 = load i32, ptr %gep.a.3, align 4 |
| %gep.b.3 = getelementptr inbounds nuw i8, ptr %gep.b, i64 12 |
| %b.3 = load i32, ptr %gep.b.3, align 4 |
| %shr = ashr i32 %a.3, %b.3 |
| %gep.dst.3 = getelementptr inbounds nuw i8, ptr %gep.dst, i64 12 |
| store i32 %shr, ptr %gep.dst.3, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %exitcond.not = icmp eq i64 %iv.next, 1024 |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: |
| ret void |
| } |