| ; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa |
| ; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s |
| ; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s |
| ; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s |
| |
| ; Test load balancing logic with 6 kernels. |
| ; |
| ; Kernels go from most expensive (A == 6) to least expensive (F == 1) |
| ; |
| ; Load balancing should work like this (current partition cost is in parens) |
| ; |
| ; Initial -> [P0(0), P1(0), P2(0)] |
| ; |
| ; A(6) goes in 2 -> [P2(6), P0(0), P1(0)] |
| ; B(5) goes in 1 -> [P2(6), P1(5), P0(4)] |
| ; C(4) goes in 0 -> [P2(6), P1(5), P0(4)] |
| |
| ; D(3) goes in 0 -> [P0(7), P2(6), P1(5)] |
| ; E(2) goes in 1 -> [P0(7), P1(7), P2(6)] |
| ; F(1) goes in 2 -> [P0(7), P1(7), P2(7)] |
| |
| ; CHECK0-NOT: define |
| ; CHECK0: define amdgpu_kernel void @C |
| ; CHECK0: define amdgpu_kernel void @D |
| ; CHECK0-NOT: define |
| |
| ; CHECK1-NOT: define |
| ; CHECK1: define amdgpu_kernel void @B |
| ; CHECK1: define amdgpu_kernel void @E |
| ; CHECK1-NOT: define |
| |
| ; CHECK2-NOT: define |
| ; CHECK2: define amdgpu_kernel void @A |
| ; CHECK2: define amdgpu_kernel void @F |
| ; CHECK2-NOT: define |
| |
| |
| define amdgpu_kernel void @A(ptr %x) { |
| store i64 42, ptr %x |
| store i64 43, ptr %x |
| store i64 44, ptr %x |
| store i64 45, ptr %x |
| store i64 46, ptr %x |
| ret void |
| } |
| |
| define amdgpu_kernel void @B(ptr %x) { |
| store i64 42, ptr %x |
| store i64 43, ptr %x |
| store i64 44, ptr %x |
| store i64 45, ptr %x |
| ret void |
| } |
| |
| define amdgpu_kernel void @C(ptr %x) { |
| store i64 42, ptr %x |
| store i64 43, ptr %x |
| store i64 44, ptr %x |
| ret void |
| } |
| |
| define amdgpu_kernel void @D(ptr %x) { |
| store i64 42, ptr %x |
| store i64 43, ptr %x |
| ret void |
| } |
| |
| define amdgpu_kernel void @E(ptr %x) { |
| store i64 42, ptr %x |
| ret void |
| } |
| |
| define amdgpu_kernel void @F() { |
| ret void |
| } |