| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals |
| ; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT |
| ; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC |
| |
| target triple = "amdgcn-amd-amdhsa" |
| |
| %struct.ident_t = type { i32, i32, i32, i32, ptr } |
| |
| @G = internal addrspace(3) global i32 undef, align 4 |
| @H = internal addrspace(3) global i32 undef, align 4 |
| @X = internal addrspace(3) global i32 undef, align 4 |
| @str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1 |
| |
| ; Make sure we do not delete the stores to @G without also replacing the load with `1`. |
| ;. |
| ; TUNIT: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 |
| ; TUNIT: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 |
| ; TUNIT: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 |
| ; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 |
| ; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 |
| ;. |
| ; CGSCC: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 |
| ; CGSCC: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 |
| ; CGSCC: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4 |
| ; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 |
| ;. |
| define void @kernel() "kernel" { |
| ; |
| ; CHECK: Function Attrs: norecurse |
| ; CHECK-LABEL: define {{[^@]+}}@kernel |
| ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 |
| ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] |
| ; CHECK: if.then: |
| ; CHECK-NEXT: br label [[IF_MERGE:%.*]] |
| ; CHECK: if.else: |
| ; CHECK-NEXT: call void @barrier() #[[ATTR6:[0-9]+]] |
| ; CHECK-NEXT: call void @use1(i32 undef) #[[ATTR6]] |
| ; CHECK-NEXT: call void @barrier() #[[ATTR6]] |
| ; CHECK-NEXT: br label [[IF_MERGE]] |
| ; CHECK: if.merge: |
| ; CHECK-NEXT: call void @use1(i32 2) #[[ATTR6]] |
| ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] |
| ; CHECK: if.then2: |
| ; CHECK-NEXT: call void @barrier() #[[ATTR6]] |
| ; CHECK-NEXT: br label [[IF_END]] |
| ; CHECK: if.end: |
| ; CHECK-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1) |
| ; CHECK-NEXT: ret void |
| ; |
| %call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) |
| %cmp = icmp eq i32 %call, -1 |
| br i1 %cmp, label %if.then, label %if.else |
| if.then: |
| store i32 1, ptr addrspace(3) @G |
| store i32 2, ptr addrspace(3) @H |
| br label %if.merge |
| if.else: |
| call void @barrier(); |
| %l = load i32, ptr addrspace(3) @G |
| call void @use1(i32 %l) |
| %hv = load i32, ptr addrspace(3) @H |
| %hc = icmp eq i32 %hv, 2 |
| call void @llvm.assume(i1 %hc) |
| call void @barrier(); |
| br label %if.merge |
| if.merge: |
| %hreload = load i32, ptr addrspace(3) @H |
| call void @use1(i32 %hreload) |
| br i1 %cmp, label %if.then2, label %if.end |
| if.then2: |
| store i32 2, ptr addrspace(3) @G |
| call void @barrier(); |
| br label %if.end |
| if.end: |
| call void @__kmpc_target_deinit(ptr undef, i8 1) |
| ret void |
| } |
| |
| define void @test_assume() { |
| ; CHECK-LABEL: define {{[^@]+}}@test_assume() { |
| ; CHECK-NEXT: call void @llvm.assume(i1 true) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.assume(i1 icmp ne (ptr addrspacecast (ptr addrspace(4) @str to ptr), ptr null)) |
| ret void |
| } |
| |
| ; We can't ignore the sync, hence this might store 2 into %p |
| define void @kernel2(ptr %p) "kernel" { |
| ; CHECK-LABEL: define {{[^@]+}}@kernel2 |
| ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4 |
| ; CHECK-NEXT: call void @sync() |
| ; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 |
| ; CHECK-NEXT: store i32 2, ptr addrspace(3) @X, align 4 |
| ; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| store i32 1, ptr addrspace(3) @X |
| call void @sync() |
| %v = load i32, ptr addrspace(3) @X |
| store i32 2, ptr addrspace(3) @X |
| store i32 %v, ptr %p |
| ret void |
| } |
| |
| ; We can't ignore the sync, hence this might store 2 into %p |
| define void @kernel3(ptr %p) "kernel" { |
| ; TUNIT-LABEL: define {{[^@]+}}@kernel3 |
| ; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { |
| ; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4 |
| ; TUNIT-NEXT: call void @sync_def.internalized() |
| ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 |
| ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @X, align 4 |
| ; TUNIT-NEXT: store i32 [[V]], ptr [[P]], align 4 |
| ; TUNIT-NEXT: ret void |
| ; |
| ; CGSCC-LABEL: define {{[^@]+}}@kernel3 |
| ; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { |
| ; CGSCC-NEXT: store i32 1, ptr addrspace(3) @X, align 4 |
| ; CGSCC-NEXT: call void @sync_def() |
| ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 |
| ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @X, align 4 |
| ; CGSCC-NEXT: store i32 [[V]], ptr [[P]], align 4 |
| ; CGSCC-NEXT: ret void |
| ; |
| store i32 1, ptr addrspace(3) @X |
| call void @sync_def() |
| %v = load i32, ptr addrspace(3) @X |
| store i32 2, ptr addrspace(3) @X |
| store i32 %v, ptr %p |
| ret void |
| } |
| |
| define void @sync_def() { |
| ; CHECK-LABEL: define {{[^@]+}}@sync_def() { |
| ; CHECK-NEXT: call void @sync() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @sync() |
| ret void |
| } |
| |
| declare void @sync() |
| declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier" |
| declare void @use1(i32) nosync norecurse nounwind nocallback |
| declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback |
| declare void @__kmpc_target_deinit(ptr, i8) nocallback |
| declare void @llvm.assume(i1) |
| |
| !llvm.module.flags = !{!0, !1} |
| !nvvm.annotations = !{!2, !3, !4} |
| |
| !0 = !{i32 7, !"openmp", i32 50} |
| !1 = !{i32 7, !"openmp-device", i32 50} |
| !2 = !{ptr @kernel, !"kernel", i32 1} |
| !3 = !{ptr @kernel2, !"kernel", i32 1} |
| !4 = !{ptr @kernel3, !"kernel", i32 1} |
| |
| ;. |
| ; CHECK: attributes #[[ATTR0]] = { norecurse "kernel" } |
| ; CHECK: attributes #[[ATTR1]] = { "kernel" } |
| ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" } |
| ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind } |
| ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback } |
| ; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } |
| ; CHECK: attributes #[[ATTR6]] = { nounwind } |
| ;. |
| ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} |
| ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} |
| ; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} |
| ; CHECK: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1} |
| ; CHECK: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1} |
| ;. |