| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %} |
| |
| define i128 @srem_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: srem_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<19>; |
| ; CHECK-NEXT: .reg .b32 %r<16>; |
| ; CHECK-NEXT: .reg .b64 %rd<127>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [srem_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [srem_i128_param_1]; |
| ; CHECK-NEXT: shr.s64 %rd2, %rd46, 63; |
| ; CHECK-NEXT: mov.b64 %rd117, 0; |
| ; CHECK-NEXT: sub.cc.s64 %rd52, %rd117, %rd45; |
| ; CHECK-NEXT: subc.cc.s64 %rd53, %rd117, %rd46; |
| ; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0; |
| ; CHECK-NEXT: selp.b64 %rd4, %rd53, %rd46, %p1; |
| ; CHECK-NEXT: selp.b64 %rd3, %rd52, %rd45, %p1; |
| ; CHECK-NEXT: sub.cc.s64 %rd54, %rd117, %rd49; |
| ; CHECK-NEXT: subc.cc.s64 %rd55, %rd117, %rd50; |
| ; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0; |
| ; CHECK-NEXT: selp.b64 %rd6, %rd55, %rd50, %p2; |
| ; CHECK-NEXT: selp.b64 %rd5, %rd54, %rd49, %p2; |
| ; CHECK-NEXT: or.b64 %rd56, %rd5, %rd6; |
| ; CHECK-NEXT: setp.eq.s64 %p3, %rd56, 0; |
| ; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4; |
| ; CHECK-NEXT: setp.eq.s64 %p4, %rd57, 0; |
| ; CHECK-NEXT: or.pred %p5, %p3, %p4; |
| ; CHECK-NEXT: setp.ne.s64 %p6, %rd6, 0; |
| ; CHECK-NEXT: clz.b64 %r1, %rd6; |
| ; CHECK-NEXT: cvt.u64.u32 %rd58, %r1; |
| ; CHECK-NEXT: clz.b64 %r2, %rd5; |
| ; CHECK-NEXT: cvt.u64.u32 %rd59, %r2; |
| ; CHECK-NEXT: add.s64 %rd60, %rd59, 64; |
| ; CHECK-NEXT: selp.b64 %rd61, %rd58, %rd60, %p6; |
| ; CHECK-NEXT: setp.ne.s64 %p7, %rd4, 0; |
| ; CHECK-NEXT: clz.b64 %r3, %rd4; |
| ; CHECK-NEXT: cvt.u64.u32 %rd62, %r3; |
| ; CHECK-NEXT: clz.b64 %r4, %rd3; |
| ; CHECK-NEXT: cvt.u64.u32 %rd63, %r4; |
| ; CHECK-NEXT: add.s64 %rd64, %rd63, 64; |
| ; CHECK-NEXT: selp.b64 %rd65, %rd62, %rd64, %p7; |
| ; CHECK-NEXT: sub.cc.s64 %rd66, %rd61, %rd65; |
| ; CHECK-NEXT: subc.cc.s64 %rd67, %rd117, 0; |
| ; CHECK-NEXT: setp.eq.s64 %p8, %rd67, 0; |
| ; CHECK-NEXT: setp.ne.s64 %p9, %rd67, 0; |
| ; CHECK-NEXT: selp.u32 %r5, -1, 0, %p9; |
| ; CHECK-NEXT: setp.gt.u64 %p10, %rd66, 127; |
| ; CHECK-NEXT: selp.u32 %r6, -1, 0, %p10; |
| ; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8; |
| ; CHECK-NEXT: and.b32 %r8, %r7, 1; |
| ; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1; |
| ; CHECK-NEXT: or.pred %p12, %p5, %p11; |
| ; CHECK-NEXT: xor.b64 %rd68, %rd66, 127; |
| ; CHECK-NEXT: or.b64 %rd69, %rd68, %rd67; |
| ; CHECK-NEXT: setp.eq.s64 %p13, %rd69, 0; |
| ; CHECK-NEXT: selp.b64 %rd126, 0, %rd4, %p12; |
| ; CHECK-NEXT: selp.b64 %rd125, 0, %rd3, %p12; |
| ; CHECK-NEXT: or.pred %p14, %p12, %p13; |
| ; CHECK-NEXT: @%p14 bra $L__BB0_5; |
| ; CHECK-NEXT: // %bb.3: // %udiv-bb1 |
| ; CHECK-NEXT: add.cc.s64 %rd119, %rd66, 1; |
| ; CHECK-NEXT: addc.cc.s64 %rd120, %rd67, 0; |
| ; CHECK-NEXT: or.b64 %rd72, %rd119, %rd120; |
| ; CHECK-NEXT: setp.eq.s64 %p15, %rd72, 0; |
| ; CHECK-NEXT: cvt.u32.u64 %r9, %rd66; |
| ; CHECK-NEXT: sub.s32 %r10, 127, %r9; |
| ; CHECK-NEXT: shl.b64 %rd73, %rd4, %r10; |
| ; CHECK-NEXT: sub.s32 %r11, 64, %r10; |
| ; CHECK-NEXT: shr.u64 %rd74, %rd3, %r11; |
| ; CHECK-NEXT: or.b64 %rd75, %rd73, %rd74; |
| ; CHECK-NEXT: sub.s32 %r12, 63, %r9; |
| ; CHECK-NEXT: shl.b64 %rd76, %rd3, %r12; |
| ; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63; |
| ; CHECK-NEXT: selp.b64 %rd124, %rd76, %rd75, %p16; |
| ; CHECK-NEXT: shl.b64 %rd123, %rd3, %r10; |
| ; CHECK-NEXT: mov.u64 %rd114, %rd117; |
| ; CHECK-NEXT: @%p15 bra $L__BB0_4; |
| ; CHECK-NEXT: // %bb.1: // %udiv-preheader |
| ; CHECK-NEXT: cvt.u32.u64 %r13, %rd119; |
| ; CHECK-NEXT: shr.u64 %rd79, %rd3, %r13; |
| ; CHECK-NEXT: sub.s32 %r14, 64, %r13; |
| ; CHECK-NEXT: shl.b64 %rd80, %rd4, %r14; |
| ; CHECK-NEXT: or.b64 %rd81, %rd79, %rd80; |
| ; CHECK-NEXT: add.s32 %r15, %r13, -64; |
| ; CHECK-NEXT: shr.u64 %rd82, %rd4, %r15; |
| ; CHECK-NEXT: setp.gt.s32 %p17, %r13, 63; |
| ; CHECK-NEXT: selp.b64 %rd121, %rd82, %rd81, %p17; |
| ; CHECK-NEXT: shr.u64 %rd122, %rd4, %r13; |
| ; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1; |
| ; CHECK-NEXT: mov.b64 %rd114, 0; |
| ; CHECK-NEXT: mov.u64 %rd117, %rd114; |
| ; CHECK-NEXT: $L__BB0_2: // %udiv-do-while |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u64 %rd83, %rd121, 63; |
| ; CHECK-NEXT: shl.b64 %rd84, %rd122, 1; |
| ; CHECK-NEXT: or.b64 %rd85, %rd84, %rd83; |
| ; CHECK-NEXT: shl.b64 %rd86, %rd121, 1; |
| ; CHECK-NEXT: shr.u64 %rd87, %rd124, 63; |
| ; CHECK-NEXT: or.b64 %rd88, %rd86, %rd87; |
| ; CHECK-NEXT: shr.u64 %rd89, %rd123, 63; |
| ; CHECK-NEXT: shl.b64 %rd90, %rd124, 1; |
| ; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; |
| ; CHECK-NEXT: shl.b64 %rd92, %rd123, 1; |
| ; CHECK-NEXT: or.b64 %rd123, %rd117, %rd92; |
| ; CHECK-NEXT: or.b64 %rd124, %rd114, %rd91; |
| ; CHECK-NEXT: sub.cc.s64 %rd93, %rd35, %rd88; |
| ; CHECK-NEXT: subc.cc.s64 %rd94, %rd36, %rd85; |
| ; CHECK-NEXT: shr.s64 %rd95, %rd94, 63; |
| ; CHECK-NEXT: and.b64 %rd117, %rd95, 1; |
| ; CHECK-NEXT: and.b64 %rd96, %rd95, %rd5; |
| ; CHECK-NEXT: and.b64 %rd97, %rd95, %rd6; |
| ; CHECK-NEXT: sub.cc.s64 %rd121, %rd88, %rd96; |
| ; CHECK-NEXT: subc.cc.s64 %rd122, %rd85, %rd97; |
| ; CHECK-NEXT: add.cc.s64 %rd119, %rd119, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd120, %rd120, -1; |
| ; CHECK-NEXT: or.b64 %rd98, %rd119, %rd120; |
| ; CHECK-NEXT: setp.eq.s64 %p18, %rd98, 0; |
| ; CHECK-NEXT: @%p18 bra $L__BB0_4; |
| ; CHECK-NEXT: bra.uni $L__BB0_2; |
| ; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit |
| ; CHECK-NEXT: shr.u64 %rd99, %rd123, 63; |
| ; CHECK-NEXT: shl.b64 %rd100, %rd124, 1; |
| ; CHECK-NEXT: or.b64 %rd101, %rd100, %rd99; |
| ; CHECK-NEXT: shl.b64 %rd102, %rd123, 1; |
| ; CHECK-NEXT: or.b64 %rd125, %rd117, %rd102; |
| ; CHECK-NEXT: or.b64 %rd126, %rd114, %rd101; |
| ; CHECK-NEXT: $L__BB0_5: // %udiv-end |
| ; CHECK-NEXT: mul.hi.u64 %rd103, %rd5, %rd125; |
| ; CHECK-NEXT: mad.lo.s64 %rd104, %rd5, %rd126, %rd103; |
| ; CHECK-NEXT: mad.lo.s64 %rd105, %rd6, %rd125, %rd104; |
| ; CHECK-NEXT: mul.lo.s64 %rd106, %rd5, %rd125; |
| ; CHECK-NEXT: sub.cc.s64 %rd107, %rd3, %rd106; |
| ; CHECK-NEXT: subc.cc.s64 %rd108, %rd4, %rd105; |
| ; CHECK-NEXT: xor.b64 %rd109, %rd107, %rd2; |
| ; CHECK-NEXT: xor.b64 %rd110, %rd108, %rd2; |
| ; CHECK-NEXT: sub.cc.s64 %rd111, %rd109, %rd2; |
| ; CHECK-NEXT: subc.cc.s64 %rd112, %rd110, %rd2; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd111, %rd112}; |
| ; CHECK-NEXT: ret; |
| %div = srem i128 %lhs, %rhs |
| ret i128 %div |
| } |
| |
| define i128 @urem_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: urem_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<17>; |
| ; CHECK-NEXT: .reg .b32 %r<16>; |
| ; CHECK-NEXT: .reg .b64 %rd<113>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd41, %rd42}, [urem_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [urem_i128_param_1]; |
| ; CHECK-NEXT: or.b64 %rd45, %rd3, %rd4; |
| ; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0; |
| ; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42; |
| ; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0; |
| ; CHECK-NEXT: or.pred %p3, %p1, %p2; |
| ; CHECK-NEXT: setp.ne.s64 %p4, %rd4, 0; |
| ; CHECK-NEXT: clz.b64 %r1, %rd4; |
| ; CHECK-NEXT: cvt.u64.u32 %rd47, %r1; |
| ; CHECK-NEXT: clz.b64 %r2, %rd3; |
| ; CHECK-NEXT: cvt.u64.u32 %rd48, %r2; |
| ; CHECK-NEXT: add.s64 %rd49, %rd48, 64; |
| ; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4; |
| ; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0; |
| ; CHECK-NEXT: clz.b64 %r3, %rd42; |
| ; CHECK-NEXT: cvt.u64.u32 %rd51, %r3; |
| ; CHECK-NEXT: clz.b64 %r4, %rd41; |
| ; CHECK-NEXT: cvt.u64.u32 %rd52, %r4; |
| ; CHECK-NEXT: add.s64 %rd53, %rd52, 64; |
| ; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5; |
| ; CHECK-NEXT: mov.b64 %rd103, 0; |
| ; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; |
| ; CHECK-NEXT: subc.cc.s64 %rd57, %rd103, 0; |
| ; CHECK-NEXT: setp.eq.s64 %p6, %rd57, 0; |
| ; CHECK-NEXT: setp.ne.s64 %p7, %rd57, 0; |
| ; CHECK-NEXT: selp.u32 %r5, -1, 0, %p7; |
| ; CHECK-NEXT: setp.gt.u64 %p8, %rd56, 127; |
| ; CHECK-NEXT: selp.u32 %r6, -1, 0, %p8; |
| ; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6; |
| ; CHECK-NEXT: and.b32 %r8, %r7, 1; |
| ; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1; |
| ; CHECK-NEXT: or.pred %p10, %p3, %p9; |
| ; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; |
| ; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; |
| ; CHECK-NEXT: setp.eq.s64 %p11, %rd59, 0; |
| ; CHECK-NEXT: selp.b64 %rd112, 0, %rd42, %p10; |
| ; CHECK-NEXT: selp.b64 %rd111, 0, %rd41, %p10; |
| ; CHECK-NEXT: or.pred %p12, %p10, %p11; |
| ; CHECK-NEXT: @%p12 bra $L__BB1_5; |
| ; CHECK-NEXT: // %bb.3: // %udiv-bb1 |
| ; CHECK-NEXT: add.cc.s64 %rd105, %rd56, 1; |
| ; CHECK-NEXT: addc.cc.s64 %rd106, %rd57, 0; |
| ; CHECK-NEXT: or.b64 %rd62, %rd105, %rd106; |
| ; CHECK-NEXT: setp.eq.s64 %p13, %rd62, 0; |
| ; CHECK-NEXT: cvt.u32.u64 %r9, %rd56; |
| ; CHECK-NEXT: sub.s32 %r10, 127, %r9; |
| ; CHECK-NEXT: shl.b64 %rd63, %rd42, %r10; |
| ; CHECK-NEXT: sub.s32 %r11, 64, %r10; |
| ; CHECK-NEXT: shr.u64 %rd64, %rd41, %r11; |
| ; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64; |
| ; CHECK-NEXT: sub.s32 %r12, 63, %r9; |
| ; CHECK-NEXT: shl.b64 %rd66, %rd41, %r12; |
| ; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63; |
| ; CHECK-NEXT: selp.b64 %rd110, %rd66, %rd65, %p14; |
| ; CHECK-NEXT: shl.b64 %rd109, %rd41, %r10; |
| ; CHECK-NEXT: mov.u64 %rd100, %rd103; |
| ; CHECK-NEXT: @%p13 bra $L__BB1_4; |
| ; CHECK-NEXT: // %bb.1: // %udiv-preheader |
| ; CHECK-NEXT: cvt.u32.u64 %r13, %rd105; |
| ; CHECK-NEXT: shr.u64 %rd69, %rd41, %r13; |
| ; CHECK-NEXT: sub.s32 %r14, 64, %r13; |
| ; CHECK-NEXT: shl.b64 %rd70, %rd42, %r14; |
| ; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70; |
| ; CHECK-NEXT: add.s32 %r15, %r13, -64; |
| ; CHECK-NEXT: shr.u64 %rd72, %rd42, %r15; |
| ; CHECK-NEXT: setp.gt.s32 %p15, %r13, 63; |
| ; CHECK-NEXT: selp.b64 %rd107, %rd72, %rd71, %p15; |
| ; CHECK-NEXT: shr.u64 %rd108, %rd42, %r13; |
| ; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1; |
| ; CHECK-NEXT: mov.b64 %rd100, 0; |
| ; CHECK-NEXT: mov.u64 %rd103, %rd100; |
| ; CHECK-NEXT: $L__BB1_2: // %udiv-do-while |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u64 %rd73, %rd107, 63; |
| ; CHECK-NEXT: shl.b64 %rd74, %rd108, 1; |
| ; CHECK-NEXT: or.b64 %rd75, %rd74, %rd73; |
| ; CHECK-NEXT: shl.b64 %rd76, %rd107, 1; |
| ; CHECK-NEXT: shr.u64 %rd77, %rd110, 63; |
| ; CHECK-NEXT: or.b64 %rd78, %rd76, %rd77; |
| ; CHECK-NEXT: shr.u64 %rd79, %rd109, 63; |
| ; CHECK-NEXT: shl.b64 %rd80, %rd110, 1; |
| ; CHECK-NEXT: or.b64 %rd81, %rd80, %rd79; |
| ; CHECK-NEXT: shl.b64 %rd82, %rd109, 1; |
| ; CHECK-NEXT: or.b64 %rd109, %rd103, %rd82; |
| ; CHECK-NEXT: or.b64 %rd110, %rd100, %rd81; |
| ; CHECK-NEXT: sub.cc.s64 %rd83, %rd33, %rd78; |
| ; CHECK-NEXT: subc.cc.s64 %rd84, %rd34, %rd75; |
| ; CHECK-NEXT: shr.s64 %rd85, %rd84, 63; |
| ; CHECK-NEXT: and.b64 %rd103, %rd85, 1; |
| ; CHECK-NEXT: and.b64 %rd86, %rd85, %rd3; |
| ; CHECK-NEXT: and.b64 %rd87, %rd85, %rd4; |
| ; CHECK-NEXT: sub.cc.s64 %rd107, %rd78, %rd86; |
| ; CHECK-NEXT: subc.cc.s64 %rd108, %rd75, %rd87; |
| ; CHECK-NEXT: add.cc.s64 %rd105, %rd105, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd106, %rd106, -1; |
| ; CHECK-NEXT: or.b64 %rd88, %rd105, %rd106; |
| ; CHECK-NEXT: setp.eq.s64 %p16, %rd88, 0; |
| ; CHECK-NEXT: @%p16 bra $L__BB1_4; |
| ; CHECK-NEXT: bra.uni $L__BB1_2; |
| ; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit |
| ; CHECK-NEXT: shr.u64 %rd89, %rd109, 63; |
| ; CHECK-NEXT: shl.b64 %rd90, %rd110, 1; |
| ; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; |
| ; CHECK-NEXT: shl.b64 %rd92, %rd109, 1; |
| ; CHECK-NEXT: or.b64 %rd111, %rd103, %rd92; |
| ; CHECK-NEXT: or.b64 %rd112, %rd100, %rd91; |
| ; CHECK-NEXT: $L__BB1_5: // %udiv-end |
| ; CHECK-NEXT: mul.hi.u64 %rd93, %rd3, %rd111; |
| ; CHECK-NEXT: mad.lo.s64 %rd94, %rd3, %rd112, %rd93; |
| ; CHECK-NEXT: mad.lo.s64 %rd95, %rd4, %rd111, %rd94; |
| ; CHECK-NEXT: mul.lo.s64 %rd96, %rd3, %rd111; |
| ; CHECK-NEXT: sub.cc.s64 %rd97, %rd41, %rd96; |
| ; CHECK-NEXT: subc.cc.s64 %rd98, %rd42, %rd95; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd97, %rd98}; |
| ; CHECK-NEXT: ret; |
| %div = urem i128 %lhs, %rhs |
| ret i128 %div |
| } |
| |
| define i128 @srem_i128_pow2k(i128 %lhs) { |
| ; CHECK-LABEL: srem_i128_pow2k( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<10>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [srem_i128_pow2k_param_0]; |
| ; CHECK-NEXT: shr.s64 %rd3, %rd2, 63; |
| ; CHECK-NEXT: shr.u64 %rd4, %rd3, 31; |
| ; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4; |
| ; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0; |
| ; CHECK-NEXT: and.b64 %rd7, %rd5, -8589934592; |
| ; CHECK-NEXT: sub.cc.s64 %rd8, %rd1, %rd7; |
| ; CHECK-NEXT: subc.cc.s64 %rd9, %rd2, %rd6; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd9}; |
| ; CHECK-NEXT: ret; |
| %div = srem i128 %lhs, 8589934592 |
| ret i128 %div |
| } |
| |
| define i128 @urem_i128_pow2k(i128 %lhs) { |
| ; CHECK-LABEL: urem_i128_pow2k( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [urem_i128_pow2k_param_0]; |
| ; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591; |
| ; CHECK-NEXT: mov.b64 %rd4, 0; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4}; |
| ; CHECK-NEXT: ret; |
| %div = urem i128 %lhs, 8589934592 |
| ret i128 %div |
| } |
| |
| define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: sdiv_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<19>; |
| ; CHECK-NEXT: .reg .b32 %r<16>; |
| ; CHECK-NEXT: .reg .b64 %rd<122>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [sdiv_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [sdiv_i128_param_1]; |
| ; CHECK-NEXT: mov.b64 %rd112, 0; |
| ; CHECK-NEXT: sub.cc.s64 %rd52, %rd112, %rd45; |
| ; CHECK-NEXT: subc.cc.s64 %rd53, %rd112, %rd46; |
| ; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0; |
| ; CHECK-NEXT: selp.b64 %rd2, %rd53, %rd46, %p1; |
| ; CHECK-NEXT: selp.b64 %rd1, %rd52, %rd45, %p1; |
| ; CHECK-NEXT: sub.cc.s64 %rd54, %rd112, %rd49; |
| ; CHECK-NEXT: subc.cc.s64 %rd55, %rd112, %rd50; |
| ; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0; |
| ; CHECK-NEXT: selp.b64 %rd4, %rd55, %rd50, %p2; |
| ; CHECK-NEXT: selp.b64 %rd3, %rd54, %rd49, %p2; |
| ; CHECK-NEXT: xor.b64 %rd56, %rd50, %rd46; |
| ; CHECK-NEXT: shr.s64 %rd5, %rd56, 63; |
| ; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4; |
| ; CHECK-NEXT: setp.eq.s64 %p3, %rd57, 0; |
| ; CHECK-NEXT: or.b64 %rd58, %rd1, %rd2; |
| ; CHECK-NEXT: setp.eq.s64 %p4, %rd58, 0; |
| ; CHECK-NEXT: or.pred %p5, %p3, %p4; |
| ; CHECK-NEXT: setp.ne.s64 %p6, %rd4, 0; |
| ; CHECK-NEXT: clz.b64 %r1, %rd4; |
| ; CHECK-NEXT: cvt.u64.u32 %rd59, %r1; |
| ; CHECK-NEXT: clz.b64 %r2, %rd3; |
| ; CHECK-NEXT: cvt.u64.u32 %rd60, %r2; |
| ; CHECK-NEXT: add.s64 %rd61, %rd60, 64; |
| ; CHECK-NEXT: selp.b64 %rd62, %rd59, %rd61, %p6; |
| ; CHECK-NEXT: setp.ne.s64 %p7, %rd2, 0; |
| ; CHECK-NEXT: clz.b64 %r3, %rd2; |
| ; CHECK-NEXT: cvt.u64.u32 %rd63, %r3; |
| ; CHECK-NEXT: clz.b64 %r4, %rd1; |
| ; CHECK-NEXT: cvt.u64.u32 %rd64, %r4; |
| ; CHECK-NEXT: add.s64 %rd65, %rd64, 64; |
| ; CHECK-NEXT: selp.b64 %rd66, %rd63, %rd65, %p7; |
| ; CHECK-NEXT: sub.cc.s64 %rd67, %rd62, %rd66; |
| ; CHECK-NEXT: subc.cc.s64 %rd68, %rd112, 0; |
| ; CHECK-NEXT: setp.eq.s64 %p8, %rd68, 0; |
| ; CHECK-NEXT: setp.ne.s64 %p9, %rd68, 0; |
| ; CHECK-NEXT: selp.u32 %r5, -1, 0, %p9; |
| ; CHECK-NEXT: setp.gt.u64 %p10, %rd67, 127; |
| ; CHECK-NEXT: selp.u32 %r6, -1, 0, %p10; |
| ; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8; |
| ; CHECK-NEXT: and.b32 %r8, %r7, 1; |
| ; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1; |
| ; CHECK-NEXT: or.pred %p12, %p5, %p11; |
| ; CHECK-NEXT: xor.b64 %rd69, %rd67, 127; |
| ; CHECK-NEXT: or.b64 %rd70, %rd69, %rd68; |
| ; CHECK-NEXT: setp.eq.s64 %p13, %rd70, 0; |
| ; CHECK-NEXT: selp.b64 %rd121, 0, %rd2, %p12; |
| ; CHECK-NEXT: selp.b64 %rd120, 0, %rd1, %p12; |
| ; CHECK-NEXT: or.pred %p14, %p12, %p13; |
| ; CHECK-NEXT: @%p14 bra $L__BB4_5; |
| ; CHECK-NEXT: // %bb.3: // %udiv-bb1 |
| ; CHECK-NEXT: add.cc.s64 %rd114, %rd67, 1; |
| ; CHECK-NEXT: addc.cc.s64 %rd115, %rd68, 0; |
| ; CHECK-NEXT: or.b64 %rd73, %rd114, %rd115; |
| ; CHECK-NEXT: setp.eq.s64 %p15, %rd73, 0; |
| ; CHECK-NEXT: cvt.u32.u64 %r9, %rd67; |
| ; CHECK-NEXT: sub.s32 %r10, 127, %r9; |
| ; CHECK-NEXT: shl.b64 %rd74, %rd2, %r10; |
| ; CHECK-NEXT: sub.s32 %r11, 64, %r10; |
| ; CHECK-NEXT: shr.u64 %rd75, %rd1, %r11; |
| ; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75; |
| ; CHECK-NEXT: sub.s32 %r12, 63, %r9; |
| ; CHECK-NEXT: shl.b64 %rd77, %rd1, %r12; |
| ; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63; |
| ; CHECK-NEXT: selp.b64 %rd119, %rd77, %rd76, %p16; |
| ; CHECK-NEXT: shl.b64 %rd118, %rd1, %r10; |
| ; CHECK-NEXT: mov.u64 %rd109, %rd112; |
| ; CHECK-NEXT: @%p15 bra $L__BB4_4; |
| ; CHECK-NEXT: // %bb.1: // %udiv-preheader |
| ; CHECK-NEXT: cvt.u32.u64 %r13, %rd114; |
| ; CHECK-NEXT: shr.u64 %rd80, %rd1, %r13; |
| ; CHECK-NEXT: sub.s32 %r14, 64, %r13; |
| ; CHECK-NEXT: shl.b64 %rd81, %rd2, %r14; |
| ; CHECK-NEXT: or.b64 %rd82, %rd80, %rd81; |
| ; CHECK-NEXT: add.s32 %r15, %r13, -64; |
| ; CHECK-NEXT: shr.u64 %rd83, %rd2, %r15; |
| ; CHECK-NEXT: setp.gt.s32 %p17, %r13, 63; |
| ; CHECK-NEXT: selp.b64 %rd116, %rd83, %rd82, %p17; |
| ; CHECK-NEXT: shr.u64 %rd117, %rd2, %r13; |
| ; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1; |
| ; CHECK-NEXT: mov.b64 %rd109, 0; |
| ; CHECK-NEXT: mov.u64 %rd112, %rd109; |
| ; CHECK-NEXT: $L__BB4_2: // %udiv-do-while |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u64 %rd84, %rd116, 63; |
| ; CHECK-NEXT: shl.b64 %rd85, %rd117, 1; |
| ; CHECK-NEXT: or.b64 %rd86, %rd85, %rd84; |
| ; CHECK-NEXT: shl.b64 %rd87, %rd116, 1; |
| ; CHECK-NEXT: shr.u64 %rd88, %rd119, 63; |
| ; CHECK-NEXT: or.b64 %rd89, %rd87, %rd88; |
| ; CHECK-NEXT: shr.u64 %rd90, %rd118, 63; |
| ; CHECK-NEXT: shl.b64 %rd91, %rd119, 1; |
| ; CHECK-NEXT: or.b64 %rd92, %rd91, %rd90; |
| ; CHECK-NEXT: shl.b64 %rd93, %rd118, 1; |
| ; CHECK-NEXT: or.b64 %rd118, %rd112, %rd93; |
| ; CHECK-NEXT: or.b64 %rd119, %rd109, %rd92; |
| ; CHECK-NEXT: sub.cc.s64 %rd94, %rd35, %rd89; |
| ; CHECK-NEXT: subc.cc.s64 %rd95, %rd36, %rd86; |
| ; CHECK-NEXT: shr.s64 %rd96, %rd95, 63; |
| ; CHECK-NEXT: and.b64 %rd112, %rd96, 1; |
| ; CHECK-NEXT: and.b64 %rd97, %rd96, %rd3; |
| ; CHECK-NEXT: and.b64 %rd98, %rd96, %rd4; |
| ; CHECK-NEXT: sub.cc.s64 %rd116, %rd89, %rd97; |
| ; CHECK-NEXT: subc.cc.s64 %rd117, %rd86, %rd98; |
| ; CHECK-NEXT: add.cc.s64 %rd114, %rd114, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd115, %rd115, -1; |
| ; CHECK-NEXT: or.b64 %rd99, %rd114, %rd115; |
| ; CHECK-NEXT: setp.eq.s64 %p18, %rd99, 0; |
| ; CHECK-NEXT: @%p18 bra $L__BB4_4; |
| ; CHECK-NEXT: bra.uni $L__BB4_2; |
| ; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit |
| ; CHECK-NEXT: shr.u64 %rd100, %rd118, 63; |
| ; CHECK-NEXT: shl.b64 %rd101, %rd119, 1; |
| ; CHECK-NEXT: or.b64 %rd102, %rd101, %rd100; |
| ; CHECK-NEXT: shl.b64 %rd103, %rd118, 1; |
| ; CHECK-NEXT: or.b64 %rd120, %rd112, %rd103; |
| ; CHECK-NEXT: or.b64 %rd121, %rd109, %rd102; |
| ; CHECK-NEXT: $L__BB4_5: // %udiv-end |
| ; CHECK-NEXT: xor.b64 %rd104, %rd120, %rd5; |
| ; CHECK-NEXT: xor.b64 %rd105, %rd121, %rd5; |
| ; CHECK-NEXT: sub.cc.s64 %rd106, %rd104, %rd5; |
| ; CHECK-NEXT: subc.cc.s64 %rd107, %rd105, %rd5; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd106, %rd107}; |
| ; CHECK-NEXT: ret; |
| %div = sdiv i128 %lhs, %rhs |
| ret i128 %div |
| } |
| |
| define i128 @udiv_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: udiv_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<17>; |
| ; CHECK-NEXT: .reg .b32 %r<16>; |
| ; CHECK-NEXT: .reg .b64 %rd<107>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd41, %rd42}, [udiv_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd43, %rd44}, [udiv_i128_param_1]; |
| ; CHECK-NEXT: or.b64 %rd45, %rd43, %rd44; |
| ; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0; |
| ; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42; |
| ; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0; |
| ; CHECK-NEXT: or.pred %p3, %p1, %p2; |
| ; CHECK-NEXT: setp.ne.s64 %p4, %rd44, 0; |
| ; CHECK-NEXT: clz.b64 %r1, %rd44; |
| ; CHECK-NEXT: cvt.u64.u32 %rd47, %r1; |
| ; CHECK-NEXT: clz.b64 %r2, %rd43; |
| ; CHECK-NEXT: cvt.u64.u32 %rd48, %r2; |
| ; CHECK-NEXT: add.s64 %rd49, %rd48, 64; |
| ; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4; |
| ; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0; |
| ; CHECK-NEXT: clz.b64 %r3, %rd42; |
| ; CHECK-NEXT: cvt.u64.u32 %rd51, %r3; |
| ; CHECK-NEXT: clz.b64 %r4, %rd41; |
| ; CHECK-NEXT: cvt.u64.u32 %rd52, %r4; |
| ; CHECK-NEXT: add.s64 %rd53, %rd52, 64; |
| ; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5; |
| ; CHECK-NEXT: mov.b64 %rd97, 0; |
| ; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; |
| ; CHECK-NEXT: subc.cc.s64 %rd57, %rd97, 0; |
| ; CHECK-NEXT: setp.eq.s64 %p6, %rd57, 0; |
| ; CHECK-NEXT: setp.ne.s64 %p7, %rd57, 0; |
| ; CHECK-NEXT: selp.u32 %r5, -1, 0, %p7; |
| ; CHECK-NEXT: setp.gt.u64 %p8, %rd56, 127; |
| ; CHECK-NEXT: selp.u32 %r6, -1, 0, %p8; |
| ; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6; |
| ; CHECK-NEXT: and.b32 %r8, %r7, 1; |
| ; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1; |
| ; CHECK-NEXT: or.pred %p10, %p3, %p9; |
| ; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; |
| ; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; |
| ; CHECK-NEXT: setp.eq.s64 %p11, %rd59, 0; |
| ; CHECK-NEXT: selp.b64 %rd106, 0, %rd42, %p10; |
| ; CHECK-NEXT: selp.b64 %rd105, 0, %rd41, %p10; |
| ; CHECK-NEXT: or.pred %p12, %p10, %p11; |
| ; CHECK-NEXT: @%p12 bra $L__BB5_5; |
| ; CHECK-NEXT: // %bb.3: // %udiv-bb1 |
| ; CHECK-NEXT: add.cc.s64 %rd99, %rd56, 1; |
| ; CHECK-NEXT: addc.cc.s64 %rd100, %rd57, 0; |
| ; CHECK-NEXT: or.b64 %rd62, %rd99, %rd100; |
| ; CHECK-NEXT: setp.eq.s64 %p13, %rd62, 0; |
| ; CHECK-NEXT: cvt.u32.u64 %r9, %rd56; |
| ; CHECK-NEXT: sub.s32 %r10, 127, %r9; |
| ; CHECK-NEXT: shl.b64 %rd63, %rd42, %r10; |
| ; CHECK-NEXT: sub.s32 %r11, 64, %r10; |
| ; CHECK-NEXT: shr.u64 %rd64, %rd41, %r11; |
| ; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64; |
| ; CHECK-NEXT: sub.s32 %r12, 63, %r9; |
| ; CHECK-NEXT: shl.b64 %rd66, %rd41, %r12; |
| ; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63; |
| ; CHECK-NEXT: selp.b64 %rd104, %rd66, %rd65, %p14; |
| ; CHECK-NEXT: shl.b64 %rd103, %rd41, %r10; |
| ; CHECK-NEXT: mov.u64 %rd94, %rd97; |
| ; CHECK-NEXT: @%p13 bra $L__BB5_4; |
| ; CHECK-NEXT: // %bb.1: // %udiv-preheader |
| ; CHECK-NEXT: cvt.u32.u64 %r13, %rd99; |
| ; CHECK-NEXT: shr.u64 %rd69, %rd41, %r13; |
| ; CHECK-NEXT: sub.s32 %r14, 64, %r13; |
| ; CHECK-NEXT: shl.b64 %rd70, %rd42, %r14; |
| ; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70; |
| ; CHECK-NEXT: add.s32 %r15, %r13, -64; |
| ; CHECK-NEXT: shr.u64 %rd72, %rd42, %r15; |
| ; CHECK-NEXT: setp.gt.s32 %p15, %r13, 63; |
| ; CHECK-NEXT: selp.b64 %rd101, %rd72, %rd71, %p15; |
| ; CHECK-NEXT: shr.u64 %rd102, %rd42, %r13; |
| ; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1; |
| ; CHECK-NEXT: mov.b64 %rd94, 0; |
| ; CHECK-NEXT: mov.u64 %rd97, %rd94; |
| ; CHECK-NEXT: $L__BB5_2: // %udiv-do-while |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u64 %rd73, %rd101, 63; |
| ; CHECK-NEXT: shl.b64 %rd74, %rd102, 1; |
| ; CHECK-NEXT: or.b64 %rd75, %rd74, %rd73; |
| ; CHECK-NEXT: shl.b64 %rd76, %rd101, 1; |
| ; CHECK-NEXT: shr.u64 %rd77, %rd104, 63; |
| ; CHECK-NEXT: or.b64 %rd78, %rd76, %rd77; |
| ; CHECK-NEXT: shr.u64 %rd79, %rd103, 63; |
| ; CHECK-NEXT: shl.b64 %rd80, %rd104, 1; |
| ; CHECK-NEXT: or.b64 %rd81, %rd80, %rd79; |
| ; CHECK-NEXT: shl.b64 %rd82, %rd103, 1; |
| ; CHECK-NEXT: or.b64 %rd103, %rd97, %rd82; |
| ; CHECK-NEXT: or.b64 %rd104, %rd94, %rd81; |
| ; CHECK-NEXT: sub.cc.s64 %rd83, %rd33, %rd78; |
| ; CHECK-NEXT: subc.cc.s64 %rd84, %rd34, %rd75; |
| ; CHECK-NEXT: shr.s64 %rd85, %rd84, 63; |
| ; CHECK-NEXT: and.b64 %rd97, %rd85, 1; |
| ; CHECK-NEXT: and.b64 %rd86, %rd85, %rd43; |
| ; CHECK-NEXT: and.b64 %rd87, %rd85, %rd44; |
| ; CHECK-NEXT: sub.cc.s64 %rd101, %rd78, %rd86; |
| ; CHECK-NEXT: subc.cc.s64 %rd102, %rd75, %rd87; |
| ; CHECK-NEXT: add.cc.s64 %rd99, %rd99, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd100, %rd100, -1; |
| ; CHECK-NEXT: or.b64 %rd88, %rd99, %rd100; |
| ; CHECK-NEXT: setp.eq.s64 %p16, %rd88, 0; |
| ; CHECK-NEXT: @%p16 bra $L__BB5_4; |
| ; CHECK-NEXT: bra.uni $L__BB5_2; |
| ; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit |
| ; CHECK-NEXT: shr.u64 %rd89, %rd103, 63; |
| ; CHECK-NEXT: shl.b64 %rd90, %rd104, 1; |
| ; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; |
| ; CHECK-NEXT: shl.b64 %rd92, %rd103, 1; |
| ; CHECK-NEXT: or.b64 %rd105, %rd97, %rd92; |
| ; CHECK-NEXT: or.b64 %rd106, %rd94, %rd91; |
| ; CHECK-NEXT: $L__BB5_5: // %udiv-end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd105, %rd106}; |
| ; CHECK-NEXT: ret; |
| %div = udiv i128 %lhs, %rhs |
| ret i128 %div |
| } |
| |
| define i128 @sdiv_i128_pow2k(i128 %lhs) { |
| ; CHECK-LABEL: sdiv_i128_pow2k( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<11>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [sdiv_i128_pow2k_param_0]; |
| ; CHECK-NEXT: shr.s64 %rd3, %rd2, 63; |
| ; CHECK-NEXT: shr.u64 %rd4, %rd3, 31; |
| ; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4; |
| ; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0; |
| ; CHECK-NEXT: shl.b64 %rd7, %rd6, 31; |
| ; CHECK-NEXT: shr.u64 %rd8, %rd5, 33; |
| ; CHECK-NEXT: or.b64 %rd9, %rd8, %rd7; |
| ; CHECK-NEXT: shr.s64 %rd10, %rd6, 33; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd9, %rd10}; |
| ; CHECK-NEXT: ret; |
| %div = sdiv i128 %lhs, 8589934592 |
| ret i128 %div |
| } |
| |
| define i128 @udiv_i128_pow2k(i128 %lhs) { |
| ; CHECK-LABEL: udiv_i128_pow2k( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [udiv_i128_pow2k_param_0]; |
| ; CHECK-NEXT: shl.b64 %rd3, %rd2, 31; |
| ; CHECK-NEXT: shr.u64 %rd4, %rd1, 33; |
| ; CHECK-NEXT: or.b64 %rd5, %rd4, %rd3; |
| ; CHECK-NEXT: shr.u64 %rd6, %rd2, 33; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; |
| ; CHECK-NEXT: ret; |
| %div = udiv i128 %lhs, 8589934592 |
| ret i128 %div |
| } |
| |
| define i128 @add_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: add_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [add_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [add_i128_param_1]; |
| ; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd3; |
| ; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, %rd4; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; |
| ; CHECK-NEXT: ret; |
| %result = add i128 %lhs, %rhs |
| ret i128 %result |
| } |