|  | /* Copyright (C) 2008-2020 Free Software Foundation, Inc. | 
|  | Contributor: Joern Rennecke <joern.rennecke@embecosm.com> | 
|  | on behalf of Synopsys Inc. | 
|  |  | 
|  | This file is part of GCC. | 
|  |  | 
|  | GCC is free software; you can redistribute it and/or modify it under | 
|  | the terms of the GNU General Public License as published by the Free | 
|  | Software Foundation; either version 3, or (at your option) any later | 
|  | version. | 
|  |  | 
|  | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|  | FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|  | for more details. | 
|  |  | 
|  | Under Section 7 of GPL version 3, you are granted additional | 
|  | permissions described in the GCC Runtime Library Exception, version | 
|  | 3.1, as published by the Free Software Foundation. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License and | 
|  | a copy of the GCC Runtime Library Exception along with this program; | 
|  | see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see | 
|  | <http://www.gnu.org/licenses/>.  */ | 
|  |  | 
|  | /* | 
|  | - calculate 15..18 bit inverse using a table of approximating polynoms. | 
|  | precision is higher for polynoms used to evaluate input with larger | 
|  | value. | 
|  | - do one newton-raphson iteration step to double the precision, | 
|  | then multiply this with the divisor | 
|  | -> more time to decide if dividend is subnormal | 
|  | - the worst error propagation is on the side of the value range | 
|  | with the least initial defect, thus giving us about 30 bits precision. | 
|  | */ | 
|  | #include "arc-ieee-754.h" | 
|  |  | 
|  | #if 0 /* DEBUG */ | 
|  | .global __divsf3 | 
|  | FUNC(__divsf3) | 
|  | .balign 4 | 
|  | __divsf3: | 
|  | push_s blink | 
|  | push_s r1 | 
|  | bl.d __divsf3_c | 
|  | push_s r0 | 
|  | ld_s r1,[sp,4] | 
|  | st_s r0,[sp,4] | 
|  | bl.d __divsf3_asm | 
|  | pop_s r0 | 
|  | pop_s r1 | 
|  | pop_s blink | 
|  | cmp r0,r1 | 
|  | #if 1 | 
|  | bne abort | 
|  | jeq_s [blink] | 
|  | b abort | 
|  | #else | 
|  | bne abort | 
|  | j_s [blink] | 
|  | #endif | 
|  | ENDFUNC(__divsf3) | 
|  | #define __divsf3 __divsf3_asm | 
|  | #endif /* DEBUG */ | 
|  |  | 
|  | FUNC(__divsf3) | 
|  | .balign 4 | 
|  | .L7f800000: | 
|  | .long 0x7f800000 | 
|  | .Ldivtab: | 
|  | .long 0xfc0ffff0 | 
|  | .long 0xf46ffefd | 
|  | .long 0xed1ffd2a | 
|  | .long 0xe627fa8e | 
|  | .long 0xdf7ff73b | 
|  | .long 0xd917f33b | 
|  | .long 0xd2f7eea3 | 
|  | .long 0xcd1fe986 | 
|  | .long 0xc77fe3e7 | 
|  | .long 0xc21fdddb | 
|  | .long 0xbcefd760 | 
|  | .long 0xb7f7d08c | 
|  | .long 0xb32fc960 | 
|  | .long 0xae97c1ea | 
|  | .long 0xaa27ba26 | 
|  | .long 0xa5e7b22e | 
|  | .long 0xa1cfa9fe | 
|  | .long 0x9ddfa1a0 | 
|  | .long 0x9a0f990c | 
|  | .long 0x9667905d | 
|  | .long 0x92df878a | 
|  | .long 0x8f6f7e84 | 
|  | .long 0x8c27757e | 
|  | .long 0x88f76c54 | 
|  | .long 0x85df630c | 
|  | .long 0x82e759c5 | 
|  | .long 0x8007506d | 
|  | .long 0x7d3f470a | 
|  | .long 0x7a8f3da2 | 
|  | .long 0x77ef341e | 
|  | .long 0x756f2abe | 
|  | .long 0x72f7212d | 
|  | .long 0x709717ad | 
|  | .long 0x6e4f0e44 | 
|  | .long 0x6c1704d6 | 
|  | .long 0x69e6fb44 | 
|  | .long 0x67cef1d7 | 
|  | .long 0x65c6e872 | 
|  | .long 0x63cedf18 | 
|  | .long 0x61e6d5cd | 
|  | .long 0x6006cc6d | 
|  | .long 0x5e36c323 | 
|  | .long 0x5c76b9f3 | 
|  | .long 0x5abeb0b7 | 
|  | .long 0x5916a79b | 
|  | .long 0x57769e77 | 
|  | .long 0x55de954d | 
|  | .long 0x54568c4e | 
|  | .long 0x52d6834d | 
|  | .long 0x51667a7f | 
|  | .long 0x4ffe71b5 | 
|  | .long 0x4e9e68f1 | 
|  | .long 0x4d466035 | 
|  | .long 0x4bf65784 | 
|  | .long 0x4aae4ede | 
|  | .long 0x496e4646 | 
|  | .long 0x48363dbd | 
|  | .long 0x47063547 | 
|  | .long 0x45de2ce5 | 
|  | .long 0x44be2498 | 
|  | .long 0x43a61c64 | 
|  | .long 0x4296144a | 
|  | .long 0x41860c0e | 
|  | .long 0x407e03ee | 
|  | __divsf3_support: /* This label makes debugger output saner.  */ | 
|  | .Ldenorm_fp1: | 
|  | bclr r6,r6,31 | 
|  | norm.f r12,r6 ; flag for x/0 -> Inf check | 
|  | add r6,r6,r6 | 
|  | rsub r5,r12,16 | 
|  | ror r5,r1,r5 | 
|  | asl r6,r6,r12 | 
|  | bmsk r5,r5,5 | 
|  | ld.as r5,[r3,r5] | 
|  | add r4,r6,r6 | 
|  | ; load latency | 
|  | MPYHU r7,r5,r4 | 
|  | bic.ne.f 0, \ | 
|  | 0x60000000,r0 ; large number / denorm -> Inf | 
|  | beq_s .Linf_NaN | 
|  | asl r5,r5,13 | 
|  | ; wb stall | 
|  | ; slow track | 
|  | sub r7,r5,r7 | 
|  | MPYHU r8,r7,r6 | 
|  | asl_s r12,r12,23 | 
|  | and.f r2,r0,r9 | 
|  | add r2,r2,r12 | 
|  | asl r12,r0,8 | 
|  | ; wb stall | 
|  | bne.d .Lpast_denorm_fp1 | 
|  | .Ldenorm_fp0: | 
|  | MPYHU r8,r8,r7 | 
|  | bclr r12,r12,31 | 
|  | norm.f r3,r12 ; flag for 0/x -> 0 check | 
|  | bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 | 
|  | beq_s .Lret0 | 
|  | asl_s r12,r12,r3 | 
|  | asl_s r3,r3,23 | 
|  | add_s r12,r12,r12 | 
|  | add r11,r11,r3 | 
|  | b.d .Lpast_denorm_fp0 | 
|  | mov_s r3,r12 | 
|  | .balign 4 | 
|  | .Linf_NaN: | 
|  | bclr.f 0,r0,31 ; 0/0 -> NaN | 
|  | xor_s r0,r0,r1 | 
|  | bmsk r1,r0,30 | 
|  | bic_s r0,r0,r1 | 
|  | sub.eq r0,r0,1 | 
|  | j_s.d [blink] | 
|  | or r0,r0,r9 | 
|  | .Lret0: | 
|  | xor_s r0,r0,r1 | 
|  | bmsk r1,r0,30 | 
|  | j_s.d [blink] | 
|  | bic_s r0,r0,r1 | 
|  | .Linf_nan_fp1: | 
|  | lsr_s r0,r0,31 | 
|  | bmsk.f 0,r1,22 | 
|  | asl_s r0,r0,31 | 
|  | bne_s 0f ; inf/inf -> nan | 
|  | brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan | 
|  | 0:	j_s.d [blink] | 
|  | mov r0,-1 | 
|  | .Lsigned0: | 
|  | .Linf_nan_fp0: | 
|  | tst_s r1,r1 | 
|  | j_s.d [blink] | 
|  | bxor.mi r0,r0,31 | 
|  | .balign 4 | 
|  | .global __divsf3 | 
|  | /* N.B. the spacing between divtab and the sub3 to get its address must | 
|  | be a multiple of 8.  */ | 
|  | __divsf3: | 
|  | lsr r2,r1,17 | 
|  | sub3 r3,pcl,55;(.-.Ldivtab) >> 3 | 
|  | bmsk_s r2,r2,5 | 
|  | ld.as r5,[r3,r2] | 
|  | asl r4,r1,9 | 
|  | ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 | 
|  | MPYHU r7,r5,r4 | 
|  | asl r6,r1,8 | 
|  | and.f r11,r1,r9 | 
|  | bset r6,r6,31 | 
|  | asl r5,r5,13 | 
|  | ; wb stall | 
|  | beq .Ldenorm_fp1 | 
|  | sub r7,r5,r7 | 
|  | MPYHU r8,r7,r6 | 
|  | breq.d r11,r9,.Linf_nan_fp1 | 
|  | and.f r2,r0,r9 | 
|  | beq.d .Ldenorm_fp0 | 
|  | asl r12,r0,8 | 
|  | ; wb stall | 
|  | breq r2,r9,.Linf_nan_fp0 | 
|  | MPYHU r8,r8,r7 | 
|  | .Lpast_denorm_fp1: | 
|  | bset r3,r12,31 | 
|  | .Lpast_denorm_fp0: | 
|  | cmp_s r3,r6 | 
|  | lsr.cc r3,r3,1 | 
|  | add_s r2,r2, /* wait for immediate */ \ | 
|  | /* wb stall */ \ | 
|  | 0x3f000000 | 
|  | sub r7,r7,r8 ; u1.31 inverse, about 30 bit | 
|  | MPYHU r3,r3,r7 | 
|  | sbc r2,r2,r11 | 
|  | xor.f 0,r0,r1 | 
|  | and r0,r2,r9 | 
|  | bxor.mi r0,r0,31 | 
|  | brhs r2, /* wb stall / wait for immediate */ \ | 
|  | 0x7f000000,.Linf_denorm | 
|  | .Lpast_denorm: | 
|  | add_s r3,r3,0x22 ; round to nearest or higher | 
|  | tst r3,0x3c ; check if rounding was unsafe | 
|  | lsr r3,r3,6 | 
|  | jne.d [blink] ; return if rounding was safe. | 
|  | add_s r0,r0,r3 | 
|  | /* work out exact rounding if we fall through here.  */ | 
|  | /* We know that the exact result cannot be represented in single | 
|  | precision.  Find the mid-point between the two nearest | 
|  | representable values, multiply with the divisor, and check if | 
|  | the result is larger than the dividend.  */ | 
|  | add_s r3,r3,r3 | 
|  | sub_s r3,r3,1 | 
|  | mpyu r3,r3,r6 | 
|  | asr.f 0,r0,1 ; for round-to-even in case this is a denorm | 
|  | rsub r2,r9,25 | 
|  | asl_s r12,r12,r2 | 
|  | ; wb stall | 
|  | ; slow track | 
|  | sub.f 0,r12,r3 | 
|  | j_s.d [blink] | 
|  | sub.mi r0,r0,1 | 
|  | /* For denormal results, it is possible that an exact result needs | 
|  | rounding, and thus the round-to-even rule has to come into play.  */ | 
|  | .Linf_denorm: | 
|  | brlo r2,0xc0000000,.Linf | 
|  | .Ldenorm: | 
|  | asr_s r2,r2,23 | 
|  | bic r0,r0,r9 | 
|  | neg r9,r2 | 
|  | brlo.d r9,25,.Lpast_denorm | 
|  | lsr r3,r3,r9 | 
|  | /* Fall through: return +- 0 */ | 
|  | j_s [blink] | 
|  | .Linf: | 
|  | j_s.d [blink] | 
|  | or r0,r0,r9 | 
|  | ENDFUNC(__divsf3) |