|  | /* Copyright (C) 2008-2021 Free Software Foundation, Inc. | 
|  | Contributor: Joern Rennecke <joern.rennecke@embecosm.com> | 
|  | on behalf of Synopsys Inc. | 
|  |  | 
|  | This file is part of GCC. | 
|  |  | 
|  | GCC is free software; you can redistribute it and/or modify it under | 
|  | the terms of the GNU General Public License as published by the Free | 
|  | Software Foundation; either version 3, or (at your option) any later | 
|  | version. | 
|  |  | 
|  | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|  | FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|  | for more details. | 
|  |  | 
|  | Under Section 7 of GPL version 3, you are granted additional | 
|  | permissions described in the GCC Runtime Library Exception, version | 
|  | 3.1, as published by the Free Software Foundation. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License and | 
|  | a copy of the GCC Runtime Library Exception along with this program; | 
|  | see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see | 
|  | <http://www.gnu.org/licenses/>.  */ | 
|  |  | 
|  | #include "arc-ieee-754.h" | 
|  | #if 0 /* DEBUG */ | 
|  | .global __addsf3 | 
|  | FUNC(__addsf3) | 
|  | .balign 4 | 
|  | __addsf3: | 
|  | push_s blink | 
|  | push_s r1 | 
|  | bl.d __addsf3_c | 
|  | push_s r0 | 
|  | ld_s r1,[sp,4] | 
|  | st_s r0,[sp,4] | 
|  | bl.d __addsf3_asm | 
|  | pop_s r0 | 
|  | pop_s r1 | 
|  | pop_s blink | 
|  | cmp r0,r1 | 
|  | jeq_s [blink] | 
|  | bl abort | 
|  | ENDFUNC(__addsf3) | 
|  | .global __subsf3 | 
|  | FUNC(__subsf3) | 
|  | .balign 4 | 
|  | __subsf3: | 
|  | push_s blink | 
|  | push_s r1 | 
|  | bl.d __subsf3_c | 
|  | push_s r0 | 
|  | ld_s r1,[sp,4] | 
|  | st_s r0,[sp,4] | 
|  | bl.d __subsf3_asm | 
|  | pop_s r0 | 
|  | pop_s r1 | 
|  | pop_s blink | 
|  | cmp r0,r1 | 
|  | jeq_s [blink] | 
|  | bl abort | 
|  | ENDFUNC(__subsf3) | 
|  | #define __addsf3 __addsf3_asm | 
|  | #define __subsf3 __subsf3_asm | 
|  | #endif /* DEBUG */ | 
|  | /* N.B. This is optimized for ARC700. | 
|  | ARC600 has very different scheduling / instruction selection criteria.  */ | 
|  |  | 
|  | /* inputs: r0, r1 | 
|  | output: r0 | 
|  | clobber: r1-r10, r12, flags  */ | 
|  |  | 
|  | .balign 4 | 
|  | .global __addsf3 | 
|  | .global __subsf3 | 
|  | FUNC(__addsf3) | 
|  | FUNC(__subsf3) | 
|  | .long 0x7f800000 ; exponent mask | 
|  | __subsf3: | 
|  | bxor_l r1,r1,31 | 
|  | __addsf3: | 
|  | ld r9,[pcl,-8] | 
|  | bmsk r4,r0,30 | 
|  | xor r10,r0,r1 | 
|  | and r6,r1,r9 | 
|  | sub.f r12,r4,r6 | 
|  | asr_s r12,r12,23 | 
|  | blo .Ldbl1_gt | 
|  | brhs r4,r9,.Linf_nan | 
|  | brne r12,0,.Lsmall_shift | 
|  | brge r10,0,.Ladd_same_exp ; r12 == 0 | 
|  | /* After subtracting, we need to normalize; when shifting to place the | 
|  | leading 1 into position for the implicit 1 and adding that to DBL0, | 
|  | we increment the exponent.  Thus, we have to subtract one more than | 
|  | the shift count from the exponent beforehand.  Iff the exponent drops thus | 
|  | below zero (before adding in the fraction with the leading one), we have | 
|  | generated a denormal number.  Denormal handling is basicallly reducing the | 
|  | shift count so that we produce a zero exponent instead; FWIW, this way | 
|  | the shift count can become zero (if we started out with exponent 1). | 
|  | On the plus side, we don't need to check for denorm input, the result | 
|  | of subtracing these looks just the same as denormals generated during | 
|  | subtraction.  */ | 
|  | bmsk r7,r1,30 | 
|  | breq	r4,r7,.Lret0 | 
|  | sub.f r5,r4,r7 | 
|  | lsr r12,r4,23 | 
|  | neg.cs r5,r5 | 
|  | norm r3,r5 | 
|  | bmsk r2,r0,22 | 
|  | sub_s r3,r3,6 | 
|  | min r12,r12,r3 | 
|  | bic r1,r0,r2 | 
|  | sub_s r3,r12,1 | 
|  | asl_s r12,r12,23 | 
|  | asl r2,r5,r3 | 
|  | sub_s r1,r1,r12 | 
|  | add_s r0,r1,r2 | 
|  | j_s.d [blink] | 
|  | bxor.cs r0,r0,31 | 
|  | .balign 4 | 
|  | .Linf_nan: | 
|  | ; If both inputs are inf, but with different signs, the result is NaN. | 
|  | asr r12,r10,31 | 
|  | or_s r1,r1,r12 | 
|  | j_s.d [blink] | 
|  | or.eq r0,r0,r1 | 
|  | .balign 4 | 
|  | .Ladd_same_exp: | 
|  | /* This is a special case because we can't test for need to shift | 
|  | down by checking if bit 23 of DBL0 changes.  OTOH, here we know | 
|  | that we always need to shift down.  */ | 
|  | ; adding the two floating point numbers together makes the sign | 
|  | ; cancel out and apear as carry; the exponent is doubled, and the | 
|  | ; fraction also in need of shifting left by one. The two implicit | 
|  | ; ones of the sources make an implicit 1 of the result, again | 
|  | ; non-existent in a place shifted by one. | 
|  | add.f	r0,r0,r1 | 
|  | btst_s	r0,1 | 
|  | breq	r6,0,.Ldenorm_add | 
|  | add.ne	r0,r0,1 ; round to even. | 
|  | rrc	r0,r0 | 
|  | bmsk	r1,r9,23 | 
|  | add	r0,r0,r1 ; increment exponent | 
|  | bic.f	0,r9,r0; check for overflow -> infinity. | 
|  | jne_l	[blink] | 
|  | mov_s	r0,r9 | 
|  | j_s.d	[blink] | 
|  | bset.cs	r0,r0,31 | 
|  |  | 
|  | .Ldenorm_add: | 
|  | j_s.d [blink] | 
|  | add r0,r4,r1 | 
|  |  | 
|  | .Lret_dbl0: | 
|  | j_s [blink] | 
|  |  | 
|  | .balign 4 | 
|  | .Lsmall_shift: | 
|  | brhi r12,25,.Lret_dbl0 | 
|  | breq.d r6,0,.Ldenorm_small_shift | 
|  | bmsk_s r1,r1,22 | 
|  | bset_s r1,r1,23 | 
|  | .Lfixed_denorm_small_shift: | 
|  | neg r8,r12 | 
|  | asl r5,r1,r8 | 
|  | brge.d r10,0,.Ladd | 
|  | lsr_l r1,r1,r12 | 
|  | /* subtract, abs(DBL0) > abs(DBL1) */ | 
|  | /* DBL0: original values | 
|  | DBL1: fraction with explicit leading 1, shifted into place | 
|  | r4:  orig. DBL0 & 0x7fffffff | 
|  | r6:  orig. DBL1 & 0x7f800000 | 
|  | r9:  0x7f800000 | 
|  | r10: orig. DBL0H ^ DBL1H | 
|  | r5 : guard bits */ | 
|  | .balign 4 | 
|  | .Lsub: | 
|  | neg.f r12,r5 | 
|  | bmsk r3,r0,22 | 
|  | bset r5,r3,23 | 
|  | sbc.f r4,r5,r1 | 
|  | beq.d .Large_cancel_sub | 
|  | bic r7,r0,r3 | 
|  | norm r3,r4 | 
|  | bmsk r6,r7,30 | 
|  | .Lsub_done: | 
|  | sub_s r3,r3,6 | 
|  | breq r3,1,.Lsub_done_noshift | 
|  | asl r5,r3,23 | 
|  | sub_l r3,r3,1 | 
|  | brlo r6,r5,.Ldenorm_sub | 
|  | sub r0,r7,r5 | 
|  | neg_s r1,r3 | 
|  | lsr.f r2,r12,r1 | 
|  | asl_s r12,r12,r3 | 
|  | btst_s	r2,0 | 
|  | bmsk.eq.f r12,r12,30 | 
|  | asl r5,r4,r3 | 
|  | add_s r0,r0,r2 | 
|  | adc.ne r0,r0,0 | 
|  | j_s.d [blink] | 
|  | add_l r0,r0,r5 | 
|  |  | 
|  | .Lret0: | 
|  | j_s.d	[blink] | 
|  | mov_l	r0,0 | 
|  |  | 
|  | .balign 4 | 
|  | .Ldenorm_small_shift: | 
|  | brne.d	r12,1,.Lfixed_denorm_small_shift | 
|  | sub_s	r12,r12,1 | 
|  | brlt.d	r10,0,.Lsub | 
|  | mov_s	r5,r12 ; zero r5, and align following code | 
|  | .Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear. | 
|  | bmsk	r2,r0,22 | 
|  | add_s	r2,r2,r1 | 
|  | bbit0.d	r2,23,.Lno_shiftdown | 
|  | add_s	r0,r0,r1 | 
|  | bic.f	0,r9,r0; check for overflow -> infinity; eq : infinity | 
|  | bmsk	r1,r2,22 | 
|  | lsr.ne.f r2,r2,2; cc: even ; hi: might round down | 
|  | lsr.ne	r1,r1,1 | 
|  | rcmp.hi	r5,1; hi : round down | 
|  | bclr.hi	r0,r0,0 | 
|  | j_l.d	[blink] | 
|  | sub_s	r0,r0,r1 | 
|  |  | 
|  | /* r4: DBL0H & 0x7fffffff | 
|  | r6: DBL1H & 0x7f800000 | 
|  | r9: 0x7f800000 | 
|  | r10: sign difference | 
|  | r12: shift count (negative) */ | 
|  | .balign 4 | 
|  | .Ldbl1_gt: | 
|  | brhs r6,r9,.Lret_dbl1 ; inf or NaN | 
|  | neg r8,r12 | 
|  | brhi r8,25,.Lret_dbl1 | 
|  | .Lsmall_shift_dbl0: | 
|  | breq.d r6,0,.Ldenorm_small_shift_dbl0 | 
|  | bmsk_s r0,r0,22 | 
|  | bset_s r0,r0,23 | 
|  | .Lfixed_denorm_small_shift_dbl0: | 
|  | asl r5,r0,r12 | 
|  | brge.d r10,0,.Ladd_dbl1_gt | 
|  | lsr r0,r0,r8 | 
|  | /* subtract, abs(DBL0) < abs(DBL1) */ | 
|  | /* DBL0: fraction with explicit leading 1, shifted into place | 
|  | DBL1: original value | 
|  | r6:  orig. DBL1 & 0x7f800000 | 
|  | r9:  0x7f800000 | 
|  | r5: guard bits */ | 
|  | .balign 4 | 
|  | .Lrsub: | 
|  | neg.f r12,r5 | 
|  | bmsk r5,r1,22 | 
|  | bic r7,r1,r5 | 
|  | bset r5,r5,23 | 
|  | sbc.f r4,r5,r0 | 
|  | bne.d .Lsub_done ; note: r6 is already set up. | 
|  | norm r3,r4 | 
|  | /* Fall through */ | 
|  |  | 
|  | /* r4:r12 : unnormalized result fraction | 
|  | r7: result sign and exponent         */ | 
|  | /* When seeing large cancellation, only the topmost guard bit might be set.  */ | 
|  | .balign 4 | 
|  | .Large_cancel_sub: | 
|  | breq_s	r12,0,.Lret0 | 
|  | sub	r0,r7,24<<23 | 
|  | xor.f	0,r0,r7 ; test if exponent is negative | 
|  | tst.pl	r9,r0  ; test if exponent is zero | 
|  | jpnz	[blink] ; return if non-denormal result | 
|  | bmsk	r6,r7,30 | 
|  | lsr	r3,r6,23 | 
|  | xor	r0,r6,r7 | 
|  | sub_s	r3,r3,24-22 | 
|  | j_s.d	[blink] | 
|  | bset	r0,r0,r3 | 
|  |  | 
|  | ; If a denorm is produced, we have an exact result - | 
|  | ; no need for rounding. | 
|  | .balign 4 | 
|  | .Ldenorm_sub: | 
|  | sub r3,r6,1 | 
|  | lsr.f r3,r3,23 | 
|  | xor r0,r6,r7 | 
|  | neg_s r1,r3 | 
|  | asl.ne r4,r4,r3 | 
|  | lsr_s r12,r12,r1 | 
|  | add_s r0,r0,r4 | 
|  | j_s.d [blink] | 
|  | add.ne r0,r0,r12 | 
|  |  | 
|  | .balign 4 | 
|  | .Lsub_done_noshift: | 
|  | add.f 0,r12,r12 | 
|  | btst.eq r4,0 | 
|  | bclr r4,r4,23 | 
|  | add r0,r7,r4 | 
|  | j_s.d [blink] | 
|  | adc.ne r0,r0,0 | 
|  |  | 
|  | .balign 4 | 
|  | .Lno_shiftdown: | 
|  | add.f 0,r5,r5 | 
|  | btst.eq r0,0 | 
|  | cmp.eq r5,r5 | 
|  | j_s.d [blink] | 
|  | add.cs r0,r0,1 | 
|  |  | 
|  | .Lret_dbl1: | 
|  | j_s.d [blink] | 
|  | mov_l r0,r1 | 
|  | .balign 4 | 
|  | .Ldenorm_small_shift_dbl0: | 
|  | sub.f r8,r8,1 | 
|  | bne.d .Lfixed_denorm_small_shift_dbl0 | 
|  | add_s r12,r12,1 | 
|  | brlt.d r10,0,.Lrsub | 
|  | mov r5,0 | 
|  | .Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear. | 
|  | bmsk	r2,r1,22 | 
|  | add_s	r2,r2,r0 | 
|  | bbit0.d	r2,23,.Lno_shiftdown_dbl1_gt | 
|  | add_s	r0,r1,r0 | 
|  | bic.f	0,r9,r0; check for overflow -> infinity; eq : infinity | 
|  | bmsk	r1,r2,22 | 
|  | lsr.ne.f r2,r2,2; cc: even ; hi: might round down | 
|  | lsr.ne	r1,r1,1 | 
|  | rcmp.hi	r5,1; hi : round down | 
|  | bclr.hi	r0,r0,0 | 
|  | j_l.d	[blink] | 
|  | sub_s	r0,r0,r1 | 
|  |  | 
|  | .balign	4 | 
|  | .Lno_shiftdown_dbl1_gt: | 
|  | add.f	0,r5,r5 | 
|  | btst.eq	r0,0 | 
|  | cmp.eq	r5,r5 | 
|  | j_s.d	[blink] | 
|  | add.cs	r0,r0,1 | 
|  | ENDFUNC(__addsf3) | 
|  | ENDFUNC(__subsf3) |