|  | /*  -*- Mode: Asm -*-  */ | 
|  | ;;    Copyright (C) 2012-2020 Free Software Foundation, Inc. | 
|  | ;;    Contributed by Sean D'Epagnier  (sean@depagnier.com) | 
|  | ;;                   Georg-Johann Lay (avr@gjlay.de) | 
|  |  | 
|  | ;; This file is free software; you can redistribute it and/or modify it | 
|  | ;; under the terms of the GNU General Public License as published by the | 
|  | ;; Free Software Foundation; either version 3, or (at your option) any | 
|  | ;; later version. | 
|  |  | 
|  | ;; In addition to the permissions in the GNU General Public License, the | 
|  | ;; Free Software Foundation gives you unlimited permission to link the | 
|  | ;; compiled version of this file into combinations with other programs, | 
|  | ;; and to distribute those combinations without any restriction coming | 
|  | ;; from the use of this file.  (The General Public License restrictions | 
|  | ;; do apply in other respects; for example, they cover modification of | 
|  | ;; the file, and distribution when not linked into a combine | 
|  | ;; executable.) | 
|  |  | 
|  | ;; This file is distributed in the hope that it will be useful, but | 
|  | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | ;; General Public License for more details. | 
|  |  | 
|  | ;; You should have received a copy of the GNU General Public License | 
|  | ;; along with this program; see the file COPYING.  If not, write to | 
|  | ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, | 
|  | ;; Boston, MA 02110-1301, USA. | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Fixed point library routines for AVR | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | #if defined __AVR_TINY__ | 
|  | #define __zero_reg__ r17 | 
|  | #define __tmp_reg__ r16 | 
|  | #else | 
|  | #define __zero_reg__ r1 | 
|  | #define __tmp_reg__ r0 | 
|  | #endif | 
|  |  | 
|  | .section .text.libgcc.fixed, "ax", @progbits | 
|  |  | 
|  | #ifndef __AVR_TINY__ | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Conversions to float | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | #if defined (L_fractqqsf) | 
|  | DEFUN __fractqqsf | 
|  | ;; Move in place for SA -> SF conversion | 
|  | clr     r22 | 
|  | mov     r23, r24 | 
|  | ;; Sign-extend | 
|  | lsl     r24 | 
|  | sbc     r24, r24 | 
|  | mov     r25, r24 | 
|  | XJMP    __fractsasf | 
|  | ENDF __fractqqsf | 
|  | #endif  /* L_fractqqsf */ | 
|  |  | 
|  | #if defined (L_fractuqqsf) | 
|  | DEFUN __fractuqqsf | 
|  | ;; Move in place for USA -> SF conversion | 
|  | clr     r22 | 
|  | mov     r23, r24 | 
|  | ;; Zero-extend | 
|  | clr     r24 | 
|  | clr     r25 | 
|  | XJMP    __fractusasf | 
|  | ENDF __fractuqqsf | 
|  | #endif  /* L_fractuqqsf */ | 
|  |  | 
|  | #if defined (L_fracthqsf) | 
|  | DEFUN __fracthqsf | 
|  | ;; Move in place for SA -> SF conversion | 
|  | wmov    22, 24 | 
|  | ;; Sign-extend | 
|  | lsl     r25 | 
|  | sbc     r24, r24 | 
|  | mov     r25, r24 | 
|  | XJMP    __fractsasf | 
|  | ENDF __fracthqsf | 
|  | #endif  /* L_fracthqsf */ | 
|  |  | 
|  | #if defined (L_fractuhqsf) | 
|  | DEFUN __fractuhqsf | 
|  | ;; Move in place for USA -> SF conversion | 
|  | wmov    22, 24 | 
|  | ;; Zero-extend | 
|  | clr     r24 | 
|  | clr     r25 | 
|  | XJMP    __fractusasf | 
|  | ENDF __fractuhqsf | 
|  | #endif  /* L_fractuhqsf */ | 
|  |  | 
|  | #if defined (L_fracthasf) | 
|  | DEFUN __fracthasf | 
|  | ;; Move in place for SA -> SF conversion | 
|  | clr     r22 | 
|  | mov     r23, r24 | 
|  | mov     r24, r25 | 
|  | ;; Sign-extend | 
|  | lsl     r25 | 
|  | sbc     r25, r25 | 
|  | XJMP    __fractsasf | 
|  | ENDF __fracthasf | 
|  | #endif  /* L_fracthasf */ | 
|  |  | 
|  | #if defined (L_fractuhasf) | 
|  | DEFUN __fractuhasf | 
|  | ;; Move in place for USA -> SF conversion | 
|  | clr     r22 | 
|  | mov     r23, r24 | 
|  | mov     r24, r25 | 
|  | ;; Zero-extend | 
|  | clr     r25 | 
|  | XJMP    __fractusasf | 
|  | ENDF __fractuhasf | 
|  | #endif  /* L_fractuhasf */ | 
|  |  | 
|  |  | 
|  | #if defined (L_fractsqsf) | 
|  | DEFUN __fractsqsf | 
|  | XCALL   __floatsisf | 
|  | ;; Divide non-zero results by 2^31 to move the | 
|  | ;; decimal point into place | 
|  | tst     r25 | 
|  | breq    0f | 
|  | subi    r24, exp_lo (31) | 
|  | sbci    r25, exp_hi (31) | 
|  | 0:  ret | 
|  | ENDF __fractsqsf | 
|  | #endif  /* L_fractsqsf */ | 
|  |  | 
|  | #if defined (L_fractusqsf) | 
|  | DEFUN __fractusqsf | 
|  | XCALL   __floatunsisf | 
|  | ;; Divide non-zero results by 2^32 to move the | 
|  | ;; decimal point into place | 
|  | cpse    r25, __zero_reg__ | 
|  | subi    r25, exp_hi (32) | 
|  | ret | 
|  | ENDF __fractusqsf | 
|  | #endif  /* L_fractusqsf */ | 
|  |  | 
|  | #if defined (L_fractsasf) | 
|  | DEFUN __fractsasf | 
|  | XCALL   __floatsisf | 
|  | ;; Divide non-zero results by 2^15 to move the | 
|  | ;; decimal point into place | 
|  | tst     r25 | 
|  | breq    0f | 
|  | subi    r24, exp_lo (15) | 
|  | sbci    r25, exp_hi (15) | 
|  | 0:  ret | 
|  | ENDF __fractsasf | 
|  | #endif  /* L_fractsasf */ | 
|  |  | 
|  | #if defined (L_fractusasf) | 
|  | DEFUN __fractusasf | 
|  | XCALL   __floatunsisf | 
|  | ;; Divide non-zero results by 2^16 to move the | 
|  | ;; decimal point into place | 
|  | cpse    r25, __zero_reg__ | 
|  | subi    r25, exp_hi (16) | 
|  | ret | 
|  | ENDF __fractusasf | 
|  | #endif  /* L_fractusasf */ | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Conversions from float | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | #if defined (L_fractsfqq) | 
|  | DEFUN __fractsfqq | 
|  | ;; Multiply with 2^{24+7} to get a QQ result in r25 | 
|  | subi    r24, exp_lo (-31) | 
|  | sbci    r25, exp_hi (-31) | 
|  | XCALL   __fixsfsi | 
|  | mov     r24, r25 | 
|  | ret | 
|  | ENDF __fractsfqq | 
|  | #endif  /* L_fractsfqq */ | 
|  |  | 
|  | #if defined (L_fractsfuqq) | 
|  | DEFUN __fractsfuqq | 
|  | ;; Multiply with 2^{24+8} to get a UQQ result in r25 | 
|  | subi    r25, exp_hi (-32) | 
|  | XCALL   __fixunssfsi | 
|  | mov     r24, r25 | 
|  | ret | 
|  | ENDF __fractsfuqq | 
|  | #endif  /* L_fractsfuqq */ | 
|  |  | 
|  | #if defined (L_fractsfha) | 
|  | DEFUN __fractsfha | 
|  | ;; Multiply with 2^{16+7} to get a HA result in r25:r24 | 
|  | subi    r24, exp_lo (-23) | 
|  | sbci    r25, exp_hi (-23) | 
|  | XJMP    __fixsfsi | 
|  | ENDF __fractsfha | 
|  | #endif  /* L_fractsfha */ | 
|  |  | 
|  | #if defined (L_fractsfuha) | 
|  | DEFUN __fractsfuha | 
|  | ;; Multiply with 2^24 to get a UHA result in r25:r24 | 
|  | subi    r25, exp_hi (-24) | 
|  | XJMP    __fixunssfsi | 
|  | ENDF __fractsfuha | 
|  | #endif  /* L_fractsfuha */ | 
|  |  | 
|  | #if defined (L_fractsfhq) | 
|  | FALIAS __fractsfsq | 
|  |  | 
|  | DEFUN __fractsfhq | 
|  | ;; Multiply with 2^{16+15} to get a HQ result in r25:r24 | 
|  | ;; resp. with 2^31 to get a SQ result in r25:r22 | 
|  | subi    r24, exp_lo (-31) | 
|  | sbci    r25, exp_hi (-31) | 
|  | XJMP    __fixsfsi | 
|  | ENDF __fractsfhq | 
|  | #endif  /* L_fractsfhq */ | 
|  |  | 
|  | #if defined (L_fractsfuhq) | 
|  | FALIAS __fractsfusq | 
|  |  | 
|  | DEFUN __fractsfuhq | 
|  | ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24 | 
|  | ;; resp. with 2^32 to get a USQ result in r25:r22 | 
|  | subi    r25, exp_hi (-32) | 
|  | XJMP    __fixunssfsi | 
|  | ENDF __fractsfuhq | 
|  | #endif  /* L_fractsfuhq */ | 
|  |  | 
|  | #if defined (L_fractsfsa) | 
|  | DEFUN __fractsfsa | 
|  | ;; Multiply with 2^15 to get a SA result in r25:r22 | 
|  | subi    r24, exp_lo (-15) | 
|  | sbci    r25, exp_hi (-15) | 
|  | XJMP    __fixsfsi | 
|  | ENDF __fractsfsa | 
|  | #endif  /* L_fractsfsa */ | 
|  |  | 
|  | #if defined (L_fractsfusa) | 
|  | DEFUN __fractsfusa | 
|  | ;; Multiply with 2^16 to get a USA result in r25:r22 | 
|  | subi    r25, exp_hi (-16) | 
|  | XJMP    __fixunssfsi | 
|  | ENDF __fractsfusa | 
|  | #endif  /* L_fractsfusa */ | 
|  |  | 
|  |  | 
|  | ;; For multiplication the functions here are called directly from | 
|  | ;; avr-fixed.md instead of using the standard libcall mechanisms. | 
|  | ;; This can make better code because GCC knows exactly which | 
|  | ;; of the call-used registers (not all of them) are clobbered.  */ | 
|  |  | 
|  | /******************************************************* | 
|  | Fractional  Multiplication  8 x 8  without MUL | 
|  | *******************************************************/ | 
|  |  | 
|  | #if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__) | 
|  | ;;; R23 = R24 * R25 | 
|  | ;;; Clobbers: __tmp_reg__, R22, R24, R25 | 
|  | ;;; Rounding: ??? | 
|  | DEFUN __mulqq3 | 
|  | XCALL   __fmuls | 
|  | ;; TR 18037 requires that  (-1) * (-1)  does not overflow | 
|  | ;; The only input that can produce  -1  is  (-1)^2. | 
|  | dec     r23 | 
|  | brvs    0f | 
|  | inc     r23 | 
|  | 0:  ret | 
|  | ENDF  __mulqq3 | 
|  | #endif /* L_mulqq3 && ! HAVE_MUL */ | 
|  |  | 
|  | /******************************************************* | 
|  | Fractional Multiply  .16 x .16  with and without MUL | 
|  | *******************************************************/ | 
|  |  | 
|  | #if defined (L_mulhq3) | 
|  | ;;; Same code with and without MUL, but the interfaces differ: | 
|  | ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25) | 
|  | ;;;         Clobbers: ABI, called by optabs | 
|  | ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26) | 
|  | ;;;         Clobbers: __tmp_reg__, R22, R23 | 
|  | ;;; Rounding:  -0.5 LSB  <= error  <=  0.5 LSB | 
|  | DEFUN   __mulhq3 | 
|  | XCALL   __mulhisi3 | 
|  | ;; Shift result into place | 
|  | lsl     r23 | 
|  | rol     r24 | 
|  | rol     r25 | 
|  | brvs    1f | 
|  | ;; Round | 
|  | sbrc    r23, 7 | 
|  | adiw    r24, 1 | 
|  | ret | 
|  | 1:  ;; Overflow.  TR 18037 requires  (-1)^2  not to overflow | 
|  | ldi     r24, lo8 (0x7fff) | 
|  | ldi     r25, hi8 (0x7fff) | 
|  | ret | 
|  | ENDF __mulhq3 | 
|  | #endif  /* defined (L_mulhq3) */ | 
|  |  | 
|  | #if defined (L_muluhq3) | 
|  | ;;; Same code with and without MUL, but the interfaces differ: | 
|  | ;;; no MUL: (R25:R24) *= (R23:R22) | 
|  | ;;;         Clobbers: ABI, called by optabs | 
|  | ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26) | 
|  | ;;;         Clobbers: __tmp_reg__, R22, R23 | 
|  | ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB | 
|  | DEFUN   __muluhq3 | 
|  | XCALL   __umulhisi3 | 
|  | ;; Round | 
|  | sbrc    r23, 7 | 
|  | adiw    r24, 1 | 
|  | ret | 
|  | ENDF __muluhq3 | 
|  | #endif  /* L_muluhq3 */ | 
|  |  | 
|  |  | 
|  | /******************************************************* | 
|  | Fixed  Multiply  8.8 x 8.8  with and without MUL | 
|  | *******************************************************/ | 
|  |  | 
|  | #if defined (L_mulha3) | 
|  | ;;; Same code with and without MUL, but the interfaces differ: | 
|  | ;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25) | 
|  | ;;;         Clobbers: ABI, called by optabs | 
|  | ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26) | 
|  | ;;;         Clobbers: __tmp_reg__, R22, R23 | 
|  | ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB | 
|  | DEFUN   __mulha3 | 
|  | XCALL   __mulhisi3 | 
|  | lsl     r22 | 
|  | rol     r23 | 
|  | rol     r24 | 
|  | XJMP    __muluha3_round | 
|  | ENDF __mulha3 | 
|  | #endif  /* L_mulha3 */ | 
|  |  | 
|  | #if defined (L_muluha3) | 
|  | ;;; Same code with and without MUL, but the interfaces differ: | 
|  | ;;; no MUL: (R25:R24) *= (R23:R22) | 
|  | ;;;         Clobbers: ABI, called by optabs | 
|  | ;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26) | 
|  | ;;;         Clobbers: __tmp_reg__, R22, R23 | 
|  | ;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB | 
|  | DEFUN   __muluha3 | 
|  | XCALL   __umulhisi3 | 
|  | XJMP    __muluha3_round | 
|  | ENDF __muluha3 | 
|  | #endif  /* L_muluha3 */ | 
|  |  | 
|  | #if defined (L_muluha3_round) | 
|  | DEFUN   __muluha3_round | 
|  | ;; Shift result into place | 
|  | mov     r25, r24 | 
|  | mov     r24, r23 | 
|  | ;; Round | 
|  | sbrc    r22, 7 | 
|  | adiw    r24, 1 | 
|  | ret | 
|  | ENDF __muluha3_round | 
|  | #endif  /* L_muluha3_round */ | 
|  |  | 
|  |  | 
|  | /******************************************************* | 
|  | Fixed  Multiplication  16.16 x 16.16 | 
|  | *******************************************************/ | 
|  |  | 
|  | ;; Bits outside the result (below LSB), used in the signed version | 
|  | #define GUARD __tmp_reg__ | 
|  |  | 
|  | #if defined (__AVR_HAVE_MUL__) | 
|  |  | 
|  | ;; Multiplier | 
|  | #define A0  16 | 
|  | #define A1  A0+1 | 
|  | #define A2  A1+1 | 
|  | #define A3  A2+1 | 
|  |  | 
|  | ;; Multiplicand | 
|  | #define B0  20 | 
|  | #define B1  B0+1 | 
|  | #define B2  B1+1 | 
|  | #define B3  B2+1 | 
|  |  | 
|  | ;; Result | 
|  | #define C0  24 | 
|  | #define C1  C0+1 | 
|  | #define C2  C1+1 | 
|  | #define C3  C2+1 | 
|  |  | 
|  | #if defined (L_mulusa3) | 
|  | ;;; (C3:C0) = (A3:A0) * (B3:B0) | 
|  | DEFUN __mulusa3 | 
|  | set | 
|  | ;; Fallthru | 
|  | ENDF  __mulusa3 | 
|  |  | 
|  | ;;; Round for last digit iff T = 1 | 
|  | ;;; Return guard bits in GUARD (__tmp_reg__). | 
|  | ;;; Rounding, T = 0:  -1.0 LSB  <  error  <=  0   LSB | 
|  | ;;; Rounding, T = 1:  -0.5 LSB  <  error  <=  0.5 LSB | 
|  | DEFUN __mulusa3_round | 
|  | ;; Some of the MUL instructions have LSBs outside the result. | 
|  | ;; Don't ignore these LSBs in order to tame rounding error. | 
|  | ;; Use C2/C3 for these LSBs. | 
|  |  | 
|  | clr C0 | 
|  | clr C1 | 
|  | mul A0, B0  $  movw C2, r0 | 
|  |  | 
|  | mul A1, B0  $  add  C3, r0  $  adc C0, r1 | 
|  | mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1 | 
|  |  | 
|  | ;; Round if T = 1.  Store guarding bits outside the result for rounding | 
|  | ;; and left-shift by the signed version (function below). | 
|  | brtc 0f | 
|  | sbrc C3, 7 | 
|  | adiw C0, 1 | 
|  | 0:  push C3 | 
|  |  | 
|  | ;; The following MULs don't have LSBs outside the result. | 
|  | ;; C2/C3 is the high part. | 
|  |  | 
|  | mul  A0, B2  $  add C0, r0  $  adc C1, r1  $  sbc  C2, C2 | 
|  | mul  A1, B1  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0 | 
|  | mul  A2, B0  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0 | 
|  | neg  C2 | 
|  |  | 
|  | mul  A0, B3  $  add C1, r0  $  adc C2, r1  $  sbc  C3, C3 | 
|  | mul  A1, B2  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0 | 
|  | mul  A2, B1  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0 | 
|  | mul  A3, B0  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0 | 
|  | neg  C3 | 
|  |  | 
|  | mul  A1, B3  $  add C2, r0  $  adc C3, r1 | 
|  | mul  A2, B2  $  add C2, r0  $  adc C3, r1 | 
|  | mul  A3, B1  $  add C2, r0  $  adc C3, r1 | 
|  |  | 
|  | mul  A2, B3  $  add C3, r0 | 
|  | mul  A3, B2  $  add C3, r0 | 
|  |  | 
|  | ;; Guard bits used in the signed version below. | 
|  | pop  GUARD | 
|  | clr  __zero_reg__ | 
|  | ret | 
|  | ENDF __mulusa3_round | 
|  | #endif /* L_mulusa3 */ | 
|  |  | 
|  | #if defined (L_mulsa3) | 
|  | ;;; (C3:C0) = (A3:A0) * (B3:B0) | 
|  | ;;; Clobbers: __tmp_reg__, T | 
|  | ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB | 
|  | DEFUN __mulsa3 | 
|  | clt | 
|  | XCALL   __mulusa3_round | 
|  | ;; A posteriori sign extension of the operands | 
|  | tst     B3 | 
|  | brpl 1f | 
|  | sub     C2, A0 | 
|  | sbc     C3, A1 | 
|  | 1:  sbrs    A3, 7 | 
|  | rjmp 2f | 
|  | sub     C2, B0 | 
|  | sbc     C3, B1 | 
|  | 2: | 
|  | ;;  Shift 1 bit left to adjust for 15 fractional bits | 
|  | lsl     GUARD | 
|  | rol     C0 | 
|  | rol     C1 | 
|  | rol     C2 | 
|  | rol     C3 | 
|  | ;; Round last digit | 
|  | lsl     GUARD | 
|  | adc     C0, __zero_reg__ | 
|  | adc     C1, __zero_reg__ | 
|  | adc     C2, __zero_reg__ | 
|  | adc     C3, __zero_reg__ | 
|  | ret | 
|  | ENDF __mulsa3 | 
|  | #endif /* L_mulsa3 */ | 
|  |  | 
|  | #undef A0 | 
|  | #undef A1 | 
|  | #undef A2 | 
|  | #undef A3 | 
|  | #undef B0 | 
|  | #undef B1 | 
|  | #undef B2 | 
|  | #undef B3 | 
|  | #undef C0 | 
|  | #undef C1 | 
|  | #undef C2 | 
|  | #undef C3 | 
|  |  | 
|  | #else /* __AVR_HAVE_MUL__ */ | 
|  |  | 
|  | #define A0 18 | 
|  | #define A1 A0+1 | 
|  | #define A2 A0+2 | 
|  | #define A3 A0+3 | 
|  |  | 
|  | #define B0 22 | 
|  | #define B1 B0+1 | 
|  | #define B2 B0+2 | 
|  | #define B3 B0+3 | 
|  |  | 
|  | #define C0  22 | 
|  | #define C1  C0+1 | 
|  | #define C2  C0+2 | 
|  | #define C3  C0+3 | 
|  |  | 
|  | ;; __tmp_reg__ | 
|  | #define CC0  0 | 
|  | ;; __zero_reg__ | 
|  | #define CC1  1 | 
|  | #define CC2  16 | 
|  | #define CC3  17 | 
|  |  | 
|  | #define AA0  26 | 
|  | #define AA1  AA0+1 | 
|  | #define AA2  30 | 
|  | #define AA3  AA2+1 | 
|  |  | 
|  | #if defined (L_mulsa3) | 
|  | ;;; (R25:R22)  *=  (R21:R18) | 
|  | ;;; Clobbers: ABI, called by optabs | 
|  | ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB | 
|  | DEFUN   __mulsa3 | 
|  | push    B0 | 
|  | push    B1 | 
|  | push    B3 | 
|  | clt | 
|  | XCALL   __mulusa3_round | 
|  | pop     r30 | 
|  | ;; sign-extend B | 
|  | bst     r30, 7 | 
|  | brtc 1f | 
|  | ;; A1, A0 survived in  R27:R26 | 
|  | sub     C2, AA0 | 
|  | sbc     C3, AA1 | 
|  | 1: | 
|  | pop     AA1  ;; B1 | 
|  | pop     AA0  ;; B0 | 
|  |  | 
|  | ;; sign-extend A.  A3 survived in  R31 | 
|  | bst     AA3, 7 | 
|  | brtc 2f | 
|  | sub     C2, AA0 | 
|  | sbc     C3, AA1 | 
|  | 2: | 
|  | ;;  Shift 1 bit left to adjust for 15 fractional bits | 
|  | lsl     GUARD | 
|  | rol     C0 | 
|  | rol     C1 | 
|  | rol     C2 | 
|  | rol     C3 | 
|  | ;; Round last digit | 
|  | lsl     GUARD | 
|  | adc     C0, __zero_reg__ | 
|  | adc     C1, __zero_reg__ | 
|  | adc     C2, __zero_reg__ | 
|  | adc     C3, __zero_reg__ | 
|  | ret | 
|  | ENDF __mulsa3 | 
|  | #endif  /* L_mulsa3 */ | 
|  |  | 
|  | #if defined (L_mulusa3) | 
|  | ;;; (R25:R22)  *=  (R21:R18) | 
|  | ;;; Clobbers: ABI, called by optabs | 
|  | ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB | 
|  | DEFUN __mulusa3 | 
|  | set | 
|  | ;; Fallthru | 
|  | ENDF  __mulusa3 | 
|  |  | 
|  | ;;; A[] survives in 26, 27, 30, 31 | 
|  | ;;; Also used by __mulsa3 with T = 0 | 
|  | ;;; Round if T = 1 | 
|  | ;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version. | 
|  | DEFUN __mulusa3_round | 
|  | push    CC2 | 
|  | push    CC3 | 
|  | ; clear result | 
|  | clr     __tmp_reg__ | 
|  | wmov    CC2, CC0 | 
|  | ; save multiplicand | 
|  | wmov    AA0, A0 | 
|  | wmov    AA2, A2 | 
|  | rjmp 3f | 
|  |  | 
|  | ;; Loop the integral part | 
|  |  | 
|  | 1:  ;; CC += A * 2^n;  n >= 0 | 
|  | add  CC0,A0  $  adc CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3 | 
|  |  | 
|  | 2:  ;; A <<= 1 | 
|  | lsl  A0      $  rol A1      $  rol  A2      $  rol  A3 | 
|  |  | 
|  | 3:  ;; IBIT(B) >>= 1 | 
|  | ;; Carry = n-th bit of B;  n >= 0 | 
|  | lsr     B3 | 
|  | ror     B2 | 
|  | brcs 1b | 
|  | sbci    B3, 0 | 
|  | brne 2b | 
|  |  | 
|  | ;; Loop the fractional part | 
|  | ;; B2/B3 is 0 now, use as guard bits for rounding | 
|  | ;; Restore multiplicand | 
|  | wmov    A0, AA0 | 
|  | wmov    A2, AA2 | 
|  | rjmp 5f | 
|  |  | 
|  | 4:  ;; CC += A:Guard * 2^n;  n < 0 | 
|  | add  B3,B2 $  adc  CC0,A0  $  adc  CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3 | 
|  | 5: | 
|  | ;; A:Guard >>= 1 | 
|  | lsr  A3   $  ror  A2  $  ror  A1  $  ror   A0  $   ror  B2 | 
|  |  | 
|  | ;; FBIT(B) <<= 1 | 
|  | ;; Carry = n-th bit of B;  n < 0 | 
|  | lsl     B0 | 
|  | rol     B1 | 
|  | brcs 4b | 
|  | sbci    B0, 0 | 
|  | brne 5b | 
|  |  | 
|  | ;; Save guard bits and set carry for rounding | 
|  | push    B3 | 
|  | lsl     B3 | 
|  | ;; Move result into place | 
|  | wmov    C2, CC2 | 
|  | wmov    C0, CC0 | 
|  | clr     __zero_reg__ | 
|  | brtc 6f | 
|  | ;; Round iff T = 1 | 
|  | adc     C0, __zero_reg__ | 
|  | adc     C1, __zero_reg__ | 
|  | adc     C2, __zero_reg__ | 
|  | adc     C3, __zero_reg__ | 
|  | 6: | 
|  | pop     GUARD | 
|  | ;; Epilogue | 
|  | pop     CC3 | 
|  | pop     CC2 | 
|  | ret | 
|  | ENDF __mulusa3_round | 
|  | #endif  /* L_mulusa3 */ | 
|  |  | 
|  | #undef A0 | 
|  | #undef A1 | 
|  | #undef A2 | 
|  | #undef A3 | 
|  | #undef B0 | 
|  | #undef B1 | 
|  | #undef B2 | 
|  | #undef B3 | 
|  | #undef C0 | 
|  | #undef C1 | 
|  | #undef C2 | 
|  | #undef C3 | 
|  | #undef AA0 | 
|  | #undef AA1 | 
|  | #undef AA2 | 
|  | #undef AA3 | 
|  | #undef CC0 | 
|  | #undef CC1 | 
|  | #undef CC2 | 
|  | #undef CC3 | 
|  |  | 
|  | #endif /* __AVR_HAVE_MUL__ */ | 
|  |  | 
|  | #undef GUARD | 
|  |  | 
|  | /*********************************************************** | 
|  | Fixed  unsigned saturated Multiplication  8.8 x 8.8 | 
|  | ***********************************************************/ | 
|  |  | 
|  | #define C0  22 | 
|  | #define C1  C0+1 | 
|  | #define C2  C0+2 | 
|  | #define C3  C0+3 | 
|  | #define SS __tmp_reg__ | 
|  |  | 
|  | #if defined (L_usmuluha3) | 
|  | DEFUN __usmuluha3 | 
|  | ;; Widening multiply | 
|  | #ifdef __AVR_HAVE_MUL__ | 
|  | ;; Adjust interface | 
|  | movw    R26, R22 | 
|  | movw    R18, R24 | 
|  | #endif /* HAVE MUL */ | 
|  | XCALL   __umulhisi3 | 
|  | tst     C3 | 
|  | brne .Lmax | 
|  | ;; Round, target is in C1..C2 | 
|  | lsl     C0 | 
|  | adc     C1, __zero_reg__ | 
|  | adc     C2, __zero_reg__ | 
|  | brcs .Lmax | 
|  | ;; Move result into place | 
|  | mov     C3, C2 | 
|  | mov     C2, C1 | 
|  | ret | 
|  | .Lmax: | 
|  | ;; Saturate | 
|  | ldi     C2, 0xff | 
|  | ldi     C3, 0xff | 
|  | ret | 
|  | ENDF  __usmuluha3 | 
|  | #endif /* L_usmuluha3 */ | 
|  |  | 
|  | /*********************************************************** | 
|  | Fixed signed saturated Multiplication  s8.7 x s8.7 | 
|  | ***********************************************************/ | 
|  |  | 
|  | #if defined (L_ssmulha3) | 
|  | DEFUN __ssmulha3 | 
|  | ;; Widening multiply | 
|  | #ifdef __AVR_HAVE_MUL__ | 
|  | ;; Adjust interface | 
|  | movw    R26, R22 | 
|  | movw    R18, R24 | 
|  | #endif /* HAVE MUL */ | 
|  | XCALL   __mulhisi3 | 
|  | ;; Adjust decimal point | 
|  | lsl     C0 | 
|  | rol     C1 | 
|  | rol     C2 | 
|  | brvs .LsatC3.3 | 
|  | ;; The 9 MSBs must be the same | 
|  | rol     C3 | 
|  | sbc     SS, SS | 
|  | cp      C3, SS | 
|  | brne .LsatSS | 
|  | ;; Round | 
|  | lsl     C0 | 
|  | adc     C1, __zero_reg__ | 
|  | adc     C2, __zero_reg__ | 
|  | brvs .Lmax | 
|  | ;; Move result into place | 
|  | mov    C3, C2 | 
|  | mov    C2, C1 | 
|  | ret | 
|  | .Lmax: | 
|  | ;; Load 0x7fff | 
|  | clr     C3 | 
|  | .LsatC3.3: | 
|  | ;; C3 <  0 -->  0x8000 | 
|  | ;; C3 >= 0 -->  0x7fff | 
|  | mov     SS, C3 | 
|  | .LsatSS: | 
|  | ;; Load min / max value: | 
|  | ;; SS = -1  -->  0x8000 | 
|  | ;; SS =  0  -->  0x7fff | 
|  | ldi     C3, 0x7f | 
|  | ldi     C2, 0xff | 
|  | sbrc    SS, 7 | 
|  | adiw    C2, 1 | 
|  | ret | 
|  | ENDF  __ssmulha3 | 
|  | #endif /* L_ssmulha3 */ | 
|  |  | 
|  | #undef C0 | 
|  | #undef C1 | 
|  | #undef C2 | 
|  | #undef C3 | 
|  | #undef SS | 
|  |  | 
|  | /*********************************************************** | 
|  | Fixed  unsigned saturated Multiplication  16.16 x 16.16 | 
|  | ***********************************************************/ | 
|  |  | 
|  | #define C0  18 | 
|  | #define C1  C0+1 | 
|  | #define C2  C0+2 | 
|  | #define C3  C0+3 | 
|  | #define C4  C0+4 | 
|  | #define C5  C0+5 | 
|  | #define C6  C0+6 | 
|  | #define C7  C0+7 | 
|  | #define SS __tmp_reg__ | 
|  |  | 
|  | #if defined (L_usmulusa3) | 
|  | ;; R22[4] = R22[4] *{ssat} R18[4] | 
|  | ;; Ordinary ABI function | 
|  | DEFUN __usmulusa3 | 
|  | ;; Widening multiply | 
|  | XCALL   __umulsidi3 | 
|  | or      C7, C6 | 
|  | brne .Lmax | 
|  | ;; Round, target is in C2..C5 | 
|  | lsl     C1 | 
|  | adc     C2, __zero_reg__ | 
|  | adc     C3, __zero_reg__ | 
|  | adc     C4, __zero_reg__ | 
|  | adc     C5, __zero_reg__ | 
|  | brcs .Lmax | 
|  | ;; Move result into place | 
|  | wmov    C6, C4 | 
|  | wmov    C4, C2 | 
|  | ret | 
|  | .Lmax: | 
|  | ;; Saturate | 
|  | ldi     C7, 0xff | 
|  | ldi     C6, 0xff | 
|  | wmov    C4, C6 | 
|  | ret | 
|  | ENDF  __usmulusa3 | 
|  | #endif /* L_usmulusa3 */ | 
|  |  | 
|  | /*********************************************************** | 
|  | Fixed signed saturated Multiplication  s16.15 x s16.15 | 
|  | ***********************************************************/ | 
|  |  | 
|  | #if defined (L_ssmulsa3) | 
|  | ;; R22[4] = R22[4] *{ssat} R18[4] | 
|  | ;; Ordinary ABI function | 
|  | DEFUN __ssmulsa3 | 
|  | ;; Widening multiply | 
|  | XCALL   __mulsidi3 | 
|  | ;; Adjust decimal point | 
|  | lsl     C1 | 
|  | rol     C2 | 
|  | rol     C3 | 
|  | rol     C4 | 
|  | rol     C5 | 
|  | brvs .LsatC7.7 | 
|  | ;; The 17 MSBs must be the same | 
|  | rol     C6 | 
|  | rol     C7 | 
|  | sbc     SS, SS | 
|  | cp      C6, SS | 
|  | cpc     C7, SS | 
|  | brne .LsatSS | 
|  | ;; Round | 
|  | lsl     C1 | 
|  | adc     C2, __zero_reg__ | 
|  | adc     C3, __zero_reg__ | 
|  | adc     C4, __zero_reg__ | 
|  | adc     C5, __zero_reg__ | 
|  | brvs .Lmax | 
|  | ;; Move result into place | 
|  | wmov    C6, C4 | 
|  | wmov    C4, C2 | 
|  | ret | 
|  |  | 
|  | .Lmax: | 
|  | ;; Load 0x7fffffff | 
|  | clr     C7 | 
|  | .LsatC7.7: | 
|  | ;; C7 <  0 -->  0x80000000 | 
|  | ;; C7 >= 0 -->  0x7fffffff | 
|  | lsl     C7 | 
|  | sbc     SS, SS | 
|  | .LsatSS: | 
|  | ;; Load min / max value: | 
|  | ;; SS = -1  -->  0x80000000 | 
|  | ;; SS =  0  -->  0x7fffffff | 
|  | com     SS | 
|  | mov     C4, SS | 
|  | mov     C5, C4 | 
|  | wmov    C6, C4 | 
|  | subi    C7, 0x80 | 
|  | ret | 
|  | ENDF  __ssmulsa3 | 
|  | #endif /* L_ssmulsa3 */ | 
|  |  | 
|  | #undef C0 | 
|  | #undef C1 | 
|  | #undef C2 | 
|  | #undef C3 | 
|  | #undef C4 | 
|  | #undef C5 | 
|  | #undef C6 | 
|  | #undef C7 | 
|  | #undef SS | 
|  |  | 
|  | /******************************************************* | 
|  | Fractional Division 8 / 8 | 
|  | *******************************************************/ | 
|  |  | 
|  | #define r_divd  r25     /* dividend */ | 
|  | #define r_quo   r24     /* quotient */ | 
|  | #define r_div   r22     /* divisor */ | 
|  | #define r_sign  __tmp_reg__ | 
|  |  | 
|  | #if defined (L_divqq3) | 
|  | DEFUN   __divqq3 | 
|  | mov     r_sign, r_divd | 
|  | eor     r_sign, r_div | 
|  | sbrc    r_div, 7 | 
|  | neg     r_div | 
|  | sbrc    r_divd, 7 | 
|  | neg     r_divd | 
|  | XCALL   __divqq_helper | 
|  | lsr     r_quo | 
|  | sbrc    r_sign, 7   ; negate result if needed | 
|  | neg     r_quo | 
|  | ret | 
|  | ENDF __divqq3 | 
|  | #endif  /* L_divqq3 */ | 
|  |  | 
|  | #if defined (L_udivuqq3) | 
|  | DEFUN   __udivuqq3 | 
|  | cp      r_divd, r_div | 
|  | brsh    0f | 
|  | XJMP __divqq_helper | 
|  | ;; Result is out of [0, 1)  ==>  Return 1 - eps. | 
|  | 0:  ldi     r_quo, 0xff | 
|  | ret | 
|  | ENDF __udivuqq3 | 
|  | #endif  /* L_udivuqq3 */ | 
|  |  | 
|  |  | 
|  | #if defined (L_divqq_helper) | 
|  | DEFUN   __divqq_helper | 
|  | clr     r_quo           ; clear quotient | 
|  | inc     __zero_reg__    ; init loop counter, used per shift | 
|  | __udivuqq3_loop: | 
|  | lsl     r_divd          ; shift dividend | 
|  | brcs    0f              ; dividend overflow | 
|  | cp      r_divd,r_div    ; compare dividend & divisor | 
|  | brcc    0f              ; dividend >= divisor | 
|  | rol     r_quo           ; shift quotient (with CARRY) | 
|  | rjmp    __udivuqq3_cont | 
|  | 0: | 
|  | sub     r_divd,r_div    ; restore dividend | 
|  | lsl     r_quo           ; shift quotient (without CARRY) | 
|  | __udivuqq3_cont: | 
|  | lsl     __zero_reg__    ; shift loop-counter bit | 
|  | brne    __udivuqq3_loop | 
|  | com     r_quo           ; complement result | 
|  | ; because C flag was complemented in loop | 
|  | ret | 
|  | ENDF __divqq_helper | 
|  | #endif  /* L_divqq_helper */ | 
|  |  | 
|  | #undef  r_divd | 
|  | #undef  r_quo | 
|  | #undef  r_div | 
|  | #undef  r_sign | 
|  |  | 
|  |  | 
|  | /******************************************************* | 
|  | Fractional Division 16 / 16 | 
|  | *******************************************************/ | 
|  | #define r_divdL 26     /* dividend Low */ | 
|  | #define r_divdH 27     /* dividend Hig */ | 
|  | #define r_quoL  24     /* quotient Low */ | 
|  | #define r_quoH  25     /* quotient High */ | 
|  | #define r_divL  22     /* divisor */ | 
|  | #define r_divH  23     /* divisor */ | 
|  | #define r_cnt   21 | 
|  |  | 
|  | #if defined (L_divhq3) | 
|  | DEFUN   __divhq3 | 
|  | mov     r0, r_divdH | 
|  | eor     r0, r_divH | 
|  | sbrs    r_divH, 7 | 
|  | rjmp    1f | 
|  | NEG2    r_divL | 
|  | 1: | 
|  | sbrs    r_divdH, 7 | 
|  | rjmp    2f | 
|  | NEG2    r_divdL | 
|  | 2: | 
|  | cp      r_divdL, r_divL | 
|  | cpc     r_divdH, r_divH | 
|  | breq    __divhq3_minus1  ; if equal return -1 | 
|  | XCALL   __udivuhq3 | 
|  | lsr     r_quoH | 
|  | ror     r_quoL | 
|  | brpl    9f | 
|  | ;; negate result if needed | 
|  | NEG2    r_quoL | 
|  | 9: | 
|  | ret | 
|  | __divhq3_minus1: | 
|  | ldi     r_quoH, 0x80 | 
|  | clr     r_quoL | 
|  | ret | 
|  | ENDF __divhq3 | 
|  | #endif  /* defined (L_divhq3) */ | 
|  |  | 
|  | #if defined (L_udivuhq3) | 
|  | DEFUN   __udivuhq3 | 
|  | sub     r_quoH,r_quoH   ; clear quotient and carry | 
|  | ;; FALLTHRU | 
|  | ENDF __udivuhq3 | 
|  |  | 
|  | DEFUN   __udivuha3_common | 
|  | clr     r_quoL          ; clear quotient | 
|  | ldi     r_cnt,16        ; init loop counter | 
|  | __udivuhq3_loop: | 
|  | rol     r_divdL         ; shift dividend (with CARRY) | 
|  | rol     r_divdH | 
|  | brcs    __udivuhq3_ep   ; dividend overflow | 
|  | cp      r_divdL,r_divL  ; compare dividend & divisor | 
|  | cpc     r_divdH,r_divH | 
|  | brcc    __udivuhq3_ep   ; dividend >= divisor | 
|  | rol     r_quoL          ; shift quotient (with CARRY) | 
|  | rjmp    __udivuhq3_cont | 
|  | __udivuhq3_ep: | 
|  | sub     r_divdL,r_divL  ; restore dividend | 
|  | sbc     r_divdH,r_divH | 
|  | lsl     r_quoL          ; shift quotient (without CARRY) | 
|  | __udivuhq3_cont: | 
|  | rol     r_quoH          ; shift quotient | 
|  | dec     r_cnt           ; decrement loop counter | 
|  | brne    __udivuhq3_loop | 
|  | com     r_quoL          ; complement result | 
|  | com     r_quoH          ; because C flag was complemented in loop | 
|  | ret | 
|  | ENDF __udivuha3_common | 
|  | #endif  /* defined (L_udivuhq3) */ | 
|  |  | 
|  | /******************************************************* | 
|  | Fixed Division 8.8 / 8.8 | 
|  | *******************************************************/ | 
|  | #if defined (L_divha3) | 
|  | DEFUN   __divha3 | 
|  | mov     r0, r_divdH | 
|  | eor     r0, r_divH | 
|  | sbrs    r_divH, 7 | 
|  | rjmp    1f | 
|  | NEG2    r_divL | 
|  | 1: | 
|  | sbrs    r_divdH, 7 | 
|  | rjmp    2f | 
|  | NEG2    r_divdL | 
|  | 2: | 
|  | XCALL   __udivuha3 | 
|  | lsr     r_quoH  ; adjust to 7 fractional bits | 
|  | ror     r_quoL | 
|  | sbrs    r0, 7   ; negate result if needed | 
|  | ret | 
|  | NEG2    r_quoL | 
|  | ret | 
|  | ENDF __divha3 | 
|  | #endif  /* defined (L_divha3) */ | 
|  |  | 
|  | #if defined (L_udivuha3) | 
|  | DEFUN   __udivuha3 | 
|  | mov     r_quoH, r_divdL | 
|  | mov     r_divdL, r_divdH | 
|  | clr     r_divdH | 
|  | lsl     r_quoH     ; shift quotient into carry | 
|  | XJMP    __udivuha3_common ; same as fractional after rearrange | 
|  | ENDF __udivuha3 | 
|  | #endif  /* defined (L_udivuha3) */ | 
|  |  | 
|  | #undef  r_divdL | 
|  | #undef  r_divdH | 
|  | #undef  r_quoL | 
|  | #undef  r_quoH | 
|  | #undef  r_divL | 
|  | #undef  r_divH | 
|  | #undef  r_cnt | 
|  |  | 
|  | /******************************************************* | 
|  | Fixed Division 16.16 / 16.16 | 
|  | *******************************************************/ | 
|  |  | 
|  | #define r_arg1L  24    /* arg1 gets passed already in place */ | 
|  | #define r_arg1H  25 | 
|  | #define r_arg1HL 26 | 
|  | #define r_arg1HH 27 | 
|  | #define r_divdL  26    /* dividend Low */ | 
|  | #define r_divdH  27 | 
|  | #define r_divdHL 30 | 
|  | #define r_divdHH 31    /* dividend High */ | 
|  | #define r_quoL   22    /* quotient Low */ | 
|  | #define r_quoH   23 | 
|  | #define r_quoHL  24 | 
|  | #define r_quoHH  25    /* quotient High */ | 
|  | #define r_divL   18    /* divisor Low */ | 
|  | #define r_divH   19 | 
|  | #define r_divHL  20 | 
|  | #define r_divHH  21    /* divisor High */ | 
|  | #define r_cnt  __zero_reg__  /* loop count (0 after the loop!) */ | 
|  |  | 
|  | #if defined (L_divsa3) | 
|  | DEFUN   __divsa3 | 
|  | mov     r0, r_arg1HH | 
|  | eor     r0, r_divHH | 
|  | sbrs    r_divHH, 7 | 
|  | rjmp    1f | 
|  | NEG4    r_divL | 
|  | 1: | 
|  | sbrs    r_arg1HH, 7 | 
|  | rjmp    2f | 
|  | NEG4    r_arg1L | 
|  | 2: | 
|  | XCALL   __udivusa3 | 
|  | lsr     r_quoHH ; adjust to 15 fractional bits | 
|  | ror     r_quoHL | 
|  | ror     r_quoH | 
|  | ror     r_quoL | 
|  | sbrs    r0, 7   ; negate result if needed | 
|  | ret | 
|  | ;; negate r_quoL | 
|  | XJMP    __negsi2 | 
|  | ENDF __divsa3 | 
|  | #endif  /* defined (L_divsa3) */ | 
|  |  | 
|  | #if defined (L_udivusa3) | 
|  | DEFUN   __udivusa3 | 
|  | ldi     r_divdHL, 32    ; init loop counter | 
|  | mov     r_cnt, r_divdHL | 
|  | clr     r_divdHL | 
|  | clr     r_divdHH | 
|  | wmov    r_quoL, r_divdHL | 
|  | lsl     r_quoHL         ; shift quotient into carry | 
|  | rol     r_quoHH | 
|  | __udivusa3_loop: | 
|  | rol     r_divdL         ; shift dividend (with CARRY) | 
|  | rol     r_divdH | 
|  | rol     r_divdHL | 
|  | rol     r_divdHH | 
|  | brcs    __udivusa3_ep   ; dividend overflow | 
|  | cp      r_divdL,r_divL  ; compare dividend & divisor | 
|  | cpc     r_divdH,r_divH | 
|  | cpc     r_divdHL,r_divHL | 
|  | cpc     r_divdHH,r_divHH | 
|  | brcc    __udivusa3_ep   ; dividend >= divisor | 
|  | rol     r_quoL          ; shift quotient (with CARRY) | 
|  | rjmp    __udivusa3_cont | 
|  | __udivusa3_ep: | 
|  | sub     r_divdL,r_divL  ; restore dividend | 
|  | sbc     r_divdH,r_divH | 
|  | sbc     r_divdHL,r_divHL | 
|  | sbc     r_divdHH,r_divHH | 
|  | lsl     r_quoL          ; shift quotient (without CARRY) | 
|  | __udivusa3_cont: | 
|  | rol     r_quoH          ; shift quotient | 
|  | rol     r_quoHL | 
|  | rol     r_quoHH | 
|  | dec     r_cnt           ; decrement loop counter | 
|  | brne    __udivusa3_loop | 
|  | com     r_quoL          ; complement result | 
|  | com     r_quoH          ; because C flag was complemented in loop | 
|  | com     r_quoHL | 
|  | com     r_quoHH | 
|  | ret | 
|  | ENDF __udivusa3 | 
|  | #endif  /* defined (L_udivusa3) */ | 
|  |  | 
|  | #undef  r_arg1L | 
|  | #undef  r_arg1H | 
|  | #undef  r_arg1HL | 
|  | #undef  r_arg1HH | 
|  | #undef  r_divdL | 
|  | #undef  r_divdH | 
|  | #undef  r_divdHL | 
|  | #undef  r_divdHH | 
|  | #undef  r_quoL | 
|  | #undef  r_quoH | 
|  | #undef  r_quoHL | 
|  | #undef  r_quoHH | 
|  | #undef  r_divL | 
|  | #undef  r_divH | 
|  | #undef  r_divHL | 
|  | #undef  r_divHH | 
|  | #undef  r_cnt | 
|  |  | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Saturation, 1 Byte | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | ;; First Argument and Return Register | 
|  | #define A0  24 | 
|  |  | 
|  | #if defined (L_ssabs_1) | 
|  | DEFUN __ssabs_1 | 
|  | sbrs    A0, 7 | 
|  | ret | 
|  | neg     A0 | 
|  | sbrc    A0,7 | 
|  | dec     A0 | 
|  | ret | 
|  | ENDF __ssabs_1 | 
|  | #endif /* L_ssabs_1 */ | 
|  |  | 
|  | #undef A0 | 
|  |  | 
|  |  | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Saturation, 2 Bytes | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | ;; First Argument and Return Register | 
|  | #define A0  24 | 
|  | #define A1  A0+1 | 
|  |  | 
|  | #if defined (L_ssneg_2) | 
|  | DEFUN __ssneg_2 | 
|  | NEG2    A0 | 
|  | brvc 0f | 
|  | sbiw    A0, 1 | 
|  | 0:  ret | 
|  | ENDF __ssneg_2 | 
|  | #endif /* L_ssneg_2 */ | 
|  |  | 
|  | #if defined (L_ssabs_2) | 
|  | DEFUN __ssabs_2 | 
|  | sbrs    A1, 7 | 
|  | ret | 
|  | XJMP    __ssneg_2 | 
|  | ENDF __ssabs_2 | 
|  | #endif /* L_ssabs_2 */ | 
|  |  | 
|  | #undef A0 | 
|  | #undef A1 | 
|  |  | 
|  |  | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Saturation, 4 Bytes | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | ;; First Argument and Return Register | 
|  | #define A0  22 | 
|  | #define A1  A0+1 | 
|  | #define A2  A0+2 | 
|  | #define A3  A0+3 | 
|  |  | 
|  | #if defined (L_ssneg_4) | 
|  | DEFUN __ssneg_4 | 
|  | XCALL   __negsi2 | 
|  | brvc 0f | 
|  | ldi     A3, 0x7f | 
|  | ldi     A2, 0xff | 
|  | ldi     A1, 0xff | 
|  | ldi     A0, 0xff | 
|  | 0:  ret | 
|  | ENDF __ssneg_4 | 
|  | #endif /* L_ssneg_4 */ | 
|  |  | 
|  | #if defined (L_ssabs_4) | 
|  | DEFUN __ssabs_4 | 
|  | sbrs    A3, 7 | 
|  | ret | 
|  | XJMP    __ssneg_4 | 
|  | ENDF __ssabs_4 | 
|  | #endif /* L_ssabs_4 */ | 
|  |  | 
|  | #undef A0 | 
|  | #undef A1 | 
|  | #undef A2 | 
|  | #undef A3 | 
|  |  | 
|  |  | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Saturation, 8 Bytes | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | ;; First Argument and Return Register | 
|  | #define A0  18 | 
|  | #define A1  A0+1 | 
|  | #define A2  A0+2 | 
|  | #define A3  A0+3 | 
|  | #define A4  A0+4 | 
|  | #define A5  A0+5 | 
|  | #define A6  A0+6 | 
|  | #define A7  A0+7 | 
|  |  | 
|  | #if defined (L_clr_8) | 
|  | FALIAS __usneguta2 | 
|  | FALIAS __usneguda2 | 
|  | FALIAS __usnegudq2 | 
|  |  | 
|  | ;; Clear Carry and all Bytes | 
|  | DEFUN __clr_8 | 
|  | ;; Clear Carry and set Z | 
|  | sub     A7, A7 | 
|  | ;; FALLTHRU | 
|  | ENDF  __clr_8 | 
|  | ;; Propagate Carry to all Bytes, Carry unaltered | 
|  | DEFUN __sbc_8 | 
|  | sbc     A7, A7 | 
|  | sbc     A6, A6 | 
|  | wmov    A4, A6 | 
|  | wmov    A2, A6 | 
|  | wmov    A0, A6 | 
|  | ret | 
|  | ENDF __sbc_8 | 
|  | #endif /* L_clr_8 */ | 
|  |  | 
|  | #if defined (L_ssneg_8) | 
|  | FALIAS __ssnegta2 | 
|  | FALIAS __ssnegda2 | 
|  | FALIAS __ssnegdq2 | 
|  |  | 
|  | DEFUN __ssneg_8 | 
|  | XCALL   __negdi2 | 
|  | brvc 0f | 
|  | ;; A[] = 0x7fffffff | 
|  | sec | 
|  | XCALL   __sbc_8 | 
|  | ldi     A7, 0x7f | 
|  | 0:  ret | 
|  | ENDF __ssneg_8 | 
|  | #endif /* L_ssneg_8 */ | 
|  |  | 
|  | #if defined (L_ssabs_8) | 
|  | FALIAS __ssabsta2 | 
|  | FALIAS __ssabsda2 | 
|  | FALIAS __ssabsdq2 | 
|  |  | 
|  | DEFUN __ssabs_8 | 
|  | sbrs    A7, 7 | 
|  | ret | 
|  | XJMP    __ssneg_8 | 
|  | ENDF __ssabs_8 | 
|  | #endif /* L_ssabs_8 */ | 
|  |  | 
|  | ;; Second Argument | 
|  | #define B0  10 | 
|  | #define B1  B0+1 | 
|  | #define B2  B0+2 | 
|  | #define B3  B0+3 | 
|  | #define B4  B0+4 | 
|  | #define B5  B0+5 | 
|  | #define B6  B0+6 | 
|  | #define B7  B0+7 | 
|  |  | 
|  | #if defined (L_usadd_8) | 
|  | FALIAS __usadduta3 | 
|  | FALIAS __usadduda3 | 
|  | FALIAS __usaddudq3 | 
|  |  | 
|  | DEFUN __usadd_8 | 
|  | XCALL   __adddi3 | 
|  | brcs 0f | 
|  | ret | 
|  | 0:  ;; A[] = 0xffffffff | 
|  | XJMP    __sbc_8 | 
|  | ENDF __usadd_8 | 
|  | #endif /* L_usadd_8 */ | 
|  |  | 
|  | #if defined (L_ussub_8) | 
|  | FALIAS __ussubuta3 | 
|  | FALIAS __ussubuda3 | 
|  | FALIAS __ussubudq3 | 
|  |  | 
|  | DEFUN __ussub_8 | 
|  | XCALL   __subdi3 | 
|  | brcs 0f | 
|  | ret | 
|  | 0:  ;; A[] = 0 | 
|  | XJMP    __clr_8 | 
|  | ENDF __ussub_8 | 
|  | #endif /* L_ussub_8 */ | 
|  |  | 
|  | #if defined (L_ssadd_8) | 
|  | FALIAS __ssaddta3 | 
|  | FALIAS __ssaddda3 | 
|  | FALIAS __ssadddq3 | 
|  |  | 
|  | DEFUN __ssadd_8 | 
|  | XCALL   __adddi3 | 
|  | brvc 0f | 
|  | ;; A = (B >= 0) ? INT64_MAX : INT64_MIN | 
|  | cpi     B7, 0x80 | 
|  | XCALL   __sbc_8 | 
|  | subi    A7, 0x80 | 
|  | 0:  ret | 
|  | ENDF __ssadd_8 | 
|  | #endif /* L_ssadd_8 */ | 
|  |  | 
|  | #if defined (L_sssub_8) | 
|  | FALIAS __sssubta3 | 
|  | FALIAS __sssubda3 | 
|  | FALIAS __sssubdq3 | 
|  |  | 
|  | DEFUN __sssub_8 | 
|  | XCALL   __subdi3 | 
|  | brvc 0f | 
|  | ;; A = (B < 0) ? INT64_MAX : INT64_MIN | 
|  | ldi     A7, 0x7f | 
|  | cp      A7, B7 | 
|  | XCALL   __sbc_8 | 
|  | subi    A7, 0x80 | 
|  | 0:  ret | 
|  | ENDF __sssub_8 | 
|  | #endif /* L_sssub_8 */ | 
|  |  | 
|  | #undef A0 | 
|  | #undef A1 | 
|  | #undef A2 | 
|  | #undef A3 | 
|  | #undef A4 | 
|  | #undef A5 | 
|  | #undef A6 | 
|  | #undef A7 | 
|  | #undef B0 | 
|  | #undef B1 | 
|  | #undef B2 | 
|  | #undef B3 | 
|  | #undef B4 | 
|  | #undef B5 | 
|  | #undef B6 | 
|  | #undef B7 | 
|  |  | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Rounding Helpers | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | #ifdef L_mask1 | 
|  |  | 
|  | #define AA 24 | 
|  | #define CC 25 | 
|  |  | 
|  | ;; R25 = 1 << (R24 & 7) | 
|  | ;; CC  = 1 << (AA  & 7) | 
|  | ;; Clobbers: None | 
|  | DEFUN __mask1 | 
|  | ;; CC = 2 ^ AA.1 | 
|  | ldi     CC, 1 << 2 | 
|  | sbrs    AA, 1 | 
|  | ldi     CC, 1 << 0 | 
|  | ;; CC *= 2 ^ AA.0 | 
|  | sbrc    AA, 0 | 
|  | lsl     CC | 
|  | ;; CC *= 2 ^ AA.2 | 
|  | sbrc    AA, 2 | 
|  | swap    CC | 
|  | ret | 
|  | ENDF __mask1 | 
|  |  | 
|  | #undef AA | 
|  | #undef CC | 
|  | #endif /* L_mask1 */ | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | ;; The rounding point. Any bits smaller than | 
|  | ;; 2^{-RP} will be cleared. | 
|  | #define RP R24 | 
|  |  | 
|  | #define A0 22 | 
|  | #define A1 A0 + 1 | 
|  |  | 
|  | #define C0 24 | 
|  | #define C1 C0 + 1 | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Rounding, 1 Byte | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | #ifdef L_roundqq3 | 
|  |  | 
|  | ;; R24 = round (R22, R24) | 
|  | ;; Clobbers: R22, __tmp_reg__ | 
|  | DEFUN  __roundqq3 | 
|  | mov     __tmp_reg__, C1 | 
|  | subi    RP, __QQ_FBIT__ - 1 | 
|  | neg     RP | 
|  | ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP) | 
|  | XCALL   __mask1 | 
|  | mov     C0, C1 | 
|  | ;; Add-Saturate 2^{-RP-1} | 
|  | add     A0, C0 | 
|  | brvc 0f | 
|  | ldi     C0, 0x7f | 
|  | rjmp 9f | 
|  | 0:  ;; Mask out bits beyond RP | 
|  | lsl     C0 | 
|  | neg     C0 | 
|  | and     C0, A0 | 
|  | 9:  mov     C1, __tmp_reg__ | 
|  | ret | 
|  | ENDF  __roundqq3 | 
|  | #endif /* L_roundqq3 */ | 
|  |  | 
|  | #ifdef L_rounduqq3 | 
|  |  | 
|  | ;; R24 = round (R22, R24) | 
|  | ;; Clobbers: R22, __tmp_reg__ | 
|  | DEFUN  __rounduqq3 | 
|  | mov     __tmp_reg__, C1 | 
|  | subi    RP, __UQQ_FBIT__ - 1 | 
|  | neg     RP | 
|  | ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP) | 
|  | XCALL   __mask1 | 
|  | mov     C0, C1 | 
|  | ;; Add-Saturate 2^{-RP-1} | 
|  | add     A0, C0 | 
|  | brcc 0f | 
|  | ldi     C0, 0xff | 
|  | rjmp 9f | 
|  | 0:  ;; Mask out bits beyond RP | 
|  | lsl     C0 | 
|  | neg     C0 | 
|  | and     C0, A0 | 
|  | 9:  mov     C1, __tmp_reg__ | 
|  | ret | 
|  | ENDF  __rounduqq3 | 
|  | #endif /* L_rounduqq3 */ | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Rounding, 2 Bytes | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | #ifdef L_addmask_2 | 
|  |  | 
|  | ;; [ R25:R24 =  1 << (R24 & 15) | 
|  | ;;   R23:R22 += 1 << (R24 & 15) ] | 
|  | ;; SREG is set according to the addition | 
|  | DEFUN __addmask_2 | 
|  | ;; R25 = 1 << (R24 & 7) | 
|  | XCALL   __mask1 | 
|  | cpi     RP, 1 << 3 | 
|  | sbc     C0, C0 | 
|  | ;; Swap C0 and C1 if RP.3 was set | 
|  | and     C0, C1 | 
|  | eor     C1, C0 | 
|  | ;; Finally, add the power-of-two:  A[] += C[] | 
|  | add     A0, C0 | 
|  | adc     A1, C1 | 
|  | ret | 
|  | ENDF  __addmask_2 | 
|  | #endif /* L_addmask_2 */ | 
|  |  | 
|  | #ifdef L_round_s2 | 
|  |  | 
|  | ;; R25:R24 = round (R23:R22, R24) | 
|  | ;; Clobbers: R23, R22 | 
|  | DEFUN  __roundhq3 | 
|  | subi    RP, __HQ_FBIT__ - __HA_FBIT__ | 
|  | ENDF   __roundhq3 | 
|  | DEFUN  __roundha3 | 
|  | subi    RP, __HA_FBIT__ - 1 | 
|  | neg     RP | 
|  | ;; [ R25:R24  = 1 << (FBIT-1 - RP) | 
|  | ;;   R23:R22 += 1 << (FBIT-1 - RP) ] | 
|  | XCALL   __addmask_2 | 
|  | XJMP    __round_s2_const | 
|  | ENDF  __roundha3 | 
|  |  | 
|  | #endif /* L_round_s2 */ | 
|  |  | 
|  | #ifdef L_round_u2 | 
|  |  | 
|  | ;; R25:R24 = round (R23:R22, R24) | 
|  | ;; Clobbers: R23, R22 | 
|  | DEFUN  __rounduhq3 | 
|  | subi    RP, __UHQ_FBIT__ - __UHA_FBIT__ | 
|  | ENDF   __rounduhq3 | 
|  | DEFUN  __rounduha3 | 
|  | subi    RP, __UHA_FBIT__ - 1 | 
|  | neg     RP | 
|  | ;; [ R25:R24  = 1 << (FBIT-1 - RP) | 
|  | ;;   R23:R22 += 1 << (FBIT-1 - RP) ] | 
|  | XCALL   __addmask_2 | 
|  | XJMP    __round_u2_const | 
|  | ENDF  __rounduha3 | 
|  |  | 
|  | #endif /* L_round_u2 */ | 
|  |  | 
|  |  | 
|  | #ifdef L_round_2_const | 
|  |  | 
|  | ;; Helpers for 2 byte wide rounding | 
|  |  | 
|  | DEFUN  __round_s2_const | 
|  | brvc 2f | 
|  | ldi     C1, 0x7f | 
|  | rjmp 1f | 
|  | ;; FALLTHRU (Barrier) | 
|  | ENDF  __round_s2_const | 
|  |  | 
|  | DEFUN __round_u2_const | 
|  | brcc 2f | 
|  | ldi     C1, 0xff | 
|  | 1: | 
|  | ldi     C0, 0xff | 
|  | rjmp 9f | 
|  | 2: | 
|  | ;; Saturation is performed now. | 
|  | ;; Currently, we have C[] = 2^{-RP-1} | 
|  | ;; C[] = 2^{-RP} | 
|  | lsl     C0 | 
|  | rol     C1 | 
|  | ;; | 
|  | NEG2    C0 | 
|  | ;; Clear the bits beyond the rounding point. | 
|  | and     C0, A0 | 
|  | and     C1, A1 | 
|  | 9:  ret | 
|  | ENDF  __round_u2_const | 
|  |  | 
|  | #endif /* L_round_2_const */ | 
|  |  | 
|  | #undef A0 | 
|  | #undef A1 | 
|  | #undef C0 | 
|  | #undef C1 | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Rounding, 4 Bytes | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | #define A0 18 | 
|  | #define A1 A0 + 1 | 
|  | #define A2 A0 + 2 | 
|  | #define A3 A0 + 3 | 
|  |  | 
|  | #define C0 22 | 
|  | #define C1 C0 + 1 | 
|  | #define C2 C0 + 2 | 
|  | #define C3 C0 + 3 | 
|  |  | 
|  | #ifdef L_addmask_4 | 
|  |  | 
|  | ;; [ R25:R22 =  1 << (R24 & 31) | 
|  | ;;   R21:R18 += 1 << (R24 & 31) ] | 
|  | ;; SREG is set according to the addition | 
|  | DEFUN __addmask_4 | 
|  | ;; R25 = 1 << (R24 & 7) | 
|  | XCALL   __mask1 | 
|  | cpi     RP, 1 << 4 | 
|  | sbc     C0, C0 | 
|  | sbc     C1, C1 | 
|  | ;; Swap C2 with C3 if RP.3 is not set | 
|  | cpi     RP, 1 << 3 | 
|  | sbc     C2, C2 | 
|  | and     C2, C3 | 
|  | eor     C3, C2 | 
|  | ;; Swap C3:C2 with C1:C0 if RP.4 is not set | 
|  | and     C0, C2  $  eor     C2, C0 | 
|  | and     C1, C3  $  eor     C3, C1 | 
|  | ;; Finally, add the power-of-two:  A[] += C[] | 
|  | add     A0, C0 | 
|  | adc     A1, C1 | 
|  | adc     A2, C2 | 
|  | adc     A3, C3 | 
|  | ret | 
|  | ENDF  __addmask_4 | 
|  | #endif /* L_addmask_4 */ | 
|  |  | 
|  | #ifdef L_round_s4 | 
|  |  | 
|  | ;; R25:R22 = round (R21:R18, R24) | 
|  | ;; Clobbers: R18...R21 | 
|  | DEFUN  __roundsq3 | 
|  | subi    RP, __SQ_FBIT__ - __SA_FBIT__ | 
|  | ENDF   __roundsq3 | 
|  | DEFUN  __roundsa3 | 
|  | subi    RP, __SA_FBIT__ - 1 | 
|  | neg     RP | 
|  | ;; [ R25:R22  = 1 << (FBIT-1 - RP) | 
|  | ;;   R21:R18 += 1 << (FBIT-1 - RP) ] | 
|  | XCALL   __addmask_4 | 
|  | XJMP    __round_s4_const | 
|  | ENDF  __roundsa3 | 
|  |  | 
|  | #endif /* L_round_s4 */ | 
|  |  | 
|  | #ifdef L_round_u4 | 
|  |  | 
|  | ;; R25:R22 = round (R21:R18, R24) | 
|  | ;; Clobbers: R18...R21 | 
|  | DEFUN  __roundusq3 | 
|  | subi    RP, __USQ_FBIT__ - __USA_FBIT__ | 
|  | ENDF   __roundusq3 | 
|  | DEFUN  __roundusa3 | 
|  | subi    RP, __USA_FBIT__ - 1 | 
|  | neg     RP | 
|  | ;; [ R25:R22  = 1 << (FBIT-1 - RP) | 
|  | ;;   R21:R18 += 1 << (FBIT-1 - RP) ] | 
|  | XCALL   __addmask_4 | 
|  | XJMP    __round_u4_const | 
|  | ENDF  __roundusa3 | 
|  |  | 
|  | #endif /* L_round_u4 */ | 
|  |  | 
|  |  | 
|  | #ifdef L_round_4_const | 
|  |  | 
|  | ;; Helpers for 4 byte wide rounding | 
|  |  | 
|  | DEFUN  __round_s4_const | 
|  | brvc 2f | 
|  | ldi     C3, 0x7f | 
|  | rjmp 1f | 
|  | ;; FALLTHRU (Barrier) | 
|  | ENDF  __round_s4_const | 
|  |  | 
|  | DEFUN __round_u4_const | 
|  | brcc 2f | 
|  | ldi     C3, 0xff | 
|  | 1: | 
|  | ldi     C2, 0xff | 
|  | ldi     C1, 0xff | 
|  | ldi     C0, 0xff | 
|  | rjmp 9f | 
|  | 2: | 
|  | ;; Saturation is performed now. | 
|  | ;; Currently, we have C[] = 2^{-RP-1} | 
|  | ;; C[] = 2^{-RP} | 
|  | lsl     C0 | 
|  | rol     C1 | 
|  | rol     C2 | 
|  | rol     C3 | 
|  | XCALL   __negsi2 | 
|  | ;; Clear the bits beyond the rounding point. | 
|  | and     C0, A0 | 
|  | and     C1, A1 | 
|  | and     C2, A2 | 
|  | and     C3, A3 | 
|  | 9:  ret | 
|  | ENDF  __round_u4_const | 
|  |  | 
|  | #endif /* L_round_4_const */ | 
|  |  | 
|  | #undef A0 | 
|  | #undef A1 | 
|  | #undef A2 | 
|  | #undef A3 | 
|  | #undef C0 | 
|  | #undef C1 | 
|  | #undef C2 | 
|  | #undef C3 | 
|  |  | 
|  | #undef RP | 
|  |  | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  | ;; Rounding, 8 Bytes | 
|  | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 
|  |  | 
|  | #define RP     16 | 
|  | #define FBITm1 31 | 
|  |  | 
|  | #define C0 18 | 
|  | #define C1 C0 + 1 | 
|  | #define C2 C0 + 2 | 
|  | #define C3 C0 + 3 | 
|  | #define C4 C0 + 4 | 
|  | #define C5 C0 + 5 | 
|  | #define C6 C0 + 6 | 
|  | #define C7 C0 + 7 | 
|  |  | 
|  | #define A0 16 | 
|  | #define A1 17 | 
|  | #define A2 26 | 
|  | #define A3 27 | 
|  | #define A4 28 | 
|  | #define A5 29 | 
|  | #define A6 30 | 
|  | #define A7 31 | 
|  |  | 
|  |  | 
|  | #ifdef L_rounddq3 | 
|  | ;; R25:R18 = round (R25:R18, R16) | 
|  | ;; Clobbers: ABI | 
|  | DEFUN  __rounddq3 | 
|  | ldi     FBITm1, __DQ_FBIT__ - 1 | 
|  | clt | 
|  | XJMP    __round_x8 | 
|  | ENDF  __rounddq3 | 
|  | #endif /* L_rounddq3 */ | 
|  |  | 
|  | #ifdef L_roundudq3 | 
|  | ;; R25:R18 = round (R25:R18, R16) | 
|  | ;; Clobbers: ABI | 
|  | DEFUN  __roundudq3 | 
|  | ldi     FBITm1, __UDQ_FBIT__ - 1 | 
|  | set | 
|  | XJMP    __round_x8 | 
|  | ENDF  __roundudq3 | 
|  | #endif /* L_roundudq3 */ | 
|  |  | 
|  | #ifdef L_roundda3 | 
|  | ;; R25:R18 = round (R25:R18, R16) | 
|  | ;; Clobbers: ABI | 
|  | DEFUN  __roundda3 | 
|  | ldi     FBITm1, __DA_FBIT__ - 1 | 
|  | clt | 
|  | XJMP    __round_x8 | 
|  | ENDF  __roundda3 | 
|  | #endif /* L_roundda3 */ | 
|  |  | 
|  | #ifdef L_rounduda3 | 
|  | ;; R25:R18 = round (R25:R18, R16) | 
|  | ;; Clobbers: ABI | 
|  | DEFUN  __rounduda3 | 
|  | ldi     FBITm1, __UDA_FBIT__ - 1 | 
|  | set | 
|  | XJMP    __round_x8 | 
|  | ENDF  __rounduda3 | 
|  | #endif /* L_rounduda3 */ | 
|  |  | 
|  | #ifdef L_roundta3 | 
|  | ;; R25:R18 = round (R25:R18, R16) | 
|  | ;; Clobbers: ABI | 
|  | DEFUN  __roundta3 | 
|  | ldi     FBITm1, __TA_FBIT__ - 1 | 
|  | clt | 
|  | XJMP    __round_x8 | 
|  | ENDF  __roundta3 | 
|  | #endif /* L_roundta3 */ | 
|  |  | 
|  | #ifdef L_rounduta3 | 
|  | ;; R25:R18 = round (R25:R18, R16) | 
|  | ;; Clobbers: ABI | 
|  | DEFUN  __rounduta3 | 
|  | ldi     FBITm1, __UTA_FBIT__ - 1 | 
|  | set | 
|  | XJMP    __round_x8 | 
|  | ENDF  __rounduta3 | 
|  | #endif /* L_rounduta3 */ | 
|  |  | 
|  |  | 
|  | #ifdef L_round_x8 | 
|  | DEFUN __round_x8 | 
|  | push r16 | 
|  | push r17 | 
|  | push r28 | 
|  | push r29 | 
|  | ;; Compute log2 of addend from rounding point | 
|  | sub     RP, FBITm1 | 
|  | neg     RP | 
|  | ;; Move input to work register A[] | 
|  | push    C0 | 
|  | mov     A1, C1 | 
|  | wmov    A2, C2 | 
|  | wmov    A4, C4 | 
|  | wmov    A6, C6 | 
|  | ;; C[] = 1 << (FBIT-1 - RP) | 
|  | XCALL   __clr_8 | 
|  | inc     C0 | 
|  | XCALL   __ashldi3 | 
|  | pop     A0 | 
|  | ;; A[] += C[] | 
|  | add     A0, C0 | 
|  | adc     A1, C1 | 
|  | adc     A2, C2 | 
|  | adc     A3, C3 | 
|  | adc     A4, C4 | 
|  | adc     A5, C5 | 
|  | adc     A6, C6 | 
|  | adc     A7, C7 | 
|  | brts    1f | 
|  | ;; Signed | 
|  | brvc    3f | 
|  | ;; Signed overflow: A[] = 0x7f... | 
|  | brvs    2f | 
|  | 1:  ;; Unsigned | 
|  | brcc    3f | 
|  | ;; Unsigned overflow: A[] = 0xff... | 
|  | 2:  ldi     C7, 0xff | 
|  | ldi     C6, 0xff | 
|  | wmov    C0, C6 | 
|  | wmov    C2, C6 | 
|  | wmov    C4, C6 | 
|  | bld     C7, 7 | 
|  | rjmp 9f | 
|  | 3: | 
|  | ;;  C[] = -C[] - C[] | 
|  | push    A0 | 
|  | ldi     r16, 1 | 
|  | XCALL   __ashldi3 | 
|  | pop     A0 | 
|  | XCALL   __negdi2 | 
|  | ;; Clear the bits beyond the rounding point. | 
|  | and     C0, A0 | 
|  | and     C1, A1 | 
|  | and     C2, A2 | 
|  | and     C3, A3 | 
|  | and     C4, A4 | 
|  | and     C5, A5 | 
|  | and     C6, A6 | 
|  | and     C7, A7 | 
|  | 9:  ;; Epilogue | 
|  | pop r29 | 
|  | pop r28 | 
|  | pop r17 | 
|  | pop r16 | 
|  | ret | 
|  | ENDF  __round_x8 | 
|  |  | 
|  | #endif /* L_round_x8 */ | 
|  |  | 
|  | #undef A0 | 
|  | #undef A1 | 
|  | #undef A2 | 
|  | #undef A3 | 
|  | #undef A4 | 
|  | #undef A5 | 
|  | #undef A6 | 
|  | #undef A7 | 
|  |  | 
|  | #undef C0 | 
|  | #undef C1 | 
|  | #undef C2 | 
|  | #undef C3 | 
|  | #undef C4 | 
|  | #undef C5 | 
|  | #undef C6 | 
|  | #undef C7 | 
|  |  | 
|  | #undef RP | 
|  | #undef FBITm1 | 
|  |  | 
|  |  | 
|  | ;; Supply implementations / symbols for the bit-banging functions | 
|  | ;; __builtin_avr_bitsfx and __builtin_avr_fxbits | 
|  | #ifdef L_ret | 
|  | DEFUN __ret | 
|  | ret | 
|  | ENDF  __ret | 
|  | #endif /* L_ret */ | 
|  |  | 
|  | #endif /* if not __AVR_TINY__ */ |