libc/src/math/generic/expxf16.h - rust-lang/llvm-project - Git at Google

 //===-- Common utilities for half-precision exponential functions ---------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
 #define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H

 #include "src/__support/CPP/array.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/cast.h"
 #include "src/__support/FPUtil/multiply_add.h"
 #include "src/__support/FPUtil/nearest_integer.h"
 #include "src/__support/macros/attributes.h"
 #include "src/__support/macros/config.h"
 #include <stdint.h>

 namespace LIBC_NAMESPACE_DECL {

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > for i from -18 to 12 do print(round(exp(i), SG, RN));
 static constexpr cpp::array<float, 31> EXP_HI = {
     0x1.05a628p-26f, 0x1.639e32p-25f, 0x1.e355bcp-24f, 0x1.4875cap-22f,
     0x1.be6c7p-21f,  0x1.2f6054p-19f, 0x1.9c54c4p-18f, 0x1.183542p-16f,
     0x1.7cd79cp-15f, 0x1.02cf22p-13f, 0x1.5fc21p-12f,  0x1.de16bap-11f,
     0x1.44e52p-9f,   0x1.b993fep-8f,  0x1.2c155cp-6f,  0x1.97db0cp-5f,
     0x1.152aaap-3f,  0x1.78b564p-2f,  0x1p+0f,         0x1.5bf0a8p+1f,
     0x1.d8e64cp+2f,  0x1.415e5cp+4f,  0x1.b4c902p+5f,  0x1.28d38ap+7f,
     0x1.936dc6p+8f,  0x1.122886p+10f, 0x1.749ea8p+11f, 0x1.fa7158p+12f,
     0x1.5829dcp+14f, 0x1.d3c448p+15f, 0x1.3de166p+17f,
 };

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > for i from 0 to 7 do print(round(exp(i * 2^-3), SG, RN));
 static constexpr cpp::array<float, 8> EXP_MID = {
     0x1p+0f,        0x1.221604p+0f, 0x1.48b5e4p+0f, 0x1.747a52p+0f,
     0x1.a61298p+0f, 0x1.de455ep+0f, 0x1.0ef9dcp+1f, 0x1.330e58p+1f,
 };

 struct ExpRangeReduction {
   float exp_hi_mid;
   float exp_lo;
 };

 LIBC_INLINE ExpRangeReduction exp_range_reduction(float16 x) {
   // For -18 < x < 12, to compute exp(x), we perform the following range
   // reduction: find hi, mid, lo, such that:
   //   x = hi + mid + lo, in which
   //     hi is an integer,
   //     mid * 2^3 is an integer,
   //     -2^(-4) <= lo < 2^(-4).
   // In particular,
   //   hi + mid = round(x * 2^3) * 2^(-3).
   // Then,
   //   exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo).
   // We store exp(hi) and exp(mid) in the lookup tables EXP_HI and EXP_MID
   // respectively.  exp(lo) is computed using a degree-3 minimax polynomial
   // generated by Sollya.

   float xf = x;
   float kf = fputil::nearest_integer(xf * 0x1.0p+3f);
   int x_hi_mid = static_cast<int>(kf);
   int x_hi = x_hi_mid >> 3;
   int x_mid = x_hi_mid & 0x7;
   // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
   float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf);

   float exp_hi = EXP_HI[x_hi + 18];
   float exp_mid = EXP_MID[x_mid];
   // Degree-3 minimax polynomial generated by Sollya with the following
   // commands:
   //   > display = hexadecimal;
   //   > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-4, 2^-4]);
   //   > 1 + x * P;
   float exp_lo =
       fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.001p-1f, 0x1.555ddep-3f);
   return {exp_hi * exp_mid, exp_lo};
 }

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN));
 constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = {
     0x3f80'0000U, 0x3f8b'95c2U, 0x3f98'37f0U, 0x3fa5'fed7U,
     0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U,
 };

 LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) {
   // For -25 < x < 16, to compute 2^x, we perform the following range reduction:
   // find hi, mid, lo, such that:
   //   x = hi + mid + lo, in which
   //     hi is an integer,
   //     mid * 2^3 is an integer,
   //     -2^(-4) <= lo < 2^(-4).
   // In particular,
   //   hi + mid = round(x * 2^3) * 2^(-3).
   // Then,
   //   2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
   // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
   // by adding hi to the exponent field of 2^mid.  2^lo is computed using a
   // degree-3 minimax polynomial generated by Sollya.

   float xf = x;
   float kf = fputil::nearest_integer(xf * 0x1.0p+3f);
   int x_hi_mid = static_cast<int>(kf);
   unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3;
   unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7;
   // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
   float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf);

   uint32_t exp2_hi_mid_bits =
       EXP2_MID_BITS[x_mid] +
       static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
   float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
   // Degree-3 minimax polynomial generated by Sollya with the following
   // commands:
   //   > display = hexadecimal;
   //   > P = fpminimax((2^x - 1)/x, 2, [|SG...|], [-2^-4, 2^-4]);
   //   > 1 + x * P;
   float exp2_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.62e43p-1f, 0x1.ec0aa6p-3f,
                                    0x1.c6b4a6p-5f);
   return {exp2_hi_mid, exp2_lo};
 }

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > round(log2(10), SG, RN);
 static constexpr float LOG2F_10 = 0x1.a934fp+1f;

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > round(log10(2), SG, RN);
 static constexpr float LOG10F_2 = 0x1.344136p-2f;

 LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) {
   // For -8 < x < 5, to compute 10^x, we perform the following range reduction:
   // find hi, mid, lo, such that:
   //   x = (hi + mid) * log2(10) + lo, in which
   //     hi is an integer,
   //     mid * 2^3 is an integer,
   //     -2^(-4) <= lo < 2^(-4).
   // In particular,
   //   hi + mid = round(x * 2^3) * 2^(-3).
   // Then,
   //   10^x = 10^(hi + mid + lo) = 2^((hi + mid) * log2(10)) + 10^lo
   // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
   // by adding hi to the exponent field of 2^mid.  10^lo is computed using a
   // degree-4 minimax polynomial generated by Sollya.

   float xf = x;
   float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f));
   int x_hi_mid = static_cast<int>(kf);
   unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3;
   unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7;
   // lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x
   float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf);

   uint32_t exp2_hi_mid_bits =
       EXP2_MID_BITS[x_mid] +
       static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
   float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
   // Degree-4 minimax polynomial generated by Sollya with the following
   // commands:
   //   > display = hexadecimal;
   //   > P = fpminimax((10^x - 1)/x, 3, [|SG...|], [-2^-4, 2^-4]);
   //   > 1 + x * P;
   float exp10_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.26bb14p+1f, 0x1.53526p+1f,
                                     0x1.04b434p+1f, 0x1.2bcf9ep+0f);
   return {exp2_hi_mid, exp10_lo};
 }

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > round(log2(exp(1)), SG, RN);
 static constexpr float LOG2F_E = 0x1.715476p+0f;

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > round(log(2), SG, RN);
 static constexpr float LOGF_2 = 0x1.62e43p-1f;

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN));
 static constexpr cpp::array<uint32_t, 32> EXP2_MID_5_BITS = {
     0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U,
     0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU,
     0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU,
     0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU,
     0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU,
     0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU,
     0x3ff5'257dU, 0x3ffa'83b3U,
 };

 // This function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
 // and exp(-x) simultaneously.
 // To compute e^x, we perform the following range reduction:
 // find hi, mid, lo such that:
 //   x = (hi + mid) * log(2) + lo, in which
 //     hi is an integer,
 //     0 <= mid * 2^5 < 32 is an integer
 //     -2^(-5) <= lo * log2(e) <= 2^-5.
 // In particular,
 //   hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
 // Then,
 //   e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
 // We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid
 // by adding hi to the exponent field of 2^mid.
 // e^lo is computed using a degree-3 minimax polynomial generated by Sollya:
 //   e^lo ~ P(lo)
 //        = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
 //        = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
 //        = P_even + lo * P_odd
 // To compute e^(-x), notice that:
 //   e^(-x) = 2^(-(hi + mid)) * e^(-lo)
 //          ~ 2^(-(hi + mid)) * P(-lo)
 //          = 2^(-(hi + mid)) * (P_even - lo * P_odd)
 // So:
 //   sinh(x) = (e^x - e^(-x)) / 2
 //           ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
 //                    2^(-(hi + mid)) * (P_even - lo * P_odd))
 //           = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
 //                    lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
 // And similarly:
 //   cosh(x) = (e^x + e^(-x)) / 2
 //           ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
 //                    lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
 // The main point of these formulas is that the expensive part of calculating
 // the polynomials approximating lower parts of e^x and e^(-x) is shared and
 // only done once.
 template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) {
   float xf = x;
   float kf = fputil::nearest_integer(xf * (LOG2F_E * 0x1.0p+5f));
   int x_hi_mid_p = static_cast<int>(kf);
   int x_hi_mid_m = -x_hi_mid_p;

   unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> 5;
   unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> 5;
   unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & 0x1f;
   unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & 0x1f;

   uint32_t exp2_hi_mid_bits_p =
       EXP2_MID_5_BITS[x_mid_p] +
       static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN);
   uint32_t exp2_hi_mid_bits_m =
       EXP2_MID_5_BITS[x_mid_m] +
       static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN);
   // exp2_hi_mid_p = 2^(hi + mid)
   float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val();
   // exp2_hi_mid_m = 2^(-(hi + mid))
   float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val();

   // exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid))
   float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m;
   // exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid))
   float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m;

   // lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x
   float lo = fputil::multiply_add(kf, LOGF_2 * -0x1.0p-5f, xf);
   float lo_sq = lo * lo;

   // Degree-3 minimax polynomial generated by Sollya with the following
   // commands:
   //   > display = hexadecimal;
   //   > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
   //   > 1 + x * P;
   constexpr cpp::array<float, 4> COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f,
                                            0x1.555778p-3f};
   float half_p_odd =
       fputil::polyeval(lo_sq, COEFFS[1] * 0.5f, COEFFS[3] * 0.5f);
   float half_p_even =
       fputil::polyeval(lo_sq, COEFFS[0] * 0.5f, COEFFS[2] * 0.5f);

   // sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +
   //                (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid))))
   if constexpr (IsSinh)
     return fputil::cast<float16>(fputil::multiply_add(
         lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff));
   // cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +
   //                (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid))))
   return fputil::cast<float16>(fputil::multiply_add(
       lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum));
 }

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > for i from 0 to 31 do print(round(log(1 + i * 2^-5), SG, RN));
 constexpr cpp::array<float, 32> LOGF_F = {
     0x0p+0f,        0x1.f829bp-6f,  0x1.f0a30cp-5f, 0x1.6f0d28p-4f,
     0x1.e27076p-4f, 0x1.29553p-3f,  0x1.5ff308p-3f, 0x1.9525aap-3f,
     0x1.c8ff7cp-3f, 0x1.fb9186p-3f, 0x1.1675cap-2f, 0x1.2e8e2cp-2f,
     0x1.4618bcp-2f, 0x1.5d1bdcp-2f, 0x1.739d8p-2f,  0x1.89a338p-2f,
     0x1.9f323ep-2f, 0x1.b44f78p-2f, 0x1.c8ff7cp-2f, 0x1.dd46ap-2f,
     0x1.f128f6p-2f, 0x1.02552ap-1f, 0x1.0be72ep-1f, 0x1.154c3ep-1f,
     0x1.1e85f6p-1f, 0x1.2795e2p-1f, 0x1.307d74p-1f, 0x1.393e0ep-1f,
     0x1.41d8fep-1f, 0x1.4a4f86p-1f, 0x1.52a2d2p-1f, 0x1.5ad404p-1f,
 };

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > for i from 0 to 31 do print(round(log2(1 + i * 2^-5), SG, RN));
 constexpr cpp::array<float, 32> LOG2F_F = {
     0x0p+0f,        0x1.6bad38p-5f, 0x1.663f7p-4f,  0x1.08c588p-3f,
     0x1.5c01a4p-3f, 0x1.acf5e2p-3f, 0x1.fbc16cp-3f, 0x1.24407ap-2f,
     0x1.49a784p-2f, 0x1.6e221cp-2f, 0x1.91bba8p-2f, 0x1.b47ecp-2f,
     0x1.d6753ep-2f, 0x1.f7a856p-2f, 0x1.0c105p-1f,  0x1.1bf312p-1f,
     0x1.2b8034p-1f, 0x1.3abb4p-1f,  0x1.49a784p-1f, 0x1.584822p-1f,
     0x1.66a008p-1f, 0x1.74b1fep-1f, 0x1.82809ep-1f, 0x1.900e62p-1f,
     0x1.9d5dap-1f,  0x1.aa709p-1f,  0x1.b74948p-1f, 0x1.c3e9cap-1f,
     0x1.d053f6p-1f, 0x1.dc899ap-1f, 0x1.e88c6cp-1f, 0x1.f45e08p-1f,
 };

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > for i from 0 to 31 do print(round(log10(1 + i * 2^-5), SG, RN));
 constexpr cpp::array<float, 32> LOG10F_F = {
     0x0p+0f,        0x1.b5e908p-7f, 0x1.af5f92p-6f, 0x1.3ed11ap-5f,
     0x1.a30a9ep-5f, 0x1.02428cp-4f, 0x1.31b306p-4f, 0x1.5fe804p-4f,
     0x1.8cf184p-4f, 0x1.b8de4ep-4f, 0x1.e3bc1ap-4f, 0x1.06cbd6p-3f,
     0x1.1b3e72p-3f, 0x1.2f3b6ap-3f, 0x1.42c7e8p-3f, 0x1.55e8c6p-3f,
     0x1.68a288p-3f, 0x1.7af974p-3f, 0x1.8cf184p-3f, 0x1.9e8e7cp-3f,
     0x1.afd3e4p-3f, 0x1.c0c514p-3f, 0x1.d1653p-3f,  0x1.e1b734p-3f,
     0x1.f1bdeep-3f, 0x1.00be06p-2f, 0x1.087a08p-2f, 0x1.101432p-2f,
     0x1.178da6p-2f, 0x1.1ee778p-2f, 0x1.2622bp-2f,  0x1.2d404cp-2f,
 };

 // Generated by Sollya with the following commands:
 //   > display = hexadecimal;
 //   > for i from 0 to 31 do print(round(1 / (1 + i * 2^-5), SG, RN));
 constexpr cpp::array<float, 32> ONE_OVER_F_F = {
     0x1p+0f,        0x1.f07c2p-1f,  0x1.e1e1e2p-1f, 0x1.d41d42p-1f,
     0x1.c71c72p-1f, 0x1.bacf92p-1f, 0x1.af286cp-1f, 0x1.a41a42p-1f,
     0x1.99999ap-1f, 0x1.8f9c18p-1f, 0x1.861862p-1f, 0x1.7d05f4p-1f,
     0x1.745d18p-1f, 0x1.6c16c2p-1f, 0x1.642c86p-1f, 0x1.5c9882p-1f,
     0x1.555556p-1f, 0x1.4e5e0ap-1f, 0x1.47ae14p-1f, 0x1.414142p-1f,
     0x1.3b13b2p-1f, 0x1.3521dp-1f,  0x1.2f684cp-1f, 0x1.29e412p-1f,
     0x1.24924ap-1f, 0x1.1f7048p-1f, 0x1.1a7b96p-1f, 0x1.15b1e6p-1f,
     0x1.111112p-1f, 0x1.0c9714p-1f, 0x1.08421p-1f,  0x1.041042p-1f,
 };

 } // namespace LIBC_NAMESPACE_DECL

 #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
	//===-- Common utilities for half-precision exponential functions ---------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
	#define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H

	#include "src/__support/CPP/array.h"
	#include "src/__support/FPUtil/FPBits.h"
	#include "src/__support/FPUtil/PolyEval.h"
	#include "src/__support/FPUtil/cast.h"
	#include "src/__support/FPUtil/multiply_add.h"
	#include "src/__support/FPUtil/nearest_integer.h"
	#include "src/__support/macros/attributes.h"
	#include "src/__support/macros/config.h"
	#include <stdint.h>

	namespace LIBC_NAMESPACE_DECL {

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > for i from -18 to 12 do print(round(exp(i), SG, RN));
	static constexpr cpp::array<float, 31> EXP_HI = {
	0x1.05a628p-26f, 0x1.639e32p-25f, 0x1.e355bcp-24f, 0x1.4875cap-22f,
	0x1.be6c7p-21f, 0x1.2f6054p-19f, 0x1.9c54c4p-18f, 0x1.183542p-16f,
	0x1.7cd79cp-15f, 0x1.02cf22p-13f, 0x1.5fc21p-12f, 0x1.de16bap-11f,
	0x1.44e52p-9f, 0x1.b993fep-8f, 0x1.2c155cp-6f, 0x1.97db0cp-5f,
	0x1.152aaap-3f, 0x1.78b564p-2f, 0x1p+0f, 0x1.5bf0a8p+1f,
	0x1.d8e64cp+2f, 0x1.415e5cp+4f, 0x1.b4c902p+5f, 0x1.28d38ap+7f,
	0x1.936dc6p+8f, 0x1.122886p+10f, 0x1.749ea8p+11f, 0x1.fa7158p+12f,
	0x1.5829dcp+14f, 0x1.d3c448p+15f, 0x1.3de166p+17f,
	};

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > for i from 0 to 7 do print(round(exp(i * 2^-3), SG, RN));
	static constexpr cpp::array<float, 8> EXP_MID = {
	0x1p+0f, 0x1.221604p+0f, 0x1.48b5e4p+0f, 0x1.747a52p+0f,
	0x1.a61298p+0f, 0x1.de455ep+0f, 0x1.0ef9dcp+1f, 0x1.330e58p+1f,
	};

	struct ExpRangeReduction {
	float exp_hi_mid;
	float exp_lo;
	};

	LIBC_INLINE ExpRangeReduction exp_range_reduction(float16 x) {
	// For -18 < x < 12, to compute exp(x), we perform the following range
	// reduction: find hi, mid, lo, such that:
	// x = hi + mid + lo, in which
	// hi is an integer,
	// mid * 2^3 is an integer,
	// -2^(-4) <= lo < 2^(-4).
	// In particular,
	// hi + mid = round(x * 2^3) * 2^(-3).
	// Then,
	// exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo).
	// We store exp(hi) and exp(mid) in the lookup tables EXP_HI and EXP_MID
	// respectively. exp(lo) is computed using a degree-3 minimax polynomial
	// generated by Sollya.

	float xf = x;
	float kf = fputil::nearest_integer(xf * 0x1.0p+3f);
	int x_hi_mid = static_cast<int>(kf);
	int x_hi = x_hi_mid >> 3;
	int x_mid = x_hi_mid & 0x7;
	// lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
	float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf);

	float exp_hi = EXP_HI[x_hi + 18];
	float exp_mid = EXP_MID[x_mid];
	// Degree-3 minimax polynomial generated by Sollya with the following
	// commands:
	// > display = hexadecimal;
	// > P = fpminimax(expm1(x)/x, 2, [\|SG...\|], [-2^-4, 2^-4]);
	// > 1 + x * P;
	float exp_lo =
	fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.001p-1f, 0x1.555ddep-3f);
	return {exp_hi * exp_mid, exp_lo};
	}

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN));
	constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = {
	0x3f80'0000U, 0x3f8b'95c2U, 0x3f98'37f0U, 0x3fa5'fed7U,
	0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U,
	};

	LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) {
	// For -25 < x < 16, to compute 2^x, we perform the following range reduction:
	// find hi, mid, lo, such that:
	// x = hi + mid + lo, in which
	// hi is an integer,
	// mid * 2^3 is an integer,
	// -2^(-4) <= lo < 2^(-4).
	// In particular,
	// hi + mid = round(x * 2^3) * 2^(-3).
	// Then,
	// 2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
	// We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
	// by adding hi to the exponent field of 2^mid. 2^lo is computed using a
	// degree-3 minimax polynomial generated by Sollya.

	float xf = x;
	float kf = fputil::nearest_integer(xf * 0x1.0p+3f);
	int x_hi_mid = static_cast<int>(kf);
	unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3;
	unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7;
	// lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
	float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf);

	uint32_t exp2_hi_mid_bits =
	EXP2_MID_BITS[x_mid] +
	static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
	float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
	// Degree-3 minimax polynomial generated by Sollya with the following
	// commands:
	// > display = hexadecimal;
	// > P = fpminimax((2^x - 1)/x, 2, [\|SG...\|], [-2^-4, 2^-4]);
	// > 1 + x * P;
	float exp2_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.62e43p-1f, 0x1.ec0aa6p-3f,
	0x1.c6b4a6p-5f);
	return {exp2_hi_mid, exp2_lo};
	}

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > round(log2(10), SG, RN);
	static constexpr float LOG2F_10 = 0x1.a934fp+1f;

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > round(log10(2), SG, RN);
	static constexpr float LOG10F_2 = 0x1.344136p-2f;

	LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) {
	// For -8 < x < 5, to compute 10^x, we perform the following range reduction:
	// find hi, mid, lo, such that:
	// x = (hi + mid) * log2(10) + lo, in which
	// hi is an integer,
	// mid * 2^3 is an integer,
	// -2^(-4) <= lo < 2^(-4).
	// In particular,
	// hi + mid = round(x * 2^3) * 2^(-3).
	// Then,
	// 10^x = 10^(hi + mid + lo) = 2^((hi + mid) * log2(10)) + 10^lo
	// We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
	// by adding hi to the exponent field of 2^mid. 10^lo is computed using a
	// degree-4 minimax polynomial generated by Sollya.

	float xf = x;
	float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f));
	int x_hi_mid = static_cast<int>(kf);
	unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3;
	unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7;
	// lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x
	float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf);

	uint32_t exp2_hi_mid_bits =
	EXP2_MID_BITS[x_mid] +
	static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
	float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
	// Degree-4 minimax polynomial generated by Sollya with the following
	// commands:
	// > display = hexadecimal;
	// > P = fpminimax((10^x - 1)/x, 3, [\|SG...\|], [-2^-4, 2^-4]);
	// > 1 + x * P;
	float exp10_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.26bb14p+1f, 0x1.53526p+1f,
	0x1.04b434p+1f, 0x1.2bcf9ep+0f);
	return {exp2_hi_mid, exp10_lo};
	}

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > round(log2(exp(1)), SG, RN);
	static constexpr float LOG2F_E = 0x1.715476p+0f;

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > round(log(2), SG, RN);
	static constexpr float LOGF_2 = 0x1.62e43p-1f;

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN));
	static constexpr cpp::array<uint32_t, 32> EXP2_MID_5_BITS = {
	0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U,
	0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU,
	0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU,
	0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU,
	0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU,
	0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU,
	0x3ff5'257dU, 0x3ffa'83b3U,
	};

	// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
	// and exp(-x) simultaneously.
	// To compute e^x, we perform the following range reduction:
	// find hi, mid, lo such that:
	// x = (hi + mid) * log(2) + lo, in which
	// hi is an integer,
	// 0 <= mid * 2^5 < 32 is an integer
	// -2^(-5) <= lo * log2(e) <= 2^-5.
	// In particular,
	// hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
	// Then,
	// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
	// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid
	// by adding hi to the exponent field of 2^mid.
	// e^lo is computed using a degree-3 minimax polynomial generated by Sollya:
	// e^lo ~ P(lo)
	// = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
	// = (1 + c2lo^2 + c4lo^4) + lo * (1 + c3lo^2 + c5lo^4)
	// = P_even + lo * P_odd
	// To compute e^(-x), notice that:
	// e^(-x) = 2^(-(hi + mid)) * e^(-lo)
	// ~ 2^(-(hi + mid)) * P(-lo)
	// = 2^(-(hi + mid)) * (P_even - lo * P_odd)
	// So:
	// sinh(x) = (e^x - e^(-x)) / 2
	// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
	// 2^(-(hi + mid)) * (P_even - lo * P_odd))
	// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
	// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
	// And similarly:
	// cosh(x) = (e^x + e^(-x)) / 2
	// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
	// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
	// The main point of these formulas is that the expensive part of calculating
	// the polynomials approximating lower parts of e^x and e^(-x) is shared and
	// only done once.
	template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) {
	float xf = x;
	float kf = fputil::nearest_integer(xf * (LOG2F_E * 0x1.0p+5f));
	int x_hi_mid_p = static_cast<int>(kf);
	int x_hi_mid_m = -x_hi_mid_p;

	unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> 5;
	unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> 5;
	unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & 0x1f;
	unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & 0x1f;

	uint32_t exp2_hi_mid_bits_p =
	EXP2_MID_5_BITS[x_mid_p] +
	static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN);
	uint32_t exp2_hi_mid_bits_m =
	EXP2_MID_5_BITS[x_mid_m] +
	static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN);
	// exp2_hi_mid_p = 2^(hi + mid)
	float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val();
	// exp2_hi_mid_m = 2^(-(hi + mid))
	float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val();

	// exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid))
	float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m;
	// exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid))
	float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m;

	// lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x
	float lo = fputil::multiply_add(kf, LOGF_2 * -0x1.0p-5f, xf);
	float lo_sq = lo * lo;

	// Degree-3 minimax polynomial generated by Sollya with the following
	// commands:
	// > display = hexadecimal;
	// > P = fpminimax(expm1(x)/x, 2, [\|SG...\|], [-2^-5, 2^-5]);
	// > 1 + x * P;
	constexpr cpp::array<float, 4> COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f,
	0x1.555778p-3f};
	float half_p_odd =
	fputil::polyeval(lo_sq, COEFFS[1] * 0.5f, COEFFS[3] * 0.5f);
	float half_p_even =
	fputil::polyeval(lo_sq, COEFFS[0] * 0.5f, COEFFS[2] * 0.5f);

	// sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +
	// (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid))))
	if constexpr (IsSinh)
	return fputil::cast<float16>(fputil::multiply_add(
	lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff));
	// cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +
	// (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid))))
	return fputil::cast<float16>(fputil::multiply_add(
	lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum));
	}

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > for i from 0 to 31 do print(round(log(1 + i * 2^-5), SG, RN));
	constexpr cpp::array<float, 32> LOGF_F = {
	0x0p+0f, 0x1.f829bp-6f, 0x1.f0a30cp-5f, 0x1.6f0d28p-4f,
	0x1.e27076p-4f, 0x1.29553p-3f, 0x1.5ff308p-3f, 0x1.9525aap-3f,
	0x1.c8ff7cp-3f, 0x1.fb9186p-3f, 0x1.1675cap-2f, 0x1.2e8e2cp-2f,
	0x1.4618bcp-2f, 0x1.5d1bdcp-2f, 0x1.739d8p-2f, 0x1.89a338p-2f,
	0x1.9f323ep-2f, 0x1.b44f78p-2f, 0x1.c8ff7cp-2f, 0x1.dd46ap-2f,
	0x1.f128f6p-2f, 0x1.02552ap-1f, 0x1.0be72ep-1f, 0x1.154c3ep-1f,
	0x1.1e85f6p-1f, 0x1.2795e2p-1f, 0x1.307d74p-1f, 0x1.393e0ep-1f,
	0x1.41d8fep-1f, 0x1.4a4f86p-1f, 0x1.52a2d2p-1f, 0x1.5ad404p-1f,
	};

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > for i from 0 to 31 do print(round(log2(1 + i * 2^-5), SG, RN));
	constexpr cpp::array<float, 32> LOG2F_F = {
	0x0p+0f, 0x1.6bad38p-5f, 0x1.663f7p-4f, 0x1.08c588p-3f,
	0x1.5c01a4p-3f, 0x1.acf5e2p-3f, 0x1.fbc16cp-3f, 0x1.24407ap-2f,
	0x1.49a784p-2f, 0x1.6e221cp-2f, 0x1.91bba8p-2f, 0x1.b47ecp-2f,
	0x1.d6753ep-2f, 0x1.f7a856p-2f, 0x1.0c105p-1f, 0x1.1bf312p-1f,
	0x1.2b8034p-1f, 0x1.3abb4p-1f, 0x1.49a784p-1f, 0x1.584822p-1f,
	0x1.66a008p-1f, 0x1.74b1fep-1f, 0x1.82809ep-1f, 0x1.900e62p-1f,
	0x1.9d5dap-1f, 0x1.aa709p-1f, 0x1.b74948p-1f, 0x1.c3e9cap-1f,
	0x1.d053f6p-1f, 0x1.dc899ap-1f, 0x1.e88c6cp-1f, 0x1.f45e08p-1f,
	};

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > for i from 0 to 31 do print(round(log10(1 + i * 2^-5), SG, RN));
	constexpr cpp::array<float, 32> LOG10F_F = {
	0x0p+0f, 0x1.b5e908p-7f, 0x1.af5f92p-6f, 0x1.3ed11ap-5f,
	0x1.a30a9ep-5f, 0x1.02428cp-4f, 0x1.31b306p-4f, 0x1.5fe804p-4f,
	0x1.8cf184p-4f, 0x1.b8de4ep-4f, 0x1.e3bc1ap-4f, 0x1.06cbd6p-3f,
	0x1.1b3e72p-3f, 0x1.2f3b6ap-3f, 0x1.42c7e8p-3f, 0x1.55e8c6p-3f,
	0x1.68a288p-3f, 0x1.7af974p-3f, 0x1.8cf184p-3f, 0x1.9e8e7cp-3f,
	0x1.afd3e4p-3f, 0x1.c0c514p-3f, 0x1.d1653p-3f, 0x1.e1b734p-3f,
	0x1.f1bdeep-3f, 0x1.00be06p-2f, 0x1.087a08p-2f, 0x1.101432p-2f,
	0x1.178da6p-2f, 0x1.1ee778p-2f, 0x1.2622bp-2f, 0x1.2d404cp-2f,
	};

	// Generated by Sollya with the following commands:
	// > display = hexadecimal;
	// > for i from 0 to 31 do print(round(1 / (1 + i * 2^-5), SG, RN));
	constexpr cpp::array<float, 32> ONE_OVER_F_F = {
	0x1p+0f, 0x1.f07c2p-1f, 0x1.e1e1e2p-1f, 0x1.d41d42p-1f,
	0x1.c71c72p-1f, 0x1.bacf92p-1f, 0x1.af286cp-1f, 0x1.a41a42p-1f,
	0x1.99999ap-1f, 0x1.8f9c18p-1f, 0x1.861862p-1f, 0x1.7d05f4p-1f,
	0x1.745d18p-1f, 0x1.6c16c2p-1f, 0x1.642c86p-1f, 0x1.5c9882p-1f,
	0x1.555556p-1f, 0x1.4e5e0ap-1f, 0x1.47ae14p-1f, 0x1.414142p-1f,
	0x1.3b13b2p-1f, 0x1.3521dp-1f, 0x1.2f684cp-1f, 0x1.29e412p-1f,
	0x1.24924ap-1f, 0x1.1f7048p-1f, 0x1.1a7b96p-1f, 0x1.15b1e6p-1f,
	0x1.111112p-1f, 0x1.0c9714p-1f, 0x1.08421p-1f, 0x1.041042p-1f,
	};

	} // namespace LIBC_NAMESPACE_DECL

	#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H