//===-- Single-precision general exp/log functions ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
#define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H

#include "math_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/common.h"
#include <src/__support/FPUtil/NearestIntegerOperations.h>

#include <errno.h>

namespace __llvm_libc {

struct ExpBase {
  // Base = e
  static constexpr int MID_BITS = 5;
  static constexpr int MID_MASK = (1 << MID_BITS) - 1;
  // log2(e) * 2^5
  static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS);
  // High and low parts of -log(2) * 2^(-5)
  static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS);
  static constexpr double M_LOGB_2_LO =
      -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS);
  // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya
  // with:
  // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN));
  static constexpr int64_t EXP_2_MID[1 << MID_BITS] = {
      0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f,
      0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa,
      0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715,
      0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d,
      0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429,
      0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74,
      0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db,
      0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d,
      0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c,
      0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f,
      0x3ffea4afa2a490da, 0x3fff50765b6e4540,
  };

  // Approximating e^dx with degree-5 minimax polynomial generated by Sollya:
  // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]);
  // Then:
  //   e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5.
  static constexpr double COEFFS[4] = {
      0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5,
      0x1.11112a0e34bdbp-7};

  static double powb_lo(double dx) {
    using fputil::multiply_add;
    double dx2 = dx * dx;
    double c0 = 1.0 + dx;
    // c1 = COEFFS[0] + COEFFS[1] * dx
    double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]);
    // c2 = COEFFS[2] + COEFFS[3] * dx
    double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]);
    // r = c4 + c5 * dx^4
    //   = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7
    return fputil::polyeval(dx2, c0, c1, c2);
  }
};

struct Exp10Base : public ExpBase {
  // log2(10) * 2^5
  static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS);
  // High and low parts of -log10(2) * 2^(-5).
  // Notice that since |x * log2(10)| < 150:
  //   |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13
  // So when the FMA instructions are not available, in order for the product
  //   k * M_LOGB_2_HI
  // to be exact, we only store the high part of log10(2) up to 38 bits
  // (= 53 - 15) of precision.
  // It is generated by Sollya with:
  // > round(log10(2), 44, RN);
  static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS);
  // > round(log10(2) - 0x1.34413509f8p-2, D, RN);
  static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS);

  // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
  // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]);
  // Then:
  //   10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
  static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1,
                                       0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0,
                                       0x1.1429e74a98f43p-1};

  static double powb_lo(double dx) {
    using fputil::multiply_add;
    double dx2 = dx * dx;
    // c0 = 1 + COEFFS[0] * dx
    double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0);
    // c1 = COEFFS[1] + COEFFS[2] * dx
    double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]);
    // c2 = COEFFS[3] + COEFFS[4] * dx
    double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]);
    // r = c0 + dx^2 * (c1 + c2 * dx^2)
    //   = c0 + c1 * dx^2 + c2 * dx^4
    //   = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
    return fputil::polyeval(dx2, c0, c1, c2);
  }
};

constexpr int LOG_P1_BITS = 6;
constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;

// N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
extern const double LOG_P1_LOG2[LOG_P1_SIZE];

// N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
extern const double LOG_P1_1_OVER[LOG_P1_SIZE];

// Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
// K_LOG2_ODD starts from x^3
extern const double K_LOG2_ODD[4];
extern const double K_LOG2_EVEN[4];

// Output of range reduction for exp_b: (2^(mid + hi), lo)
// where:
//   b^x = 2^(mid + hi) * b^lo
struct exp_b_reduc_t {
  double mh; // 2^(mid + hi)
  double lo;
};

// The function correctly calculates b^x value with at least float precision
// in a limited range.
// Range reduction:
//   b^x = 2^(hi + mid) * b^lo
// where:
//   x = (hi + mid) * log_b(2) + lo
//   hi is an integer,
//   0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer
//   -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1)
// Base class needs to provide the following constants:
//   - MID_BITS    : number of bits after decimal points used for mid
//   - MID_MASK    : 2^MID_BITS - 1, mask to extract mid bits
//   - LOG2_B      : log2(b) * 2^MID_BITS for scaling
//   - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS)
//   - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS)
//   - EXP_2_MID   : look up table for bit fields of 2^mid
// Return:
//   { 2^(hi + mid), lo }
template <class Base> static inline exp_b_reduc_t exp_b_range_reduc(float x) {
  double xd = static_cast<double>(x);
  // kd = round((hi + mid) * log2(b) * 2^MID_BITS)
  double kd = fputil::nearest_integer(Base::LOG2_B * xd);
  // k = round((hi + mid) * log2(b) * 2^MID_BITS)
  int k = static_cast<int>(kd);
  // hi = floor(kd * 2^(-MID_BITS))
  // exp_hi = shift hi to the exponent field of double precision.
  int64_t exp_hi = static_cast<int64_t>((k >> Base::MID_BITS))
                   << fputil::FloatProperties<double>::MANTISSA_WIDTH;
  // mh = 2^hi * 2^mid
  // mh_bits = bit field of mh
  int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
  double mh = fputil::FPBits<double>(uint64_t(mh_bits)).get_val();
  // dx = lo = x - (hi + mid) * log(2)
  double dx = fputil::multiply_add(
      kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd));
  return {mh, dx};
}

// The function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
// and exp(-x) simultaneously.
// To compute e^x, we perform the following range
// reduction: find hi, mid, lo such that:
//   x = (hi + mid) * log(2) + lo, in which
//     hi is an integer,
//     0 <= mid * 2^5 < 32 is an integer
//     -2^(-6) <= lo * log2(e) <= 2^-6.
// In particular,
//   hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
// Then,
//   e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
// 2^mid is stored in the lookup table of 32 elements.
// e^lo is computed using a degree-5 minimax polynomial
// generated by Sollya:
//   e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
//        = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
//        = P_even + lo * P_odd
// We perform 2^hi * 2^mid by simply add hi to the exponent field
// of 2^mid.
// To compute e^(-x), notice that:
//   e^(-x) = 2^(-(hi + mid)) * e^(-lo)
//          ~ 2^(-(hi + mid)) * P(-lo)
//          = 2^(-(hi + mid)) * (P_even - lo * P_odd)
// So:
//   sinh(x) = (e^x - e^(-x)) / 2
//           ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
//                    2^(-(hi + mid)) * (P_even - lo * P_odd))
//           = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
//                    lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
// And similarly:
//   cosh(x) = (e^x + e^(-x)) / 2
//           ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
//                    lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
// The main point of these formulas is that the expensive part of calculating
// the polynomials approximating lower parts of e^(x) and e^(-x) are shared
// and only done once.
template <bool is_sinh> static inline double exp_pm_eval(float x) {
  double xd = static_cast<double>(x);

  // round(x * log2(e) * 2^5)
  double kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);

  // k_p = round(x * log2(e) * 2^5)
  int k_p = static_cast<int>(kd);
  // k_m = round(-x * log2(e) * 2^5)
  int k_m = -k_p;

  // hi = floor(kf * 2^(-5))
  // exp_hi = shift hi to the exponent field of double precision.
  int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS))
                     << fputil::FloatProperties<double>::MANTISSA_WIDTH;
  int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS))
                     << fputil::FloatProperties<double>::MANTISSA_WIDTH;
  // mh_p = 2^(hi + mid)
  // mh_m = 2^(-(hi + mid))
  // mh_bits_* = bit field of mh_*
  int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p;
  int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m;
  double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val();
  double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val();
  // mh_sum = 2^(hi + mid) + 2^(-(hi + mid))
  double mh_sum = mh_p + mh_m;
  // mh_diff = 2^(hi + mid) - 2^(-(hi + mid))
  double mh_diff = mh_p - mh_m;

  // dx = lo = x - (hi + mid) * log(2)
  double dx =
      fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO,
                           fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd));
  double dx2 = dx * dx;

  // c0 = 1 + COEFFS[0] * lo^2
  // P_even = 1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4
  double p_even =
      fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[0], ExpBase::COEFFS[2]);
  // P_odd = 1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4
  double p_odd =
      fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[1], ExpBase::COEFFS[3]);

  double r;
  if constexpr (is_sinh)
    r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff);
  else
    r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum);
  return 0.5 * r;
}

// x should be positive, normal finite value
inline static double log2_eval(double x) {
  using FPB = fputil::FPBits<double>;
  FPB bs(x);

  double result = 0;
  result += bs.get_exponent();

  int p1 =
      (bs.get_mantissa() >> (FPB::FloatProp::MANTISSA_WIDTH - LOG_P1_BITS)) &
      (LOG_P1_SIZE - 1);

  bs.bits &= FPB::FloatProp::MANTISSA_MASK >> LOG_P1_BITS;
  bs.set_unbiased_exponent(FPB::FloatProp::EXPONENT_BIAS);
  double dx = (bs.get_val() - 1.0) * LOG_P1_1_OVER[p1];

  // Taylor series for log(2,1+x)
  double c1 = fputil::multiply_add(dx, K_LOG2_ODD[0], K_LOG2_EVEN[0]);
  double c2 = fputil::multiply_add(dx, K_LOG2_ODD[1], K_LOG2_EVEN[1]);
  double c3 = fputil::multiply_add(dx, K_LOG2_ODD[2], K_LOG2_EVEN[2]);
  double c4 = fputil::multiply_add(dx, K_LOG2_ODD[3], K_LOG2_EVEN[3]);

  // c0 = dx * (1.0 / ln(2)) + LOG_P1_LOG2[p1]
  double c0 = fputil::multiply_add(dx, 0x1.71547652b82fep+0, LOG_P1_LOG2[p1]);
  result += __llvm_libc::fputil::polyeval(dx * dx, c0, c1, c2, c3, c4);
  return result;
}

// x should be positive, normal finite value
inline static double log_eval(double x) {
  // ln(x) = log[2,x] * ln(2)
  return log2_eval(x) * 0x1.62e42fefa39efp-1;
}

} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
