add m1n1HEAD master

author: magh <magh@maghmogh.com> 2023-03-06 18:44:55 -0600
committer: magh <magh@maghmogh.com> 2023-03-06 18:44:55 -0600
commit: e80d9d8871b325a04b18f90a9ea4bb7fd148fb25 (patch)
tree: 79dbdb8506b7ff1e92549188d1b94cfc0b3503ae /tools/src/math
4 files changed, 418 insertions, 0 deletions
diff --git a/tools/src/math/exp2f_data.c b/tools/src/math/exp2f_data.c
new file mode 100644
index 0000000..38c9333
--- /dev/null
+++ b/tools/src/math/exp2f_data.c
@@ -0,0 +1,42 @@
+/*
+ * Shared data between expf, exp2f and powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "exp2f_data.h"
+
+#define N (1 << EXP2F_TABLE_BITS)
+
+const struct exp2f_data __exp2f_data = {
+    /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
+       used for computing 2^(k/N) for an int |k| < 150 N as
+       double(tab[k%N] + (k << 52-BITS)) */
+    .tab =
+        {
+            0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+            0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+            0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+            0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+            0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+            0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+            0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+            0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+        },
+    .shift_scaled = 0x1.8p+52 / N,
+    .poly =
+        {
+            0x1.c6af84b912394p-5,
+            0x1.ebfce50fac4f3p-3,
+            0x1.62e42ff0c52d6p-1,
+        },
+    .shift = 0x1.8p+52,
+    .invln2_scaled = 0x1.71547652b82fep+0 * N,
+    .poly_scaled =
+        {
+            0x1.c6af84b912394p-5 / N / N / N,
+            0x1.ebfce50fac4f3p-3 / N / N,
+            0x1.62e42ff0c52d6p-1 / N,
+        },
+};
diff --git a/tools/src/math/exp2f_data.h b/tools/src/math/exp2f_data.h
new file mode 100644
index 0000000..a88c6ce
--- /dev/null
+++ b/tools/src/math/exp2f_data.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _EXP2F_DATA_H
+#define _EXP2F_DATA_H
+
+#include <stdint.h>
+
+/* Shared between expf, exp2f and powf.  */
+#define EXP2F_TABLE_BITS 5
+#define EXP2F_POLY_ORDER 3
+extern const struct exp2f_data {
+    uint64_t tab[1 << EXP2F_TABLE_BITS];
+    double shift_scaled;
+    double poly[EXP2F_POLY_ORDER];
+    double shift;
+    double invln2_scaled;
+    double poly_scaled[EXP2F_POLY_ORDER];
+} __exp2f_data;
+
+#endif
diff --git a/tools/src/math/expf.c b/tools/src/math/expf.c
new file mode 100644
index 0000000..c9f1b3f
--- /dev/null
+++ b/tools/src/math/expf.c
@@ -0,0 +1,83 @@
+/*
+ * Single-precision e^x function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+#include "exp2f_data.h"
+#include "libm.h"
+
+#define double_t double
+
+/*
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
+
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+Wrong count: 170635 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N       (1 << EXP2F_TABLE_BITS)
+#define InvLn2N __exp2f_data.invln2_scaled
+#define T       __exp2f_data.tab
+#define C       __exp2f_data.poly_scaled
+
+static inline uint32_t top12(float x)
+{
+    return asuint(x) >> 20;
+}
+
+float expf(float x)
+{
+    uint32_t abstop;
+    uint64_t ki, t;
+    double_t kd, xd, z, r, r2, y, s;
+
+    xd = (double_t)x;
+    abstop = top12(x) & 0x7ff;
+    if (predict_false(abstop >= top12(88.0f))) {
+        /* |x| >= 88 or x is nan.  */
+        if (asuint(x) == asuint(-INFINITY))
+            return 0.0f;
+        if (abstop >= top12(INFINITY))
+            return x + x;
+        if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+            return __math_oflowf(0);
+        if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+            return __math_uflowf(0);
+    }
+
+    /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
+    z = InvLn2N * xd;
+
+    /* Round and convert z to int, the result is in [-150*N, 128*N] and
+       ideally ties-to-even rule is used, otherwise the magnitude of r
+       can be bigger which gives larger approximation error.  */
+#if TOINT_INTRINSICS
+    kd = roundtoint(z);
+    ki = converttoint(z);
+#else
+#define SHIFT __exp2f_data.shift
+    kd = eval_as_double(z + SHIFT);
+    ki = asuint64(kd);
+    kd -= SHIFT;
+#endif
+    r = z - kd;
+
+    /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+    t = T[ki % N];
+    t += ki << (52 - EXP2F_TABLE_BITS);
+    s = asdouble(t);
+    z = C[0] * r + C[1];
+    r2 = r * r;
+    y = C[2] * r + 1;
+    y = z * r2 + y;
+    y = y * s;
+    return eval_as_float(y);
+}
diff --git a/tools/src/math/libm.h b/tools/src/math/libm.h
new file mode 100644
index 0000000..1616c74
--- /dev/null
+++ b/tools/src/math/libm.h
@@ -0,0 +1,271 @@
+#ifndef _LIBM_H
+#define _LIBM_H
+
+#include <endian.h>
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
+union ldshape {
+    long double f;
+    struct {
+        uint64_t m;
+        uint16_t se;
+    } i;
+};
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
+/* This is the m68k variant of 80-bit long double, and this definition only works
+ * on archs where the alignment requirement of uint64_t is <= 4. */
+union ldshape {
+    long double f;
+    struct {
+        uint16_t se;
+        uint16_t pad;
+        uint64_t m;
+    } i;
+};
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
+union ldshape {
+    long double f;
+    struct {
+        uint64_t lo;
+        uint32_t mid;
+        uint16_t top;
+        uint16_t se;
+    } i;
+    struct {
+        uint64_t lo;
+        uint64_t hi;
+    } i2;
+};
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
+union ldshape {
+    long double f;
+    struct {
+        uint16_t se;
+        uint16_t top;
+        uint32_t mid;
+        uint64_t lo;
+    } i;
+    struct {
+        uint64_t hi;
+        uint64_t lo;
+    } i2;
+};
+#else
+#error Unsupported long double representation
+#endif
+
+/* Support non-nearest rounding mode.  */
+#define WANT_ROUNDING 1
+/* Support signaling NaNs.  */
+#define WANT_SNAN 0
+
+#if WANT_SNAN
+#error SNaN is unsupported
+#else
+#define issignalingf_inline(x) 0
+#define issignaling_inline(x)  0
+#endif
+
+#ifndef TOINT_INTRINSICS
+#define TOINT_INTRINSICS 0
+#endif
+
+#if TOINT_INTRINSICS
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+   consistently with converttoint so the results match.  If the result
+   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
+static double_t roundtoint(double_t);
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+   consistently with roundtoint.  If the result is not representible in an
+   int32_t then the semantics is unspecified.  */
+static int32_t converttoint(double_t);
+#endif
+
+/* Helps static branch prediction so hot path can be better optimized.  */
+#ifdef __GNUC__
+#define predict_true(x)  __builtin_expect(!!(x), 1)
+#define predict_false(x) __builtin_expect(x, 0)
+#else
+#define predict_true(x)  (x)
+#define predict_false(x) (x)
+#endif
+
+/* Evaluate an expression as the specified type. With standard excess
+   precision handling a type cast or assignment is enough (with
+   -ffloat-store an assignment is required, in old compilers argument
+   passing and return statement may not drop excess precision).  */
+
+static inline float eval_as_float(float x)
+{
+    float y = x;
+    return y;
+}
+
+static inline double eval_as_double(double x)
+{
+    double y = x;
+    return y;
+}
+
+/* fp_barrier returns its input, but limits code transformations
+   as if it had a side-effect (e.g. observable io) and returned
+   an arbitrary value.  */
+
+#ifndef fp_barrierf
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+    volatile float y = x;
+    return y;
+}
+#endif
+
+#ifndef fp_barrier
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+    volatile double y = x;
+    return y;
+}
+#endif
+
+#ifndef fp_barrierl
+#define fp_barrierl fp_barrierl
+static inline long double fp_barrierl(long double x)
+{
+    volatile long double y = x;
+    return y;
+}
+#endif
+
+/* fp_force_eval ensures that the input value is computed when that's
+   otherwise unused.  To prevent the constant folding of the input
+   expression, an additional fp_barrier may be needed or a compilation
+   mode that does so (e.g. -frounding-math in gcc). Then it can be
+   used to evaluate an expression for its fenv side-effects only.   */
+
+#ifndef fp_force_evalf
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+    volatile float y;
+    y = x;
+    (void)y;
+}
+#endif
+
+#ifndef fp_force_eval
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+    volatile double y;
+    y = x;
+    (void)y;
+}
+#endif
+
+#ifndef fp_force_evall
+#define fp_force_evall fp_force_evall
+static inline void fp_force_evall(long double x)
+{
+    volatile long double y;
+    y = x;
+    (void)y;
+}
+#endif
+
+#define FORCE_EVAL(x)                                                                              \
+    do {                                                                                           \
+        if (sizeof(x) == sizeof(float)) {                                                          \
+            fp_force_evalf(x);                                                                     \
+        } else if (sizeof(x) == sizeof(double)) {                                                  \
+            fp_force_eval(x);                                                                      \
+        } else {                                                                                   \
+            fp_force_evall(x);                                                                     \
+        }                                                                                          \
+    } while (0)
+
+#define asuint(f)                                                                                  \
+    ((union {                                                                                      \
+        float _f;                                                                                  \
+        uint32_t _i;                                                                               \
+    }){f})                                                                                         \
+        ._i
+#define asfloat(i)                                                                                 \
+    ((union {                                                                                      \
+        uint32_t _i;                                                                               \
+        float _f;                                                                                  \
+    }){i})                                                                                         \
+        ._f
+#define asuint64(f)                                                                                \
+    ((union {                                                                                      \
+        double _f;                                                                                 \
+        uint64_t _i;                                                                               \
+    }){f})                                                                                         \
+        ._i
+#define asdouble(i)                                                                                \
+    ((union {                                                                                      \
+        uint64_t _i;                                                                               \
+        double _f;                                                                                 \
+    }){i})                                                                                         \
+        ._f
+
+#define EXTRACT_WORDS(hi, lo, d)                                                                   \
+    do {                                                                                           \
+        uint64_t __u = asuint64(d);                                                                \
+        (hi) = __u >> 32;                                                                          \
+        (lo) = (uint32_t)__u;                                                                      \
+    } while (0)
+
+#define GET_HIGH_WORD(hi, d)                                                                       \
+    do {                                                                                           \
+        (hi) = asuint64(d) >> 32;                                                                  \
+    } while (0)
+
+#define GET_LOW_WORD(lo, d)                                                                        \
+    do {                                                                                           \
+        (lo) = (uint32_t)asuint64(d);                                                              \
+    } while (0)
+
+#define INSERT_WORDS(d, hi, lo)                                                                    \
+    do {                                                                                           \
+        (d) = asdouble(((uint64_t)(hi) << 32) | (uint32_t)(lo));                                   \
+    } while (0)
+
+#define SET_HIGH_WORD(d, hi) INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
+
+#define SET_LOW_WORD(d, lo) INSERT_WORDS(d, asuint64(d) >> 32, lo)
+
+#define GET_FLOAT_WORD(w, d)                                                                       \
+    do {                                                                                           \
+        (w) = asuint(d);                                                                           \
+    } while (0)
+
+#define SET_FLOAT_WORD(d, w)                                                                       \
+    do {                                                                                           \
+        (d) = asfloat(w);                                                                          \
+    } while (0)
+
+/* error handling functions */
+
+static inline float __math_xflowf(uint32_t sign, float y)
+{
+    return eval_as_float(fp_barrierf(sign ? -y : y) * y);
+}
+
+static inline float __math_oflowf(uint32_t sign)
+{
+    return __math_xflowf(sign, 0x1p97f);
+}
+
+float __math_uflowf(uint32_t sign)
+{
+    return __math_xflowf(sign, 0x1p-95f);
+}
+
+#endif
author	magh <magh@maghmogh.com>	2023-03-06 18:44:55 -0600
committer	magh <magh@maghmogh.com>	2023-03-06 18:44:55 -0600
commit	e80d9d8871b325a04b18f90a9ea4bb7fd148fb25 (patch)
tree	79dbdb8506b7ff1e92549188d1b94cfc0b3503ae /tools/src/math