lib/libc/mingw/math/fmal.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

/**
 * This file has no copyright assigned and is placed in the Public Domain.
 * This file is part of the mingw-w64 runtime package.
 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
 */
long double fmal(long double x, long double y, long double z);

#if __SIZEOF_LONG_DOUBLE__ == __SIZEOF_DOUBLE__

double fma(double x, double y, double z);

/* On ARM `long double` is 64 bits. And ARM has hardware FMA. */
long double fmal(long double x, long double y, long double z){
  return fma(x, y, z);
}

#elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)

/**
 * x87-specific software-emulated FMA by LH_Mouse (lh_mouse at 126 dot com).
 * This file is donated to the mingw-w64 project.
 * Note: This file requires C99 support to compile.
 */

#include <math.h>
#include <stdint.h>

/* See <https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format>.
 * Note the higher half of the mantissa has fewer significant bits than the lower
 * half, which reduces rounding errors in the more significant position but increases
 * them in the other end.
 */
typedef union x87reg_ {
  struct __attribute__((__packed__)) {
    uint64_t mlo : 33;
    uint64_t mhi : 31;
    uint16_t exp : 15;
    uint16_t sgn :  1;
  };
  long double f;
} x87reg;

static inline void break_down(x87reg *restrict lo, x87reg *restrict hi, long double x) {
  hi->f = x;
  /* Erase low-order significant bits. `hi->f` now has only 31 significant bits. */
  hi->mlo = 0;
  /* Store the low-order half. It will be normalized by the hardware. */
  lo->f = x - hi->f;
  /* Preserve signness in case of zero. */
  lo->sgn = hi->sgn;
}

long double fmal(long double x, long double y, long double z) {
  /*
    POSIX-2013:
    1. If x or y are NaN, a NaN shall be returned.
    2. If x multiplied by y is an exact infinity and z is also an infinity
       but with the opposite sign, a domain error shall occur, and either a NaN
       (if supported), or an implementation-defined value shall be returned.
    3. If one of x and y is infinite, the other is zero, and z is not a NaN,
       a domain error shall occur, and either a NaN (if supported), or an
       implementation-defined value shall be returned.
    4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN
       shall be returned and a domain error may occur.
    5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned.
  */
  /* Check whether the result is finite. */
  long double ret = x * y + z;
  if(!isfinite(ret)) {
    return ret; /* If this naive check doesn't yield a finite value, the FMA isn't
                   likely to return one either. Forward the value as is. */
  }
  x87reg xlo, xhi, ylo, yhi;
  break_down(&xlo, &xhi, x);
  break_down(&ylo, &yhi, y);
  /* The order of these four statements is essential. Don't move them around. */
  ret = z;
  ret += xhi.f * yhi.f;                 /* The most significant item comes first. */
  ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */
  ret += xlo.f * ylo.f;                 /* The least significant item comes last. */
  return ret;
}

#else

#error Please add FMA implementation for this platform.

#endif