Merge branch 'for-4.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...

[sfrench/cifs-2.6.git] / arch / mips / math-emu / dp_maddf.c
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c

index caa62f20a888d120dc22ec187aa5693470b9ec22..e0d9be5fbf4cd541406b93c5d2add6814a82a9eb 100644 (file)
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -14,22 +14,45 @@
  
  #include "ieee754dp.h"
  
-enum maddf_flags {
-       maddf_negate_product    = 1 << 0,
-};
+
+/* 128 bits shift right logical with rounding. */
+void srl128(u64 *hptr, u64 *lptr, int count)
+{
+       u64 low;
+
+       if (count >= 128) {
+               *lptr = *hptr != 0 || *lptr != 0;
+               *hptr = 0;
+       } else if (count >= 64) {
+               if (count == 64) {
+                       *lptr = *hptr | (*lptr != 0);
+               } else {
+                       low = *lptr;
+                       *lptr = *hptr >> (count - 64);
+                       *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
+               }
+               *hptr = 0;
+       } else {
+               low = *lptr;
+               *lptr = low >> count | *hptr << (64 - count);
+               *lptr |= (low << (64 - count)) != 0;
+               *hptr = *hptr >> count;
+       }
+}
  
  static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
                                  union ieee754dp y, enum maddf_flags flags)
  {
         int re;
         int rs;
-       u64 rm;
         unsigned lxm;
         unsigned hxm;
         unsigned lym;
         unsigned hym;
         u64 lrm;
         u64 hrm;
+       u64 lzm;
+       u64 hzm;
         u64 t;
         u64 at;
         int s;
@@ -48,52 +71,34 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
  
         ieee754_clearcx();
  
-       switch (zc) {
-       case IEEE754_CLASS_SNAN:
-               ieee754_setcx(IEEE754_INVALID_OPERATION);
+       /*
+        * Handle the cases when at least one of x, y or z is a NaN.
+        * Order of precedence is sNaN, qNaN and z, x, y.
+        */
+       if (zc == IEEE754_CLASS_SNAN)
                 return ieee754dp_nanxcpt(z);
-       case IEEE754_CLASS_DNORM:
-               DPDNORMZ;
-       /* QNAN and ZERO cases are handled separately below */
-       }
-
-       switch (CLPAIR(xc, yc)) {
-       case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-       case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
-       case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
-       case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
-       case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
-               return ieee754dp_nanxcpt(y);
-
-       case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
-       case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
-       case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
-       case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
-       case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
-       case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
+       if (xc == IEEE754_CLASS_SNAN)
                 return ieee754dp_nanxcpt(x);
-
-       case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
-       case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
-       case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
-       case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
+       if (yc == IEEE754_CLASS_SNAN)
+               return ieee754dp_nanxcpt(y);
+       if (zc == IEEE754_CLASS_QNAN)
+               return z;
+       if (xc == IEEE754_CLASS_QNAN)
+               return x;
+       if (yc == IEEE754_CLASS_QNAN)
                 return y;
  
-       case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
-       case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
-       case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
-       case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
-       case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
-               return x;
+       if (zc == IEEE754_CLASS_DNORM)
+               DPDNORMZ;
+       /* ZERO z cases are handled separately below */
  
+       switch (CLPAIR(xc, yc)) {
  
         /*
          * Infinity handling
          */
         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
-               if (zc == IEEE754_CLASS_QNAN)
-                       return z;
                 ieee754_setcx(IEEE754_INVALID_OPERATION);
                 return ieee754dp_indef();
  
@@ -102,9 +107,27 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
-               if (zc == IEEE754_CLASS_QNAN)
-                       return z;
-               return ieee754dp_inf(xs ^ ys);
+               if ((zc == IEEE754_CLASS_INF) &&
+                   ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+                    ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+                       /*
+                        * Cases of addition of infinities with opposite signs
+                        * or subtraction of infinities with same signs.
+                        */
+                       ieee754_setcx(IEEE754_INVALID_OPERATION);
+                       return ieee754dp_indef();
+               }
+               /*
+                * z is here either not an infinity, or an infinity having the
+                * same sign as product (x*y) (in case of MADDF.D instruction)
+                * or product -(x*y) (in MSUBF.D case). The result must be an
+                * infinity, and its sign is determined only by the value of
+                * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+                */
+               if (flags & MADDF_NEGATE_PRODUCT)
+                       return ieee754dp_inf(1 ^ (xs ^ ys));
+               else
+                       return ieee754dp_inf(xs ^ ys);
  
         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -113,32 +136,42 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
                 if (zc == IEEE754_CLASS_INF)
                         return ieee754dp_inf(zs);
-               /* Multiplication is 0 so just return z */
+               if (zc == IEEE754_CLASS_ZERO) {
+                       /* Handle cases +0 + (-0) and similar ones. */
+                       if ((!(flags & MADDF_NEGATE_PRODUCT)
+                                       && (zs == (xs ^ ys))) ||
+                           ((flags & MADDF_NEGATE_PRODUCT)
+                                       && (zs != (xs ^ ys))))
+                               /*
+                                * Cases of addition of zeros of equal signs
+                                * or subtraction of zeroes of opposite signs.
+                                * The sign of the resulting zero is in any
+                                * such case determined only by the sign of z.
+                                */
+                               return z;
+
+                       return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
+               }
+               /* x*y is here 0, and z is not 0, so just return z */
                 return z;
  
         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
                 DPDNORMX;
  
         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
-               if (zc == IEEE754_CLASS_QNAN)
-                       return z;
-               else if (zc == IEEE754_CLASS_INF)
+               if (zc == IEEE754_CLASS_INF)
                         return ieee754dp_inf(zs);
                 DPDNORMY;
                 break;
  
         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
-               if (zc == IEEE754_CLASS_QNAN)
-                       return z;
-               else if (zc == IEEE754_CLASS_INF)
+               if (zc == IEEE754_CLASS_INF)
                         return ieee754dp_inf(zs);
                 DPDNORMX;
                 break;
  
         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
-               if (zc == IEEE754_CLASS_QNAN)
-                       return z;
-               else if (zc == IEEE754_CLASS_INF)
+               if (zc == IEEE754_CLASS_INF)
                         return ieee754dp_inf(zs);
                 /* fall through to real computations */
         }
@@ -157,7 +190,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
  
         re = xe + ye;
         rs = xs ^ ys;
-       if (flags & maddf_negate_product)
+       if (flags & MADDF_NEGATE_PRODUCT)
                 rs ^= 1;
  
         /* shunt to top of word */
@@ -165,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
         ym <<= 64 - (DP_FBITS + 1);
  
         /*
-        * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
+        * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
          */
  
         /* 32 * 32 => 64 */
@@ -195,81 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
  
         hrm = hrm + (t >> 32);
  
-       rm = hrm | (lrm != 0);
-
-       /*
-        * Sticky shift down to normal rounding precision.
-        */
-       if ((s64) rm < 0) {
-               rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
-                    ((rm << (DP_FBITS + 1 + 3)) != 0);
+       /* Put explicit bit at bit 126 if necessary */
+       if ((int64_t)hrm < 0) {
+               lrm = (hrm << 63) | (lrm >> 1);
+               hrm = hrm >> 1;
                 re++;
-       } else {
-               rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
-                    ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
         }
-       assert(rm & (DP_HIDDEN_BIT << 3));
  
-       if (zc == IEEE754_CLASS_ZERO)
-               return ieee754dp_format(rs, re, rm);
+       assert(hrm & (1 << 62));
  
-       /* And now the addition */
-       assert(zm & DP_HIDDEN_BIT);
+       if (zc == IEEE754_CLASS_ZERO) {
+               /*
+                * Move explicit bit from bit 126 to bit 55 since the
+                * ieee754dp_format code expects the mantissa to be
+                * 56 bits wide (53 + 3 rounding bits).
+                */
+               srl128(&hrm, &lrm, (126 - 55));
+               return ieee754dp_format(rs, re, lrm);
+       }
  
-       /*
-        * Provide guard,round and stick bit space.
-        */
-       zm <<= 3;
+       /* Move explicit bit from bit 52 to bit 126 */
+       lzm = 0;
+       hzm = zm << 10;
+       assert(hzm & (1 << 62));
  
+       /* Make the exponents the same */
         if (ze > re) {
                 /*
                  * Have to shift y fraction right to align.
                  */
                 s = ze - re;
-               rm = XDPSRS(rm, s);
+               srl128(&hrm, &lrm, s);
                 re += s;
         } else if (re > ze) {
                 /*
                  * Have to shift x fraction right to align.
                  */
                 s = re - ze;
-               zm = XDPSRS(zm, s);
+               srl128(&hzm, &lzm, s);
                 ze += s;
         }
         assert(ze == re);
         assert(ze <= DP_EMAX);
  
+       /* Do the addition */
         if (zs == rs) {
                 /*
-                * Generate 28 bit result of adding two 27 bit numbers
-                * leaving result in xm, xs and xe.
+                * Generate 128 bit result by adding two 127 bit numbers
+                * leaving result in hzm:lzm, zs and ze.
                  */
-               zm = zm + rm;
-
-               if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
-                       zm = XDPSRS1(zm);
+               hzm = hzm + hrm + (lzm > (lzm + lrm));
+               lzm = lzm + lrm;
+               if ((int64_t)hzm < 0) {        /* carry out */
+                       srl128(&hzm, &lzm, 1);
                         ze++;
                 }
         } else {
-               if (zm >= rm) {
-                       zm = zm - rm;
+               if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
+                       hzm = hzm - hrm - (lzm < lrm);
+                       lzm = lzm - lrm;
                 } else {
-                       zm = rm - zm;
+                       hzm = hrm - hzm - (lrm < lzm);
+                       lzm = lrm - lzm;
                         zs = rs;
                 }
-               if (zm == 0)
+               if (lzm == 0 && hzm == 0)
                         return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
  
                 /*
-                * Normalize to rounding precision.
+                * Put explicit bit at bit 126 if necessary.
                  */
-               while ((zm >> (DP_FBITS + 3)) == 0) {
-                       zm <<= 1;
-                       ze--;
+               if (hzm == 0) {
+                       /* left shift by 63 or 64 bits */
+                       if ((int64_t)lzm < 0) {
+                               /* MSB of lzm is the explicit bit */
+                               hzm = lzm >> 1;
+                               lzm = lzm << 63;
+                               ze -= 63;
+                       } else {
+                               hzm = lzm;
+                               lzm = 0;
+                               ze -= 64;
+                       }
+               }
+
+               t = 0;
+               while ((hzm >> (62 - t)) == 0)
+                       t++;
+
+               assert(t <= 62);
+               if (t) {
+                       hzm = hzm << t | lzm >> (64 - t);
+                       lzm = lzm << t;
+                       ze -= t;
                 }
         }
  
-       return ieee754dp_format(zs, ze, zm);
+       /*
+        * Move explicit bit from bit 126 to bit 55 since the
+        * ieee754dp_format code expects the mantissa to be
+        * 56 bits wide (53 + 3 rounding bits).
+        */
+       srl128(&hzm, &lzm, (126 - 55));
+
+       return ieee754dp_format(zs, ze, lzm);
  }
  
  union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
@@ -281,5 +343,5 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
  union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
                                 union ieee754dp y)
  {
-       return _dp_maddf(z, x, y, maddf_negate_product);
+       return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
  }