Merge remote-tracking branch 'asoc/topic/rcar' into asoc-next
[sfrench/cifs-2.6.git] / arch / mips / math-emu / sp_maddf.c
1 /*
2  * IEEE754 floating point arithmetic
3  * single precision: MADDF.f (Fused Multiply Add)
4  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
5  *
6  * MIPS floating point support
7  * Copyright (C) 2015 Imagination Technologies, Ltd.
8  * Author: Markos Chandras <markos.chandras@imgtec.com>
9  *
10  *  This program is free software; you can distribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; version 2 of the License.
13  */
14
15 #include "ieee754sp.h"
16
17 enum maddf_flags {
18         maddf_negate_product    = 1 << 0,
19 };
20
21 static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
22                                  union ieee754sp y, enum maddf_flags flags)
23 {
24         int re;
25         int rs;
26         unsigned rm;
27         unsigned short lxm;
28         unsigned short hxm;
29         unsigned short lym;
30         unsigned short hym;
31         unsigned lrm;
32         unsigned hrm;
33         unsigned t;
34         unsigned at;
35         int s;
36
37         COMPXSP;
38         COMPYSP;
39         COMPZSP;
40
41         EXPLODEXSP;
42         EXPLODEYSP;
43         EXPLODEZSP;
44
45         FLUSHXSP;
46         FLUSHYSP;
47         FLUSHZSP;
48
49         ieee754_clearcx();
50
51         switch (zc) {
52         case IEEE754_CLASS_SNAN:
53                 ieee754_setcx(IEEE754_INVALID_OPERATION);
54                 return ieee754sp_nanxcpt(z);
55         case IEEE754_CLASS_DNORM:
56                 SPDNORMZ;
57         /* QNAN and ZERO cases are handled separately below */
58         }
59
60         switch (CLPAIR(xc, yc)) {
61         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
62         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
63         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
64         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
65         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
66                 return ieee754sp_nanxcpt(y);
67
68         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
69         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
70         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
71         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
72         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
73         case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
74                 return ieee754sp_nanxcpt(x);
75
76         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
77         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
78         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
79         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
80                 return y;
81
82         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
83         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
84         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
85         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
86         case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
87                 return x;
88
89         /*
90          * Infinity handling
91          */
92         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
93         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
94                 if (zc == IEEE754_CLASS_QNAN)
95                         return z;
96                 ieee754_setcx(IEEE754_INVALID_OPERATION);
97                 return ieee754sp_indef();
98
99         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
100         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
101         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
102         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
103         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
104                 if (zc == IEEE754_CLASS_QNAN)
105                         return z;
106                 return ieee754sp_inf(xs ^ ys);
107
108         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
109         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
110         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
111         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
112         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
113                 if (zc == IEEE754_CLASS_INF)
114                         return ieee754sp_inf(zs);
115                 /* Multiplication is 0 so just return z */
116                 return z;
117
118         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
119                 SPDNORMX;
120
121         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
122                 if (zc == IEEE754_CLASS_QNAN)
123                         return z;
124                 else if (zc == IEEE754_CLASS_INF)
125                         return ieee754sp_inf(zs);
126                 SPDNORMY;
127                 break;
128
129         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130                 if (zc == IEEE754_CLASS_QNAN)
131                         return z;
132                 else if (zc == IEEE754_CLASS_INF)
133                         return ieee754sp_inf(zs);
134                 SPDNORMX;
135                 break;
136
137         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
138                 if (zc == IEEE754_CLASS_QNAN)
139                         return z;
140                 else if (zc == IEEE754_CLASS_INF)
141                         return ieee754sp_inf(zs);
142                 /* fall through to real computations */
143         }
144
145         /* Finally get to do some computation */
146
147         /*
148          * Do the multiplication bit first
149          *
150          * rm = xm * ym, re = xe + ye basically
151          *
152          * At this point xm and ym should have been normalized.
153          */
154
155         /* rm = xm * ym, re = xe+ye basically */
156         assert(xm & SP_HIDDEN_BIT);
157         assert(ym & SP_HIDDEN_BIT);
158
159         re = xe + ye;
160         rs = xs ^ ys;
161         if (flags & maddf_negate_product)
162                 rs ^= 1;
163
164         /* shunt to top of word */
165         xm <<= 32 - (SP_FBITS + 1);
166         ym <<= 32 - (SP_FBITS + 1);
167
168         /*
169          * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
170          */
171         lxm = xm & 0xffff;
172         hxm = xm >> 16;
173         lym = ym & 0xffff;
174         hym = ym >> 16;
175
176         lrm = lxm * lym;        /* 16 * 16 => 32 */
177         hrm = hxm * hym;        /* 16 * 16 => 32 */
178
179         t = lxm * hym; /* 16 * 16 => 32 */
180         at = lrm + (t << 16);
181         hrm += at < lrm;
182         lrm = at;
183         hrm = hrm + (t >> 16);
184
185         t = hxm * lym; /* 16 * 16 => 32 */
186         at = lrm + (t << 16);
187         hrm += at < lrm;
188         lrm = at;
189         hrm = hrm + (t >> 16);
190
191         rm = hrm | (lrm != 0);
192
193         /*
194          * Sticky shift down to normal rounding precision.
195          */
196         if ((int) rm < 0) {
197                 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
198                     ((rm << (SP_FBITS + 1 + 3)) != 0);
199                 re++;
200         } else {
201                 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
202                      ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
203         }
204         assert(rm & (SP_HIDDEN_BIT << 3));
205
206         if (zc == IEEE754_CLASS_ZERO)
207                 return ieee754sp_format(rs, re, rm);
208
209         /* And now the addition */
210
211         assert(zm & SP_HIDDEN_BIT);
212
213         /*
214          * Provide guard,round and stick bit space.
215          */
216         zm <<= 3;
217
218         if (ze > re) {
219                 /*
220                  * Have to shift r fraction right to align.
221                  */
222                 s = ze - re;
223                 rm = XSPSRS(rm, s);
224                 re += s;
225         } else if (re > ze) {
226                 /*
227                  * Have to shift z fraction right to align.
228                  */
229                 s = re - ze;
230                 zm = XSPSRS(zm, s);
231                 ze += s;
232         }
233         assert(ze == re);
234         assert(ze <= SP_EMAX);
235
236         if (zs == rs) {
237                 /*
238                  * Generate 28 bit result of adding two 27 bit numbers
239                  * leaving result in zm, zs and ze.
240                  */
241                 zm = zm + rm;
242
243                 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
244                         zm = XSPSRS1(zm);
245                         ze++;
246                 }
247         } else {
248                 if (zm >= rm) {
249                         zm = zm - rm;
250                 } else {
251                         zm = rm - zm;
252                         zs = rs;
253                 }
254                 if (zm == 0)
255                         return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
256
257                 /*
258                  * Normalize in extended single precision
259                  */
260                 while ((zm >> (SP_MBITS + 3)) == 0) {
261                         zm <<= 1;
262                         ze--;
263                 }
264
265         }
266         return ieee754sp_format(zs, ze, zm);
267 }
268
269 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
270                                 union ieee754sp y)
271 {
272         return _sp_maddf(z, x, y, 0);
273 }
274
275 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
276                                 union ieee754sp y)
277 {
278         return _sp_maddf(z, x, y, maddf_negate_product);
279 }