[AArch64] Fix FP_ROUNDMODE.
[jlayton/glibc.git] / soft-fp / extended.h
1 /* Software floating-point emulation.
2    Definitions for IEEE Extended Precision.
3    Copyright (C) 1999-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Jakub Jelinek (jj@ultra.linux.cz).
6
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Lesser General Public
9    License as published by the Free Software Foundation; either
10    version 2.1 of the License, or (at your option) any later version.
11
12    In addition to the permissions in the GNU Lesser General Public
13    License, the Free Software Foundation gives you unlimited
14    permission to link the compiled version of this file into
15    combinations with other programs, and to distribute those
16    combinations without any restriction coming from the use of this
17    file.  (The Lesser General Public License restrictions do apply in
18    other respects; for example, they cover modification of the file,
19    and distribution when not linked into a combine executable.)
20
21    The GNU C Library is distributed in the hope that it will be useful,
22    but WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    Lesser General Public License for more details.
25
26    You should have received a copy of the GNU Lesser General Public
27    License along with the GNU C Library; if not, see
28    <http://www.gnu.org/licenses/>.  */
29
30 #if _FP_W_TYPE_SIZE < 32
31 # error "Here's a nickel, kid. Go buy yourself a real computer."
32 #endif
33
34 #if _FP_W_TYPE_SIZE < 64
35 # define _FP_FRACTBITS_E        (4*_FP_W_TYPE_SIZE)
36 # define _FP_FRACTBITS_DW_E     (8*_FP_W_TYPE_SIZE)
37 #else
38 # define _FP_FRACTBITS_E        (2*_FP_W_TYPE_SIZE)
39 # define _FP_FRACTBITS_DW_E     (4*_FP_W_TYPE_SIZE)
40 #endif
41
42 #define _FP_FRACBITS_E          64
43 #define _FP_FRACXBITS_E         (_FP_FRACTBITS_E - _FP_FRACBITS_E)
44 #define _FP_WFRACBITS_E         (_FP_WORKBITS + _FP_FRACBITS_E)
45 #define _FP_WFRACXBITS_E        (_FP_FRACTBITS_E - _FP_WFRACBITS_E)
46 #define _FP_EXPBITS_E           15
47 #define _FP_EXPBIAS_E           16383
48 #define _FP_EXPMAX_E            32767
49
50 #define _FP_QNANBIT_E           \
51         ((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
52 #define _FP_QNANBIT_SH_E                \
53         ((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
54 #define _FP_IMPLBIT_E           \
55         ((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
56 #define _FP_IMPLBIT_SH_E                \
57         ((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
58 #define _FP_OVERFLOW_E          \
59         ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
60
61 #define _FP_WFRACBITS_DW_E      (2 * _FP_WFRACBITS_E)
62 #define _FP_WFRACXBITS_DW_E     (_FP_FRACTBITS_DW_E - _FP_WFRACBITS_DW_E)
63 #define _FP_HIGHBIT_DW_E        \
64   ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_E - 1) % _FP_W_TYPE_SIZE)
65
66 typedef float XFtype __attribute__ ((mode (XF)));
67
68 #if _FP_W_TYPE_SIZE < 64
69
70 union _FP_UNION_E
71 {
72   XFtype flt;
73   struct _FP_STRUCT_LAYOUT
74   {
75 # if __BYTE_ORDER == __BIG_ENDIAN
76     unsigned long pad1 : _FP_W_TYPE_SIZE;
77     unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
78     unsigned long sign : 1;
79     unsigned long exp : _FP_EXPBITS_E;
80     unsigned long frac1 : _FP_W_TYPE_SIZE;
81     unsigned long frac0 : _FP_W_TYPE_SIZE;
82 # else
83     unsigned long frac0 : _FP_W_TYPE_SIZE;
84     unsigned long frac1 : _FP_W_TYPE_SIZE;
85     unsigned exp : _FP_EXPBITS_E;
86     unsigned sign : 1;
87 # endif /* not bigendian */
88   } bits __attribute__ ((packed));
89 };
90
91
92 # define FP_DECL_E(X)           _FP_DECL (4, X)
93
94 # define FP_UNPACK_RAW_E(X, val)                \
95   do                                            \
96     {                                           \
97       union _FP_UNION_E _flo;                   \
98       _flo.flt = (val);                         \
99                                                 \
100       X##_f[2] = 0;                             \
101       X##_f[3] = 0;                             \
102       X##_f[0] = _flo.bits.frac0;               \
103       X##_f[1] = _flo.bits.frac1;               \
104       X##_e  = _flo.bits.exp;                   \
105       X##_s  = _flo.bits.sign;                  \
106     }                                           \
107   while (0)
108
109 # define FP_UNPACK_RAW_EP(X, val)                               \
110   do                                                            \
111     {                                                           \
112       union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val);    \
113                                                                 \
114       X##_f[2] = 0;                                             \
115       X##_f[3] = 0;                                             \
116       X##_f[0] = _flo->bits.frac0;                              \
117       X##_f[1] = _flo->bits.frac1;                              \
118       X##_e  = _flo->bits.exp;                                  \
119       X##_s  = _flo->bits.sign;                                 \
120     }                                                           \
121   while (0)
122
123 # define FP_PACK_RAW_E(val, X)                  \
124   do                                            \
125     {                                           \
126       union _FP_UNION_E _flo;                   \
127                                                 \
128       if (X##_e)                                \
129         X##_f[1] |= _FP_IMPLBIT_E;              \
130       else                                      \
131         X##_f[1] &= ~(_FP_IMPLBIT_E);           \
132       _flo.bits.frac0 = X##_f[0];               \
133       _flo.bits.frac1 = X##_f[1];               \
134       _flo.bits.exp   = X##_e;                  \
135       _flo.bits.sign  = X##_s;                  \
136                                                 \
137       (val) = _flo.flt;                         \
138     }                                           \
139   while (0)
140
141 # define FP_PACK_RAW_EP(val, X)                                         \
142   do                                                                    \
143     {                                                                   \
144       if (!FP_INHIBIT_RESULTS)                                          \
145         {                                                               \
146           union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val);        \
147                                                                         \
148           if (X##_e)                                                    \
149             X##_f[1] |= _FP_IMPLBIT_E;                                  \
150           else                                                          \
151             X##_f[1] &= ~(_FP_IMPLBIT_E);                               \
152           _flo->bits.frac0 = X##_f[0];                                  \
153           _flo->bits.frac1 = X##_f[1];                                  \
154           _flo->bits.exp   = X##_e;                                     \
155           _flo->bits.sign  = X##_s;                                     \
156         }                                                               \
157     }                                                                   \
158   while (0)
159
160 # define FP_UNPACK_E(X, val)                    \
161   do                                            \
162     {                                           \
163       FP_UNPACK_RAW_E (X, val);                 \
164       _FP_UNPACK_CANONICAL (E, 4, X);           \
165     }                                           \
166   while (0)
167
168 # define FP_UNPACK_EP(X, val)                   \
169   do                                            \
170     {                                           \
171       FP_UNPACK_RAW_EP (X, val);                \
172       _FP_UNPACK_CANONICAL (E, 4, X);           \
173     }                                           \
174   while (0)
175
176 # define FP_UNPACK_SEMIRAW_E(X, val)            \
177   do                                            \
178     {                                           \
179       FP_UNPACK_RAW_E (X, val);                 \
180       _FP_UNPACK_SEMIRAW (E, 4, X);             \
181     }                                           \
182   while (0)
183
184 # define FP_UNPACK_SEMIRAW_EP(X, val)           \
185   do                                            \
186     {                                           \
187       FP_UNPACK_RAW_EP (X, val);                \
188       _FP_UNPACK_SEMIRAW (E, 4, X);             \
189     }                                           \
190   while (0)
191
192 # define FP_PACK_E(val, X)                      \
193   do                                            \
194     {                                           \
195       _FP_PACK_CANONICAL (E, 4, X);             \
196       FP_PACK_RAW_E (val, X);                   \
197     }                                           \
198   while (0)
199
200 # define FP_PACK_EP(val, X)                     \
201   do                                            \
202     {                                           \
203       _FP_PACK_CANONICAL (E, 4, X);             \
204       FP_PACK_RAW_EP (val, X);                  \
205     }                                           \
206   while (0)
207
208 # define FP_PACK_SEMIRAW_E(val, X)              \
209   do                                            \
210     {                                           \
211       _FP_PACK_SEMIRAW (E, 4, X);               \
212       FP_PACK_RAW_E (val, X);                   \
213     }                                           \
214   while (0)
215
216 # define FP_PACK_SEMIRAW_EP(val, X)             \
217   do                                            \
218     {                                           \
219       _FP_PACK_SEMIRAW (E, 4, X);               \
220       FP_PACK_RAW_EP (val, X);                  \
221     }                                           \
222   while (0)
223
224 # define FP_ISSIGNAN_E(X)       _FP_ISSIGNAN (E, 4, X)
225 # define FP_NEG_E(R, X)         _FP_NEG (E, 4, R, X)
226 # define FP_ADD_E(R, X, Y)      _FP_ADD (E, 4, R, X, Y)
227 # define FP_SUB_E(R, X, Y)      _FP_SUB (E, 4, R, X, Y)
228 # define FP_MUL_E(R, X, Y)      _FP_MUL (E, 4, R, X, Y)
229 # define FP_DIV_E(R, X, Y)      _FP_DIV (E, 4, R, X, Y)
230 # define FP_SQRT_E(R, X)        _FP_SQRT (E, 4, R, X)
231 # define FP_FMA_E(R, X, Y, Z)   _FP_FMA (E, 4, 8, R, X, Y, Z)
232
233 /*
234  * Square root algorithms:
235  * We have just one right now, maybe Newton approximation
236  * should be added for those machines where division is fast.
237  * This has special _E version because standard _4 square
238  * root would not work (it has to start normally with the
239  * second word and not the first), but as we have to do it
240  * anyway, we optimize it by doing most of the calculations
241  * in two UWtype registers instead of four.
242  */
243
244 # define _FP_SQRT_MEAT_E(R, S, T, X, q)                 \
245   do                                                    \
246     {                                                   \
247       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);      \
248       _FP_FRAC_SRL_4 (X, (_FP_WORKBITS));               \
249       while (q)                                         \
250         {                                               \
251           T##_f[1] = S##_f[1] + q;                      \
252           if (T##_f[1] <= X##_f[1])                     \
253             {                                           \
254               S##_f[1] = T##_f[1] + q;                  \
255               X##_f[1] -= T##_f[1];                     \
256               R##_f[1] += q;                            \
257             }                                           \
258           _FP_FRAC_SLL_2 (X, 1);                        \
259           q >>= 1;                                      \
260         }                                               \
261       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);      \
262       while (q)                                         \
263         {                                               \
264           T##_f[0] = S##_f[0] + q;                      \
265           T##_f[1] = S##_f[1];                          \
266           if (T##_f[1] < X##_f[1]                       \
267               || (T##_f[1] == X##_f[1]                  \
268                   && T##_f[0] <= X##_f[0]))             \
269             {                                           \
270               S##_f[0] = T##_f[0] + q;                  \
271               S##_f[1] += (T##_f[0] > S##_f[0]);        \
272               _FP_FRAC_DEC_2 (X, T);                    \
273               R##_f[0] += q;                            \
274             }                                           \
275           _FP_FRAC_SLL_2 (X, 1);                        \
276           q >>= 1;                                      \
277         }                                               \
278       _FP_FRAC_SLL_4 (R, (_FP_WORKBITS));               \
279       if (X##_f[0] | X##_f[1])                          \
280         {                                               \
281           if (S##_f[1] < X##_f[1]                       \
282               || (S##_f[1] == X##_f[1]                  \
283                   && S##_f[0] < X##_f[0]))              \
284             R##_f[0] |= _FP_WORK_ROUND;                 \
285           R##_f[0] |= _FP_WORK_STICKY;                  \
286         }                                               \
287     }                                                   \
288   while (0)
289
290 # define FP_CMP_E(r, X, Y, un)          _FP_CMP (E, 4, r, X, Y, un)
291 # define FP_CMP_EQ_E(r, X, Y)           _FP_CMP_EQ (E, 4, r, X, Y)
292 # define FP_CMP_UNORD_E(r, X, Y)        _FP_CMP_UNORD (E, 4, r, X, Y)
293
294 # define FP_TO_INT_E(r, X, rsz, rsg)    _FP_TO_INT (E, 4, r, X, rsz, rsg)
295 # define FP_FROM_INT_E(X, r, rs, rt)    _FP_FROM_INT (E, 4, X, r, rs, rt)
296
297 # define _FP_FRAC_HIGH_E(X)     (X##_f[2])
298 # define _FP_FRAC_HIGH_RAW_E(X) (X##_f[1])
299
300 # define _FP_FRAC_HIGH_DW_E(X)  (X##_f[4])
301
302 #else   /* not _FP_W_TYPE_SIZE < 64 */
303 union _FP_UNION_E
304 {
305   XFtype flt;
306   struct _FP_STRUCT_LAYOUT
307   {
308 # if __BYTE_ORDER == __BIG_ENDIAN
309     _FP_W_TYPE pad  : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
310     unsigned sign   : 1;
311     unsigned exp    : _FP_EXPBITS_E;
312     _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
313 # else
314     _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
315     unsigned exp    : _FP_EXPBITS_E;
316     unsigned sign   : 1;
317 # endif
318   } bits;
319 };
320
321 # define FP_DECL_E(X)           _FP_DECL (2, X)
322
323 # define FP_UNPACK_RAW_E(X, val)                \
324   do                                            \
325     {                                           \
326       union _FP_UNION_E _flo;                   \
327       _flo.flt = (val);                         \
328                                                 \
329       X##_f0 = _flo.bits.frac;                  \
330       X##_f1 = 0;                               \
331       X##_e = _flo.bits.exp;                    \
332       X##_s = _flo.bits.sign;                   \
333     }                                           \
334   while (0)
335
336 # define FP_UNPACK_RAW_EP(X, val)                               \
337   do                                                            \
338     {                                                           \
339       union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val);    \
340                                                                 \
341       X##_f0 = _flo->bits.frac;                                 \
342       X##_f1 = 0;                                               \
343       X##_e = _flo->bits.exp;                                   \
344       X##_s = _flo->bits.sign;                                  \
345     }                                                           \
346   while (0)
347
348 # define FP_PACK_RAW_E(val, X)                  \
349   do                                            \
350     {                                           \
351       union _FP_UNION_E _flo;                   \
352                                                 \
353       if (X##_e)                                \
354         X##_f0 |= _FP_IMPLBIT_E;                \
355       else                                      \
356         X##_f0 &= ~(_FP_IMPLBIT_E);             \
357       _flo.bits.frac = X##_f0;                  \
358       _flo.bits.exp  = X##_e;                   \
359       _flo.bits.sign = X##_s;                   \
360                                                 \
361       (val) = _flo.flt;                         \
362     }                                           \
363   while (0)
364
365 # define FP_PACK_RAW_EP(fs, val, X)                                     \
366   do                                                                    \
367     {                                                                   \
368       if (!FP_INHIBIT_RESULTS)                                          \
369         {                                                               \
370           union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val);        \
371                                                                         \
372           if (X##_e)                                                    \
373             X##_f0 |= _FP_IMPLBIT_E;                                    \
374           else                                                          \
375             X##_f0 &= ~(_FP_IMPLBIT_E);                                 \
376           _flo->bits.frac = X##_f0;                                     \
377           _flo->bits.exp  = X##_e;                                      \
378           _flo->bits.sign = X##_s;                                      \
379         }                                                               \
380     }                                                                   \
381   while (0)
382
383
384 # define FP_UNPACK_E(X, val)                    \
385   do                                            \
386     {                                           \
387       FP_UNPACK_RAW_E (X, val);                 \
388       _FP_UNPACK_CANONICAL (E, 2, X);           \
389     }                                           \
390   while (0)
391
392 # define FP_UNPACK_EP(X, val)                   \
393   do                                            \
394     {                                           \
395       FP_UNPACK_RAW_EP (X, val);                \
396       _FP_UNPACK_CANONICAL (E, 2, X);           \
397     }                                           \
398   while (0)
399
400 # define FP_UNPACK_SEMIRAW_E(X, val)            \
401   do                                            \
402     {                                           \
403       FP_UNPACK_RAW_E (X, val);                 \
404       _FP_UNPACK_SEMIRAW (E, 2, X);             \
405     }                                           \
406   while (0)
407
408 # define FP_UNPACK_SEMIRAW_EP(X, val)           \
409   do                                            \
410     {                                           \
411       FP_UNPACK_RAW_EP (X, val);                \
412       _FP_UNPACK_SEMIRAW (E, 2, X);             \
413     }                                           \
414   while (0)
415
416 # define FP_PACK_E(val, X)                      \
417   do                                            \
418     {                                           \
419       _FP_PACK_CANONICAL (E, 2, X);             \
420       FP_PACK_RAW_E (val, X);                   \
421     }                                           \
422   while (0)
423
424 # define FP_PACK_EP(val, X)                     \
425   do                                            \
426     {                                           \
427       _FP_PACK_CANONICAL (E, 2, X);             \
428       FP_PACK_RAW_EP (val, X);                  \
429     }                                           \
430   while (0)
431
432 # define FP_PACK_SEMIRAW_E(val, X)              \
433   do                                            \
434     {                                           \
435       _FP_PACK_SEMIRAW (E, 2, X);               \
436       FP_PACK_RAW_E (val, X);                   \
437     }                                           \
438   while (0)
439
440 # define FP_PACK_SEMIRAW_EP(val, X)             \
441   do                                            \
442     {                                           \
443       _FP_PACK_SEMIRAW (E, 2, X);               \
444       FP_PACK_RAW_EP (val, X);                  \
445     }                                           \
446   while (0)
447
448 # define FP_ISSIGNAN_E(X)       _FP_ISSIGNAN (E, 2, X)
449 # define FP_NEG_E(R, X)         _FP_NEG (E, 2, R, X)
450 # define FP_ADD_E(R, X, Y)      _FP_ADD (E, 2, R, X, Y)
451 # define FP_SUB_E(R, X, Y)      _FP_SUB (E, 2, R, X, Y)
452 # define FP_MUL_E(R, X, Y)      _FP_MUL (E, 2, R, X, Y)
453 # define FP_DIV_E(R, X, Y)      _FP_DIV (E, 2, R, X, Y)
454 # define FP_SQRT_E(R, X)        _FP_SQRT (E, 2, R, X)
455 # define FP_FMA_E(R, X, Y, Z)   _FP_FMA (E, 2, 4, R, X, Y, Z)
456
457 /*
458  * Square root algorithms:
459  * We have just one right now, maybe Newton approximation
460  * should be added for those machines where division is fast.
461  * We optimize it by doing most of the calculations
462  * in one UWtype registers instead of two, although we don't
463  * have to.
464  */
465 # define _FP_SQRT_MEAT_E(R, S, T, X, q)                 \
466   do                                                    \
467     {                                                   \
468       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);      \
469       _FP_FRAC_SRL_2 (X, (_FP_WORKBITS));               \
470       while (q)                                         \
471         {                                               \
472           T##_f0 = S##_f0 + q;                          \
473           if (T##_f0 <= X##_f0)                         \
474             {                                           \
475               S##_f0 = T##_f0 + q;                      \
476               X##_f0 -= T##_f0;                         \
477               R##_f0 += q;                              \
478             }                                           \
479           _FP_FRAC_SLL_1 (X, 1);                        \
480           q >>= 1;                                      \
481         }                                               \
482       _FP_FRAC_SLL_2 (R, (_FP_WORKBITS));               \
483       if (X##_f0)                                       \
484         {                                               \
485           if (S##_f0 < X##_f0)                          \
486             R##_f0 |= _FP_WORK_ROUND;                   \
487           R##_f0 |= _FP_WORK_STICKY;                    \
488         }                                               \
489     }                                                   \
490   while (0)
491
492 # define FP_CMP_E(r, X, Y, un)          _FP_CMP (E, 2, r, X, Y, un)
493 # define FP_CMP_EQ_E(r, X, Y)           _FP_CMP_EQ (E, 2, r, X, Y)
494 # define FP_CMP_UNORD_E(r, X, Y)        _FP_CMP_UNORD (E, 2, r, X, Y)
495
496 # define FP_TO_INT_E(r, X, rsz, rsg)    _FP_TO_INT (E, 2, r, X, rsz, rsg)
497 # define FP_FROM_INT_E(X, r, rs, rt)    _FP_FROM_INT (E, 2, X, r, rs, rt)
498
499 # define _FP_FRAC_HIGH_E(X)     (X##_f1)
500 # define _FP_FRAC_HIGH_RAW_E(X) (X##_f0)
501
502 # define _FP_FRAC_HIGH_DW_E(X)  (X##_f[2])
503
504 #endif /* not _FP_W_TYPE_SIZE < 64 */