Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
[sfrench/cifs-2.6.git] / arch / sh / kernel / cpu / sh2a / fpu.c
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
5  *
6  * This file is subject to the terms and conditions of the GNU General Public
7  * License.  See the file "COPYING" in the main directory of this archive
8  * for more details.
9  *
10  * FIXME! These routines can be optimized in big endian case.
11  */
12 #include <linux/sched.h>
13 #include <linux/signal.h>
14 #include <asm/processor.h>
15 #include <asm/io.h>
16 #include <asm/fpu.h>
17
18 /* The PR (precision) bit in the FP Status Register must be clear when
19  * an frchg instruction is executed, otherwise the instruction is undefined.
20  * Executing frchg with PR set causes a trap on some SH4 implementations.
21  */
22
23 #define FPSCR_RCHG 0x00000000
24
25
26 /*
27  * Save FPU registers onto task structure.
28  */
29 void
30 save_fpu(struct task_struct *tsk)
31 {
32         unsigned long dummy;
33
34         enable_fpu();
35         asm volatile("sts.l     fpul, @-%0\n\t"
36                      "sts.l     fpscr, @-%0\n\t"
37                      "fmov.s    fr15, @-%0\n\t"
38                      "fmov.s    fr14, @-%0\n\t"
39                      "fmov.s    fr13, @-%0\n\t"
40                      "fmov.s    fr12, @-%0\n\t"
41                      "fmov.s    fr11, @-%0\n\t"
42                      "fmov.s    fr10, @-%0\n\t"
43                      "fmov.s    fr9, @-%0\n\t"
44                      "fmov.s    fr8, @-%0\n\t"
45                      "fmov.s    fr7, @-%0\n\t"
46                      "fmov.s    fr6, @-%0\n\t"
47                      "fmov.s    fr5, @-%0\n\t"
48                      "fmov.s    fr4, @-%0\n\t"
49                      "fmov.s    fr3, @-%0\n\t"
50                      "fmov.s    fr2, @-%0\n\t"
51                      "fmov.s    fr1, @-%0\n\t"
52                      "fmov.s    fr0, @-%0\n\t"
53                      "lds       %3, fpscr\n\t"
54                      : "=r" (dummy)
55                      : "0" ((char *)(&tsk->thread.fpu.hard.status)),
56                        "r" (FPSCR_RCHG),
57                        "r" (FPSCR_INIT)
58                      : "memory");
59
60         disable_fpu();
61 }
62
63 static void
64 restore_fpu(struct task_struct *tsk)
65 {
66         unsigned long dummy;
67
68         enable_fpu();
69         asm volatile("fmov.s    @%0+, fr0\n\t"
70                      "fmov.s    @%0+, fr1\n\t"
71                      "fmov.s    @%0+, fr2\n\t"
72                      "fmov.s    @%0+, fr3\n\t"
73                      "fmov.s    @%0+, fr4\n\t"
74                      "fmov.s    @%0+, fr5\n\t"
75                      "fmov.s    @%0+, fr6\n\t"
76                      "fmov.s    @%0+, fr7\n\t"
77                      "fmov.s    @%0+, fr8\n\t"
78                      "fmov.s    @%0+, fr9\n\t"
79                      "fmov.s    @%0+, fr10\n\t"
80                      "fmov.s    @%0+, fr11\n\t"
81                      "fmov.s    @%0+, fr12\n\t"
82                      "fmov.s    @%0+, fr13\n\t"
83                      "fmov.s    @%0+, fr14\n\t"
84                      "fmov.s    @%0+, fr15\n\t"
85                      "lds.l     @%0+, fpscr\n\t"
86                      "lds.l     @%0+, fpul\n\t"
87                      : "=r" (dummy)
88                      : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
89                      : "memory");
90         disable_fpu();
91 }
92
93 /*
94  * Load the FPU with signalling NANS.  This bit pattern we're using
95  * has the property that no matter wether considered as single or as
96  * double precission represents signaling NANS.
97  */
98
99 static void
100 fpu_init(void)
101 {
102         enable_fpu();
103         asm volatile("lds       %0, fpul\n\t"
104                      "fsts      fpul, fr0\n\t"
105                      "fsts      fpul, fr1\n\t"
106                      "fsts      fpul, fr2\n\t"
107                      "fsts      fpul, fr3\n\t"
108                      "fsts      fpul, fr4\n\t"
109                      "fsts      fpul, fr5\n\t"
110                      "fsts      fpul, fr6\n\t"
111                      "fsts      fpul, fr7\n\t"
112                      "fsts      fpul, fr8\n\t"
113                      "fsts      fpul, fr9\n\t"
114                      "fsts      fpul, fr10\n\t"
115                      "fsts      fpul, fr11\n\t"
116                      "fsts      fpul, fr12\n\t"
117                      "fsts      fpul, fr13\n\t"
118                      "fsts      fpul, fr14\n\t"
119                      "fsts      fpul, fr15\n\t"
120                      "lds       %2, fpscr\n\t"
121                      : /* no output */
122                      : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
123         disable_fpu();
124 }
125
126 /*
127  *      Emulate arithmetic ops on denormalized number for some FPU insns.
128  */
129
130 /* denormalized float * float */
131 static int denormal_mulf(int hx, int hy)
132 {
133         unsigned int ix, iy;
134         unsigned long long m, n;
135         int exp, w;
136
137         ix = hx & 0x7fffffff;
138         iy = hy & 0x7fffffff;
139         if (iy < 0x00800000 || ix == 0)
140                 return ((hx ^ hy) & 0x80000000);
141
142         exp = (iy & 0x7f800000) >> 23;
143         ix &= 0x007fffff;
144         iy = (iy & 0x007fffff) | 0x00800000;
145         m = (unsigned long long)ix * iy;
146         n = m;
147         w = -1;
148         while (n) { n >>= 1; w++; }
149
150         /* FIXME: use guard bits */
151         exp += w - 126 - 46;
152         if (exp > 0)
153                 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
154         else if (exp + 22 >= 0)
155                 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
156         else
157                 ix = 0;
158
159         ix |= (hx ^ hy) & 0x80000000;
160         return ix;
161 }
162
163 /* denormalized double * double */
164 static void mult64(unsigned long long x, unsigned long long y,
165                 unsigned long long *highp, unsigned long long *lowp)
166 {
167         unsigned long long sub0, sub1, sub2, sub3;
168         unsigned long long high, low;
169
170         sub0 = (x >> 32) * (unsigned long) (y >> 32);
171         sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
172         sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
173         sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
174         low = sub3;
175         high = 0LL;
176         sub3 += (sub1 << 32);
177         if (low > sub3)
178                 high++;
179         low = sub3;
180         sub3 += (sub2 << 32);
181         if (low > sub3)
182                 high++;
183         low = sub3;
184         high += (sub1 >> 32) + (sub2 >> 32);
185         high += sub0;
186         *lowp = low;
187         *highp = high;
188 }
189
190 static inline long long rshift64(unsigned long long mh,
191                 unsigned long long ml, int n)
192 {
193         if (n >= 64)
194                 return mh >> (n - 64);
195         return (mh << (64 - n)) | (ml >> n);
196 }
197
198 static long long denormal_muld(long long hx, long long hy)
199 {
200         unsigned long long ix, iy;
201         unsigned long long mh, ml, nh, nl;
202         int exp, w;
203
204         ix = hx & 0x7fffffffffffffffLL;
205         iy = hy & 0x7fffffffffffffffLL;
206         if (iy < 0x0010000000000000LL || ix == 0)
207                 return ((hx ^ hy) & 0x8000000000000000LL);
208
209         exp = (iy & 0x7ff0000000000000LL) >> 52;
210         ix &= 0x000fffffffffffffLL;
211         iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
212         mult64(ix, iy, &mh, &ml);
213         nh = mh;
214         nl = ml;
215         w = -1;
216         if (nh) {
217                 while (nh) { nh >>= 1; w++;}
218                 w += 64;
219         } else
220                 while (nl) { nl >>= 1; w++;}
221
222         /* FIXME: use guard bits */
223         exp += w - 1022 - 52 * 2;
224         if (exp > 0)
225                 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
226                         | ((long long)exp << 52);
227         else if (exp + 51 >= 0)
228                 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
229         else
230                 ix = 0;
231
232         ix |= (hx ^ hy) & 0x8000000000000000LL;
233         return ix;
234 }
235
236 /* ix - iy where iy: denormal and ix, iy >= 0 */
237 static int denormal_subf1(unsigned int ix, unsigned int iy)
238 {
239         int frac;
240         int exp;
241
242         if (ix < 0x00800000)
243                 return ix - iy;
244
245         exp = (ix & 0x7f800000) >> 23;
246         if (exp - 1 > 31)
247                 return ix;
248         iy >>= exp - 1;
249         if (iy == 0)
250                 return ix;
251
252         frac = (ix & 0x007fffff) | 0x00800000;
253         frac -= iy;
254         while (frac < 0x00800000) {
255                 if (--exp == 0)
256                         return frac;
257                 frac <<= 1;
258         }
259
260         return (exp << 23) | (frac & 0x007fffff);
261 }
262
263 /* ix + iy where iy: denormal and ix, iy >= 0 */
264 static int denormal_addf1(unsigned int ix, unsigned int iy)
265 {
266         int frac;
267         int exp;
268
269         if (ix < 0x00800000)
270                 return ix + iy;
271
272         exp = (ix & 0x7f800000) >> 23;
273         if (exp - 1 > 31)
274                 return ix;
275         iy >>= exp - 1;
276         if (iy == 0)
277           return ix;
278
279         frac = (ix & 0x007fffff) | 0x00800000;
280         frac += iy;
281         if (frac >= 0x01000000) {
282                 frac >>= 1;
283                 ++exp;
284         }
285
286         return (exp << 23) | (frac & 0x007fffff);
287 }
288
289 static int denormal_addf(int hx, int hy)
290 {
291         unsigned int ix, iy;
292         int sign;
293
294         if ((hx ^ hy) & 0x80000000) {
295                 sign = hx & 0x80000000;
296                 ix = hx & 0x7fffffff;
297                 iy = hy & 0x7fffffff;
298                 if (iy < 0x00800000) {
299                         ix = denormal_subf1(ix, iy);
300                         if ((int) ix < 0) {
301                                 ix = -ix;
302                                 sign ^= 0x80000000;
303                         }
304                 } else {
305                         ix = denormal_subf1(iy, ix);
306                         sign ^= 0x80000000;
307                 }
308         } else {
309                 sign = hx & 0x80000000;
310                 ix = hx & 0x7fffffff;
311                 iy = hy & 0x7fffffff;
312                 if (iy < 0x00800000)
313                         ix = denormal_addf1(ix, iy);
314                 else
315                         ix = denormal_addf1(iy, ix);
316         }
317
318         return sign | ix;
319 }
320
321 /* ix - iy where iy: denormal and ix, iy >= 0 */
322 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
323 {
324         long long frac;
325         int exp;
326
327         if (ix < 0x0010000000000000LL)
328                 return ix - iy;
329
330         exp = (ix & 0x7ff0000000000000LL) >> 52;
331         if (exp - 1 > 63)
332                 return ix;
333         iy >>= exp - 1;
334         if (iy == 0)
335                 return ix;
336
337         frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
338         frac -= iy;
339         while (frac < 0x0010000000000000LL) {
340                 if (--exp == 0)
341                         return frac;
342                 frac <<= 1;
343         }
344
345         return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
346 }
347
348 /* ix + iy where iy: denormal and ix, iy >= 0 */
349 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
350 {
351         long long frac;
352         long long exp;
353
354         if (ix < 0x0010000000000000LL)
355                 return ix + iy;
356
357         exp = (ix & 0x7ff0000000000000LL) >> 52;
358         if (exp - 1 > 63)
359                 return ix;
360         iy >>= exp - 1;
361         if (iy == 0)
362           return ix;
363
364         frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
365         frac += iy;
366         if (frac >= 0x0020000000000000LL) {
367                 frac >>= 1;
368                 ++exp;
369         }
370
371         return (exp << 52) | (frac & 0x000fffffffffffffLL);
372 }
373
374 static long long denormal_addd(long long hx, long long hy)
375 {
376         unsigned long long ix, iy;
377         long long sign;
378
379         if ((hx ^ hy) & 0x8000000000000000LL) {
380                 sign = hx & 0x8000000000000000LL;
381                 ix = hx & 0x7fffffffffffffffLL;
382                 iy = hy & 0x7fffffffffffffffLL;
383                 if (iy < 0x0010000000000000LL) {
384                         ix = denormal_subd1(ix, iy);
385                         if ((int) ix < 0) {
386                                 ix = -ix;
387                                 sign ^= 0x8000000000000000LL;
388                         }
389                 } else {
390                         ix = denormal_subd1(iy, ix);
391                         sign ^= 0x8000000000000000LL;
392                 }
393         } else {
394                 sign = hx & 0x8000000000000000LL;
395                 ix = hx & 0x7fffffffffffffffLL;
396                 iy = hy & 0x7fffffffffffffffLL;
397                 if (iy < 0x0010000000000000LL)
398                         ix = denormal_addd1(ix, iy);
399                 else
400                         ix = denormal_addd1(iy, ix);
401         }
402
403         return sign | ix;
404 }
405
406 /**
407  *      denormal_to_double - Given denormalized float number,
408  *                           store double float
409  *
410  *      @fpu: Pointer to sh_fpu_hard structure
411  *      @n: Index to FP register
412  */
413 static void
414 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
415 {
416         unsigned long du, dl;
417         unsigned long x = fpu->fpul;
418         int exp = 1023 - 126;
419
420         if (x != 0 && (x & 0x7f800000) == 0) {
421                 du = (x & 0x80000000);
422                 while ((x & 0x00800000) == 0) {
423                         x <<= 1;
424                         exp--;
425                 }
426                 x &= 0x007fffff;
427                 du |= (exp << 20) | (x >> 3);
428                 dl = x << 29;
429
430                 fpu->fp_regs[n] = du;
431                 fpu->fp_regs[n+1] = dl;
432         }
433 }
434
435 /**
436  *      ieee_fpe_handler - Handle denormalized number exception
437  *
438  *      @regs: Pointer to register structure
439  *
440  *      Returns 1 when it's handled (should not cause exception).
441  */
442 static int
443 ieee_fpe_handler (struct pt_regs *regs)
444 {
445         unsigned short insn = *(unsigned short *) regs->pc;
446         unsigned short finsn;
447         unsigned long nextpc;
448         int nib[4] = {
449                 (insn >> 12) & 0xf,
450                 (insn >> 8) & 0xf,
451                 (insn >> 4) & 0xf,
452                 insn & 0xf};
453
454         if (nib[0] == 0xb ||
455             (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
456                 regs->pr = regs->pc + 4;
457         if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
458                 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
459                 finsn = *(unsigned short *) (regs->pc + 2);
460         } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
461                 if (regs->sr & 1)
462                         nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
463                 else
464                         nextpc = regs->pc + 4;
465                 finsn = *(unsigned short *) (regs->pc + 2);
466         } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
467                 if (regs->sr & 1)
468                         nextpc = regs->pc + 4;
469                 else
470                         nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
471                 finsn = *(unsigned short *) (regs->pc + 2);
472         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
473                  (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
474                 nextpc = regs->regs[nib[1]];
475                 finsn = *(unsigned short *) (regs->pc + 2);
476         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
477                  (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
478                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
479                 finsn = *(unsigned short *) (regs->pc + 2);
480         } else if (insn == 0x000b) { /* rts */
481                 nextpc = regs->pr;
482                 finsn = *(unsigned short *) (regs->pc + 2);
483         } else {
484                 nextpc = regs->pc + 2;
485                 finsn = insn;
486         }
487
488 #define FPSCR_FPU_ERROR (1 << 17)
489
490         if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
491                 struct task_struct *tsk = current;
492
493                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
494                         /* FPU error */
495                         denormal_to_double (&tsk->thread.fpu.hard,
496                                             (finsn >> 8) & 0xf);
497                 } else
498                         return 0;
499
500                 regs->pc = nextpc;
501                 return 1;
502         } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
503                 struct task_struct *tsk = current;
504                 int fpscr;
505                 int n, m, prec;
506                 unsigned int hx, hy;
507
508                 n = (finsn >> 8) & 0xf;
509                 m = (finsn >> 4) & 0xf;
510                 hx = tsk->thread.fpu.hard.fp_regs[n];
511                 hy = tsk->thread.fpu.hard.fp_regs[m];
512                 fpscr = tsk->thread.fpu.hard.fpscr;
513                 prec = fpscr & (1 << 19);
514
515                 if ((fpscr & FPSCR_FPU_ERROR)
516                      && (prec && ((hx & 0x7fffffff) < 0x00100000
517                                    || (hy & 0x7fffffff) < 0x00100000))) {
518                         long long llx, lly;
519
520                         /* FPU error because of denormal */
521                         llx = ((long long) hx << 32)
522                                | tsk->thread.fpu.hard.fp_regs[n+1];
523                         lly = ((long long) hy << 32)
524                                | tsk->thread.fpu.hard.fp_regs[m+1];
525                         if ((hx & 0x7fffffff) >= 0x00100000)
526                                 llx = denormal_muld(lly, llx);
527                         else
528                                 llx = denormal_muld(llx, lly);
529                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
530                         tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
531                 } else if ((fpscr & FPSCR_FPU_ERROR)
532                      && (!prec && ((hx & 0x7fffffff) < 0x00800000
533                                    || (hy & 0x7fffffff) < 0x00800000))) {
534                         /* FPU error because of denormal */
535                         if ((hx & 0x7fffffff) >= 0x00800000)
536                                 hx = denormal_mulf(hy, hx);
537                         else
538                                 hx = denormal_mulf(hx, hy);
539                         tsk->thread.fpu.hard.fp_regs[n] = hx;
540                 } else
541                         return 0;
542
543                 regs->pc = nextpc;
544                 return 1;
545         } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
546                 struct task_struct *tsk = current;
547                 int fpscr;
548                 int n, m, prec;
549                 unsigned int hx, hy;
550
551                 n = (finsn >> 8) & 0xf;
552                 m = (finsn >> 4) & 0xf;
553                 hx = tsk->thread.fpu.hard.fp_regs[n];
554                 hy = tsk->thread.fpu.hard.fp_regs[m];
555                 fpscr = tsk->thread.fpu.hard.fpscr;
556                 prec = fpscr & (1 << 19);
557
558                 if ((fpscr & FPSCR_FPU_ERROR)
559                      && (prec && ((hx & 0x7fffffff) < 0x00100000
560                                    || (hy & 0x7fffffff) < 0x00100000))) {
561                         long long llx, lly;
562
563                         /* FPU error because of denormal */
564                         llx = ((long long) hx << 32)
565                                | tsk->thread.fpu.hard.fp_regs[n+1];
566                         lly = ((long long) hy << 32)
567                                | tsk->thread.fpu.hard.fp_regs[m+1];
568                         if ((finsn & 0xf00f) == 0xf000)
569                                 llx = denormal_addd(llx, lly);
570                         else
571                                 llx = denormal_addd(llx, lly ^ (1LL << 63));
572                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
573                         tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
574                 } else if ((fpscr & FPSCR_FPU_ERROR)
575                      && (!prec && ((hx & 0x7fffffff) < 0x00800000
576                                    || (hy & 0x7fffffff) < 0x00800000))) {
577                         /* FPU error because of denormal */
578                         if ((finsn & 0xf00f) == 0xf000)
579                                 hx = denormal_addf(hx, hy);
580                         else
581                                 hx = denormal_addf(hx, hy ^ 0x80000000);
582                         tsk->thread.fpu.hard.fp_regs[n] = hx;
583                 } else
584                         return 0;
585
586                 regs->pc = nextpc;
587                 return 1;
588         }
589
590         return 0;
591 }
592
593 BUILD_TRAP_HANDLER(fpu_error)
594 {
595         struct task_struct *tsk = current;
596         TRAP_HANDLER_DECL;
597
598         __unlazy_fpu(tsk, regs);
599         if (ieee_fpe_handler(regs)) {
600                 tsk->thread.fpu.hard.fpscr &=
601                         ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
602                 grab_fpu(regs);
603                 restore_fpu(tsk);
604                 task_thread_info(tsk)->status |= TS_USEDFPU;
605                 return;
606         }
607
608         force_sig(SIGFPE, tsk);
609 }
610
611 void fpu_state_restore(struct pt_regs *regs)
612 {
613         struct task_struct *tsk = current;
614
615         grab_fpu(regs);
616         if (unlikely(!user_mode(regs))) {
617                 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
618                 BUG();
619                 return;
620         }
621
622         if (likely(used_math())) {
623                 /* Using the FPU again.  */
624                 restore_fpu(tsk);
625         } else  {
626                 /* First time FPU user.  */
627                 fpu_init();
628                 set_used_math();
629         }
630         task_thread_info(tsk)->status |= TS_USEDFPU;
631         tsk->fpu_counter++;
632 }
633
634 BUILD_TRAP_HANDLER(fpu_state_restore)
635 {
636         TRAP_HANDLER_DECL;
637
638         fpu_state_restore(regs);
639 }