arch/sh/kernel/cpu/sh4/fpu.c

   1 /*
   2  * Save/restore floating point context for signal handlers.
   3  *
   4  * This file is subject to the terms and conditions of the GNU General Public
   5  * License.  See the file "COPYING" in the main directory of this archive
   6  * for more details.
   7  *
   8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
   9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
  10  *
  11  * FIXME! These routines have not been tested for big endian case.
  12  */
  13 #include <linux/sched.h>
  14 #include <linux/signal.h>
  15 #include <linux/io.h>
  16 #include <cpu/fpu.h>
  17 #include <asm/processor.h>
  18 #include <asm/system.h>
  19 #include <asm/fpu.h>
  20
  21 /* The PR (precision) bit in the FP Status Register must be clear when
  22  * an frchg instruction is executed, otherwise the instruction is undefined.
  23  * Executing frchg with PR set causes a trap on some SH4 implementations.
  24  */
  25
  26 #define FPSCR_RCHG 0x00000000
  27 extern unsigned long long float64_div(unsigned long long a,
  28                                       unsigned long long b);
  29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
  30 extern unsigned long long float64_mul(unsigned long long a,
  31                                       unsigned long long b);
  32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
  33 extern unsigned long long float64_add(unsigned long long a,
  34                                       unsigned long long b);
  35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
  36 extern unsigned long long float64_sub(unsigned long long a,
  37                                       unsigned long long b);
  38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
  39 extern unsigned long int float64_to_float32(unsigned long long a);
  40 static unsigned int fpu_exception_flags;
  41
  42 /*
  43  * Save FPU registers onto task structure.
  44  * Assume called with FPU enabled (SR.FD=0).
  45  */
  46 void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
  47 {
  48         unsigned long dummy;
  49
  50         clear_tsk_thread_flag(tsk, TIF_USEDFPU);
  51         enable_fpu();
  52         asm volatile ("sts.l    fpul, @-%0\n\t"
  53                       "sts.l    fpscr, @-%0\n\t"
  54                       "lds      %2, fpscr\n\t"
  55                       "frchg\n\t"
  56                       "fmov.s   fr15, @-%0\n\t"
  57                       "fmov.s   fr14, @-%0\n\t"
  58                       "fmov.s   fr13, @-%0\n\t"
  59                       "fmov.s   fr12, @-%0\n\t"
  60                       "fmov.s   fr11, @-%0\n\t"
  61                       "fmov.s   fr10, @-%0\n\t"
  62                       "fmov.s   fr9, @-%0\n\t"
  63                       "fmov.s   fr8, @-%0\n\t"
  64                       "fmov.s   fr7, @-%0\n\t"
  65                       "fmov.s   fr6, @-%0\n\t"
  66                       "fmov.s   fr5, @-%0\n\t"
  67                       "fmov.s   fr4, @-%0\n\t"
  68                       "fmov.s   fr3, @-%0\n\t"
  69                       "fmov.s   fr2, @-%0\n\t"
  70                       "fmov.s   fr1, @-%0\n\t"
  71                       "fmov.s   fr0, @-%0\n\t"
  72                       "frchg\n\t"
  73                       "fmov.s   fr15, @-%0\n\t"
  74                       "fmov.s   fr14, @-%0\n\t"
  75                       "fmov.s   fr13, @-%0\n\t"
  76                       "fmov.s   fr12, @-%0\n\t"
  77                       "fmov.s   fr11, @-%0\n\t"
  78                       "fmov.s   fr10, @-%0\n\t"
  79                       "fmov.s   fr9, @-%0\n\t"
  80                       "fmov.s   fr8, @-%0\n\t"
  81                       "fmov.s   fr7, @-%0\n\t"
  82                       "fmov.s   fr6, @-%0\n\t"
  83                       "fmov.s   fr5, @-%0\n\t"
  84                       "fmov.s   fr4, @-%0\n\t"
  85                       "fmov.s   fr3, @-%0\n\t"
  86                       "fmov.s   fr2, @-%0\n\t"
  87                       "fmov.s   fr1, @-%0\n\t"
  88                       "fmov.s   fr0, @-%0\n\t"
  89                       "lds      %3, fpscr\n\t":"=r" (dummy)
  90                       :"0"((char *)(&tsk->thread.fpu.hard.status)),
  91                       "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
  92                       :"memory");
  93
  94         disable_fpu();
  95         release_fpu(regs);
  96 }
  97
  98 static void restore_fpu(struct task_struct *tsk)
  99 {
 100         unsigned long dummy;
 101
 102         enable_fpu();
 103         asm volatile ("lds      %2, fpscr\n\t"
 104                       "fmov.s   @%0+, fr0\n\t"
 105                       "fmov.s   @%0+, fr1\n\t"
 106                       "fmov.s   @%0+, fr2\n\t"
 107                       "fmov.s   @%0+, fr3\n\t"
 108                       "fmov.s   @%0+, fr4\n\t"
 109                       "fmov.s   @%0+, fr5\n\t"
 110                       "fmov.s   @%0+, fr6\n\t"
 111                       "fmov.s   @%0+, fr7\n\t"
 112                       "fmov.s   @%0+, fr8\n\t"
 113                       "fmov.s   @%0+, fr9\n\t"
 114                       "fmov.s   @%0+, fr10\n\t"
 115                       "fmov.s   @%0+, fr11\n\t"
 116                       "fmov.s   @%0+, fr12\n\t"
 117                       "fmov.s   @%0+, fr13\n\t"
 118                       "fmov.s   @%0+, fr14\n\t"
 119                       "fmov.s   @%0+, fr15\n\t"
 120                       "frchg\n\t"
 121                       "fmov.s   @%0+, fr0\n\t"
 122                       "fmov.s   @%0+, fr1\n\t"
 123                       "fmov.s   @%0+, fr2\n\t"
 124                       "fmov.s   @%0+, fr3\n\t"
 125                       "fmov.s   @%0+, fr4\n\t"
 126                       "fmov.s   @%0+, fr5\n\t"
 127                       "fmov.s   @%0+, fr6\n\t"
 128                       "fmov.s   @%0+, fr7\n\t"
 129                       "fmov.s   @%0+, fr8\n\t"
 130                       "fmov.s   @%0+, fr9\n\t"
 131                       "fmov.s   @%0+, fr10\n\t"
 132                       "fmov.s   @%0+, fr11\n\t"
 133                       "fmov.s   @%0+, fr12\n\t"
 134                       "fmov.s   @%0+, fr13\n\t"
 135                       "fmov.s   @%0+, fr14\n\t"
 136                       "fmov.s   @%0+, fr15\n\t"
 137                       "frchg\n\t"
 138                       "lds.l    @%0+, fpscr\n\t"
 139                       "lds.l    @%0+, fpul\n\t"
 140                       :"=r" (dummy)
 141                       :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
 142                       :"memory");
 143         disable_fpu();
 144 }
 145
 146 /*
 147  * Load the FPU with signalling NANS.  This bit pattern we're using
 148  * has the property that no matter wether considered as single or as
 149  * double precision represents signaling NANS.
 150  */
 151
 152 static void fpu_init(void)
 153 {
 154         enable_fpu();
 155         asm volatile (  "lds    %0, fpul\n\t"
 156                         "lds    %1, fpscr\n\t"
 157                         "fsts   fpul, fr0\n\t"
 158                         "fsts   fpul, fr1\n\t"
 159                         "fsts   fpul, fr2\n\t"
 160                         "fsts   fpul, fr3\n\t"
 161                         "fsts   fpul, fr4\n\t"
 162                         "fsts   fpul, fr5\n\t"
 163                         "fsts   fpul, fr6\n\t"
 164                         "fsts   fpul, fr7\n\t"
 165                         "fsts   fpul, fr8\n\t"
 166                         "fsts   fpul, fr9\n\t"
 167                         "fsts   fpul, fr10\n\t"
 168                         "fsts   fpul, fr11\n\t"
 169                         "fsts   fpul, fr12\n\t"
 170                         "fsts   fpul, fr13\n\t"
 171                         "fsts   fpul, fr14\n\t"
 172                         "fsts   fpul, fr15\n\t"
 173                         "frchg\n\t"
 174                         "fsts   fpul, fr0\n\t"
 175                         "fsts   fpul, fr1\n\t"
 176                         "fsts   fpul, fr2\n\t"
 177                         "fsts   fpul, fr3\n\t"
 178                         "fsts   fpul, fr4\n\t"
 179                         "fsts   fpul, fr5\n\t"
 180                         "fsts   fpul, fr6\n\t"
 181                         "fsts   fpul, fr7\n\t"
 182                         "fsts   fpul, fr8\n\t"
 183                         "fsts   fpul, fr9\n\t"
 184                         "fsts   fpul, fr10\n\t"
 185                         "fsts   fpul, fr11\n\t"
 186                         "fsts   fpul, fr12\n\t"
 187                         "fsts   fpul, fr13\n\t"
 188                         "fsts   fpul, fr14\n\t"
 189                         "fsts   fpul, fr15\n\t"
 190                         "frchg\n\t"
 191                         "lds    %2, fpscr\n\t"
 192                         :       /* no output */
 193                         :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
 194         disable_fpu();
 195 }
 196
 197 /**
 198  *      denormal_to_double - Given denormalized float number,
 199  *                           store double float
 200  *
 201  *      @fpu: Pointer to sh_fpu_hard structure
 202  *      @n: Index to FP register
 203  */
 204 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
 205 {
 206         unsigned long du, dl;
 207         unsigned long x = fpu->fpul;
 208         int exp = 1023 - 126;
 209
 210         if (x != 0 && (x & 0x7f800000) == 0) {
 211                 du = (x & 0x80000000);
 212                 while ((x & 0x00800000) == 0) {
 213                         x <<= 1;
 214                         exp--;
 215                 }
 216                 x &= 0x007fffff;
 217                 du |= (exp << 20) | (x >> 3);
 218                 dl = x << 29;
 219
 220                 fpu->fp_regs[n] = du;
 221                 fpu->fp_regs[n + 1] = dl;
 222         }
 223 }
 224
 225 /**
 226  *      ieee_fpe_handler - Handle denormalized number exception
 227  *
 228  *      @regs: Pointer to register structure
 229  *
 230  *      Returns 1 when it's handled (should not cause exception).
 231  */
 232 static int ieee_fpe_handler(struct pt_regs *regs)
 233 {
 234         unsigned short insn = *(unsigned short *)regs->pc;
 235         unsigned short finsn;
 236         unsigned long nextpc;
 237         int nib[4] = {
 238                 (insn >> 12) & 0xf,
 239                 (insn >> 8) & 0xf,
 240                 (insn >> 4) & 0xf,
 241                 insn & 0xf
 242         };
 243
 244         if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
 245                 regs->pr = regs->pc + 4;  /* bsr & jsr */
 246
 247         if (nib[0] == 0xa || nib[0] == 0xb) {
 248                 /* bra & bsr */
 249                 nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
 250                 finsn = *(unsigned short *)(regs->pc + 2);
 251         } else if (nib[0] == 0x8 && nib[1] == 0xd) {
 252                 /* bt/s */
 253                 if (regs->sr & 1)
 254                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
 255                 else
 256                         nextpc = regs->pc + 4;
 257                 finsn = *(unsigned short *)(regs->pc + 2);
 258         } else if (nib[0] == 0x8 && nib[1] == 0xf) {
 259                 /* bf/s */
 260                 if (regs->sr & 1)
 261                         nextpc = regs->pc + 4;
 262                 else
 263                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
 264                 finsn = *(unsigned short *)(regs->pc + 2);
 265         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
 266                    (nib[2] == 0x0 || nib[2] == 0x2)) {
 267                 /* jmp & jsr */
 268                 nextpc = regs->regs[nib[1]];
 269                 finsn = *(unsigned short *)(regs->pc + 2);
 270         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
 271                    (nib[2] == 0x0 || nib[2] == 0x2)) {
 272                 /* braf & bsrf */
 273                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
 274                 finsn = *(unsigned short *)(regs->pc + 2);
 275         } else if (insn == 0x000b) {
 276                 /* rts */
 277                 nextpc = regs->pr;
 278                 finsn = *(unsigned short *)(regs->pc + 2);
 279         } else {
 280                 nextpc = regs->pc + instruction_size(insn);
 281                 finsn = insn;
 282         }
 283
 284         if ((finsn & 0xf1ff) == 0xf0ad) {
 285                 /* fcnvsd */
 286                 struct task_struct *tsk = current;
 287
 288                 save_fpu(tsk, regs);
 289                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
 290                         /* FPU error */
 291                         denormal_to_double(&tsk->thread.fpu.hard,
 292                                            (finsn >> 8) & 0xf);
 293                 else
 294                         return 0;
 295
 296                 regs->pc = nextpc;
 297                 return 1;
 298         } else if ((finsn & 0xf00f) == 0xf002) {
 299                 /* fmul */
 300                 struct task_struct *tsk = current;
 301                 int fpscr;
 302                 int n, m, prec;
 303                 unsigned int hx, hy;
 304
 305                 n = (finsn >> 8) & 0xf;
 306                 m = (finsn >> 4) & 0xf;
 307                 hx = tsk->thread.fpu.hard.fp_regs[n];
 308                 hy = tsk->thread.fpu.hard.fp_regs[m];
 309                 fpscr = tsk->thread.fpu.hard.fpscr;
 310                 prec = fpscr & FPSCR_DBL_PRECISION;
 311
 312                 if ((fpscr & FPSCR_CAUSE_ERROR)
 313                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 314                                  || (hy & 0x7fffffff) < 0x00100000))) {
 315                         long long llx, lly;
 316
 317                         /* FPU error because of denormal (doubles) */
 318                         llx = ((long long)hx << 32)
 319                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 320                         lly = ((long long)hy << 32)
 321                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 322                         llx = float64_mul(llx, lly);
 323                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 324                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 325                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 326                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 327                                          || (hy & 0x7fffffff) < 0x00800000))) {
 328                         /* FPU error because of denormal (floats) */
 329                         hx = float32_mul(hx, hy);
 330                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 331                 } else
 332                         return 0;
 333
 334                 regs->pc = nextpc;
 335                 return 1;
 336         } else if ((finsn & 0xf00e) == 0xf000) {
 337                 /* fadd, fsub */
 338                 struct task_struct *tsk = current;
 339                 int fpscr;
 340                 int n, m, prec;
 341                 unsigned int hx, hy;
 342
 343                 n = (finsn >> 8) & 0xf;
 344                 m = (finsn >> 4) & 0xf;
 345                 hx = tsk->thread.fpu.hard.fp_regs[n];
 346                 hy = tsk->thread.fpu.hard.fp_regs[m];
 347                 fpscr = tsk->thread.fpu.hard.fpscr;
 348                 prec = fpscr & FPSCR_DBL_PRECISION;
 349
 350                 if ((fpscr & FPSCR_CAUSE_ERROR)
 351                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 352                                  || (hy & 0x7fffffff) < 0x00100000))) {
 353                         long long llx, lly;
 354
 355                         /* FPU error because of denormal (doubles) */
 356                         llx = ((long long)hx << 32)
 357                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 358                         lly = ((long long)hy << 32)
 359                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 360                         if ((finsn & 0xf00f) == 0xf000)
 361                                 llx = float64_add(llx, lly);
 362                         else
 363                                 llx = float64_sub(llx, lly);
 364                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 365                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 366                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 367                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 368                                          || (hy & 0x7fffffff) < 0x00800000))) {
 369                         /* FPU error because of denormal (floats) */
 370                         if ((finsn & 0xf00f) == 0xf000)
 371                                 hx = float32_add(hx, hy);
 372                         else
 373                                 hx = float32_sub(hx, hy);
 374                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 375                 } else
 376                         return 0;
 377
 378                 regs->pc = nextpc;
 379                 return 1;
 380         } else if ((finsn & 0xf003) == 0xf003) {
 381                 /* fdiv */
 382                 struct task_struct *tsk = current;
 383                 int fpscr;
 384                 int n, m, prec;
 385                 unsigned int hx, hy;
 386
 387                 n = (finsn >> 8) & 0xf;
 388                 m = (finsn >> 4) & 0xf;
 389                 hx = tsk->thread.fpu.hard.fp_regs[n];
 390                 hy = tsk->thread.fpu.hard.fp_regs[m];
 391                 fpscr = tsk->thread.fpu.hard.fpscr;
 392                 prec = fpscr & FPSCR_DBL_PRECISION;
 393
 394                 if ((fpscr & FPSCR_CAUSE_ERROR)
 395                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 396                                  || (hy & 0x7fffffff) < 0x00100000))) {
 397                         long long llx, lly;
 398
 399                         /* FPU error because of denormal (doubles) */
 400                         llx = ((long long)hx << 32)
 401                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 402                         lly = ((long long)hy << 32)
 403                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 404
 405                         llx = float64_div(llx, lly);
 406
 407                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 408                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 409                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 410                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 411                                          || (hy & 0x7fffffff) < 0x00800000))) {
 412                         /* FPU error because of denormal (floats) */
 413                         hx = float32_div(hx, hy);
 414                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 415                 } else
 416                         return 0;
 417
 418                 regs->pc = nextpc;
 419                 return 1;
 420         } else if ((finsn & 0xf0bd) == 0xf0bd) {
 421                 /* fcnvds - double to single precision convert */
 422                 struct task_struct *tsk = current;
 423                 int m;
 424                 unsigned int hx;
 425
 426                 m = (finsn >> 8) & 0x7;
 427                 hx = tsk->thread.fpu.hard.fp_regs[m];
 428
 429                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
 430                         && ((hx & 0x7fffffff) < 0x00100000)) {
 431                         /* subnormal double to float conversion */
 432                         long long llx;
 433
 434                         llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
 435                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 436
 437                         tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
 438                 } else
 439                         return 0;
 440
 441                 regs->pc = nextpc;
 442                 return 1;
 443         }
 444
 445         return 0;
 446 }
 447
 448 void float_raise(unsigned int flags)
 449 {
 450         fpu_exception_flags |= flags;
 451 }
 452
 453 int float_rounding_mode(void)
 454 {
 455         struct task_struct *tsk = current;
 456         int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
 457         return roundingMode;
 458 }
 459
 460 BUILD_TRAP_HANDLER(fpu_error)
 461 {
 462         struct task_struct *tsk = current;
 463         TRAP_HANDLER_DECL;
 464
 465         save_fpu(tsk, regs);
 466         fpu_exception_flags = 0;
 467         if (ieee_fpe_handler(regs)) {
 468                 tsk->thread.fpu.hard.fpscr &=
 469                     ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
 470                 tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
 471                 /* Set the FPSCR flag as well as cause bits - simply
 472                  * replicate the cause */
 473                 tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
 474                 grab_fpu(regs);
 475                 restore_fpu(tsk);
 476                 set_tsk_thread_flag(tsk, TIF_USEDFPU);
 477                 if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
 478                      (fpu_exception_flags >> 2)) == 0) {
 479                         return;
 480                 }
 481         }
 482
 483         force_sig(SIGFPE, tsk);
 484 }
 485
 486 BUILD_TRAP_HANDLER(fpu_state_restore)
 487 {
 488         struct task_struct *tsk = current;
 489         TRAP_HANDLER_DECL;
 490
 491         grab_fpu(regs);
 492         if (!user_mode(regs)) {
 493                 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
 494                 return;
 495         }
 496
 497         if (used_math()) {
 498                 /* Using the FPU again.  */
 499                 restore_fpu(tsk);
 500         } else {
 501                 /* First time FPU user.  */
 502                 fpu_init();
 503                 set_used_math();
 504         }
 505         set_tsk_thread_flag(tsk, TIF_USEDFPU);
 506 }