arch/x86/math-emu/div_Xsig.S

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2         .file   "div_Xsig.S"
   3 /*---------------------------------------------------------------------------+
   4  |  div_Xsig.S                                                               |
   5  |                                                                           |
   6  | Division subroutine for 96 bit quantities                                 |
   7  |                                                                           |
   8  | Copyright (C) 1994,1995                                                   |
   9  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  10  |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
  11  |                                                                           |
  12  |                                                                           |
  13  +---------------------------------------------------------------------------*/
  14
  15 /*---------------------------------------------------------------------------+
  16  | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
  17  | put the 96 bit result at the location d.                                  |
  18  |                                                                           |
  19  | The result may not be accurate to 96 bits. It is intended for use where   |
  20  | a result better than 64 bits is required. The result should usually be    |
  21  | good to at least 94 bits.                                                 |
  22  | The returned result is actually divided by one half. This is done to      |
  23  | prevent overflow.                                                         |
  24  |                                                                           |
  25  |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
  26  |                                                                           |
  27  |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
  28  |                                                                           |
  29  +---------------------------------------------------------------------------*/
  30
  31 #include "exception.h"
  32 #include "fpu_emu.h"
  33
  34
  35 #define XsigLL(x)       (x)
  36 #define XsigL(x)        4(x)
  37 #define XsigH(x)        8(x)
  38
  39
  40 #ifndef NON_REENTRANT_FPU
  41 /*
  42         Local storage on the stack:
  43         Accumulator:    FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  44  */
  45 #define FPU_accum_3     -4(%ebp)
  46 #define FPU_accum_2     -8(%ebp)
  47 #define FPU_accum_1     -12(%ebp)
  48 #define FPU_accum_0     -16(%ebp)
  49 #define FPU_result_3    -20(%ebp)
  50 #define FPU_result_2    -24(%ebp)
  51 #define FPU_result_1    -28(%ebp)
  52
  53 #else
  54 .data
  55 /*
  56         Local storage in a static area:
  57         Accumulator:    FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  58  */
  59         .align 4,0
  60 FPU_accum_3:
  61         .long   0
  62 FPU_accum_2:
  63         .long   0
  64 FPU_accum_1:
  65         .long   0
  66 FPU_accum_0:
  67         .long   0
  68 FPU_result_3:
  69         .long   0
  70 FPU_result_2:
  71         .long   0
  72 FPU_result_1:
  73         .long   0
  74 #endif /* NON_REENTRANT_FPU */
  75
  76
  77 .text
  78 ENTRY(div_Xsig)
  79         pushl   %ebp
  80         movl    %esp,%ebp
  81 #ifndef NON_REENTRANT_FPU
  82         subl    $28,%esp
  83 #endif /* NON_REENTRANT_FPU */
  84
  85         pushl   %esi
  86         pushl   %edi
  87         pushl   %ebx
  88
  89         movl    PARAM1,%esi     /* pointer to num */
  90         movl    PARAM2,%ebx     /* pointer to denom */
  91
  92 #ifdef PARANOID
  93         testl   $0x80000000, XsigH(%ebx)        /* Divisor */
  94         je      L_bugged
  95 #endif /* PARANOID */
  96
  97
  98 /*---------------------------------------------------------------------------+
  99  |  Divide:   Return  arg1/arg2 to arg3.                                     |
 100  |                                                                           |
 101  |  The maximum returned value is (ignoring exponents)                       |
 102  |               .ffffffff ffffffff                                          |
 103  |               ------------------  =  1.ffffffff fffffffe                  |
 104  |               .80000000 00000000                                          |
 105  | and the minimum is                                                        |
 106  |               .80000000 00000000                                          |
 107  |               ------------------  =  .80000000 00000001   (rounded)       |
 108  |               .ffffffff ffffffff                                          |
 109  |                                                                           |
 110  +---------------------------------------------------------------------------*/
 111
 112         /* Save extended dividend in local register */
 113
 114         /* Divide by 2 to prevent overflow */
 115         clc
 116         movl    XsigH(%esi),%eax
 117         rcrl    %eax
 118         movl    %eax,FPU_accum_3
 119         movl    XsigL(%esi),%eax
 120         rcrl    %eax
 121         movl    %eax,FPU_accum_2
 122         movl    XsigLL(%esi),%eax
 123         rcrl    %eax
 124         movl    %eax,FPU_accum_1
 125         movl    $0,%eax
 126         rcrl    %eax
 127         movl    %eax,FPU_accum_0
 128
 129         movl    FPU_accum_2,%eax        /* Get the current num */
 130         movl    FPU_accum_3,%edx
 131
 132 /*----------------------------------------------------------------------*/
 133 /* Initialization done.
 134    Do the first 32 bits. */
 135
 136         /* We will divide by a number which is too large */
 137         movl    XsigH(%ebx),%ecx
 138         addl    $1,%ecx
 139         jnc     LFirst_div_not_1
 140
 141         /* here we need to divide by 100000000h,
 142            i.e., no division at all.. */
 143         mov     %edx,%eax
 144         jmp     LFirst_div_done
 145
 146 LFirst_div_not_1:
 147         divl    %ecx            /* Divide the numerator by the augmented
 148                                    denom ms dw */
 149
 150 LFirst_div_done:
 151         movl    %eax,FPU_result_3       /* Put the result in the answer */
 152
 153         mull    XsigH(%ebx)     /* mul by the ms dw of the denom */
 154
 155         subl    %eax,FPU_accum_2        /* Subtract from the num local reg */
 156         sbbl    %edx,FPU_accum_3
 157
 158         movl    FPU_result_3,%eax       /* Get the result back */
 159         mull    XsigL(%ebx)     /* now mul the ls dw of the denom */
 160
 161         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 162         sbbl    %edx,FPU_accum_2
 163         sbbl    $0,FPU_accum_3
 164         je      LDo_2nd_32_bits         /* Must check for non-zero result here */
 165
 166 #ifdef PARANOID
 167         jb      L_bugged_1
 168 #endif /* PARANOID */
 169
 170         /* need to subtract another once of the denom */
 171         incl    FPU_result_3    /* Correct the answer */
 172
 173         movl    XsigL(%ebx),%eax
 174         movl    XsigH(%ebx),%edx
 175         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 176         sbbl    %edx,FPU_accum_2
 177
 178 #ifdef PARANOID
 179         sbbl    $0,FPU_accum_3
 180         jne     L_bugged_1      /* Must check for non-zero result here */
 181 #endif /* PARANOID */
 182
 183 /*----------------------------------------------------------------------*/
 184 /* Half of the main problem is done, there is just a reduced numerator
 185    to handle now.
 186    Work with the second 32 bits, FPU_accum_0 not used from now on */
 187 LDo_2nd_32_bits:
 188         movl    FPU_accum_2,%edx        /* get the reduced num */
 189         movl    FPU_accum_1,%eax
 190
 191         /* need to check for possible subsequent overflow */
 192         cmpl    XsigH(%ebx),%edx
 193         jb      LDo_2nd_div
 194         ja      LPrevent_2nd_overflow
 195
 196         cmpl    XsigL(%ebx),%eax
 197         jb      LDo_2nd_div
 198
 199 LPrevent_2nd_overflow:
 200 /* The numerator is greater or equal, would cause overflow */
 201         /* prevent overflow */
 202         subl    XsigL(%ebx),%eax
 203         sbbl    XsigH(%ebx),%edx
 204         movl    %edx,FPU_accum_2
 205         movl    %eax,FPU_accum_1
 206
 207         incl    FPU_result_3    /* Reflect the subtraction in the answer */
 208
 209 #ifdef PARANOID
 210         je      L_bugged_2      /* Can't bump the result to 1.0 */
 211 #endif /* PARANOID */
 212
 213 LDo_2nd_div:
 214         cmpl    $0,%ecx         /* augmented denom msw */
 215         jnz     LSecond_div_not_1
 216
 217         /* %ecx == 0, we are dividing by 1.0 */
 218         mov     %edx,%eax
 219         jmp     LSecond_div_done
 220
 221 LSecond_div_not_1:
 222         divl    %ecx            /* Divide the numerator by the denom ms dw */
 223
 224 LSecond_div_done:
 225         movl    %eax,FPU_result_2       /* Put the result in the answer */
 226
 227         mull    XsigH(%ebx)     /* mul by the ms dw of the denom */
 228
 229         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 230         sbbl    %edx,FPU_accum_2
 231
 232 #ifdef PARANOID
 233         jc      L_bugged_2
 234 #endif /* PARANOID */
 235
 236         movl    FPU_result_2,%eax       /* Get the result back */
 237         mull    XsigL(%ebx)     /* now mul the ls dw of the denom */
 238
 239         subl    %eax,FPU_accum_0        /* Subtract from the num local reg */
 240         sbbl    %edx,FPU_accum_1        /* Subtract from the num local reg */
 241         sbbl    $0,FPU_accum_2
 242
 243 #ifdef PARANOID
 244         jc      L_bugged_2
 245 #endif /* PARANOID */
 246
 247         jz      LDo_3rd_32_bits
 248
 249 #ifdef PARANOID
 250         cmpl    $1,FPU_accum_2
 251         jne     L_bugged_2
 252 #endif /* PARANOID */
 253
 254         /* need to subtract another once of the denom */
 255         movl    XsigL(%ebx),%eax
 256         movl    XsigH(%ebx),%edx
 257         subl    %eax,FPU_accum_0        /* Subtract from the num local reg */
 258         sbbl    %edx,FPU_accum_1
 259         sbbl    $0,FPU_accum_2
 260
 261 #ifdef PARANOID
 262         jc      L_bugged_2
 263         jne     L_bugged_2
 264 #endif /* PARANOID */
 265
 266         addl    $1,FPU_result_2 /* Correct the answer */
 267         adcl    $0,FPU_result_3
 268
 269 #ifdef PARANOID
 270         jc      L_bugged_2      /* Must check for non-zero result here */
 271 #endif /* PARANOID */
 272
 273 /*----------------------------------------------------------------------*/
 274 /* The division is essentially finished here, we just need to perform
 275    tidying operations.
 276    Deal with the 3rd 32 bits */
 277 LDo_3rd_32_bits:
 278         /* We use an approximation for the third 32 bits.
 279         To take account of the 3rd 32 bits of the divisor
 280         (call them del), we subtract  del * (a/b) */
 281
 282         movl    FPU_result_3,%eax       /* a/b */
 283         mull    XsigLL(%ebx)            /* del */
 284
 285         subl    %edx,FPU_accum_1
 286
 287         /* A borrow indicates that the result is negative */
 288         jnb     LTest_over
 289
 290         movl    XsigH(%ebx),%edx
 291         addl    %edx,FPU_accum_1
 292
 293         subl    $1,FPU_result_2         /* Adjust the answer */
 294         sbbl    $0,FPU_result_3
 295
 296         /* The above addition might not have been enough, check again. */
 297         movl    FPU_accum_1,%edx        /* get the reduced num */
 298         cmpl    XsigH(%ebx),%edx        /* denom */
 299         jb      LDo_3rd_div
 300
 301         movl    XsigH(%ebx),%edx
 302         addl    %edx,FPU_accum_1
 303
 304         subl    $1,FPU_result_2         /* Adjust the answer */
 305         sbbl    $0,FPU_result_3
 306         jmp     LDo_3rd_div
 307
 308 LTest_over:
 309         movl    FPU_accum_1,%edx        /* get the reduced num */
 310
 311         /* need to check for possible subsequent overflow */
 312         cmpl    XsigH(%ebx),%edx        /* denom */
 313         jb      LDo_3rd_div
 314
 315         /* prevent overflow */
 316         subl    XsigH(%ebx),%edx
 317         movl    %edx,FPU_accum_1
 318
 319         addl    $1,FPU_result_2 /* Reflect the subtraction in the answer */
 320         adcl    $0,FPU_result_3
 321
 322 LDo_3rd_div:
 323         movl    FPU_accum_0,%eax
 324         movl    FPU_accum_1,%edx
 325         divl    XsigH(%ebx)
 326
 327         movl    %eax,FPU_result_1       /* Rough estimate of third word */
 328
 329         movl    PARAM3,%esi             /* pointer to answer */
 330
 331         movl    FPU_result_1,%eax
 332         movl    %eax,XsigLL(%esi)
 333         movl    FPU_result_2,%eax
 334         movl    %eax,XsigL(%esi)
 335         movl    FPU_result_3,%eax
 336         movl    %eax,XsigH(%esi)
 337
 338 L_exit:
 339         popl    %ebx
 340         popl    %edi
 341         popl    %esi
 342
 343         leave
 344         ret
 345
 346
 347 #ifdef PARANOID
 348 /* The logic is wrong if we got here */
 349 L_bugged:
 350         pushl   EX_INTERNAL|0x240
 351         call    EXCEPTION
 352         pop     %ebx
 353         jmp     L_exit
 354
 355 L_bugged_1:
 356         pushl   EX_INTERNAL|0x241
 357         call    EXCEPTION
 358         pop     %ebx
 359         jmp     L_exit
 360
 361 L_bugged_2:
 362         pushl   EX_INTERNAL|0x242
 363         call    EXCEPTION
 364         pop     %ebx
 365         jmp     L_exit
 366 #endif /* PARANOID */
 367 ENDPROC(div_Xsig)