Update copyright notices with scripts/update-copyrights
[jlayton/glibc.git] / stdlib / longlong.h
1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2    Copyright (C) 1991-2014 Free Software Foundation, Inc.
3
4    This file is part of the GNU C Library.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    In addition to the permissions in the GNU Lesser General Public
12    License, the Free Software Foundation gives you unlimited
13    permission to link the compiled version of this file into
14    combinations with other programs, and to distribute those
15    combinations without any restriction coming from the use of this
16    file.  (The Lesser General Public License restrictions do apply in
17    other respects; for example, they cover modification of the file,
18    and distribution when not linked into a combine executable.)
19
20    The GNU C Library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24
25    You should have received a copy of the GNU Lesser General Public
26    License along with the GNU C Library; if not, see
27    <http://www.gnu.org/licenses/>.  */
28
29 /* You have to define the following before including this file:
30
31    UWtype -- An unsigned type, default type for operations (typically a "word")
32    UHWtype -- An unsigned type, at least half the size of UWtype.
33    UDWtype -- An unsigned type, at least twice as large a UWtype
34    W_TYPE_SIZE -- size in bits of UWtype
35
36    UQItype -- Unsigned 8 bit type.
37    SItype, USItype -- Signed and unsigned 32 bit types.
38    DItype, UDItype -- Signed and unsigned 64 bit types.
39
40    On a 32 bit machine UWtype should typically be USItype;
41    on a 64 bit machine, UWtype should typically be UDItype.  */
42
43 #define __BITS4 (W_TYPE_SIZE / 4)
44 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47
48 #ifndef W_TYPE_SIZE
49 #define W_TYPE_SIZE     32
50 #define UWtype          USItype
51 #define UHWtype         USItype
52 #define UDWtype         UDItype
53 #endif
54
55 /* Used in glibc only.  */
56 #ifndef attribute_hidden
57 #define attribute_hidden
58 #endif
59
60 extern const UQItype __clz_tab[256] attribute_hidden;
61
62 /* Define auxiliary asm macros.
63
64    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
66    word product in HIGH_PROD and LOW_PROD.
67
68    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69    UDWtype product.  This is just a variant of umul_ppmm.
70
71    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
72    denominator) divides a UDWtype, composed by the UWtype integers
73    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
75    than DENOMINATOR for correct operation.  If, in addition, the most
76    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77    UDIV_NEEDS_NORMALIZATION is defined to 1.
78
79    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
80    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
81    is rounded towards 0.
82
83    5) count_leading_zeros(count, x) counts the number of zero-bits from the
84    msb to the first nonzero bit in the UWtype X.  This is the number of
85    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
86    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
87
88    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89    from the least significant end.
90
91    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
92    high_addend_2, low_addend_2) adds two UWtype integers, composed by
93    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
95    (i.e. carry out) is not stored anywhere, and is lost.
96
97    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
101    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
102    and is lost.
103
104    If any of these macros are left undefined for a particular CPU,
105    C macros are used.  */
106
107 /* The CPUs come in alphabetical order below.
108
109    Please add support for more CPUs here, or improve the current support
110    for the CPUs below!
111    (E.g. WE32100, IBM360.)  */
112
113 #if defined (__GNUC__) && !defined (NO_ASM)
114
115 /* We sometimes need to clobber "cc" with gcc2, but that would not be
116    understood by gcc1.  Use cpp to avoid major code duplication.  */
117 #if __GNUC__ < 2
118 #define __CLOBBER_CC
119 #define __AND_CLOBBER_CC
120 #else /* __GNUC__ >= 2 */
121 #define __CLOBBER_CC : "cc"
122 #define __AND_CLOBBER_CC , "cc"
123 #endif /* __GNUC__ < 2 */
124
125 #if defined (__alpha) && W_TYPE_SIZE == 64
126 #define umul_ppmm(ph, pl, m0, m1) \
127   do {                                                                  \
128     UDItype __m0 = (m0), __m1 = (m1);                                   \
129     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
130     (pl) = __m0 * __m1;                                                 \
131   } while (0)
132 #define UMUL_TIME 46
133 #ifndef LONGLONG_STANDALONE
134 #define udiv_qrnnd(q, r, n1, n0, d) \
135   do { UDItype __r;                                                     \
136     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
137     (r) = __r;                                                          \
138   } while (0)
139 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
140 #define UDIV_TIME 220
141 #endif /* LONGLONG_STANDALONE */
142 #ifdef __alpha_cix__
143 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
144 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
145 #define COUNT_LEADING_ZEROS_0 64
146 #else
147 #define count_leading_zeros(COUNT,X) \
148   do {                                                                  \
149     UDItype __xr = (X), __t, __a;                                       \
150     __t = __builtin_alpha_cmpbge (0, __xr);                             \
151     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
152     __t = __builtin_alpha_extbl (__xr, __a);                            \
153     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
154   } while (0)
155 #define count_trailing_zeros(COUNT,X) \
156   do {                                                                  \
157     UDItype __xr = (X), __t, __a;                                       \
158     __t = __builtin_alpha_cmpbge (0, __xr);                             \
159     __t = ~__t & -~__t;                                                 \
160     __a = ((__t & 0xCC) != 0) * 2;                                      \
161     __a += ((__t & 0xF0) != 0) * 4;                                     \
162     __a += ((__t & 0xAA) != 0);                                         \
163     __t = __builtin_alpha_extbl (__xr, __a);                            \
164     __a <<= 3;                                                          \
165     __t &= -__t;                                                        \
166     __a += ((__t & 0xCC) != 0) * 2;                                     \
167     __a += ((__t & 0xF0) != 0) * 4;                                     \
168     __a += ((__t & 0xAA) != 0);                                         \
169     (COUNT) = __a;                                                      \
170   } while (0)
171 #endif /* __alpha_cix__ */
172 #endif /* __alpha */
173
174 #if defined (__arc__) && W_TYPE_SIZE == 32
175 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
176   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
177            : "=r" ((USItype) (sh)),                                     \
178              "=&r" ((USItype) (sl))                                     \
179            : "%r" ((USItype) (ah)),                                     \
180              "rIJ" ((USItype) (bh)),                                    \
181              "%r" ((USItype) (al)),                                     \
182              "rIJ" ((USItype) (bl)))
183 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
184   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
185            : "=r" ((USItype) (sh)),                                     \
186              "=&r" ((USItype) (sl))                                     \
187            : "r" ((USItype) (ah)),                                      \
188              "rIJ" ((USItype) (bh)),                                    \
189              "r" ((USItype) (al)),                                      \
190              "rIJ" ((USItype) (bl)))
191
192 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
193 #ifdef __ARC_NORM__
194 #define count_leading_zeros(count, x) \
195   do                                                                    \
196     {                                                                   \
197       SItype c_;                                                        \
198                                                                         \
199       __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
200       (count) = c_ + 1;                                                 \
201     }                                                                   \
202   while (0)
203 #define COUNT_LEADING_ZEROS_0 32
204 #endif
205 #endif
206
207 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
208  && W_TYPE_SIZE == 32
209 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
210   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
211            : "=r" ((USItype) (sh)),                                     \
212              "=&r" ((USItype) (sl))                                     \
213            : "%r" ((USItype) (ah)),                                     \
214              "rI" ((USItype) (bh)),                                     \
215              "%r" ((USItype) (al)),                                     \
216              "rI" ((USItype) (bl)) __CLOBBER_CC)
217 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
218   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
219            : "=r" ((USItype) (sh)),                                     \
220              "=&r" ((USItype) (sl))                                     \
221            : "r" ((USItype) (ah)),                                      \
222              "rI" ((USItype) (bh)),                                     \
223              "r" ((USItype) (al)),                                      \
224              "rI" ((USItype) (bl)) __CLOBBER_CC)
225 # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
226      || defined(__ARM_ARCH_3__)
227 #  define umul_ppmm(xh, xl, a, b)                                       \
228   do {                                                                  \
229     register USItype __t0, __t1, __t2;                                  \
230     __asm__ ("%@ Inlined umul_ppmm\n"                                   \
231            "    mov     %2, %5, lsr #16\n"                              \
232            "    mov     %0, %6, lsr #16\n"                              \
233            "    bic     %3, %5, %2, lsl #16\n"                          \
234            "    bic     %4, %6, %0, lsl #16\n"                          \
235            "    mul     %1, %3, %4\n"                                   \
236            "    mul     %4, %2, %4\n"                                   \
237            "    mul     %3, %0, %3\n"                                   \
238            "    mul     %0, %2, %0\n"                                   \
239            "    adds    %3, %4, %3\n"                                   \
240            "    addcs   %0, %0, #65536\n"                               \
241            "    adds    %1, %1, %3, lsl #16\n"                          \
242            "    adc     %0, %0, %3, lsr #16"                            \
243            : "=&r" ((USItype) (xh)),                                    \
244              "=r" ((USItype) (xl)),                                     \
245              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
246            : "r" ((USItype) (a)),                                       \
247              "r" ((USItype) (b)) __CLOBBER_CC );                        \
248   } while (0)
249 #  define UMUL_TIME 20
250 # else
251 #  define umul_ppmm(xh, xl, a, b)                                       \
252   do {                                                                  \
253     /* Generate umull, under compiler control.  */                      \
254     register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);       \
255     (xl) = (USItype)__t0;                                               \
256     (xh) = (USItype)(__t0 >> 32);                                       \
257   } while (0)
258 #  define UMUL_TIME 3
259 # endif
260 # define UDIV_TIME 100
261 #endif /* __arm__ */
262
263 #if defined(__arm__)
264 /* Let gcc decide how best to implement count_leading_zeros.  */
265 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
266 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
267 #define COUNT_LEADING_ZEROS_0 32
268 #endif
269
270 #if defined (__AVR__)
271
272 #if W_TYPE_SIZE == 16
273 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
274 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
275 #define COUNT_LEADING_ZEROS_0 16
276 #endif /* W_TYPE_SIZE == 16 */
277
278 #if W_TYPE_SIZE == 32
279 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
280 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
281 #define COUNT_LEADING_ZEROS_0 32
282 #endif /* W_TYPE_SIZE == 32 */
283
284 #if W_TYPE_SIZE == 64
285 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
286 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
287 #define COUNT_LEADING_ZEROS_0 64
288 #endif /* W_TYPE_SIZE == 64 */
289
290 #endif /* defined (__AVR__) */
291
292 #if defined (__CRIS__)
293
294 #if __CRIS_arch_version >= 3
295 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
296 #define COUNT_LEADING_ZEROS_0 32
297 #endif /* __CRIS_arch_version >= 3 */
298
299 #if __CRIS_arch_version >= 8
300 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
301 #endif /* __CRIS_arch_version >= 8 */
302
303 #if __CRIS_arch_version >= 10
304 #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
305 #else
306 #define __umulsidi3 __umulsidi3
307 extern UDItype __umulsidi3 (USItype, USItype);
308 #endif /* __CRIS_arch_version >= 10 */
309
310 #define umul_ppmm(w1, w0, u, v)         \
311   do {                                  \
312     UDItype __x = __umulsidi3 (u, v);   \
313     (w0) = (USItype) (__x);             \
314     (w1) = (USItype) (__x >> 32);       \
315   } while (0)
316
317 /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
318    DFmode ("double" intrinsics, avoiding two of the three insns handling
319    carry), but defining them as open-code C composing and doing the
320    operation in DImode (UDImode) shows that the DImode needs work:
321    register pressure from requiring neighboring registers and the
322    traffic to and from them come to dominate, in the 4.7 series.  */
323
324 #endif /* defined (__CRIS__) */
325
326 #if defined (__hppa) && W_TYPE_SIZE == 32
327 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
328   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
329            : "=r" ((USItype) (sh)),                                     \
330              "=&r" ((USItype) (sl))                                     \
331            : "%rM" ((USItype) (ah)),                                    \
332              "rM" ((USItype) (bh)),                                     \
333              "%rM" ((USItype) (al)),                                    \
334              "rM" ((USItype) (bl)))
335 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
336   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
337            : "=r" ((USItype) (sh)),                                     \
338              "=&r" ((USItype) (sl))                                     \
339            : "rM" ((USItype) (ah)),                                     \
340              "rM" ((USItype) (bh)),                                     \
341              "rM" ((USItype) (al)),                                     \
342              "rM" ((USItype) (bl)))
343 #if defined (_PA_RISC1_1)
344 #define umul_ppmm(w1, w0, u, v) \
345   do {                                                                  \
346     union                                                               \
347       {                                                                 \
348         UDItype __f;                                                    \
349         struct {USItype __w1, __w0;} __w1w0;                            \
350       } __t;                                                            \
351     __asm__ ("xmpyu %1,%2,%0"                                           \
352              : "=x" (__t.__f)                                           \
353              : "x" ((USItype) (u)),                                     \
354                "x" ((USItype) (v)));                                    \
355     (w1) = __t.__w1w0.__w1;                                             \
356     (w0) = __t.__w1w0.__w0;                                             \
357      } while (0)
358 #define UMUL_TIME 8
359 #else
360 #define UMUL_TIME 30
361 #endif
362 #define UDIV_TIME 40
363 #define count_leading_zeros(count, x) \
364   do {                                                                  \
365     USItype __tmp;                                                      \
366     __asm__ (                                                           \
367        "ldi             1,%0\n"                                         \
368 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
369 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
370 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
371 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
372 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
373 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
374 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
375 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
376 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
377 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
378 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
379 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
380 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
381 "       sub             %0,%1,%0                ; Subtract it.\n"       \
382         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
383   } while (0)
384 #endif
385
386 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
387 #if !defined (__zarch__)
388 #define smul_ppmm(xh, xl, m0, m1) \
389   do {                                                                  \
390     union {DItype __ll;                                                 \
391            struct {USItype __h, __l;} __i;                              \
392           } __x;                                                        \
393     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
394              : "=&r" (__x.__ll)                                         \
395              : "r" (m0), "r" (m1));                                     \
396     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
397   } while (0)
398 #define sdiv_qrnnd(q, r, n1, n0, d) \
399   do {                                                                  \
400     union {DItype __ll;                                                 \
401            struct {USItype __h, __l;} __i;                              \
402           } __x;                                                        \
403     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
404     __asm__ ("dr %0,%2"                                                 \
405              : "=r" (__x.__ll)                                          \
406              : "0" (__x.__ll), "r" (d));                                \
407     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
408   } while (0)
409 #else
410 #define smul_ppmm(xh, xl, m0, m1) \
411   do {                                                                  \
412     register SItype __r0 __asm__ ("0");                                 \
413     register SItype __r1 __asm__ ("1") = (m0);                          \
414                                                                         \
415     __asm__ ("mr\t%%r0,%3"                                              \
416              : "=r" (__r0), "=r" (__r1)                                 \
417              : "r"  (__r1),  "r" (m1));                                 \
418     (xh) = __r0; (xl) = __r1;                                           \
419   } while (0)
420
421 #define sdiv_qrnnd(q, r, n1, n0, d) \
422   do {                                                                  \
423     register SItype __r0 __asm__ ("0") = (n1);                          \
424     register SItype __r1 __asm__ ("1") = (n0);                          \
425                                                                         \
426     __asm__ ("dr\t%%r0,%4"                                              \
427              : "=r" (__r0), "=r" (__r1)                                 \
428              : "r" (__r0), "r" (__r1), "r" (d));                        \
429     (q) = __r1; (r) = __r0;                                             \
430   } while (0)
431 #endif /* __zarch__ */
432 #endif
433
434 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
435 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
436   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
437            : "=r" ((USItype) (sh)),                                     \
438              "=&r" ((USItype) (sl))                                     \
439            : "%0" ((USItype) (ah)),                                     \
440              "g" ((USItype) (bh)),                                      \
441              "%1" ((USItype) (al)),                                     \
442              "g" ((USItype) (bl)))
443 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
444   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
445            : "=r" ((USItype) (sh)),                                     \
446              "=&r" ((USItype) (sl))                                     \
447            : "0" ((USItype) (ah)),                                      \
448              "g" ((USItype) (bh)),                                      \
449              "1" ((USItype) (al)),                                      \
450              "g" ((USItype) (bl)))
451 #define umul_ppmm(w1, w0, u, v) \
452   __asm__ ("mul{l} %3"                                                  \
453            : "=a" ((USItype) (w0)),                                     \
454              "=d" ((USItype) (w1))                                      \
455            : "%0" ((USItype) (u)),                                      \
456              "rm" ((USItype) (v)))
457 #define udiv_qrnnd(q, r, n1, n0, dv) \
458   __asm__ ("div{l} %4"                                                  \
459            : "=a" ((USItype) (q)),                                      \
460              "=d" ((USItype) (r))                                       \
461            : "0" ((USItype) (n0)),                                      \
462              "1" ((USItype) (n1)),                                      \
463              "rm" ((USItype) (dv)))
464 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
465 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
466 #define UMUL_TIME 40
467 #define UDIV_TIME 40
468 #endif /* 80x86 */
469
470 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
471 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
472   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
473            : "=r" ((UDItype) (sh)),                                     \
474              "=&r" ((UDItype) (sl))                                     \
475            : "%0" ((UDItype) (ah)),                                     \
476              "rme" ((UDItype) (bh)),                                    \
477              "%1" ((UDItype) (al)),                                     \
478              "rme" ((UDItype) (bl)))
479 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
480   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
481            : "=r" ((UDItype) (sh)),                                     \
482              "=&r" ((UDItype) (sl))                                     \
483            : "0" ((UDItype) (ah)),                                      \
484              "rme" ((UDItype) (bh)),                                    \
485              "1" ((UDItype) (al)),                                      \
486              "rme" ((UDItype) (bl)))
487 #define umul_ppmm(w1, w0, u, v) \
488   __asm__ ("mul{q} %3"                                                  \
489            : "=a" ((UDItype) (w0)),                                     \
490              "=d" ((UDItype) (w1))                                      \
491            : "%0" ((UDItype) (u)),                                      \
492              "rm" ((UDItype) (v)))
493 #define udiv_qrnnd(q, r, n1, n0, dv) \
494   __asm__ ("div{q} %4"                                                  \
495            : "=a" ((UDItype) (q)),                                      \
496              "=d" ((UDItype) (r))                                       \
497            : "0" ((UDItype) (n0)),                                      \
498              "1" ((UDItype) (n1)),                                      \
499              "rm" ((UDItype) (dv)))
500 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
501 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
502 #define UMUL_TIME 40
503 #define UDIV_TIME 40
504 #endif /* x86_64 */
505
506 #if defined (__i960__) && W_TYPE_SIZE == 32
507 #define umul_ppmm(w1, w0, u, v) \
508   ({union {UDItype __ll;                                                \
509            struct {USItype __l, __h;} __i;                              \
510           } __xx;                                                       \
511   __asm__ ("emul        %2,%1,%0"                                       \
512            : "=d" (__xx.__ll)                                           \
513            : "%dI" ((USItype) (u)),                                     \
514              "dI" ((USItype) (v)));                                     \
515   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
516 #define __umulsidi3(u, v) \
517   ({UDItype __w;                                                        \
518     __asm__ ("emul      %2,%1,%0"                                       \
519              : "=d" (__w)                                               \
520              : "%dI" ((USItype) (u)),                                   \
521                "dI" ((USItype) (v)));                                   \
522     __w; })
523 #endif /* __i960__ */
524
525 #if defined (__ia64) && W_TYPE_SIZE == 64
526 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
527    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
528    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
529    register, which takes an extra cycle.  */
530 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
531   do {                                                                  \
532     UWtype __x;                                                         \
533     __x = (al) - (bl);                                                  \
534     if ((al) < (bl))                                                    \
535       (sh) = (ah) - (bh) - 1;                                           \
536     else                                                                \
537       (sh) = (ah) - (bh);                                               \
538     (sl) = __x;                                                         \
539   } while (0)
540
541 /* Do both product parts in assembly, since that gives better code with
542    all gcc versions.  Some callers will just use the upper part, and in
543    that situation we waste an instruction, but not any cycles.  */
544 #define umul_ppmm(ph, pl, m0, m1)                                       \
545   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
546            : "=&f" (ph), "=f" (pl)                                      \
547            : "f" (m0), "f" (m1))
548 #define count_leading_zeros(count, x)                                   \
549   do {                                                                  \
550     UWtype _x = (x), _y, _a, _c;                                        \
551     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
552     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
553     _c = (_a - 1) << 3;                                                 \
554     _x >>= _c;                                                          \
555     if (_x >= 1 << 4)                                                   \
556       _x >>= 4, _c += 4;                                                \
557     if (_x >= 1 << 2)                                                   \
558       _x >>= 2, _c += 2;                                                \
559     _c += _x >> 1;                                                      \
560     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
561   } while (0)
562 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
563    based, and we don't need a special case for x==0 here */
564 #define count_trailing_zeros(count, x)                                  \
565   do {                                                                  \
566     UWtype __ctz_x = (x);                                               \
567     __asm__ ("popcnt %0 = %1"                                           \
568              : "=r" (count)                                             \
569              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
570   } while (0)
571 #define UMUL_TIME 14
572 #endif
573
574 #if defined (__M32R__) && W_TYPE_SIZE == 32
575 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
576   /* The cmp clears the condition bit.  */ \
577   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
578            : "=r" ((USItype) (sh)),                                     \
579              "=&r" ((USItype) (sl))                                     \
580            : "0" ((USItype) (ah)),                                      \
581              "r" ((USItype) (bh)),                                      \
582              "1" ((USItype) (al)),                                      \
583              "r" ((USItype) (bl))                                       \
584            : "cbit")
585 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
586   /* The cmp clears the condition bit.  */ \
587   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
588            : "=r" ((USItype) (sh)),                                     \
589              "=&r" ((USItype) (sl))                                     \
590            : "0" ((USItype) (ah)),                                      \
591              "r" ((USItype) (bh)),                                      \
592              "1" ((USItype) (al)),                                      \
593              "r" ((USItype) (bl))                                       \
594            : "cbit")
595 #endif /* __M32R__ */
596
597 #if defined (__mc68000__) && W_TYPE_SIZE == 32
598 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
599   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
600            : "=d" ((USItype) (sh)),                                     \
601              "=&d" ((USItype) (sl))                                     \
602            : "%0" ((USItype) (ah)),                                     \
603              "d" ((USItype) (bh)),                                      \
604              "%1" ((USItype) (al)),                                     \
605              "g" ((USItype) (bl)))
606 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
607   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
608            : "=d" ((USItype) (sh)),                                     \
609              "=&d" ((USItype) (sl))                                     \
610            : "0" ((USItype) (ah)),                                      \
611              "d" ((USItype) (bh)),                                      \
612              "1" ((USItype) (al)),                                      \
613              "g" ((USItype) (bl)))
614
615 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
616 #if (defined (__mc68020__) && !defined (__mc68060__))
617 #define umul_ppmm(w1, w0, u, v) \
618   __asm__ ("mulu%.l %3,%1:%0"                                           \
619            : "=d" ((USItype) (w0)),                                     \
620              "=d" ((USItype) (w1))                                      \
621            : "%0" ((USItype) (u)),                                      \
622              "dmi" ((USItype) (v)))
623 #define UMUL_TIME 45
624 #define udiv_qrnnd(q, r, n1, n0, d) \
625   __asm__ ("divu%.l %4,%1:%0"                                           \
626            : "=d" ((USItype) (q)),                                      \
627              "=d" ((USItype) (r))                                       \
628            : "0" ((USItype) (n0)),                                      \
629              "1" ((USItype) (n1)),                                      \
630              "dmi" ((USItype) (d)))
631 #define UDIV_TIME 90
632 #define sdiv_qrnnd(q, r, n1, n0, d) \
633   __asm__ ("divs%.l %4,%1:%0"                                           \
634            : "=d" ((USItype) (q)),                                      \
635              "=d" ((USItype) (r))                                       \
636            : "0" ((USItype) (n0)),                                      \
637              "1" ((USItype) (n1)),                                      \
638              "dmi" ((USItype) (d)))
639
640 #elif defined (__mcoldfire__) /* not mc68020 */
641
642 #define umul_ppmm(xh, xl, a, b) \
643   __asm__ ("| Inlined umul_ppmm\n"                                      \
644            "    move%.l %2,%/d0\n"                                      \
645            "    move%.l %3,%/d1\n"                                      \
646            "    move%.l %/d0,%/d2\n"                                    \
647            "    swap    %/d0\n"                                         \
648            "    move%.l %/d1,%/d3\n"                                    \
649            "    swap    %/d1\n"                                         \
650            "    move%.w %/d2,%/d4\n"                                    \
651            "    mulu    %/d3,%/d4\n"                                    \
652            "    mulu    %/d1,%/d2\n"                                    \
653            "    mulu    %/d0,%/d3\n"                                    \
654            "    mulu    %/d0,%/d1\n"                                    \
655            "    move%.l %/d4,%/d0\n"                                    \
656            "    clr%.w  %/d0\n"                                         \
657            "    swap    %/d0\n"                                         \
658            "    add%.l  %/d0,%/d2\n"                                    \
659            "    add%.l  %/d3,%/d2\n"                                    \
660            "    jcc     1f\n"                                           \
661            "    add%.l  %#65536,%/d1\n"                                 \
662            "1:  swap    %/d2\n"                                         \
663            "    moveq   %#0,%/d0\n"                                     \
664            "    move%.w %/d2,%/d0\n"                                    \
665            "    move%.w %/d4,%/d2\n"                                    \
666            "    move%.l %/d2,%1\n"                                      \
667            "    add%.l  %/d1,%/d0\n"                                    \
668            "    move%.l %/d0,%0"                                        \
669            : "=g" ((USItype) (xh)),                                     \
670              "=g" ((USItype) (xl))                                      \
671            : "g" ((USItype) (a)),                                       \
672              "g" ((USItype) (b))                                        \
673            : "d0", "d1", "d2", "d3", "d4")
674 #define UMUL_TIME 100
675 #define UDIV_TIME 400
676 #else /* not ColdFire */
677 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
678 #define umul_ppmm(xh, xl, a, b) \
679   __asm__ ("| Inlined umul_ppmm\n"                                      \
680            "    move%.l %2,%/d0\n"                                      \
681            "    move%.l %3,%/d1\n"                                      \
682            "    move%.l %/d0,%/d2\n"                                    \
683            "    swap    %/d0\n"                                         \
684            "    move%.l %/d1,%/d3\n"                                    \
685            "    swap    %/d1\n"                                         \
686            "    move%.w %/d2,%/d4\n"                                    \
687            "    mulu    %/d3,%/d4\n"                                    \
688            "    mulu    %/d1,%/d2\n"                                    \
689            "    mulu    %/d0,%/d3\n"                                    \
690            "    mulu    %/d0,%/d1\n"                                    \
691            "    move%.l %/d4,%/d0\n"                                    \
692            "    eor%.w  %/d0,%/d0\n"                                    \
693            "    swap    %/d0\n"                                         \
694            "    add%.l  %/d0,%/d2\n"                                    \
695            "    add%.l  %/d3,%/d2\n"                                    \
696            "    jcc     1f\n"                                           \
697            "    add%.l  %#65536,%/d1\n"                                 \
698            "1:  swap    %/d2\n"                                         \
699            "    moveq   %#0,%/d0\n"                                     \
700            "    move%.w %/d2,%/d0\n"                                    \
701            "    move%.w %/d4,%/d2\n"                                    \
702            "    move%.l %/d2,%1\n"                                      \
703            "    add%.l  %/d1,%/d0\n"                                    \
704            "    move%.l %/d0,%0"                                        \
705            : "=g" ((USItype) (xh)),                                     \
706              "=g" ((USItype) (xl))                                      \
707            : "g" ((USItype) (a)),                                       \
708              "g" ((USItype) (b))                                        \
709            : "d0", "d1", "d2", "d3", "d4")
710 #define UMUL_TIME 100
711 #define UDIV_TIME 400
712
713 #endif /* not mc68020 */
714
715 /* The '020, '030, '040 and '060 have bitfield insns.
716    cpu32 disguises as a 68020, but lacks them.  */
717 #if defined (__mc68020__) && !defined (__mcpu32__)
718 #define count_leading_zeros(count, x) \
719   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
720            : "=d" ((USItype) (count))                                   \
721            : "od" ((USItype) (x)), "n" (0))
722 /* Some ColdFire architectures have a ff1 instruction supported via
723    __builtin_clz. */
724 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
725 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
726 #define COUNT_LEADING_ZEROS_0 32
727 #endif
728 #endif /* mc68000 */
729
730 #if defined (__m88000__) && W_TYPE_SIZE == 32
731 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
732   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
733            : "=r" ((USItype) (sh)),                                     \
734              "=&r" ((USItype) (sl))                                     \
735            : "%rJ" ((USItype) (ah)),                                    \
736              "rJ" ((USItype) (bh)),                                     \
737              "%rJ" ((USItype) (al)),                                    \
738              "rJ" ((USItype) (bl)))
739 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
740   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
741            : "=r" ((USItype) (sh)),                                     \
742              "=&r" ((USItype) (sl))                                     \
743            : "rJ" ((USItype) (ah)),                                     \
744              "rJ" ((USItype) (bh)),                                     \
745              "rJ" ((USItype) (al)),                                     \
746              "rJ" ((USItype) (bl)))
747 #define count_leading_zeros(count, x) \
748   do {                                                                  \
749     USItype __cbtmp;                                                    \
750     __asm__ ("ff1 %0,%1"                                                \
751              : "=r" (__cbtmp)                                           \
752              : "r" ((USItype) (x)));                                    \
753     (count) = __cbtmp ^ 31;                                             \
754   } while (0)
755 #define COUNT_LEADING_ZEROS_0 63 /* sic */
756 #if defined (__mc88110__)
757 #define umul_ppmm(wh, wl, u, v) \
758   do {                                                                  \
759     union {UDItype __ll;                                                \
760            struct {USItype __h, __l;} __i;                              \
761           } __xx;                                                       \
762     __asm__ ("mulu.d    %0,%1,%2"                                       \
763              : "=r" (__xx.__ll)                                         \
764              : "r" ((USItype) (u)),                                     \
765                "r" ((USItype) (v)));                                    \
766     (wh) = __xx.__i.__h;                                                \
767     (wl) = __xx.__i.__l;                                                \
768   } while (0)
769 #define udiv_qrnnd(q, r, n1, n0, d) \
770   ({union {UDItype __ll;                                                \
771            struct {USItype __h, __l;} __i;                              \
772           } __xx;                                                       \
773   USItype __q;                                                          \
774   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
775   __asm__ ("divu.d %0,%1,%2"                                            \
776            : "=r" (__q)                                                 \
777            : "r" (__xx.__ll),                                           \
778              "r" ((USItype) (d)));                                      \
779   (r) = (n0) - __q * (d); (q) = __q; })
780 #define UMUL_TIME 5
781 #define UDIV_TIME 25
782 #else
783 #define UMUL_TIME 17
784 #define UDIV_TIME 150
785 #endif /* __mc88110__ */
786 #endif /* __m88000__ */
787
788 #if defined (__mn10300__)
789 # if defined (__AM33__)
790 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
791 #  define umul_ppmm(w1, w0, u, v)               \
792     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
793 #  define smul_ppmm(w1, w0, u, v)               \
794     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
795 # else
796 #  define umul_ppmm(w1, w0, u, v)               \
797     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
798 #  define smul_ppmm(w1, w0, u, v)               \
799     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
800 # endif
801 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
802   do {                                          \
803     DWunion __s, __a, __b;                      \
804     __a.s.low = (al); __a.s.high = (ah);        \
805     __b.s.low = (bl); __b.s.high = (bh);        \
806     __s.ll = __a.ll + __b.ll;                   \
807     (sl) = __s.s.low; (sh) = __s.s.high;        \
808   } while (0)
809 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
810   do {                                          \
811     DWunion __s, __a, __b;                      \
812     __a.s.low = (al); __a.s.high = (ah);        \
813     __b.s.low = (bl); __b.s.high = (bh);        \
814     __s.ll = __a.ll - __b.ll;                   \
815     (sl) = __s.s.low; (sh) = __s.s.high;        \
816   } while (0)
817 # define udiv_qrnnd(q, r, nh, nl, d)            \
818   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
819 # define sdiv_qrnnd(q, r, nh, nl, d)            \
820   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
821 # define UMUL_TIME 3
822 # define UDIV_TIME 38
823 #endif
824
825 #if defined (__mips__) && W_TYPE_SIZE == 32
826 #define umul_ppmm(w1, w0, u, v)                                         \
827   do {                                                                  \
828     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
829     (w1) = (USItype) (__x >> 32);                                       \
830     (w0) = (USItype) (__x);                                             \
831   } while (0)
832 #define UMUL_TIME 10
833 #define UDIV_TIME 100
834
835 #if (__mips == 32 || __mips == 64) && ! __mips16
836 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
837 #define COUNT_LEADING_ZEROS_0 32
838 #endif
839 #endif /* __mips__ */
840
841 #if defined (__ns32000__) && W_TYPE_SIZE == 32
842 #define umul_ppmm(w1, w0, u, v) \
843   ({union {UDItype __ll;                                                \
844            struct {USItype __l, __h;} __i;                              \
845           } __xx;                                                       \
846   __asm__ ("meid %2,%0"                                                 \
847            : "=g" (__xx.__ll)                                           \
848            : "%0" ((USItype) (u)),                                      \
849              "g" ((USItype) (v)));                                      \
850   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
851 #define __umulsidi3(u, v) \
852   ({UDItype __w;                                                        \
853     __asm__ ("meid %2,%0"                                               \
854              : "=g" (__w)                                               \
855              : "%0" ((USItype) (u)),                                    \
856                "g" ((USItype) (v)));                                    \
857     __w; })
858 #define udiv_qrnnd(q, r, n1, n0, d) \
859   ({union {UDItype __ll;                                                \
860            struct {USItype __l, __h;} __i;                              \
861           } __xx;                                                       \
862   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
863   __asm__ ("deid %2,%0"                                                 \
864            : "=g" (__xx.__ll)                                           \
865            : "0" (__xx.__ll),                                           \
866              "g" ((USItype) (d)));                                      \
867   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
868 #define count_trailing_zeros(count,x) \
869   do {                                                                  \
870     __asm__ ("ffsd     %2,%0"                                           \
871             : "=r" ((USItype) (count))                                  \
872             : "0" ((USItype) 0),                                        \
873               "r" ((USItype) (x)));                                     \
874   } while (0)
875 #endif /* __ns32000__ */
876
877 /* FIXME: We should test _IBMR2 here when we add assembly support for the
878    system vendor compilers.
879    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
880    enough, since that hits ARM and m68k too.  */
881 #if (defined (_ARCH_PPC)        /* AIX */                               \
882      || defined (__powerpc__)   /* gcc */                               \
883      || defined (__POWERPC__)   /* BEOS */                              \
884      || defined (__ppc__)       /* Darwin */                            \
885      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
886      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
887          && CPU_FAMILY == PPC)                                                \
888      ) && W_TYPE_SIZE == 32
889 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
890   do {                                                                  \
891     if (__builtin_constant_p (bh) && (bh) == 0)                         \
892       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                \
893              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
894     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
895       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                \
896              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
897     else                                                                \
898       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"              \
899              : "=r" (sh), "=&r" (sl)                                    \
900              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
901   } while (0)
902 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
903   do {                                                                  \
904     if (__builtin_constant_p (ah) && (ah) == 0)                         \
905       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"      \
906                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
907     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
908       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"      \
909                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
910     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
911       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"               \
912                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
913     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
914       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"               \
915                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
916     else                                                                \
917       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"    \
918                : "=r" (sh), "=&r" (sl)                                  \
919                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
920   } while (0)
921 #define count_leading_zeros(count, x) \
922   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
923 #define COUNT_LEADING_ZEROS_0 32
924 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
925   || defined (__ppc__)                                                    \
926   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
927   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
928          && CPU_FAMILY == PPC)
929 #define umul_ppmm(ph, pl, m0, m1) \
930   do {                                                                  \
931     USItype __m0 = (m0), __m1 = (m1);                                   \
932     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
933     (pl) = __m0 * __m1;                                                 \
934   } while (0)
935 #define UMUL_TIME 15
936 #define smul_ppmm(ph, pl, m0, m1) \
937   do {                                                                  \
938     SItype __m0 = (m0), __m1 = (m1);                                    \
939     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
940     (pl) = __m0 * __m1;                                                 \
941   } while (0)
942 #define SMUL_TIME 14
943 #define UDIV_TIME 120
944 #endif
945 #endif /* 32-bit POWER architecture variants.  */
946
947 /* We should test _IBMR2 here when we add assembly support for the system
948    vendor compilers.  */
949 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
950 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
951   do {                                                                  \
952     if (__builtin_constant_p (bh) && (bh) == 0)                         \
953       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                \
954              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
955     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
956       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                \
957              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
958     else                                                                \
959       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"              \
960              : "=r" (sh), "=&r" (sl)                                    \
961              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
962   } while (0)
963 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
964   do {                                                                  \
965     if (__builtin_constant_p (ah) && (ah) == 0)                         \
966       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"      \
967                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
968     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
969       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"      \
970                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
971     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
972       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"               \
973                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
974     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
975       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"               \
976                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
977     else                                                                \
978       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"    \
979                : "=r" (sh), "=&r" (sl)                                  \
980                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
981   } while (0)
982 #define count_leading_zeros(count, x) \
983   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
984 #define COUNT_LEADING_ZEROS_0 64
985 #define umul_ppmm(ph, pl, m0, m1) \
986   do {                                                                  \
987     UDItype __m0 = (m0), __m1 = (m1);                                   \
988     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
989     (pl) = __m0 * __m1;                                                 \
990   } while (0)
991 #define UMUL_TIME 15
992 #define smul_ppmm(ph, pl, m0, m1) \
993   do {                                                                  \
994     DItype __m0 = (m0), __m1 = (m1);                                    \
995     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
996     (pl) = __m0 * __m1;                                                 \
997   } while (0)
998 #define SMUL_TIME 14  /* ??? */
999 #define UDIV_TIME 120 /* ??? */
1000 #endif /* 64-bit PowerPC.  */
1001
1002 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
1003 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1004   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
1005            : "=r" ((USItype) (sh)),                                     \
1006              "=&r" ((USItype) (sl))                                     \
1007            : "%0" ((USItype) (ah)),                                     \
1008              "r" ((USItype) (bh)),                                      \
1009              "%1" ((USItype) (al)),                                     \
1010              "r" ((USItype) (bl)))
1011 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1012   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
1013            : "=r" ((USItype) (sh)),                                     \
1014              "=&r" ((USItype) (sl))                                     \
1015            : "0" ((USItype) (ah)),                                      \
1016              "r" ((USItype) (bh)),                                      \
1017              "1" ((USItype) (al)),                                      \
1018              "r" ((USItype) (bl)))
1019 #define umul_ppmm(ph, pl, m0, m1) \
1020   do {                                                                  \
1021     USItype __m0 = (m0), __m1 = (m1);                                   \
1022     __asm__ (                                                           \
1023        "s       r2,r2\n"                                                \
1024 "       mts     r10,%2\n"                                               \
1025 "       m       r2,%3\n"                                                \
1026 "       m       r2,%3\n"                                                \
1027 "       m       r2,%3\n"                                                \
1028 "       m       r2,%3\n"                                                \
1029 "       m       r2,%3\n"                                                \
1030 "       m       r2,%3\n"                                                \
1031 "       m       r2,%3\n"                                                \
1032 "       m       r2,%3\n"                                                \
1033 "       m       r2,%3\n"                                                \
1034 "       m       r2,%3\n"                                                \
1035 "       m       r2,%3\n"                                                \
1036 "       m       r2,%3\n"                                                \
1037 "       m       r2,%3\n"                                                \
1038 "       m       r2,%3\n"                                                \
1039 "       m       r2,%3\n"                                                \
1040 "       m       r2,%3\n"                                                \
1041 "       cas     %0,r2,r0\n"                                             \
1042 "       mfs     r10,%1"                                                 \
1043              : "=r" ((USItype) (ph)),                                   \
1044                "=r" ((USItype) (pl))                                    \
1045              : "%r" (__m0),                                             \
1046                 "r" (__m1)                                              \
1047              : "r2");                                                   \
1048     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1049              + (((SItype) __m1 >> 31) & __m0));                         \
1050   } while (0)
1051 #define UMUL_TIME 20
1052 #define UDIV_TIME 200
1053 #define count_leading_zeros(count, x) \
1054   do {                                                                  \
1055     if ((x) >= 0x10000)                                                 \
1056       __asm__ ("clz     %0,%1"                                          \
1057                : "=r" ((USItype) (count))                               \
1058                : "r" ((USItype) (x) >> 16));                            \
1059     else                                                                \
1060       {                                                                 \
1061         __asm__ ("clz   %0,%1"                                          \
1062                  : "=r" ((USItype) (count))                             \
1063                  : "r" ((USItype) (x)));                                        \
1064         (count) += 16;                                                  \
1065       }                                                                 \
1066   } while (0)
1067 #endif
1068
1069 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1070 #ifndef __sh1__
1071 #define umul_ppmm(w1, w0, u, v) \
1072   __asm__ (                                                             \
1073        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1074            : "=r<" ((USItype)(w1)),                                     \
1075              "=r<" ((USItype)(w0))                                      \
1076            : "r" ((USItype)(u)),                                        \
1077              "r" ((USItype)(v))                                         \
1078            : "macl", "mach")
1079 #define UMUL_TIME 5
1080 #endif
1081
1082 /* This is the same algorithm as __udiv_qrnnd_c.  */
1083 #define UDIV_NEEDS_NORMALIZATION 1
1084
1085 #define udiv_qrnnd(q, r, n1, n0, d) \
1086   do {                                                                  \
1087     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1088                         __attribute__ ((visibility ("hidden")));        \
1089     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1090     __asm__ (                                                           \
1091         "mov%M4 %4,r5\n"                                                \
1092 "       swap.w %3,r4\n"                                                 \
1093 "       swap.w r5,r6\n"                                                 \
1094 "       jsr @%5\n"                                                      \
1095 "       shll16 r6\n"                                                    \
1096 "       swap.w r4,r4\n"                                                 \
1097 "       jsr @%5\n"                                                      \
1098 "       swap.w r1,%0\n"                                                 \
1099 "       or r1,%0"                                                       \
1100         : "=r" (q), "=&z" (r)                                           \
1101         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1102         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1103   } while (0)
1104
1105 #define UDIV_TIME 80
1106
1107 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1108   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1109            : "=r" (sh), "=r" (sl)                                       \
1110            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1111
1112 #endif /* __sh__ */
1113
1114 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1115 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1116 #define count_leading_zeros(count, x) \
1117   do                                                                    \
1118     {                                                                   \
1119       UDItype x_ = (USItype)(x);                                        \
1120       SItype c_;                                                        \
1121                                                                         \
1122       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1123       (count) = c_ - 31;                                                \
1124     }                                                                   \
1125   while (0)
1126 #define COUNT_LEADING_ZEROS_0 32
1127 #endif
1128
1129 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1130     && W_TYPE_SIZE == 32
1131 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1132   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1133            : "=r" ((USItype) (sh)),                                     \
1134              "=&r" ((USItype) (sl))                                     \
1135            : "%rJ" ((USItype) (ah)),                                    \
1136              "rI" ((USItype) (bh)),                                     \
1137              "%rJ" ((USItype) (al)),                                    \
1138              "rI" ((USItype) (bl))                                      \
1139            __CLOBBER_CC)
1140 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1141   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1142            : "=r" ((USItype) (sh)),                                     \
1143              "=&r" ((USItype) (sl))                                     \
1144            : "rJ" ((USItype) (ah)),                                     \
1145              "rI" ((USItype) (bh)),                                     \
1146              "rJ" ((USItype) (al)),                                     \
1147              "rI" ((USItype) (bl))                                      \
1148            __CLOBBER_CC)
1149 #if defined (__sparc_v9__)
1150 #define umul_ppmm(w1, w0, u, v) \
1151   do {                                                                  \
1152     register USItype __g1 asm ("g1");                                   \
1153     __asm__ ("umul\t%2,%3,%1\n\t"                                       \
1154              "srlx\t%1, 32, %0"                                         \
1155              : "=r" ((USItype) (w1)),                                   \
1156                "=r" (__g1)                                              \
1157              : "r" ((USItype) (u)),                                     \
1158                "r" ((USItype) (v)));                                    \
1159     (w0) = __g1;                                                        \
1160   } while (0)
1161 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1162   __asm__ ("mov\t%2,%%y\n\t"                                            \
1163            "udiv\t%3,%4,%0\n\t"                                         \
1164            "umul\t%0,%4,%1\n\t"                                         \
1165            "sub\t%3,%1,%1"                                              \
1166            : "=&r" ((USItype) (__q)),                                   \
1167              "=&r" ((USItype) (__r))                                    \
1168            : "r" ((USItype) (__n1)),                                    \
1169              "r" ((USItype) (__n0)),                                    \
1170              "r" ((USItype) (__d)))
1171 #else
1172 #if defined (__sparc_v8__)
1173 #define umul_ppmm(w1, w0, u, v) \
1174   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1175            : "=r" ((USItype) (w1)),                                     \
1176              "=r" ((USItype) (w0))                                      \
1177            : "r" ((USItype) (u)),                                       \
1178              "r" ((USItype) (v)))
1179 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1180   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1181            : "=&r" ((USItype) (__q)),                                   \
1182              "=&r" ((USItype) (__r))                                    \
1183            : "r" ((USItype) (__n1)),                                    \
1184              "r" ((USItype) (__n0)),                                    \
1185              "r" ((USItype) (__d)))
1186 #else
1187 #if defined (__sparclite__)
1188 /* This has hardware multiply but not divide.  It also has two additional
1189    instructions scan (ffs from high bit) and divscc.  */
1190 #define umul_ppmm(w1, w0, u, v) \
1191   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1192            : "=r" ((USItype) (w1)),                                     \
1193              "=r" ((USItype) (w0))                                      \
1194            : "r" ((USItype) (u)),                                       \
1195              "r" ((USItype) (v)))
1196 #define udiv_qrnnd(q, r, n1, n0, d) \
1197   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1198 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1199 "       tst     %%g0\n"                                                 \
1200 "       divscc  %3,%4,%%g1\n"                                           \
1201 "       divscc  %%g1,%4,%%g1\n"                                         \
1202 "       divscc  %%g1,%4,%%g1\n"                                         \
1203 "       divscc  %%g1,%4,%%g1\n"                                         \
1204 "       divscc  %%g1,%4,%%g1\n"                                         \
1205 "       divscc  %%g1,%4,%%g1\n"                                         \
1206 "       divscc  %%g1,%4,%%g1\n"                                         \
1207 "       divscc  %%g1,%4,%%g1\n"                                         \
1208 "       divscc  %%g1,%4,%%g1\n"                                         \
1209 "       divscc  %%g1,%4,%%g1\n"                                         \
1210 "       divscc  %%g1,%4,%%g1\n"                                         \
1211 "       divscc  %%g1,%4,%%g1\n"                                         \
1212 "       divscc  %%g1,%4,%%g1\n"                                         \
1213 "       divscc  %%g1,%4,%%g1\n"                                         \
1214 "       divscc  %%g1,%4,%%g1\n"                                         \
1215 "       divscc  %%g1,%4,%%g1\n"                                         \
1216 "       divscc  %%g1,%4,%%g1\n"                                         \
1217 "       divscc  %%g1,%4,%%g1\n"                                         \
1218 "       divscc  %%g1,%4,%%g1\n"                                         \
1219 "       divscc  %%g1,%4,%%g1\n"                                         \
1220 "       divscc  %%g1,%4,%%g1\n"                                         \
1221 "       divscc  %%g1,%4,%%g1\n"                                         \
1222 "       divscc  %%g1,%4,%%g1\n"                                         \
1223 "       divscc  %%g1,%4,%%g1\n"                                         \
1224 "       divscc  %%g1,%4,%%g1\n"                                         \
1225 "       divscc  %%g1,%4,%%g1\n"                                         \
1226 "       divscc  %%g1,%4,%%g1\n"                                         \
1227 "       divscc  %%g1,%4,%%g1\n"                                         \
1228 "       divscc  %%g1,%4,%%g1\n"                                         \
1229 "       divscc  %%g1,%4,%%g1\n"                                         \
1230 "       divscc  %%g1,%4,%%g1\n"                                         \
1231 "       divscc  %%g1,%4,%0\n"                                           \
1232 "       rd      %%y,%1\n"                                               \
1233 "       bl,a 1f\n"                                                      \
1234 "       add     %1,%4,%1\n"                                             \
1235 "1:     ! End of inline udiv_qrnnd"                                     \
1236            : "=r" ((USItype) (q)),                                      \
1237              "=r" ((USItype) (r))                                       \
1238            : "r" ((USItype) (n1)),                                      \
1239              "r" ((USItype) (n0)),                                      \
1240              "rI" ((USItype) (d))                                       \
1241            : "g1" __AND_CLOBBER_CC)
1242 #define UDIV_TIME 37
1243 #define count_leading_zeros(count, x) \
1244   do {                                                                  \
1245   __asm__ ("scan %1,1,%0"                                               \
1246            : "=r" ((USItype) (count))                                   \
1247            : "r" ((USItype) (x)));                                      \
1248   } while (0)
1249 /* Early sparclites return 63 for an argument of 0, but they warn that future
1250    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1251    undefined.  */
1252 #else
1253 /* SPARC without integer multiplication and divide instructions.
1254    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1255 #define umul_ppmm(w1, w0, u, v) \
1256   __asm__ ("! Inlined umul_ppmm\n"                                      \
1257 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1258 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1259 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1260 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1261 "       mulscc  %%g1,%3,%%g1\n"                                         \
1262 "       mulscc  %%g1,%3,%%g1\n"                                         \
1263 "       mulscc  %%g1,%3,%%g1\n"                                         \
1264 "       mulscc  %%g1,%3,%%g1\n"                                         \
1265 "       mulscc  %%g1,%3,%%g1\n"                                         \
1266 "       mulscc  %%g1,%3,%%g1\n"                                         \
1267 "       mulscc  %%g1,%3,%%g1\n"                                         \
1268 "       mulscc  %%g1,%3,%%g1\n"                                         \
1269 "       mulscc  %%g1,%3,%%g1\n"                                         \
1270 "       mulscc  %%g1,%3,%%g1\n"                                         \
1271 "       mulscc  %%g1,%3,%%g1\n"                                         \
1272 "       mulscc  %%g1,%3,%%g1\n"                                         \
1273 "       mulscc  %%g1,%3,%%g1\n"                                         \
1274 "       mulscc  %%g1,%3,%%g1\n"                                         \
1275 "       mulscc  %%g1,%3,%%g1\n"                                         \
1276 "       mulscc  %%g1,%3,%%g1\n"                                         \
1277 "       mulscc  %%g1,%3,%%g1\n"                                         \
1278 "       mulscc  %%g1,%3,%%g1\n"                                         \
1279 "       mulscc  %%g1,%3,%%g1\n"                                         \
1280 "       mulscc  %%g1,%3,%%g1\n"                                         \
1281 "       mulscc  %%g1,%3,%%g1\n"                                         \
1282 "       mulscc  %%g1,%3,%%g1\n"                                         \
1283 "       mulscc  %%g1,%3,%%g1\n"                                         \
1284 "       mulscc  %%g1,%3,%%g1\n"                                         \
1285 "       mulscc  %%g1,%3,%%g1\n"                                         \
1286 "       mulscc  %%g1,%3,%%g1\n"                                         \
1287 "       mulscc  %%g1,%3,%%g1\n"                                         \
1288 "       mulscc  %%g1,%3,%%g1\n"                                         \
1289 "       mulscc  %%g1,%3,%%g1\n"                                         \
1290 "       mulscc  %%g1,%3,%%g1\n"                                         \
1291 "       mulscc  %%g1,%3,%%g1\n"                                         \
1292 "       mulscc  %%g1,%3,%%g1\n"                                         \
1293 "       mulscc  %%g1,0,%%g1\n"                                          \
1294 "       add     %%g1,%%o5,%0\n"                                         \
1295 "       rd      %%y,%1"                                                 \
1296            : "=r" ((USItype) (w1)),                                     \
1297              "=r" ((USItype) (w0))                                      \
1298            : "%rI" ((USItype) (u)),                                     \
1299              "r" ((USItype) (v))                                                \
1300            : "g1", "o5" __AND_CLOBBER_CC)
1301 #define UMUL_TIME 39            /* 39 instructions */
1302 /* It's quite necessary to add this much assembler for the sparc.
1303    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1304 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1305   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1306 "       mov     32,%%g1\n"                                              \
1307 "       subcc   %1,%2,%%g0\n"                                           \
1308 "1:     bcs     5f\n"                                                   \
1309 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1310 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1311 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1312 "       subcc   %%g1,1,%%g1\n"                                          \
1313 "2:     bne     1b\n"                                                   \
1314 "        subcc  %1,%2,%%g0\n"                                           \
1315 "       bcs     3f\n"                                                   \
1316 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1317 "       b       3f\n"                                                   \
1318 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1319 "4:     sub     %1,%2,%1\n"                                             \
1320 "5:     addxcc  %1,%1,%1\n"                                             \
1321 "       bcc     2b\n"                                                   \
1322 "        subcc  %%g1,1,%%g1\n"                                          \
1323 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1324 "       bne     4b\n"                                                   \
1325 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1326 "       sub     %1,%2,%1\n"                                             \
1327 "3:     xnor    %0,0,%0\n"                                              \
1328 "       ! End of inline udiv_qrnnd"                                     \
1329            : "=&r" ((USItype) (__q)),                                   \
1330              "=&r" ((USItype) (__r))                                    \
1331            : "r" ((USItype) (__d)),                                     \
1332              "1" ((USItype) (__n1)),                                    \
1333              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1334 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1335 #endif /* __sparclite__ */
1336 #endif /* __sparc_v8__ */
1337 #endif /* __sparc_v9__ */
1338 #endif /* sparc32 */
1339
1340 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1341     && W_TYPE_SIZE == 64
1342 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1343   do {                                                                  \
1344     UDItype __carry = 0;                                                \
1345     __asm__ ("addcc\t%r5,%6,%1\n\t"                                     \
1346              "add\t%r3,%4,%0\n\t"                                       \
1347              "movcs\t%%xcc, 1, %2\n\t"                                  \
1348              "add\t%0, %2, %0"                                          \
1349              : "=r" ((UDItype)(sh)),                                    \
1350                "=&r" ((UDItype)(sl)),                                   \
1351                "+r" (__carry)                                           \
1352              : "%rJ" ((UDItype)(ah)),                                   \
1353                "rI" ((UDItype)(bh)),                                    \
1354                "%rJ" ((UDItype)(al)),                                   \
1355                "rI" ((UDItype)(bl))                                     \
1356              __CLOBBER_CC);                                             \
1357   } while (0)
1358
1359 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1360   do {                                                                  \
1361     UDItype __carry = 0;                                                \
1362     __asm__ ("subcc\t%r5,%6,%1\n\t"                                     \
1363              "sub\t%r3,%4,%0\n\t"                                       \
1364              "movcs\t%%xcc, 1, %2\n\t"                                  \
1365              "sub\t%0, %2, %0"                                          \
1366              : "=r" ((UDItype)(sh)),                                    \
1367                "=&r" ((UDItype)(sl)),                                   \
1368                "+r" (__carry)                                           \
1369              : "%rJ" ((UDItype)(ah)),                                   \
1370                "rI" ((UDItype)(bh)),                                    \
1371                "%rJ" ((UDItype)(al)),                                   \
1372                "rI" ((UDItype)(bl))                                     \
1373              __CLOBBER_CC);                                             \
1374   } while (0)
1375
1376 #define umul_ppmm(wh, wl, u, v)                                         \
1377   do {                                                                  \
1378           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1379           __asm__ __volatile__ (                                        \
1380                    "srl %7,0,%3\n\t"                                    \
1381                    "mulx %3,%6,%1\n\t"                                  \
1382                    "srlx %6,32,%2\n\t"                                  \
1383                    "mulx %2,%3,%4\n\t"                                  \
1384                    "sllx %4,32,%5\n\t"                                  \
1385                    "srl %6,0,%3\n\t"                                    \
1386                    "sub %1,%5,%5\n\t"                                   \
1387                    "srlx %5,32,%5\n\t"                                  \
1388                    "addcc %4,%5,%4\n\t"                                 \
1389                    "srlx %7,32,%5\n\t"                                  \
1390                    "mulx %3,%5,%3\n\t"                                  \
1391                    "mulx %2,%5,%5\n\t"                                  \
1392                    "sethi %%hi(0x80000000),%2\n\t"                      \
1393                    "addcc %4,%3,%4\n\t"                                 \
1394                    "srlx %4,32,%4\n\t"                                  \
1395                    "add %2,%2,%2\n\t"                                   \
1396                    "movcc %%xcc,%%g0,%2\n\t"                            \
1397                    "addcc %5,%4,%5\n\t"                                 \
1398                    "sllx %3,32,%3\n\t"                                  \
1399                    "add %1,%3,%1\n\t"                                   \
1400                    "add %5,%2,%0"                                       \
1401            : "=r" ((UDItype)(wh)),                                      \
1402              "=&r" ((UDItype)(wl)),                                     \
1403              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1404            : "r" ((UDItype)(u)),                                        \
1405              "r" ((UDItype)(v))                                         \
1406            __CLOBBER_CC);                                               \
1407   } while (0)
1408 #define UMUL_TIME 96
1409 #define UDIV_TIME 230
1410 #endif /* sparc64 */
1411
1412 #if defined (__vax__) && W_TYPE_SIZE == 32
1413 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1414   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1415            : "=g" ((USItype) (sh)),                                     \
1416              "=&g" ((USItype) (sl))                                     \
1417            : "%0" ((USItype) (ah)),                                     \
1418              "g" ((USItype) (bh)),                                      \
1419              "%1" ((USItype) (al)),                                     \
1420              "g" ((USItype) (bl)))
1421 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1422   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1423            : "=g" ((USItype) (sh)),                                     \
1424              "=&g" ((USItype) (sl))                                     \
1425            : "0" ((USItype) (ah)),                                      \
1426              "g" ((USItype) (bh)),                                      \
1427              "1" ((USItype) (al)),                                      \
1428              "g" ((USItype) (bl)))
1429 #define umul_ppmm(xh, xl, m0, m1) \
1430   do {                                                                  \
1431     union {                                                             \
1432         UDItype __ll;                                                   \
1433         struct {USItype __l, __h;} __i;                                 \
1434       } __xx;                                                           \
1435     USItype __m0 = (m0), __m1 = (m1);                                   \
1436     __asm__ ("emul %1,%2,$0,%0"                                         \
1437              : "=r" (__xx.__ll)                                         \
1438              : "g" (__m0),                                              \
1439                "g" (__m1));                                             \
1440     (xh) = __xx.__i.__h;                                                \
1441     (xl) = __xx.__i.__l;                                                \
1442     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1443              + (((SItype) __m1 >> 31) & __m0));                         \
1444   } while (0)
1445 #define sdiv_qrnnd(q, r, n1, n0, d) \
1446   do {                                                                  \
1447     union {DItype __ll;                                                 \
1448            struct {SItype __l, __h;} __i;                               \
1449           } __xx;                                                       \
1450     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1451     __asm__ ("ediv %3,%2,%0,%1"                                         \
1452              : "=g" (q), "=g" (r)                                       \
1453              : "g" (__xx.__ll), "g" (d));                               \
1454   } while (0)
1455 #endif /* __vax__ */
1456
1457 #ifdef _TMS320C6X
1458 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1459   do                                                                    \
1460     {                                                                   \
1461       UDItype __ll;                                                     \
1462       __asm__ ("addu .l1 %1, %2, %0"                                    \
1463                : "=a" (__ll) : "a" (al), "a" (bl));                     \
1464       (sl) = (USItype)__ll;                                             \
1465       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1466     }                                                                   \
1467   while (0)
1468
1469 #ifdef _TMS320C6400_PLUS
1470 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1471 #define umul_ppmm(w1, w0, u, v)                                         \
1472   do {                                                                  \
1473     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1474     (w1) = (USItype) (__x >> 32);                                       \
1475     (w0) = (USItype) (__x);                                             \
1476   } while (0)
1477 #endif  /* _TMS320C6400_PLUS */
1478
1479 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1480 #ifdef _TMS320C6400
1481 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1482 #endif
1483 #define UMUL_TIME 4
1484 #define UDIV_TIME 40
1485 #endif /* _TMS320C6X */
1486
1487 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1488 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1489    to expand builtin functions depending on what configuration features
1490    are available.  This avoids library calls when the operation can be
1491    performed in-line.  */
1492 #define umul_ppmm(w1, w0, u, v)                                         \
1493   do {                                                                  \
1494     DWunion __w;                                                        \
1495     __w.ll = __builtin_umulsidi3 (u, v);                                \
1496     w1 = __w.s.high;                                                    \
1497     w0 = __w.s.low;                                                     \
1498   } while (0)
1499 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1500 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1501 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1502 #endif /* __xtensa__ */
1503
1504 #if defined xstormy16
1505 extern UHItype __stormy16_count_leading_zeros (UHItype);
1506 #define count_leading_zeros(count, x)                                   \
1507   do                                                                    \
1508     {                                                                   \
1509       UHItype size;                                                     \
1510                                                                         \
1511       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1512       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1513         {                                                               \
1514           UHItype c;                                                    \
1515                                                                         \
1516           c = __clzhi2 ((x) >> (size - 16));                            \
1517           (count) += c;                                                 \
1518           if (c != 16)                                                  \
1519             break;                                                      \
1520         }                                                               \
1521     }                                                                   \
1522   while (0)
1523 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1524 #endif
1525
1526 #if defined (__z8000__) && W_TYPE_SIZE == 16
1527 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1528   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1529            : "=r" ((unsigned int)(sh)),                                 \
1530              "=&r" ((unsigned int)(sl))                                 \
1531            : "%0" ((unsigned int)(ah)),                                 \
1532              "r" ((unsigned int)(bh)),                                  \
1533              "%1" ((unsigned int)(al)),                                 \
1534              "rQR" ((unsigned int)(bl)))
1535 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1536   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1537            : "=r" ((unsigned int)(sh)),                                 \
1538              "=&r" ((unsigned int)(sl))                                 \
1539            : "0" ((unsigned int)(ah)),                                  \
1540              "r" ((unsigned int)(bh)),                                  \
1541              "1" ((unsigned int)(al)),                                  \
1542              "rQR" ((unsigned int)(bl)))
1543 #define umul_ppmm(xh, xl, m0, m1) \
1544   do {                                                                  \
1545     union {long int __ll;                                               \
1546            struct {unsigned int __h, __l;} __i;                         \
1547           } __xx;                                                       \
1548     unsigned int __m0 = (m0), __m1 = (m1);                              \
1549     __asm__ ("mult      %S0,%H3"                                        \
1550              : "=r" (__xx.__i.__h),                                     \
1551                "=r" (__xx.__i.__l)                                      \
1552              : "%1" (__m0),                                             \
1553                "rQR" (__m1));                                           \
1554     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1555     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1556              + (((signed int) __m1 >> 15) & __m0));                     \
1557   } while (0)
1558 #endif /* __z8000__ */
1559
1560 #endif /* __GNUC__ */
1561
1562 /* If this machine has no inline assembler, use C macros.  */
1563
1564 #if !defined (add_ssaaaa)
1565 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1566   do {                                                                  \
1567     UWtype __x;                                                         \
1568     __x = (al) + (bl);                                                  \
1569     (sh) = (ah) + (bh) + (__x < (al));                                  \
1570     (sl) = __x;                                                         \
1571   } while (0)
1572 #endif
1573
1574 #if !defined (sub_ddmmss)
1575 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1576   do {                                                                  \
1577     UWtype __x;                                                         \
1578     __x = (al) - (bl);                                                  \
1579     (sh) = (ah) - (bh) - (__x > (al));                                  \
1580     (sl) = __x;                                                         \
1581   } while (0)
1582 #endif
1583
1584 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1585    smul_ppmm.  */
1586 #if !defined (umul_ppmm) && defined (smul_ppmm)
1587 #define umul_ppmm(w1, w0, u, v)                                         \
1588   do {                                                                  \
1589     UWtype __w1;                                                        \
1590     UWtype __xm0 = (u), __xm1 = (v);                                    \
1591     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1592     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1593                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1594   } while (0)
1595 #endif
1596
1597 /* If we still don't have umul_ppmm, define it using plain C.  */
1598 #if !defined (umul_ppmm)
1599 #define umul_ppmm(w1, w0, u, v)                                         \
1600   do {                                                                  \
1601     UWtype __x0, __x1, __x2, __x3;                                      \
1602     UHWtype __ul, __vl, __uh, __vh;                                     \
1603                                                                         \
1604     __ul = __ll_lowpart (u);                                            \
1605     __uh = __ll_highpart (u);                                           \
1606     __vl = __ll_lowpart (v);                                            \
1607     __vh = __ll_highpart (v);                                           \
1608                                                                         \
1609     __x0 = (UWtype) __ul * __vl;                                        \
1610     __x1 = (UWtype) __ul * __vh;                                        \
1611     __x2 = (UWtype) __uh * __vl;                                        \
1612     __x3 = (UWtype) __uh * __vh;                                        \
1613                                                                         \
1614     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1615     __x1 += __x2;               /* but this indeed can */               \
1616     if (__x1 < __x2)            /* did we get it? */                    \
1617       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1618                                                                         \
1619     (w1) = __x3 + __ll_highpart (__x1);                                 \
1620     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1621   } while (0)
1622 #endif
1623
1624 #if !defined (__umulsidi3)
1625 #define __umulsidi3(u, v) \
1626   ({DWunion __w;                                                        \
1627     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1628     __w.ll; })
1629 #endif
1630
1631 /* Define this unconditionally, so it can be used for debugging.  */
1632 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1633   do {                                                                  \
1634     UWtype __d1, __d0, __q1, __q0;                                      \
1635     UWtype __r1, __r0, __m;                                             \
1636     __d1 = __ll_highpart (d);                                           \
1637     __d0 = __ll_lowpart (d);                                            \
1638                                                                         \
1639     __r1 = (n1) % __d1;                                                 \
1640     __q1 = (n1) / __d1;                                                 \
1641     __m = (UWtype) __q1 * __d0;                                         \
1642     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1643     if (__r1 < __m)                                                     \
1644       {                                                                 \
1645         __q1--, __r1 += (d);                                            \
1646         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1647           if (__r1 < __m)                                               \
1648             __q1--, __r1 += (d);                                        \
1649       }                                                                 \
1650     __r1 -= __m;                                                        \
1651                                                                         \
1652     __r0 = __r1 % __d1;                                                 \
1653     __q0 = __r1 / __d1;                                                 \
1654     __m = (UWtype) __q0 * __d0;                                         \
1655     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1656     if (__r0 < __m)                                                     \
1657       {                                                                 \
1658         __q0--, __r0 += (d);                                            \
1659         if (__r0 >= (d))                                                \
1660           if (__r0 < __m)                                               \
1661             __q0--, __r0 += (d);                                        \
1662       }                                                                 \
1663     __r0 -= __m;                                                        \
1664                                                                         \
1665     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1666     (r) = __r0;                                                         \
1667   } while (0)
1668
1669 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1670    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1671 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1672 #define udiv_qrnnd(q, r, nh, nl, d) \
1673   do {                                                                  \
1674     USItype __r;                                                        \
1675     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1676     (r) = __r;                                                          \
1677   } while (0)
1678 #endif
1679
1680 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1681 #if !defined (udiv_qrnnd)
1682 #define UDIV_NEEDS_NORMALIZATION 1
1683 #define udiv_qrnnd __udiv_qrnnd_c
1684 #endif
1685
1686 #if !defined (count_leading_zeros)
1687 #define count_leading_zeros(count, x) \
1688   do {                                                                  \
1689     UWtype __xr = (x);                                                  \
1690     UWtype __a;                                                         \
1691                                                                         \
1692     if (W_TYPE_SIZE <= 32)                                              \
1693       {                                                                 \
1694         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1695           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1696           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1697       }                                                                 \
1698     else                                                                \
1699       {                                                                 \
1700         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1701           if (((__xr >> __a) & 0xff) != 0)                              \
1702             break;                                                      \
1703       }                                                                 \
1704                                                                         \
1705     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1706   } while (0)
1707 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1708 #endif
1709
1710 #if !defined (count_trailing_zeros)
1711 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1712    defined in asm, but if it is not, the C version above is good enough.  */
1713 #define count_trailing_zeros(count, x) \
1714   do {                                                                  \
1715     UWtype __ctz_x = (x);                                               \
1716     UWtype __ctz_c;                                                     \
1717     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1718     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1719   } while (0)
1720 #endif
1721
1722 #ifndef UDIV_NEEDS_NORMALIZATION
1723 #define UDIV_NEEDS_NORMALIZATION 0
1724 #endif