c08bc8a5f1cf17c19ff84b66f6d3c2205f36cf74
[sfrench/cifs-2.6.git] / lib / udivmoddi4.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, see the file COPYING, or write
16  * to the Free Software Foundation, Inc.
17  */
18
19 #include <linux/libgcc.h>
20
21 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz(X))
22
23 #define W_TYPE_SIZE 32
24
25 #define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
26 #define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
27 #define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
28
29 /* If we still don't have umul_ppmm, define it using plain C. */
30 #if !defined(umul_ppmm)
31 #define umul_ppmm(w1, w0, u, v)                                         \
32         do {                                                            \
33                 unsigned long __x0, __x1, __x2, __x3;                   \
34                 unsigned short __ul, __vl, __uh, __vh;                  \
35                                                                         \
36                 __ul = __ll_lowpart(u);                                 \
37                 __uh = __ll_highpart(u);                                \
38                 __vl = __ll_lowpart(v);                                 \
39                 __vh = __ll_highpart(v);                                \
40                                                                         \
41                 __x0 = (unsigned long) __ul * __vl;                     \
42                 __x1 = (unsigned long) __ul * __vh;                     \
43                 __x2 = (unsigned long) __uh * __vl;                     \
44                 __x3 = (unsigned long) __uh * __vh;                     \
45                                                                         \
46                 __x1 += __ll_highpart(__x0);                            \
47                 __x1 += __x2;                                           \
48                 if (__x1 < __x2)                                        \
49                         __x3 += __ll_B;                                 \
50                                                                         \
51                 (w1) = __x3 + __ll_highpart(__x1);                      \
52                 (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
53         } while (0)
54 #endif
55
56 #if !defined(sub_ddmmss)
57 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
58         do {                                                            \
59                 unsigned long __x;                                      \
60                 __x = (al) - (bl);                                      \
61                 (sh) = (ah) - (bh) - (__x > (al));                      \
62                 (sl) = __x;                                             \
63         } while (0)
64 #endif
65
66 /* Define this unconditionally, so it can be used for debugging. */
67 #define __udiv_qrnnd_c(q, r, n1, n0, d)                                 \
68         do {                                                            \
69                 unsigned long __d1, __d0, __q1, __q0;                   \
70                 unsigned long __r1, __r0, __m;                          \
71                 __d1 = __ll_highpart(d);                                \
72                 __d0 = __ll_lowpart(d);                         \
73                                                                         \
74                 __r1 = (n1) % __d1;                                     \
75                 __q1 = (n1) / __d1;                                     \
76                 __m = (unsigned long) __q1 * __d0;                      \
77                 __r1 = __r1 * __ll_B | __ll_highpart(n0);               \
78                 if (__r1 < __m) {                                       \
79                         __q1--, __r1 += (d);                            \
80                         if (__r1 >= (d))                                \
81                                 if (__r1 < __m)                         \
82                                         __q1--, __r1 += (d);            \
83                 }                                                       \
84                 __r1 -= __m;                                            \
85                                                                         \
86                 __r0 = __r1 % __d1;                                     \
87                 __q0 = __r1 / __d1;                                     \
88                 __m = (unsigned long) __q0 * __d0;                      \
89                 __r0 = __r0 * __ll_B | __ll_lowpart(n0);                \
90                 if (__r0 < __m) {                                       \
91                         __q0--, __r0 += (d);                            \
92                         if (__r0 >= (d))                                \
93                                 if (__r0 < __m)                         \
94                                         __q0--, __r0 += (d);            \
95                 }                                                       \
96                 __r0 -= __m;                                            \
97                                                                         \
98                 (q) = (unsigned long) __q1 * __ll_B | __q0;             \
99                 (r) = __r0;                                             \
100         } while (0)
101
102 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
103 #if !defined(udiv_qrnnd)
104 #define UDIV_NEEDS_NORMALIZATION 1
105 #define udiv_qrnnd __udiv_qrnnd_c
106 #endif
107
108 unsigned long long __udivmoddi4(unsigned long long u, unsigned long long v,
109                                 unsigned long long *rp)
110 {
111         const DWunion nn = {.ll = u };
112         const DWunion dd = {.ll = v };
113         DWunion rr, ww;
114         unsigned long d0, d1, n0, n1, n2;
115         unsigned long q0 = 0, q1 = 0;
116         unsigned long b, bm;
117
118         d0 = dd.s.low;
119         d1 = dd.s.high;
120         n0 = nn.s.low;
121         n1 = nn.s.high;
122
123 #if !UDIV_NEEDS_NORMALIZATION
124
125         if (d1 == 0) {
126                 if (d0 > n1) {
127                         /* 0q = nn / 0D */
128
129                         udiv_qrnnd(q0, n0, n1, n0, d0);
130                         q1 = 0;
131
132                         /* Remainder in n0. */
133                 } else {
134                         /* qq = NN / 0d */
135
136                         if (d0 == 0)
137                                 /* Divide intentionally by zero. */
138                                 d0 = 1 / d0;
139
140                         udiv_qrnnd(q1, n1, 0, n1, d0);
141                         udiv_qrnnd(q0, n0, n1, n0, d0);
142
143                         /* Remainder in n0. */
144                 }
145
146                 if (rp != 0) {
147                         rr.s.low = n0;
148                         rr.s.high = 0;
149                         *rp = rr.ll;
150                 }
151
152 #else /* UDIV_NEEDS_NORMALIZATION */
153
154         if (d1 == 0) {
155                 if (d0 > n1) {
156                         /* 0q = nn / 0D */
157
158                         count_leading_zeros(bm, d0);
159
160                         if (bm != 0) {
161                                 /*
162                                  * Normalize, i.e. make the most significant bit
163                                  * of the denominator set.
164                                  */
165
166                                 d0 = d0 << bm;
167                                 n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
168                                 n0 = n0 << bm;
169                         }
170
171                         udiv_qrnnd(q0, n0, n1, n0, d0);
172                         q1 = 0;
173
174                         /* Remainder in n0 >> bm. */
175                 } else {
176                         /* qq = NN / 0d */
177
178                         if (d0 == 0)
179                                 /* Divide intentionally by zero. */
180                                 d0 = 1 / d0;
181
182                         count_leading_zeros(bm, d0);
183
184                         if (bm == 0) {
185                                 /*
186                                  * From (n1 >= d0) /\ (the most significant bit
187                                  * of d0 is set), conclude (the most significant
188                                  * bit of n1 is set) /\ (theleading quotient
189                                  * digit q1 = 1).
190                                  *
191                                  * This special case is necessary, not an
192                                  * optimization. (Shifts counts of W_TYPE_SIZE
193                                  * are undefined.)
194                                  */
195
196                                 n1 -= d0;
197                                 q1 = 1;
198                         } else {
199                                 /* Normalize. */
200
201                                 b = W_TYPE_SIZE - bm;
202
203                                 d0 = d0 << bm;
204                                 n2 = n1 >> b;
205                                 n1 = (n1 << bm) | (n0 >> b);
206                                 n0 = n0 << bm;
207
208                                 udiv_qrnnd(q1, n1, n2, n1, d0);
209                         }
210
211                         /* n1 != d0... */
212
213                         udiv_qrnnd(q0, n0, n1, n0, d0);
214
215                         /* Remainder in n0 >> bm. */
216                 }
217
218                 if (rp != 0) {
219                         rr.s.low = n0 >> bm;
220                         rr.s.high = 0;
221                         *rp = rr.ll;
222                 }
223
224 #endif /* UDIV_NEEDS_NORMALIZATION */
225
226         } else {
227                 if (d1 > n1) {
228                         /* 00 = nn / DD */
229
230                         q0 = 0;
231                         q1 = 0;
232
233                         /* Remainder in n1n0. */
234                         if (rp != 0) {
235                                 rr.s.low = n0;
236                                 rr.s.high = n1;
237                                 *rp = rr.ll;
238                         }
239                 } else {
240                         /* 0q = NN / dd */
241
242                         count_leading_zeros(bm, d1);
243                         if (bm == 0) {
244                                 /*
245                                  * From (n1 >= d1) /\ (the most significant bit
246                                  * of d1 is set), conclude (the most significant
247                                  * bit of n1 is set) /\ (the quotient digit q0 =
248                                  * 0 or 1).
249                                  *
250                                  * This special case is necessary, not an
251                                  * optimization.
252                                  */
253
254                                 /*
255                                  * The condition on the next line takes
256                                  * advantage of that n1 >= d1 (true due to
257                                  * program flow).
258                                  */
259                                 if (n1 > d1 || n0 >= d0) {
260                                         q0 = 1;
261                                         sub_ddmmss(n1, n0, n1, n0, d1, d0);
262                                 } else {
263                                         q0 = 0;
264                                 }
265
266                                 q1 = 0;
267
268                                 if (rp != 0) {
269                                         rr.s.low = n0;
270                                         rr.s.high = n1;
271                                         *rp = rr.ll;
272                                 }
273                         } else {
274                                 unsigned long m1, m0;
275                                 /* Normalize. */
276
277                                 b = W_TYPE_SIZE - bm;
278
279                                 d1 = (d1 << bm) | (d0 >> b);
280                                 d0 = d0 << bm;
281                                 n2 = n1 >> b;
282                                 n1 = (n1 << bm) | (n0 >> b);
283                                 n0 = n0 << bm;
284
285                                 udiv_qrnnd(q0, n1, n2, n1, d1);
286                                 umul_ppmm(m1, m0, q0, d0);
287
288                                 if (m1 > n1 || (m1 == n1 && m0 > n0)) {
289                                         q0--;
290                                         sub_ddmmss(m1, m0, m1, m0, d1, d0);
291                                 }
292
293                                 q1 = 0;
294
295                                 /* Remainder in (n1n0 - m1m0) >> bm. */
296                                 if (rp != 0) {
297                                         sub_ddmmss(n1, n0, n1, n0, m1, m0);
298                                         rr.s.low = (n1 << b) | (n0 >> bm);
299                                         rr.s.high = n1 >> bm;
300                                         *rp = rr.ll;
301                                 }
302                         }
303                 }
304         }
305
306         ww.s.low = q0;
307         ww.s.high = q1;
308
309         return ww.ll;
310 }