treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 500
[sfrench/cifs-2.6.git] / arch / c6x / lib / csum_64plus.S
1 ; SPDX-License-Identifier: GPL-2.0-only
2 ;
3 ;  linux/arch/c6x/lib/csum_64plus.s
4 ;
5 ;  Port on Texas Instruments TMS320C6x architecture
6 ;
7 ;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
8 ;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
9 ;
10 #include <linux/linkage.h>
11
12 ;
13 ;unsigned int csum_partial_copy(const char *src, char * dst,
14 ;                               int len, int sum)
15 ;
16 ; A4:   src
17 ; B4:   dst
18 ; A6:   len
19 ; B6:   sum
20 ; return csum in A4
21 ;
22
23         .text
24 ENTRY(csum_partial_copy)
25         MVC     .S2     ILC,B30
26
27         MV      .D1X    B6,A31          ; given csum
28         ZERO    .D1     A9              ; csum (a side)
29 ||      ZERO    .D2     B9              ; csum (b side)
30 ||      SHRU    .S2X    A6,2,B5         ; len / 4
31
32         ;; Check alignment and size
33         AND     .S1     3,A4,A1
34 ||      AND     .S2     3,B4,B0
35         OR      .L2X    B0,A1,B0        ; non aligned condition
36 ||      MVC     .S2     B5,ILC
37 ||      MVK     .D2     1,B2
38 ||      MV      .D1X    B5,A1           ; words condition
39   [!A1] B       .S1     L8
40    [B0] BNOP    .S1     L6,5
41
42         SPLOOP          1
43
44         ;; Main loop for aligned words
45         LDW     .D1T1   *A4++,A7
46         NOP     4
47         MV      .S2X    A7,B7
48 ||      EXTU    .S1     A7,0,16,A16
49         STW     .D2T2   B7,*B4++
50 ||      MPYU    .M2     B7,B2,B8
51 ||      ADD     .L1     A16,A9,A9
52         NOP
53         SPKERNEL        8,0
54 ||      ADD     .L2     B8,B9,B9
55
56         ZERO    .D1     A1
57 ||      ADD     .L1X    A9,B9,A9        ;  add csum from a and b sides
58
59 L6:
60   [!A1] BNOP    .S1     L8,5
61
62         ;; Main loop for non-aligned words
63         SPLOOP          2
64  ||     MVK     .L1     1,A2
65
66         LDNW    .D1T1   *A4++,A7
67         NOP             3
68
69         NOP
70         MV      .S2X    A7,B7
71  ||     EXTU    .S1     A7,0,16,A16
72  ||     MPYU    .M1     A7,A2,A8
73
74         ADD     .L1     A16,A9,A9
75         SPKERNEL        6,0
76  ||     STNW    .D2T2   B7,*B4++
77  ||     ADD     .L1     A8,A9,A9
78
79 L8:     AND     .S2X    2,A6,B5
80         CMPGT   .L2     B5,0,B0
81   [!B0] BNOP    .S1     L82,4
82
83         ;; Manage half-word
84         ZERO    .L1     A7
85 ||      ZERO    .D1     A8
86
87 #ifdef CONFIG_CPU_BIG_ENDIAN
88
89         LDBU    .D1T1   *A4++,A7
90         LDBU    .D1T1   *A4++,A8
91         NOP             3
92         SHL     .S1     A7,8,A0
93         ADD     .S1     A8,A9,A9
94         STB     .D2T1   A7,*B4++
95 ||      ADD     .S1     A0,A9,A9
96         STB     .D2T1   A8,*B4++
97
98 #else
99
100         LDBU    .D1T1   *A4++,A7
101         LDBU    .D1T1   *A4++,A8
102         NOP             3
103         ADD     .S1     A7,A9,A9
104         SHL     .S1     A8,8,A0
105
106         STB     .D2T1   A7,*B4++
107 ||      ADD     .S1     A0,A9,A9
108         STB     .D2T1   A8,*B4++
109
110 #endif
111
112         ;; Manage eventually the last byte
113 L82:    AND     .S2X    1,A6,B0
114   [!B0] BNOP    .S1     L9,5
115
116 ||      ZERO    .L1     A7
117
118 L83:    LDBU    .D1T1   *A4++,A7
119         NOP             4
120
121         MV      .L2X    A7,B7
122
123 #ifdef CONFIG_CPU_BIG_ENDIAN
124
125         STB     .D2T2   B7,*B4++
126 ||      SHL     .S1     A7,8,A7
127         ADD     .S1     A7,A9,A9
128
129 #else
130
131         STB     .D2T2   B7,*B4++
132 ||      ADD     .S1     A7,A9,A9
133
134 #endif
135
136         ;; Fold the csum
137 L9:     SHRU    .S2X    A9,16,B0
138   [!B0] BNOP    .S1     L10,5
139
140 L91:    SHRU    .S2X    A9,16,B4
141 ||      EXTU    .S1     A9,16,16,A3
142         ADD     .D1X    A3,B4,A9
143
144         SHRU    .S1     A9,16,A0
145    [A0] BNOP    .S1     L91,5
146
147 L10:    ADD     .D1     A31,A9,A9
148         MV      .D1     A9,A4
149
150         BNOP    .S2     B3,4
151         MVC     .S2     B30,ILC
152 ENDPROC(csum_partial_copy)
153
154 ;
155 ;unsigned short
156 ;ip_fast_csum(unsigned char *iph, unsigned int ihl)
157 ;{
158 ;       unsigned int checksum = 0;
159 ;       unsigned short *tosum = (unsigned short *) iph;
160 ;       int len;
161 ;
162 ;       len = ihl*4;
163 ;
164 ;       if (len <= 0)
165 ;               return 0;
166 ;
167 ;       while(len) {
168 ;               len -= 2;
169 ;               checksum += *tosum++;
170 ;       }
171 ;       if (len & 1)
172 ;               checksum += *(unsigned char*) tosum;
173 ;
174 ;       while(checksum >> 16)
175 ;               checksum = (checksum & 0xffff) + (checksum >> 16);
176 ;
177 ;       return ~checksum;
178 ;}
179 ;
180 ; A4:   iph
181 ; B4:   ihl
182 ; return checksum in A4
183 ;
184         .text
185
186 ENTRY(ip_fast_csum)
187         ZERO    .D1     A5
188  ||     MVC     .S2     ILC,B30
189         SHL     .S2     B4,2,B0
190         CMPGT   .L2     B0,0,B1
191   [!B1] BNOP    .S1     L15,4
192   [!B1] ZERO    .D1     A3
193
194   [!B0] B       .S1     L12
195         SHRU    .S2     B0,1,B0
196         MVC     .S2     B0,ILC
197         NOP     3
198
199         SPLOOP  1
200         LDHU    .D1T1   *A4++,A3
201         NOP     3
202         NOP
203         SPKERNEL        5,0
204  ||     ADD     .L1     A3,A5,A5
205
206 L12:    SHRU    .S1     A5,16,A0
207   [!A0] BNOP    .S1     L14,5
208
209 L13:    SHRU    .S2X    A5,16,B4
210         EXTU    .S1     A5,16,16,A3
211         ADD     .D1X    A3,B4,A5
212         SHRU    .S1     A5,16,A0
213   [A0]  BNOP    .S1     L13,5
214
215 L14:    NOT     .D1     A5,A3
216         EXTU    .S1     A3,16,16,A3
217
218 L15:    BNOP    .S2     B3,3
219         MVC     .S2     B30,ILC
220         MV      .D1     A3,A4
221 ENDPROC(ip_fast_csum)
222
223 ;
224 ;unsigned short
225 ;do_csum(unsigned char *buff, unsigned int len)
226 ;{
227 ;       int odd, count;
228 ;       unsigned int result = 0;
229 ;
230 ;       if (len <= 0)
231 ;               goto out;
232 ;       odd = 1 & (unsigned long) buff;
233 ;       if (odd) {
234 ;#ifdef __LITTLE_ENDIAN
235 ;               result += (*buff << 8);
236 ;#else
237 ;               result = *buff;
238 ;#endif
239 ;               len--;
240 ;               buff++;
241 ;       }
242 ;       count = len >> 1;               /* nr of 16-bit words.. */
243 ;       if (count) {
244 ;               if (2 & (unsigned long) buff) {
245 ;                       result += *(unsigned short *) buff;
246 ;                       count--;
247 ;                       len -= 2;
248 ;                       buff += 2;
249 ;               }
250 ;               count >>= 1;            /* nr of 32-bit words.. */
251 ;               if (count) {
252 ;                       unsigned int carry = 0;
253 ;                       do {
254 ;                               unsigned int w = *(unsigned int *) buff;
255 ;                               count--;
256 ;                               buff += 4;
257 ;                               result += carry;
258 ;                               result += w;
259 ;                               carry = (w > result);
260 ;                       } while (count);
261 ;                       result += carry;
262 ;                       result = (result & 0xffff) + (result >> 16);
263 ;               }
264 ;               if (len & 2) {
265 ;                       result += *(unsigned short *) buff;
266 ;                       buff += 2;
267 ;               }
268 ;       }
269 ;       if (len & 1)
270 ;#ifdef __LITTLE_ENDIAN
271 ;               result += *buff;
272 ;#else
273 ;               result += (*buff << 8);
274 ;#endif
275 ;       result = (result & 0xffff) + (result >> 16);
276 ;       /* add up carry.. */
277 ;       result = (result & 0xffff) + (result >> 16);
278 ;       if (odd)
279 ;               result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
280 ;out:
281 ;       return result;
282 ;}
283 ;
284 ; A4:   buff
285 ; B4:   len
286 ; return checksum in A4
287 ;
288
289 ENTRY(do_csum)
290            CMPGT   .L2     B4,0,B0
291    [!B0]   BNOP    .S1     L26,3
292            EXTU    .S1     A4,31,31,A0
293
294            MV      .L1     A0,A3
295 ||         MV      .S1X    B3,A5
296 ||         MV      .L2     B4,B3
297 ||         ZERO    .D1     A1
298
299 #ifdef CONFIG_CPU_BIG_ENDIAN
300    [A0]    SUB     .L2     B3,1,B3
301 || [A0]    LDBU    .D1T1   *A4++,A1
302 #else
303    [!A0]   BNOP    .S1     L21,5
304 || [A0]    LDBU    .D1T1   *A4++,A0
305            SUB     .L2     B3,1,B3
306 ||         SHL     .S1     A0,8,A1
307 L21:
308 #endif
309            SHR     .S2     B3,1,B0
310    [!B0]   BNOP    .S1     L24,3
311            MVK     .L1     2,A0
312            AND     .L1     A4,A0,A0
313
314    [!A0]   BNOP    .S1     L22,5
315 || [A0]    LDHU    .D1T1   *A4++,A0
316            SUB     .L2     B0,1,B0
317 ||         SUB     .S2     B3,2,B3
318 ||         ADD     .L1     A0,A1,A1
319 L22:
320            SHR     .S2     B0,1,B0
321 ||         ZERO    .L1     A0
322
323    [!B0]   BNOP    .S1     L23,5
324 || [B0]    MVC     .S2     B0,ILC
325
326            SPLOOP  3
327            SPMASK  L1
328 ||         MV      .L1     A1,A2
329 ||         LDW     .D1T1   *A4++,A1
330
331            NOP     4
332            ADD     .L1     A0,A1,A0
333            ADD     .L1     A2,A0,A2
334
335            SPKERNEL 1,2
336 ||         CMPGTU  .L1     A1,A2,A0
337
338            ADD     .L1     A0,A2,A6
339            EXTU    .S1     A6,16,16,A7
340            SHRU    .S2X    A6,16,B0
341            NOP             1
342            ADD     .L1X    A7,B0,A1
343 L23:
344            MVK     .L2     2,B0
345            AND     .L2     B3,B0,B0
346    [B0]    LDHU    .D1T1   *A4++,A0
347            NOP     4
348    [B0]    ADD     .L1     A0,A1,A1
349 L24:
350            EXTU    .S2     B3,31,31,B0
351 #ifdef CONFIG_CPU_BIG_ENDIAN
352    [!B0]   BNOP    .S1     L25,4
353 || [B0]    LDBU    .D1T1   *A4,A0
354            SHL     .S1     A0,8,A0
355            ADD     .L1     A0,A1,A1
356 L25:
357 #else
358    [B0]    LDBU    .D1T1   *A4,A0
359            NOP     4
360    [B0]    ADD     .L1     A0,A1,A1
361 #endif
362            EXTU    .S1     A1,16,16,A0
363            SHRU    .S2X    A1,16,B0
364            NOP     1
365            ADD     .L1X    A0,B0,A0
366            SHRU    .S1     A0,16,A1
367            ADD     .L1     A0,A1,A0
368            EXTU    .S1     A0,16,16,A1
369            EXTU    .S1     A1,16,24,A2
370
371            EXTU    .S1     A1,24,16,A0
372 ||         MV      .L2X    A3,B0
373
374    [B0]    OR      .L1     A0,A2,A1
375 L26:
376            NOP     1
377            BNOP    .S2X    A5,4
378            MV      .L1     A1,A4
379 ENDPROC(do_csum)
380
381 ;__wsum csum_partial(const void *buff, int len, __wsum wsum)
382 ;{
383 ;       unsigned int sum = (__force unsigned int)wsum;
384 ;       unsigned int result = do_csum(buff, len);
385 ;
386 ;       /* add in old sum, and carry.. */
387 ;       result += sum;
388 ;       if (sum > result)
389 ;               result += 1;
390 ;       return (__force __wsum)result;
391 ;}
392 ;
393 ENTRY(csum_partial)
394            MV      .L1X    B3,A9
395 ||         CALLP   .S2     do_csum,B3
396 ||         MV      .S1     A6,A8
397            BNOP    .S2X    A9,2
398            ADD     .L1     A8,A4,A1
399            CMPGTU  .L1     A8,A1,A0
400            ADD     .L1     A1,A0,A4
401 ENDPROC(csum_partial)
402
403 ;unsigned short
404 ;ip_compute_csum(unsigned char *buff, unsigned int len)
405 ;
406 ; A4:   buff
407 ; B4:   len
408 ; return checksum in A4
409
410 ENTRY(ip_compute_csum)
411            MV      .L1X    B3,A9
412 ||         CALLP   .S2     do_csum,B3
413            BNOP    .S2X    A9,3
414            NOT     .S1     A4,A4
415            CLR     .S1     A4,16,31,A4
416 ENDPROC(ip_compute_csum)