Merge branch 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / arch / arm / lib / csumpartialcopygeneric.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  *  linux/arch/arm/lib/csumpartialcopygeneric.S
4  *
5  *  Copyright (C) 1995-2001 Russell King
6  */
7 #include <asm/assembler.h>
8
9 /*
10  * unsigned int
11  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
12  *  r0 = src, r1 = dst, r2 = len, r3 = sum
13  *  Returns : r0 = checksum
14  *
15  * Note that 'tst' and 'teq' preserve the carry flag.
16  */
17
18 src     .req    r0
19 dst     .req    r1
20 len     .req    r2
21 sum     .req    r3
22
23 .Lzero:         mov     r0, sum
24                 load_regs
25
26                 /*
27                  * Align an unaligned destination pointer.  We know that
28                  * we have >= 8 bytes here, so we don't need to check
29                  * the length.  Note that the source pointer hasn't been
30                  * aligned yet.
31                  */
32 .Ldst_unaligned:
33                 tst     dst, #1
34                 beq     .Ldst_16bit
35
36                 load1b  ip
37                 sub     len, len, #1
38                 adcs    sum, sum, ip, put_byte_1        @ update checksum
39                 strb    ip, [dst], #1
40                 tst     dst, #2
41                 reteq   lr                      @ dst is now 32bit aligned
42
43 .Ldst_16bit:    load2b  r8, ip
44                 sub     len, len, #2
45                 adcs    sum, sum, r8, put_byte_0
46                 strb    r8, [dst], #1
47                 adcs    sum, sum, ip, put_byte_1
48                 strb    ip, [dst], #1
49                 ret     lr                      @ dst is now 32bit aligned
50
51                 /*
52                  * Handle 0 to 7 bytes, with any alignment of source and
53                  * destination pointers.  Note that when we get here, C = 0
54                  */
55 .Lless8:        teq     len, #0                 @ check for zero count
56                 beq     .Lzero
57
58                 /* we must have at least one byte. */
59                 tst     dst, #1                 @ dst 16-bit aligned
60                 beq     .Lless8_aligned
61
62                 /* Align dst */
63                 load1b  ip
64                 sub     len, len, #1
65                 adcs    sum, sum, ip, put_byte_1        @ update checksum
66                 strb    ip, [dst], #1
67                 tst     len, #6
68                 beq     .Lless8_byteonly
69
70 1:              load2b  r8, ip
71                 sub     len, len, #2
72                 adcs    sum, sum, r8, put_byte_0
73                 strb    r8, [dst], #1
74                 adcs    sum, sum, ip, put_byte_1
75                 strb    ip, [dst], #1
76 .Lless8_aligned:
77                 tst     len, #6
78                 bne     1b
79 .Lless8_byteonly:
80                 tst     len, #1
81                 beq     .Ldone
82                 load1b  r8
83                 adcs    sum, sum, r8, put_byte_0        @ update checksum
84                 strb    r8, [dst], #1
85                 b       .Ldone
86
87 FN_ENTRY
88                 save_regs
89
90                 cmp     len, #8                 @ Ensure that we have at least
91                 blo     .Lless8                 @ 8 bytes to copy.
92
93                 adds    sum, sum, #0            @ C = 0
94                 tst     dst, #3                 @ Test destination alignment
95                 blne    .Ldst_unaligned         @ align destination, return here
96
97                 /*
98                  * Ok, the dst pointer is now 32bit aligned, and we know
99                  * that we must have more than 4 bytes to copy.  Note
100                  * that C contains the carry from the dst alignment above.
101                  */
102
103                 tst     src, #3                 @ Test source alignment
104                 bne     .Lsrc_not_aligned
105
106                 /* Routine for src & dst aligned */
107
108                 bics    ip, len, #15
109                 beq     2f
110
111 1:              load4l  r4, r5, r6, r7
112                 stmia   dst!, {r4, r5, r6, r7}
113                 adcs    sum, sum, r4
114                 adcs    sum, sum, r5
115                 adcs    sum, sum, r6
116                 adcs    sum, sum, r7
117                 sub     ip, ip, #16
118                 teq     ip, #0
119                 bne     1b
120
121 2:              ands    ip, len, #12
122                 beq     4f
123                 tst     ip, #8
124                 beq     3f
125                 load2l  r4, r5
126                 stmia   dst!, {r4, r5}
127                 adcs    sum, sum, r4
128                 adcs    sum, sum, r5
129                 tst     ip, #4
130                 beq     4f
131
132 3:              load1l  r4
133                 str     r4, [dst], #4
134                 adcs    sum, sum, r4
135
136 4:              ands    len, len, #3
137                 beq     .Ldone
138                 load1l  r4
139                 tst     len, #2
140                 mov     r5, r4, get_byte_0
141                 beq     .Lexit
142                 adcs    sum, sum, r4, lspush #16
143                 strb    r5, [dst], #1
144                 mov     r5, r4, get_byte_1
145                 strb    r5, [dst], #1
146                 mov     r5, r4, get_byte_2
147 .Lexit:         tst     len, #1
148                 strbne  r5, [dst], #1
149                 andne   r5, r5, #255
150                 adcsne  sum, sum, r5, put_byte_0
151
152                 /*
153                  * If the dst pointer was not 16-bit aligned, we
154                  * need to rotate the checksum here to get around
155                  * the inefficient byte manipulations in the
156                  * architecture independent code.
157                  */
158 .Ldone:         adc     r0, sum, #0
159                 ldr     sum, [sp, #0]           @ dst
160                 tst     sum, #1
161                 movne   r0, r0, ror #8
162                 load_regs
163
164 .Lsrc_not_aligned:
165                 adc     sum, sum, #0            @ include C from dst alignment
166                 and     ip, src, #3
167                 bic     src, src, #3
168                 load1l  r5
169                 cmp     ip, #2
170                 beq     .Lsrc2_aligned
171                 bhi     .Lsrc3_aligned
172                 mov     r4, r5, lspull #8               @ C = 0
173                 bics    ip, len, #15
174                 beq     2f
175 1:              load4l  r5, r6, r7, r8
176                 orr     r4, r4, r5, lspush #24
177                 mov     r5, r5, lspull #8
178                 orr     r5, r5, r6, lspush #24
179                 mov     r6, r6, lspull #8
180                 orr     r6, r6, r7, lspush #24
181                 mov     r7, r7, lspull #8
182                 orr     r7, r7, r8, lspush #24
183                 stmia   dst!, {r4, r5, r6, r7}
184                 adcs    sum, sum, r4
185                 adcs    sum, sum, r5
186                 adcs    sum, sum, r6
187                 adcs    sum, sum, r7
188                 mov     r4, r8, lspull #8
189                 sub     ip, ip, #16
190                 teq     ip, #0
191                 bne     1b
192 2:              ands    ip, len, #12
193                 beq     4f
194                 tst     ip, #8
195                 beq     3f
196                 load2l  r5, r6
197                 orr     r4, r4, r5, lspush #24
198                 mov     r5, r5, lspull #8
199                 orr     r5, r5, r6, lspush #24
200                 stmia   dst!, {r4, r5}
201                 adcs    sum, sum, r4
202                 adcs    sum, sum, r5
203                 mov     r4, r6, lspull #8
204                 tst     ip, #4
205                 beq     4f
206 3:              load1l  r5
207                 orr     r4, r4, r5, lspush #24
208                 str     r4, [dst], #4
209                 adcs    sum, sum, r4
210                 mov     r4, r5, lspull #8
211 4:              ands    len, len, #3
212                 beq     .Ldone
213                 mov     r5, r4, get_byte_0
214                 tst     len, #2
215                 beq     .Lexit
216                 adcs    sum, sum, r4, lspush #16
217                 strb    r5, [dst], #1
218                 mov     r5, r4, get_byte_1
219                 strb    r5, [dst], #1
220                 mov     r5, r4, get_byte_2
221                 b       .Lexit
222
223 .Lsrc2_aligned: mov     r4, r5, lspull #16
224                 adds    sum, sum, #0
225                 bics    ip, len, #15
226                 beq     2f
227 1:              load4l  r5, r6, r7, r8
228                 orr     r4, r4, r5, lspush #16
229                 mov     r5, r5, lspull #16
230                 orr     r5, r5, r6, lspush #16
231                 mov     r6, r6, lspull #16
232                 orr     r6, r6, r7, lspush #16
233                 mov     r7, r7, lspull #16
234                 orr     r7, r7, r8, lspush #16
235                 stmia   dst!, {r4, r5, r6, r7}
236                 adcs    sum, sum, r4
237                 adcs    sum, sum, r5
238                 adcs    sum, sum, r6
239                 adcs    sum, sum, r7
240                 mov     r4, r8, lspull #16
241                 sub     ip, ip, #16
242                 teq     ip, #0
243                 bne     1b
244 2:              ands    ip, len, #12
245                 beq     4f
246                 tst     ip, #8
247                 beq     3f
248                 load2l  r5, r6
249                 orr     r4, r4, r5, lspush #16
250                 mov     r5, r5, lspull #16
251                 orr     r5, r5, r6, lspush #16
252                 stmia   dst!, {r4, r5}
253                 adcs    sum, sum, r4
254                 adcs    sum, sum, r5
255                 mov     r4, r6, lspull #16
256                 tst     ip, #4
257                 beq     4f
258 3:              load1l  r5
259                 orr     r4, r4, r5, lspush #16
260                 str     r4, [dst], #4
261                 adcs    sum, sum, r4
262                 mov     r4, r5, lspull #16
263 4:              ands    len, len, #3
264                 beq     .Ldone
265                 mov     r5, r4, get_byte_0
266                 tst     len, #2
267                 beq     .Lexit
268                 adcs    sum, sum, r4
269                 strb    r5, [dst], #1
270                 mov     r5, r4, get_byte_1
271                 strb    r5, [dst], #1
272                 tst     len, #1
273                 beq     .Ldone
274                 load1b  r5
275                 b       .Lexit
276
277 .Lsrc3_aligned: mov     r4, r5, lspull #24
278                 adds    sum, sum, #0
279                 bics    ip, len, #15
280                 beq     2f
281 1:              load4l  r5, r6, r7, r8
282                 orr     r4, r4, r5, lspush #8
283                 mov     r5, r5, lspull #24
284                 orr     r5, r5, r6, lspush #8
285                 mov     r6, r6, lspull #24
286                 orr     r6, r6, r7, lspush #8
287                 mov     r7, r7, lspull #24
288                 orr     r7, r7, r8, lspush #8
289                 stmia   dst!, {r4, r5, r6, r7}
290                 adcs    sum, sum, r4
291                 adcs    sum, sum, r5
292                 adcs    sum, sum, r6
293                 adcs    sum, sum, r7
294                 mov     r4, r8, lspull #24
295                 sub     ip, ip, #16
296                 teq     ip, #0
297                 bne     1b
298 2:              ands    ip, len, #12
299                 beq     4f
300                 tst     ip, #8
301                 beq     3f
302                 load2l  r5, r6
303                 orr     r4, r4, r5, lspush #8
304                 mov     r5, r5, lspull #24
305                 orr     r5, r5, r6, lspush #8
306                 stmia   dst!, {r4, r5}
307                 adcs    sum, sum, r4
308                 adcs    sum, sum, r5
309                 mov     r4, r6, lspull #24
310                 tst     ip, #4
311                 beq     4f
312 3:              load1l  r5
313                 orr     r4, r4, r5, lspush #8
314                 str     r4, [dst], #4
315                 adcs    sum, sum, r4
316                 mov     r4, r5, lspull #24
317 4:              ands    len, len, #3
318                 beq     .Ldone
319                 mov     r5, r4, get_byte_0
320                 tst     len, #2
321                 beq     .Lexit
322                 strb    r5, [dst], #1
323                 adcs    sum, sum, r4
324                 load1l  r4
325                 mov     r5, r4, get_byte_0
326                 strb    r5, [dst], #1
327                 adcs    sum, sum, r4, lspush #24
328                 mov     r5, r4, get_byte_1
329                 b       .Lexit
330 FN_EXIT