Merge tag 'spdx-5.2-rc3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[sfrench/cifs-2.6.git] / arch / openrisc / lib / memset.S
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * OpenRISC memset.S
4  *
5  * Hand-optimized assembler version of memset for OpenRISC.
6  * Algorithm inspired by several other arch-specific memset routines
7  * in the kernel tree
8  *
9  * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
10  */
11
12         .global memset
13         .type   memset, @function
14 memset:
15         /* arguments:
16          * r3 = *s
17          * r4 = c
18          * r5 = n
19          * r13, r15, r17, r19 used as temp regs
20         */
21
22         /* Exit if n == 0 */
23         l.sfeqi         r5, 0
24         l.bf            4f
25
26         /* Truncate c to char */
27         l.andi          r13, r4, 0xff
28
29         /* Skip word extension if c is 0 */
30         l.sfeqi         r13, 0
31         l.bf            1f
32         /* Check for at least two whole words (8 bytes) */
33          l.sfleui       r5, 7
34
35         /* Extend char c to 32-bit word cccc in r13 */
36         l.slli          r15, r13, 16  // r13 = 000c, r15 = 0c00
37         l.or            r13, r13, r15 // r13 = 0c0c, r15 = 0c00
38         l.slli          r15, r13, 8   // r13 = 0c0c, r15 = c0c0
39         l.or            r13, r13, r15 // r13 = cccc, r15 = c0c0
40
41 1:      l.addi          r19, r3, 0 // Set r19 = src
42         /* Jump to byte copy loop if less than two words */
43         l.bf            3f
44          l.or           r17, r5, r0 // Set r17 = n
45
46         /* Mask out two LSBs to check alignment */
47         l.andi          r15, r3, 0x3
48
49         /* lsb == 00, jump to word copy loop */
50         l.sfeqi         r15, 0
51         l.bf            2f
52          l.addi         r19, r3, 0 // Set r19 = src
53
54         /* lsb == 01,10 or 11 */
55         l.sb            0(r3), r13   // *src = c
56         l.addi          r17, r17, -1 // Decrease n
57
58         l.sfeqi         r15, 3
59         l.bf            2f
60          l.addi         r19, r3, 1  // src += 1
61
62         /* lsb == 01 or 10 */
63         l.sb            1(r3), r13   // *(src+1) = c
64         l.addi          r17, r17, -1 // Decrease n
65
66         l.sfeqi         r15, 2
67         l.bf            2f
68          l.addi         r19, r3, 2  // src += 2
69
70         /* lsb == 01 */
71         l.sb            2(r3), r13   // *(src+2) = c
72         l.addi          r17, r17, -1 // Decrease n
73         l.addi          r19, r3, 3   // src += 3
74
75         /* Word copy loop */
76 2:      l.sw            0(r19), r13  // *src = cccc
77         l.addi          r17, r17, -4 // Decrease n
78         l.sfgeui        r17, 4
79         l.bf            2b
80          l.addi         r19, r19, 4  // Increase src
81
82         /* When n > 0, copy the remaining bytes, otherwise jump to exit */
83         l.sfeqi         r17, 0
84         l.bf            4f
85
86         /* Byte copy loop */
87 3:      l.addi          r17, r17, -1 // Decrease n
88         l.sb            0(r19), r13  // *src = cccc
89         l.sfnei         r17, 0
90         l.bf            3b
91          l.addi         r19, r19, 1  // Increase src
92
93 4:      l.jr            r9
94          l.ori          r11, r3, 0