arch/xtensa/lib/usercopy.S

   1 /*
   2  *  arch/xtensa/lib/usercopy.S
   3  *
   4  *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
   5  *
   6  *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
   7  *  It needs to remain separate and distinct.  The hal files are part
   8  *  of the Xtensa link-time HAL, and those files may differ per
   9  *  processor configuration.  Patching the kernel for another
  10  *  processor configuration includes replacing the hal files, and we
  11  *  could lose the special functionality for accessing user-space
  12  *  memory during such a patch.  We sacrifice a little code space here
  13  *  in favor to simplify code maintenance.
  14  *
  15  *  This file is subject to the terms and conditions of the GNU General
  16  *  Public License.  See the file "COPYING" in the main directory of
  17  *  this archive for more details.
  18  *
  19  *  Copyright (C) 2002 Tensilica Inc.
  20  */
  21
  22
  23 /*
  24  * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
  25  *
  26  * The returned value is the number of bytes not copied.  Implies zero
  27  * is success.
  28  *
  29  * The general case algorithm is as follows:
  30  *   If the destination and source are both aligned,
  31  *     do 16B chunks with a loop, and then finish up with
  32  *     8B, 4B, 2B, and 1B copies conditional on the length.
  33  *   If destination is aligned and source unaligned,
  34  *     do the same, but use SRC to align the source data.
  35  *   If destination is unaligned, align it by conditionally
  36  *     copying 1B and 2B and then retest.
  37  *   This code tries to use fall-through braches for the common
  38  *     case of aligned destinations (except for the branches to
  39  *     the alignment label).
  40  *
  41  * Register use:
  42  *      a0/ return address
  43  *      a1/ stack pointer
  44  *      a2/ return value
  45  *      a3/ src
  46  *      a4/ length
  47  *      a5/ dst
  48  *      a6/ tmp
  49  *      a7/ tmp
  50  *      a8/ tmp
  51  *      a9/ tmp
  52  *      a10/ tmp
  53  *      a11/ original length
  54  */
  55
  56 #include <linux/linkage.h>
  57 #include <variant/core.h>
  58 #include <asm/asmmacro.h>
  59
  60         .text
  61 ENTRY(__xtensa_copy_user)
  62
  63         entry   sp, 16          # minimal stack frame
  64         # a2/ dst, a3/ src, a4/ len
  65         mov     a5, a2          # copy dst so that a2 is return value
  66         mov     a11, a4         # preserve original len for error case
  67 .Lcommon:
  68         bbsi.l  a2, 0, .Ldst1mod2 # if dst is 1 mod 2
  69         bbsi.l  a2, 1, .Ldst2mod4 # if dst is 2 mod 4
  70 .Ldstaligned:   # return here from .Ldstunaligned when dst is aligned
  71         srli    a7, a4, 4       # number of loop iterations with 16B
  72                                 # per iteration
  73         movi    a8, 3             # if source is also aligned,
  74         bnone   a3, a8, .Laligned # then use word copy
  75         __ssa8  a3              # set shift amount from byte offset
  76         bnez    a4, .Lsrcunaligned
  77         movi    a2, 0           # return success for len==0
  78         retw
  79
  80 /*
  81  * Destination is unaligned
  82  */
  83
  84 .Ldst1mod2:     # dst is only byte aligned
  85         bltui   a4, 7, .Lbytecopy       # do short copies byte by byte
  86
  87         # copy 1 byte
  88 EX(10f) l8ui    a6, a3, 0
  89         addi    a3, a3,  1
  90 EX(10f) s8i     a6, a5,  0
  91         addi    a5, a5,  1
  92         addi    a4, a4, -1
  93         bbci.l  a5, 1, .Ldstaligned     # if dst is now aligned, then
  94                                         # return to main algorithm
  95 .Ldst2mod4:     # dst 16-bit aligned
  96         # copy 2 bytes
  97         bltui   a4, 6, .Lbytecopy       # do short copies byte by byte
  98 EX(10f) l8ui    a6, a3, 0
  99 EX(10f) l8ui    a7, a3, 1
 100         addi    a3, a3,  2
 101 EX(10f) s8i     a6, a5,  0
 102 EX(10f) s8i     a7, a5,  1
 103         addi    a5, a5,  2
 104         addi    a4, a4, -2
 105         j       .Ldstaligned    # dst is now aligned, return to main algorithm
 106
 107 /*
 108  * Byte by byte copy
 109  */
 110         .align  4
 111         .byte   0               # 1 mod 4 alignment for LOOPNEZ
 112                                 # (0 mod 4 alignment for LBEG)
 113 .Lbytecopy:
 114 #if XCHAL_HAVE_LOOPS
 115         loopnez a4, .Lbytecopydone
 116 #else /* !XCHAL_HAVE_LOOPS */
 117         beqz    a4, .Lbytecopydone
 118         add     a7, a3, a4      # a7 = end address for source
 119 #endif /* !XCHAL_HAVE_LOOPS */
 120 .Lnextbyte:
 121 EX(10f) l8ui    a6, a3, 0
 122         addi    a3, a3, 1
 123 EX(10f) s8i     a6, a5, 0
 124         addi    a5, a5, 1
 125 #if !XCHAL_HAVE_LOOPS
 126         blt     a3, a7, .Lnextbyte
 127 #endif /* !XCHAL_HAVE_LOOPS */
 128 .Lbytecopydone:
 129         movi    a2, 0           # return success for len bytes copied
 130         retw
 131
 132 /*
 133  * Destination and source are word-aligned.
 134  */
 135         # copy 16 bytes per iteration for word-aligned dst and word-aligned src
 136         .align  4               # 1 mod 4 alignment for LOOPNEZ
 137         .byte   0               # (0 mod 4 alignment for LBEG)
 138 .Laligned:
 139 #if XCHAL_HAVE_LOOPS
 140         loopnez a7, .Loop1done
 141 #else /* !XCHAL_HAVE_LOOPS */
 142         beqz    a7, .Loop1done
 143         slli    a8, a7, 4
 144         add     a8, a8, a3      # a8 = end of last 16B source chunk
 145 #endif /* !XCHAL_HAVE_LOOPS */
 146 .Loop1:
 147 EX(10f) l32i    a6, a3,  0
 148 EX(10f) l32i    a7, a3,  4
 149 EX(10f) s32i    a6, a5,  0
 150 EX(10f) l32i    a6, a3,  8
 151 EX(10f) s32i    a7, a5,  4
 152 EX(10f) l32i    a7, a3, 12
 153 EX(10f) s32i    a6, a5,  8
 154         addi    a3, a3, 16
 155 EX(10f) s32i    a7, a5, 12
 156         addi    a5, a5, 16
 157 #if !XCHAL_HAVE_LOOPS
 158         blt     a3, a8, .Loop1
 159 #endif /* !XCHAL_HAVE_LOOPS */
 160 .Loop1done:
 161         bbci.l  a4, 3, .L2
 162         # copy 8 bytes
 163 EX(10f) l32i    a6, a3,  0
 164 EX(10f) l32i    a7, a3,  4
 165         addi    a3, a3,  8
 166 EX(10f) s32i    a6, a5,  0
 167 EX(10f) s32i    a7, a5,  4
 168         addi    a5, a5,  8
 169 .L2:
 170         bbci.l  a4, 2, .L3
 171         # copy 4 bytes
 172 EX(10f) l32i    a6, a3,  0
 173         addi    a3, a3,  4
 174 EX(10f) s32i    a6, a5,  0
 175         addi    a5, a5,  4
 176 .L3:
 177         bbci.l  a4, 1, .L4
 178         # copy 2 bytes
 179 EX(10f) l16ui   a6, a3,  0
 180         addi    a3, a3,  2
 181 EX(10f) s16i    a6, a5,  0
 182         addi    a5, a5,  2
 183 .L4:
 184         bbci.l  a4, 0, .L5
 185         # copy 1 byte
 186 EX(10f) l8ui    a6, a3,  0
 187 EX(10f) s8i     a6, a5,  0
 188 .L5:
 189         movi    a2, 0           # return success for len bytes copied
 190         retw
 191
 192 /*
 193  * Destination is aligned, Source is unaligned
 194  */
 195
 196         .align  4
 197         .byte   0               # 1 mod 4 alignement for LOOPNEZ
 198                                 # (0 mod 4 alignment for LBEG)
 199 .Lsrcunaligned:
 200         # copy 16 bytes per iteration for word-aligned dst and unaligned src
 201         and     a10, a3, a8     # save unalignment offset for below
 202         sub     a3, a3, a10     # align a3 (to avoid sim warnings only; not needed for hardware)
 203 EX(10f) l32i    a6, a3, 0       # load first word
 204 #if XCHAL_HAVE_LOOPS
 205         loopnez a7, .Loop2done
 206 #else /* !XCHAL_HAVE_LOOPS */
 207         beqz    a7, .Loop2done
 208         slli    a12, a7, 4
 209         add     a12, a12, a3    # a12 = end of last 16B source chunk
 210 #endif /* !XCHAL_HAVE_LOOPS */
 211 .Loop2:
 212 EX(10f) l32i    a7, a3,  4
 213 EX(10f) l32i    a8, a3,  8
 214         __src_b a6, a6, a7
 215 EX(10f) s32i    a6, a5,  0
 216 EX(10f) l32i    a9, a3, 12
 217         __src_b a7, a7, a8
 218 EX(10f) s32i    a7, a5,  4
 219 EX(10f) l32i    a6, a3, 16
 220         __src_b a8, a8, a9
 221 EX(10f) s32i    a8, a5,  8
 222         addi    a3, a3, 16
 223         __src_b a9, a9, a6
 224 EX(10f) s32i    a9, a5, 12
 225         addi    a5, a5, 16
 226 #if !XCHAL_HAVE_LOOPS
 227         blt     a3, a12, .Loop2
 228 #endif /* !XCHAL_HAVE_LOOPS */
 229 .Loop2done:
 230         bbci.l  a4, 3, .L12
 231         # copy 8 bytes
 232 EX(10f) l32i    a7, a3,  4
 233 EX(10f) l32i    a8, a3,  8
 234         __src_b a6, a6, a7
 235 EX(10f) s32i    a6, a5,  0
 236         addi    a3, a3,  8
 237         __src_b a7, a7, a8
 238 EX(10f) s32i    a7, a5,  4
 239         addi    a5, a5,  8
 240         mov     a6, a8
 241 .L12:
 242         bbci.l  a4, 2, .L13
 243         # copy 4 bytes
 244 EX(10f) l32i    a7, a3,  4
 245         addi    a3, a3,  4
 246         __src_b a6, a6, a7
 247 EX(10f) s32i    a6, a5,  0
 248         addi    a5, a5,  4
 249         mov     a6, a7
 250 .L13:
 251         add     a3, a3, a10     # readjust a3 with correct misalignment
 252         bbci.l  a4, 1, .L14
 253         # copy 2 bytes
 254 EX(10f) l8ui    a6, a3,  0
 255 EX(10f) l8ui    a7, a3,  1
 256         addi    a3, a3,  2
 257 EX(10f) s8i     a6, a5,  0
 258 EX(10f) s8i     a7, a5,  1
 259         addi    a5, a5,  2
 260 .L14:
 261         bbci.l  a4, 0, .L15
 262         # copy 1 byte
 263 EX(10f) l8ui    a6, a3,  0
 264 EX(10f) s8i     a6, a5,  0
 265 .L15:
 266         movi    a2, 0           # return success for len bytes copied
 267         retw
 268
 269 ENDPROC(__xtensa_copy_user)
 270
 271         .section .fixup, "ax"
 272         .align  4
 273
 274 /* a2 = original dst; a5 = current dst; a11= original len
 275  * bytes_copied = a5 - a2
 276  * retval = bytes_not_copied = original len - bytes_copied
 277  * retval = a11 - (a5 - a2)
 278  */
 279
 280
 281 10:
 282         sub     a2, a5, a2      /* a2 <-- bytes copied */
 283         sub     a2, a11, a2     /* a2 <-- bytes not copied */
 284         retw