arch/x86/lib/copy_user_64.S

   1 /*
   2  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
   3  * Copyright 2002 Andi Kleen, SuSE Labs.
   4  * Subject to the GNU Public License v2.
   5  *
   6  * Functions to copy from and to user space.
   7  */
   8
   9 #include <linux/linkage.h>
  10 #include <asm/current.h>
  11 #include <asm/asm-offsets.h>
  12 #include <asm/thread_info.h>
  13 #include <asm/cpufeatures.h>
  14 #include <asm/alternative-asm.h>
  15 #include <asm/asm.h>
  16 #include <asm/smap.h>
  17 #include <asm/export.h>
  18
  19 .macro ALIGN_DESTINATION
  20         /* check for bad alignment of destination */
  21         movl %edi,%ecx
  22         andl $7,%ecx
  23         jz 102f                         /* already aligned */
  24         subl $8,%ecx
  25         negl %ecx
  26         subl %ecx,%edx
  27 100:    movb (%rsi),%al
  28 101:    movb %al,(%rdi)
  29         incq %rsi
  30         incq %rdi
  31         decl %ecx
  32         jnz 100b
  33 102:
  34         .section .fixup,"ax"
  35 103:    addl %ecx,%edx                  /* ecx is zerorest also */
  36         jmp copy_user_handle_tail
  37         .previous
  38
  39         _ASM_EXTABLE_UA(100b, 103b)
  40         _ASM_EXTABLE_UA(101b, 103b)
  41         .endm
  42
  43 /*
  44  * copy_user_generic_unrolled - memory copy with exception handling.
  45  * This version is for CPUs like P4 that don't have efficient micro
  46  * code for rep movsq
  47  *
  48  * Input:
  49  * rdi destination
  50  * rsi source
  51  * rdx count
  52  *
  53  * Output:
  54  * eax uncopied bytes or 0 if successful.
  55  */
  56 ENTRY(copy_user_generic_unrolled)
  57         ASM_STAC
  58         cmpl $8,%edx
  59         jb 20f          /* less then 8 bytes, go to byte copy loop */
  60         ALIGN_DESTINATION
  61         movl %edx,%ecx
  62         andl $63,%edx
  63         shrl $6,%ecx
  64         jz .L_copy_short_string
  65 1:      movq (%rsi),%r8
  66 2:      movq 1*8(%rsi),%r9
  67 3:      movq 2*8(%rsi),%r10
  68 4:      movq 3*8(%rsi),%r11
  69 5:      movq %r8,(%rdi)
  70 6:      movq %r9,1*8(%rdi)
  71 7:      movq %r10,2*8(%rdi)
  72 8:      movq %r11,3*8(%rdi)
  73 9:      movq 4*8(%rsi),%r8
  74 10:     movq 5*8(%rsi),%r9
  75 11:     movq 6*8(%rsi),%r10
  76 12:     movq 7*8(%rsi),%r11
  77 13:     movq %r8,4*8(%rdi)
  78 14:     movq %r9,5*8(%rdi)
  79 15:     movq %r10,6*8(%rdi)
  80 16:     movq %r11,7*8(%rdi)
  81         leaq 64(%rsi),%rsi
  82         leaq 64(%rdi),%rdi
  83         decl %ecx
  84         jnz 1b
  85 .L_copy_short_string:
  86         movl %edx,%ecx
  87         andl $7,%edx
  88         shrl $3,%ecx
  89         jz 20f
  90 18:     movq (%rsi),%r8
  91 19:     movq %r8,(%rdi)
  92         leaq 8(%rsi),%rsi
  93         leaq 8(%rdi),%rdi
  94         decl %ecx
  95         jnz 18b
  96 20:     andl %edx,%edx
  97         jz 23f
  98         movl %edx,%ecx
  99 21:     movb (%rsi),%al
 100 22:     movb %al,(%rdi)
 101         incq %rsi
 102         incq %rdi
 103         decl %ecx
 104         jnz 21b
 105 23:     xor %eax,%eax
 106         ASM_CLAC
 107         ret
 108
 109         .section .fixup,"ax"
 110 30:     shll $6,%ecx
 111         addl %ecx,%edx
 112         jmp 60f
 113 40:     leal (%rdx,%rcx,8),%edx
 114         jmp 60f
 115 50:     movl %ecx,%edx
 116 60:     jmp copy_user_handle_tail /* ecx is zerorest also */
 117         .previous
 118
 119         _ASM_EXTABLE_UA(1b, 30b)
 120         _ASM_EXTABLE_UA(2b, 30b)
 121         _ASM_EXTABLE_UA(3b, 30b)
 122         _ASM_EXTABLE_UA(4b, 30b)
 123         _ASM_EXTABLE_UA(5b, 30b)
 124         _ASM_EXTABLE_UA(6b, 30b)
 125         _ASM_EXTABLE_UA(7b, 30b)
 126         _ASM_EXTABLE_UA(8b, 30b)
 127         _ASM_EXTABLE_UA(9b, 30b)
 128         _ASM_EXTABLE_UA(10b, 30b)
 129         _ASM_EXTABLE_UA(11b, 30b)
 130         _ASM_EXTABLE_UA(12b, 30b)
 131         _ASM_EXTABLE_UA(13b, 30b)
 132         _ASM_EXTABLE_UA(14b, 30b)
 133         _ASM_EXTABLE_UA(15b, 30b)
 134         _ASM_EXTABLE_UA(16b, 30b)
 135         _ASM_EXTABLE_UA(18b, 40b)
 136         _ASM_EXTABLE_UA(19b, 40b)
 137         _ASM_EXTABLE_UA(21b, 50b)
 138         _ASM_EXTABLE_UA(22b, 50b)
 139 ENDPROC(copy_user_generic_unrolled)
 140 EXPORT_SYMBOL(copy_user_generic_unrolled)
 141
 142 /* Some CPUs run faster using the string copy instructions.
 143  * This is also a lot simpler. Use them when possible.
 144  *
 145  * Only 4GB of copy is supported. This shouldn't be a problem
 146  * because the kernel normally only writes from/to page sized chunks
 147  * even if user space passed a longer buffer.
 148  * And more would be dangerous because both Intel and AMD have
 149  * errata with rep movsq > 4GB. If someone feels the need to fix
 150  * this please consider this.
 151  *
 152  * Input:
 153  * rdi destination
 154  * rsi source
 155  * rdx count
 156  *
 157  * Output:
 158  * eax uncopied bytes or 0 if successful.
 159  */
 160 ENTRY(copy_user_generic_string)
 161         ASM_STAC
 162         cmpl $8,%edx
 163         jb 2f           /* less than 8 bytes, go to byte copy loop */
 164         ALIGN_DESTINATION
 165         movl %edx,%ecx
 166         shrl $3,%ecx
 167         andl $7,%edx
 168 1:      rep
 169         movsq
 170 2:      movl %edx,%ecx
 171 3:      rep
 172         movsb
 173         xorl %eax,%eax
 174         ASM_CLAC
 175         ret
 176
 177         .section .fixup,"ax"
 178 11:     leal (%rdx,%rcx,8),%ecx
 179 12:     movl %ecx,%edx          /* ecx is zerorest also */
 180         jmp copy_user_handle_tail
 181         .previous
 182
 183         _ASM_EXTABLE_UA(1b, 11b)
 184         _ASM_EXTABLE_UA(3b, 12b)
 185 ENDPROC(copy_user_generic_string)
 186 EXPORT_SYMBOL(copy_user_generic_string)
 187
 188 /*
 189  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
 190  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
 191  *
 192  * Input:
 193  * rdi destination
 194  * rsi source
 195  * rdx count
 196  *
 197  * Output:
 198  * eax uncopied bytes or 0 if successful.
 199  */
 200 ENTRY(copy_user_enhanced_fast_string)
 201         ASM_STAC
 202         cmpl $64,%edx
 203         jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
 204         movl %edx,%ecx
 205 1:      rep
 206         movsb
 207         xorl %eax,%eax
 208         ASM_CLAC
 209         ret
 210
 211         .section .fixup,"ax"
 212 12:     movl %ecx,%edx          /* ecx is zerorest also */
 213         jmp copy_user_handle_tail
 214         .previous
 215
 216         _ASM_EXTABLE_UA(1b, 12b)
 217 ENDPROC(copy_user_enhanced_fast_string)
 218 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 219
 220 /*
 221  * Try to copy last bytes and clear the rest if needed.
 222  * Since protection fault in copy_from/to_user is not a normal situation,
 223  * it is not necessary to optimize tail handling.
 224  *
 225  * Input:
 226  * rdi destination
 227  * rsi source
 228  * rdx count
 229  *
 230  * Output:
 231  * eax uncopied bytes or 0 if successful.
 232  */
 233 ALIGN;
 234 copy_user_handle_tail:
 235         movl %edx,%ecx
 236 1:      rep movsb
 237 2:      mov %ecx,%eax
 238         ASM_CLAC
 239         ret
 240
 241         _ASM_EXTABLE_UA(1b, 2b)
 242 ENDPROC(copy_user_handle_tail)
 243
 244 /*
 245  * copy_user_nocache - Uncached memory copy with exception handling
 246  * This will force destination out of cache for more performance.
 247  *
 248  * Note: Cached memory copy is used when destination or size is not
 249  * naturally aligned. That is:
 250  *  - Require 8-byte alignment when size is 8 bytes or larger.
 251  *  - Require 4-byte alignment when size is 4 bytes.
 252  */
 253 ENTRY(__copy_user_nocache)
 254         ASM_STAC
 255
 256         /* If size is less than 8 bytes, go to 4-byte copy */
 257         cmpl $8,%edx
 258         jb .L_4b_nocache_copy_entry
 259
 260         /* If destination is not 8-byte aligned, "cache" copy to align it */
 261         ALIGN_DESTINATION
 262
 263         /* Set 4x8-byte copy count and remainder */
 264         movl %edx,%ecx
 265         andl $63,%edx
 266         shrl $6,%ecx
 267         jz .L_8b_nocache_copy_entry     /* jump if count is 0 */
 268
 269         /* Perform 4x8-byte nocache loop-copy */
 270 .L_4x8b_nocache_copy_loop:
 271 1:      movq (%rsi),%r8
 272 2:      movq 1*8(%rsi),%r9
 273 3:      movq 2*8(%rsi),%r10
 274 4:      movq 3*8(%rsi),%r11
 275 5:      movnti %r8,(%rdi)
 276 6:      movnti %r9,1*8(%rdi)
 277 7:      movnti %r10,2*8(%rdi)
 278 8:      movnti %r11,3*8(%rdi)
 279 9:      movq 4*8(%rsi),%r8
 280 10:     movq 5*8(%rsi),%r9
 281 11:     movq 6*8(%rsi),%r10
 282 12:     movq 7*8(%rsi),%r11
 283 13:     movnti %r8,4*8(%rdi)
 284 14:     movnti %r9,5*8(%rdi)
 285 15:     movnti %r10,6*8(%rdi)
 286 16:     movnti %r11,7*8(%rdi)
 287         leaq 64(%rsi),%rsi
 288         leaq 64(%rdi),%rdi
 289         decl %ecx
 290         jnz .L_4x8b_nocache_copy_loop
 291
 292         /* Set 8-byte copy count and remainder */
 293 .L_8b_nocache_copy_entry:
 294         movl %edx,%ecx
 295         andl $7,%edx
 296         shrl $3,%ecx
 297         jz .L_4b_nocache_copy_entry     /* jump if count is 0 */
 298
 299         /* Perform 8-byte nocache loop-copy */
 300 .L_8b_nocache_copy_loop:
 301 20:     movq (%rsi),%r8
 302 21:     movnti %r8,(%rdi)
 303         leaq 8(%rsi),%rsi
 304         leaq 8(%rdi),%rdi
 305         decl %ecx
 306         jnz .L_8b_nocache_copy_loop
 307
 308         /* If no byte left, we're done */
 309 .L_4b_nocache_copy_entry:
 310         andl %edx,%edx
 311         jz .L_finish_copy
 312
 313         /* If destination is not 4-byte aligned, go to byte copy: */
 314         movl %edi,%ecx
 315         andl $3,%ecx
 316         jnz .L_1b_cache_copy_entry
 317
 318         /* Set 4-byte copy count (1 or 0) and remainder */
 319         movl %edx,%ecx
 320         andl $3,%edx
 321         shrl $2,%ecx
 322         jz .L_1b_cache_copy_entry       /* jump if count is 0 */
 323
 324         /* Perform 4-byte nocache copy: */
 325 30:     movl (%rsi),%r8d
 326 31:     movnti %r8d,(%rdi)
 327         leaq 4(%rsi),%rsi
 328         leaq 4(%rdi),%rdi
 329
 330         /* If no bytes left, we're done: */
 331         andl %edx,%edx
 332         jz .L_finish_copy
 333
 334         /* Perform byte "cache" loop-copy for the remainder */
 335 .L_1b_cache_copy_entry:
 336         movl %edx,%ecx
 337 .L_1b_cache_copy_loop:
 338 40:     movb (%rsi),%al
 339 41:     movb %al,(%rdi)
 340         incq %rsi
 341         incq %rdi
 342         decl %ecx
 343         jnz .L_1b_cache_copy_loop
 344
 345         /* Finished copying; fence the prior stores */
 346 .L_finish_copy:
 347         xorl %eax,%eax
 348         ASM_CLAC
 349         sfence
 350         ret
 351
 352         .section .fixup,"ax"
 353 .L_fixup_4x8b_copy:
 354         shll $6,%ecx
 355         addl %ecx,%edx
 356         jmp .L_fixup_handle_tail
 357 .L_fixup_8b_copy:
 358         lea (%rdx,%rcx,8),%rdx
 359         jmp .L_fixup_handle_tail
 360 .L_fixup_4b_copy:
 361         lea (%rdx,%rcx,4),%rdx
 362         jmp .L_fixup_handle_tail
 363 .L_fixup_1b_copy:
 364         movl %ecx,%edx
 365 .L_fixup_handle_tail:
 366         sfence
 367         jmp copy_user_handle_tail
 368         .previous
 369
 370         _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
 371         _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
 372         _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
 373         _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
 374         _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
 375         _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
 376         _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
 377         _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
 378         _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
 379         _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
 380         _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
 381         _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
 382         _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
 383         _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
 384         _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
 385         _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
 386         _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
 387         _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
 388         _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
 389         _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
 390         _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
 391         _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
 392 ENDPROC(__copy_user_nocache)
 393 EXPORT_SYMBOL(__copy_user_nocache)