x86: Clean up csum-copy_64.S a bit

[sfrench/cifs-2.6.git] / arch / x86 / lib / csum-copy_64.S
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S

index ebf753e48ba99a1a7ebe5e7fc0046c23f1ccf38e..fb903b758da8534847ef3c4c97840d91972abdf4 100644 (file)
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -1,6 +1,6 @@
  /*
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- *     
+ * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
+ *
   * This file is subject to the terms and conditions of the GNU General Public
   * License.  See the file COPYING in the main directory of this archive
   * for more details. No warranty for anything given at all.
@@ -11,82 +11,82 @@
  
  /*
   * Checksum copy with exception handling.
- * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 
+ * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
   * destination is zeroed.
- * 
+ *
   * Input
   * rdi  source
   * rsi  destination
   * edx  len (32bit)
- * ecx  sum (32bit) 
+ * ecx  sum (32bit)
   * r8   src_err_ptr (int)
   * r9   dst_err_ptr (int)
   *
   * Output
   * eax  64bit sum. undefined in case of exception.
- * 
- * Wrappers need to take care of valid exception sum and zeroing.               
+ *
+ * Wrappers need to take care of valid exception sum and zeroing.
   * They also should align source or destination to 8 bytes.
   */
  
         .macro source
  10:
-       .section __ex_table,"a"
+       .section __ex_table, "a"
         .align 8
-       .quad 10b,.Lbad_source
+       .quad 10b, .Lbad_source
         .previous
         .endm
-               
+
         .macro dest
  20:
-       .section __ex_table,"a"
+       .section __ex_table, "a"
         .align 8
-       .quad 20b,.Lbad_dest
+       .quad 20b, .Lbad_dest
         .previous
         .endm
-                       
+
         .macro ignore L=.Lignore
  30:
-       .section __ex_table,"a"
+       .section __ex_table, "a"
         .align 8
-       .quad 30b,\L
+       .quad 30b, \L
         .previous
         .endm
-       
-                               
+
+
  ENTRY(csum_partial_copy_generic)
         CFI_STARTPROC
-       cmpl     $3*64,%edx
-       jle      .Lignore
+       cmpl    $3*64, %edx
+       jle     .Lignore
  
-.Lignore:              
-       subq  $7*8,%rsp
+.Lignore:
+       subq  $7*8, %rsp
         CFI_ADJUST_CFA_OFFSET 7*8
-       movq  %rbx,2*8(%rsp)
+       movq  %rbx, 2*8(%rsp)
         CFI_REL_OFFSET rbx, 2*8
-       movq  %r12,3*8(%rsp)
+       movq  %r12, 3*8(%rsp)
         CFI_REL_OFFSET r12, 3*8
-       movq  %r14,4*8(%rsp)
+       movq  %r14, 4*8(%rsp)
         CFI_REL_OFFSET r14, 4*8
-       movq  %r13,5*8(%rsp)
+       movq  %r13, 5*8(%rsp)
         CFI_REL_OFFSET r13, 5*8
-       movq  %rbp,6*8(%rsp)
+       movq  %rbp, 6*8(%rsp)
         CFI_REL_OFFSET rbp, 6*8
  
-       movq  %r8,(%rsp)
-       movq  %r9,1*8(%rsp)
-       
-       movl  %ecx,%eax
-       movl  %edx,%ecx
+       movq  %r8, (%rsp)
+       movq  %r9, 1*8(%rsp)
  
-       xorl  %r9d,%r9d
-       movq  %rcx,%r12
+       movl  %ecx, %eax
+       movl  %edx, %ecx
  
-       shrq  $6,%r12
-       jz    .Lhandle_tail       /* < 64 */
+       xorl  %r9d, %r9d
+       movq  %rcx, %r12
+
+       shrq  $6, %r12
+       jz      .Lhandle_tail       /* < 64 */
  
         clc
-       
+
         /* main loop. clear in 64 byte blocks */
         /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
         /* r11: temp3, rdx: temp4, r12 loopcnt */
@@ -94,156 +94,156 @@ ENTRY(csum_partial_copy_generic)
         .p2align 4
  .Lloop:
         source
-       movq  (%rdi),%rbx
+       movq  (%rdi), %rbx
         source
-       movq  8(%rdi),%r8
+       movq  8(%rdi), %r8
         source
-       movq  16(%rdi),%r11
+       movq  16(%rdi), %r11
         source
-       movq  24(%rdi),%rdx
+       movq  24(%rdi), %rdx
  
         source
-       movq  32(%rdi),%r10
+       movq  32(%rdi), %r10
         source
-       movq  40(%rdi),%rbp
+       movq  40(%rdi), %rbp
         source
-       movq  48(%rdi),%r14
+       movq  48(%rdi), %r14
         source
-       movq  56(%rdi),%r13
-               
+       movq  56(%rdi), %r13
+
         ignore 2f
         prefetcht0 5*64(%rdi)
-2:                                                     
-       adcq  %rbx,%rax
-       adcq  %r8,%rax
-       adcq  %r11,%rax
-       adcq  %rdx,%rax
-       adcq  %r10,%rax
-       adcq  %rbp,%rax
-       adcq  %r14,%rax
-       adcq  %r13,%rax
+2:
+       adcq  %rbx, %rax
+       adcq  %r8, %rax
+       adcq  %r11, %rax
+       adcq  %rdx, %rax
+       adcq  %r10, %rax
+       adcq  %rbp, %rax
+       adcq  %r14, %rax
+       adcq  %r13, %rax
  
         decl %r12d
-       
+
         dest
-       movq %rbx,(%rsi)
+       movq %rbx, (%rsi)
         dest
-       movq %r8,8(%rsi)
+       movq %r8, 8(%rsi)
         dest
-       movq %r11,16(%rsi)
+       movq %r11, 16(%rsi)
         dest
-       movq %rdx,24(%rsi)
+       movq %rdx, 24(%rsi)
  
         dest
-       movq %r10,32(%rsi)
+       movq %r10, 32(%rsi)
         dest
-       movq %rbp,40(%rsi)
+       movq %rbp, 40(%rsi)
         dest
-       movq %r14,48(%rsi)
+       movq %r14, 48(%rsi)
         dest
-       movq %r13,56(%rsi)
-       
+       movq %r13, 56(%rsi)
+
  3:
-       
-       leaq 64(%rdi),%rdi
-       leaq 64(%rsi),%rsi
  
-       jnz   .Lloop
+       leaq 64(%rdi), %rdi
+       leaq 64(%rsi), %rsi
  
-       adcq  %r9,%rax
+       jnz     .Lloop
+
+       adcq  %r9, %rax
  
         /* do last up to 56 bytes */
  .Lhandle_tail:
         /* ecx: count */
-       movl %ecx,%r10d
-       andl $63,%ecx
-       shrl $3,%ecx
-       jz       .Lfold
+       movl %ecx, %r10d
+       andl $63, %ecx
+       shrl $3, %ecx
+       jz      .Lfold
         clc
         .p2align 4
-.Lloop_8:      
+.Lloop_8:
         source
-       movq (%rdi),%rbx
-       adcq %rbx,%rax
+       movq (%rdi), %rbx
+       adcq %rbx, %rax
         decl %ecx
         dest
-       movq %rbx,(%rsi)
-       leaq 8(%rsi),%rsi /* preserve carry */
-       leaq 8(%rdi),%rdi
+       movq %rbx, (%rsi)
+       leaq 8(%rsi), %rsi /* preserve carry */
+       leaq 8(%rdi), %rdi
         jnz     .Lloop_8
-       adcq %r9,%rax   /* add in carry */
+       adcq %r9, %rax  /* add in carry */
  
  .Lfold:
         /* reduce checksum to 32bits */
-       movl %eax,%ebx
-       shrq $32,%rax
-       addl %ebx,%eax
-       adcl %r9d,%eax
+       movl %eax, %ebx
+       shrq $32, %rax
+       addl %ebx, %eax
+       adcl %r9d, %eax
  
-       /* do last up to 6 bytes */     
+       /* do last up to 6 bytes */
  .Lhandle_7:
-       movl %r10d,%ecx
-       andl $7,%ecx
-       shrl $1,%ecx
+       movl %r10d, %ecx
+       andl $7, %ecx
+       shrl $1, %ecx
         jz   .Lhandle_1
-       movl $2,%edx
-       xorl %ebx,%ebx
-       clc  
+       movl $2, %edx
+       xorl %ebx, %ebx
+       clc
         .p2align 4
-.Lloop_1:      
+.Lloop_1:
         source
-       movw (%rdi),%bx
-       adcl %ebx,%eax
+       movw (%rdi), %bx
+       adcl %ebx, %eax
         decl %ecx
         dest
-       movw %bx,(%rsi)
-       leaq 2(%rdi),%rdi
-       leaq 2(%rsi),%rsi
+       movw %bx, (%rsi)
+       leaq 2(%rdi), %rdi
+       leaq 2(%rsi), %rsi
         jnz .Lloop_1
-       adcl %r9d,%eax  /* add in carry */
-       
+       adcl %r9d, %eax /* add in carry */
+
         /* handle last odd byte */
  .Lhandle_1:
-       testl $1,%r10d
+       testl $1, %r10d
         jz    .Lende
-       xorl  %ebx,%ebx
+       xorl  %ebx, %ebx
         source
-       movb (%rdi),%bl
+       movb (%rdi), %bl
         dest
-       movb %bl,(%rsi)
-       addl %ebx,%eax
-       adcl %r9d,%eax          /* carry */
-                       
+       movb %bl, (%rsi)
+       addl %ebx, %eax
+       adcl %r9d, %eax         /* carry */
+
         CFI_REMEMBER_STATE
  .Lende:
-       movq 2*8(%rsp),%rbx
+       movq 2*8(%rsp), %rbx
         CFI_RESTORE rbx
-       movq 3*8(%rsp),%r12
+       movq 3*8(%rsp), %r12
         CFI_RESTORE r12
-       movq 4*8(%rsp),%r14
+       movq 4*8(%rsp), %r14
         CFI_RESTORE r14
-       movq 5*8(%rsp),%r13
+       movq 5*8(%rsp), %r13
         CFI_RESTORE r13
-       movq 6*8(%rsp),%rbp
+       movq 6*8(%rsp), %rbp
         CFI_RESTORE rbp
-       addq $7*8,%rsp
+       addq $7*8, %rsp
         CFI_ADJUST_CFA_OFFSET -7*8
         ret
         CFI_RESTORE_STATE
  
         /* Exception handlers. Very simple, zeroing is done in the wrappers */
  .Lbad_source:
-       movq (%rsp),%rax
-       testq %rax,%rax
+       movq (%rsp), %rax
+       testq %rax, %rax
         jz   .Lende
-       movl $-EFAULT,(%rax)
+       movl $-EFAULT, (%rax)
         jmp  .Lende
-       
+
  .Lbad_dest:
-       movq 8(%rsp),%rax
-       testq %rax,%rax
-       jz   .Lende     
-       movl $-EFAULT,(%rax)
+       movq 8(%rsp), %rax
+       testq %rax, %rax
+       jz   .Lende
+       movl $-EFAULT, (%rax)
         jmp .Lende
         CFI_ENDPROC
  ENDPROC(csum_partial_copy_generic)