dt-bindings: reset: imx7: Fix the spelling of 'indices'
[sfrench/cifs-2.6.git] / drivers / crypto / vmx / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # SPDX-License-Identifier: GPL-2.0
3
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
8 # Foundation.
9 #
10 # [1] https://www.openssl.org/~appro/cryptogams/
11
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
14 #
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
17 # are met:
18 #
19 #       * Redistributions of source code must retain copyright notices,
20 #         this list of conditions and the following disclaimer.
21 #
22 #       * Redistributions in binary form must reproduce the above
23 #         copyright notice, this list of conditions and the following
24 #         disclaimer in the documentation and/or other materials
25 #         provided with the distribution.
26 #
27 #       * Neither the name of the CRYPTOGAMS nor the names of its
28 #         copyright holder and contributors may be used to endorse or
29 #         promote products derived from this software without specific
30 #         prior written permission.
31 #
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35 # those given above.
36 #
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see http://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
55 #
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
69 #
70 # May 2016
71 #
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
74 #
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
78 #
79 #               CBC en-/decrypt CTR     XTS
80 # POWER8[le]    3.96/0.72       0.74    1.1
81 # POWER8[be]    3.75/0.65       0.66    1.0
82
83 $flavour = shift;
84
85 if ($flavour =~ /64/) {
86         $SIZE_T =8;
87         $LRSAVE =2*$SIZE_T;
88         $STU    ="stdu";
89         $POP    ="ld";
90         $PUSH   ="std";
91         $UCMP   ="cmpld";
92         $SHL    ="sldi";
93 } elsif ($flavour =~ /32/) {
94         $SIZE_T =4;
95         $LRSAVE =$SIZE_T;
96         $STU    ="stwu";
97         $POP    ="lwz";
98         $PUSH   ="stw";
99         $UCMP   ="cmplw";
100         $SHL    ="slwi";
101 } else { die "nonsense $flavour"; }
102
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108 die "can't locate ppc-xlate.pl";
109
110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112 $FRAME=8*$SIZE_T;
113 $prefix="aes_p8";
114
115 $sp="r1";
116 $vrsave="r12";
117
118 #########################################################################
119 {{{     # Key setup procedures                                          #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124 $code.=<<___;
125 .machine        "any"
126
127 .text
128
129 .align  7
130 rcon:
131 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
132 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
133 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
134 .long   0,0,0,0                                         ?asis
135 Lconsts:
136         mflr    r0
137         bcl     20,31,\$+4
138         mflr    $ptr     #vvvvv "distance between . and rcon
139         addi    $ptr,$ptr,-0x48
140         mtlr    r0
141         blr
142         .long   0
143         .byte   0,12,0x14,0,0,0,0,0
144 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
145
146 .globl  .${prefix}_set_encrypt_key
147 Lset_encrypt_key:
148         mflr            r11
149         $PUSH           r11,$LRSAVE($sp)
150
151         li              $ptr,-1
152         ${UCMP}i        $inp,0
153         beq-            Lenc_key_abort          # if ($inp==0) return -1;
154         ${UCMP}i        $out,0
155         beq-            Lenc_key_abort          # if ($out==0) return -1;
156         li              $ptr,-2
157         cmpwi           $bits,128
158         blt-            Lenc_key_abort
159         cmpwi           $bits,256
160         bgt-            Lenc_key_abort
161         andi.           r0,$bits,0x3f
162         bne-            Lenc_key_abort
163
164         lis             r0,0xfff0
165         mfspr           $vrsave,256
166         mtspr           256,r0
167
168         bl              Lconsts
169         mtlr            r11
170
171         neg             r9,$inp
172         lvx             $in0,0,$inp
173         addi            $inp,$inp,15            # 15 is not typo
174         lvsr            $key,0,r9               # borrow $key
175         li              r8,0x20
176         cmpwi           $bits,192
177         lvx             $in1,0,$inp
178         le?vspltisb     $mask,0x0f              # borrow $mask
179         lvx             $rcon,0,$ptr
180         le?vxor         $key,$key,$mask         # adjust for byte swap
181         lvx             $mask,r8,$ptr
182         addi            $ptr,$ptr,0x10
183         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
184         li              $cnt,8
185         vxor            $zero,$zero,$zero
186         mtctr           $cnt
187
188         ?lvsr           $outperm,0,$out
189         vspltisb        $outmask,-1
190         lvx             $outhead,0,$out
191         ?vperm          $outmask,$zero,$outmask,$outperm
192
193         blt             Loop128
194         addi            $inp,$inp,8
195         beq             L192
196         addi            $inp,$inp,8
197         b               L256
198
199 .align  4
200 Loop128:
201         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
202         vsldoi          $tmp,$zero,$in0,12      # >>32
203          vperm          $outtail,$in0,$in0,$outperm     # rotate
204          vsel           $stage,$outhead,$outtail,$outmask
205          vmr            $outhead,$outtail
206         vcipherlast     $key,$key,$rcon
207          stvx           $stage,0,$out
208          addi           $out,$out,16
209
210         vxor            $in0,$in0,$tmp
211         vsldoi          $tmp,$zero,$tmp,12      # >>32
212         vxor            $in0,$in0,$tmp
213         vsldoi          $tmp,$zero,$tmp,12      # >>32
214         vxor            $in0,$in0,$tmp
215          vadduwm        $rcon,$rcon,$rcon
216         vxor            $in0,$in0,$key
217         bdnz            Loop128
218
219         lvx             $rcon,0,$ptr            # last two round keys
220
221         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
222         vsldoi          $tmp,$zero,$in0,12      # >>32
223          vperm          $outtail,$in0,$in0,$outperm     # rotate
224          vsel           $stage,$outhead,$outtail,$outmask
225          vmr            $outhead,$outtail
226         vcipherlast     $key,$key,$rcon
227          stvx           $stage,0,$out
228          addi           $out,$out,16
229
230         vxor            $in0,$in0,$tmp
231         vsldoi          $tmp,$zero,$tmp,12      # >>32
232         vxor            $in0,$in0,$tmp
233         vsldoi          $tmp,$zero,$tmp,12      # >>32
234         vxor            $in0,$in0,$tmp
235          vadduwm        $rcon,$rcon,$rcon
236         vxor            $in0,$in0,$key
237
238         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
239         vsldoi          $tmp,$zero,$in0,12      # >>32
240          vperm          $outtail,$in0,$in0,$outperm     # rotate
241          vsel           $stage,$outhead,$outtail,$outmask
242          vmr            $outhead,$outtail
243         vcipherlast     $key,$key,$rcon
244          stvx           $stage,0,$out
245          addi           $out,$out,16
246
247         vxor            $in0,$in0,$tmp
248         vsldoi          $tmp,$zero,$tmp,12      # >>32
249         vxor            $in0,$in0,$tmp
250         vsldoi          $tmp,$zero,$tmp,12      # >>32
251         vxor            $in0,$in0,$tmp
252         vxor            $in0,$in0,$key
253          vperm          $outtail,$in0,$in0,$outperm     # rotate
254          vsel           $stage,$outhead,$outtail,$outmask
255          vmr            $outhead,$outtail
256          stvx           $stage,0,$out
257
258         addi            $inp,$out,15            # 15 is not typo
259         addi            $out,$out,0x50
260
261         li              $rounds,10
262         b               Ldone
263
264 .align  4
265 L192:
266         lvx             $tmp,0,$inp
267         li              $cnt,4
268          vperm          $outtail,$in0,$in0,$outperm     # rotate
269          vsel           $stage,$outhead,$outtail,$outmask
270          vmr            $outhead,$outtail
271          stvx           $stage,0,$out
272          addi           $out,$out,16
273         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
274         vspltisb        $key,8                  # borrow $key
275         mtctr           $cnt
276         vsububm         $mask,$mask,$key        # adjust the mask
277
278 Loop192:
279         vperm           $key,$in1,$in1,$mask    # roate-n-splat
280         vsldoi          $tmp,$zero,$in0,12      # >>32
281         vcipherlast     $key,$key,$rcon
282
283         vxor            $in0,$in0,$tmp
284         vsldoi          $tmp,$zero,$tmp,12      # >>32
285         vxor            $in0,$in0,$tmp
286         vsldoi          $tmp,$zero,$tmp,12      # >>32
287         vxor            $in0,$in0,$tmp
288
289          vsldoi         $stage,$zero,$in1,8
290         vspltw          $tmp,$in0,3
291         vxor            $tmp,$tmp,$in1
292         vsldoi          $in1,$zero,$in1,12      # >>32
293          vadduwm        $rcon,$rcon,$rcon
294         vxor            $in1,$in1,$tmp
295         vxor            $in0,$in0,$key
296         vxor            $in1,$in1,$key
297          vsldoi         $stage,$stage,$in0,8
298
299         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
300         vsldoi          $tmp,$zero,$in0,12      # >>32
301          vperm          $outtail,$stage,$stage,$outperm # rotate
302          vsel           $stage,$outhead,$outtail,$outmask
303          vmr            $outhead,$outtail
304         vcipherlast     $key,$key,$rcon
305          stvx           $stage,0,$out
306          addi           $out,$out,16
307
308          vsldoi         $stage,$in0,$in1,8
309         vxor            $in0,$in0,$tmp
310         vsldoi          $tmp,$zero,$tmp,12      # >>32
311          vperm          $outtail,$stage,$stage,$outperm # rotate
312          vsel           $stage,$outhead,$outtail,$outmask
313          vmr            $outhead,$outtail
314         vxor            $in0,$in0,$tmp
315         vsldoi          $tmp,$zero,$tmp,12      # >>32
316         vxor            $in0,$in0,$tmp
317          stvx           $stage,0,$out
318          addi           $out,$out,16
319
320         vspltw          $tmp,$in0,3
321         vxor            $tmp,$tmp,$in1
322         vsldoi          $in1,$zero,$in1,12      # >>32
323          vadduwm        $rcon,$rcon,$rcon
324         vxor            $in1,$in1,$tmp
325         vxor            $in0,$in0,$key
326         vxor            $in1,$in1,$key
327          vperm          $outtail,$in0,$in0,$outperm     # rotate
328          vsel           $stage,$outhead,$outtail,$outmask
329          vmr            $outhead,$outtail
330          stvx           $stage,0,$out
331          addi           $inp,$out,15            # 15 is not typo
332          addi           $out,$out,16
333         bdnz            Loop192
334
335         li              $rounds,12
336         addi            $out,$out,0x20
337         b               Ldone
338
339 .align  4
340 L256:
341         lvx             $tmp,0,$inp
342         li              $cnt,7
343         li              $rounds,14
344          vperm          $outtail,$in0,$in0,$outperm     # rotate
345          vsel           $stage,$outhead,$outtail,$outmask
346          vmr            $outhead,$outtail
347          stvx           $stage,0,$out
348          addi           $out,$out,16
349         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
350         mtctr           $cnt
351
352 Loop256:
353         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
354         vsldoi          $tmp,$zero,$in0,12      # >>32
355          vperm          $outtail,$in1,$in1,$outperm     # rotate
356          vsel           $stage,$outhead,$outtail,$outmask
357          vmr            $outhead,$outtail
358         vcipherlast     $key,$key,$rcon
359          stvx           $stage,0,$out
360          addi           $out,$out,16
361
362         vxor            $in0,$in0,$tmp
363         vsldoi          $tmp,$zero,$tmp,12      # >>32
364         vxor            $in0,$in0,$tmp
365         vsldoi          $tmp,$zero,$tmp,12      # >>32
366         vxor            $in0,$in0,$tmp
367          vadduwm        $rcon,$rcon,$rcon
368         vxor            $in0,$in0,$key
369          vperm          $outtail,$in0,$in0,$outperm     # rotate
370          vsel           $stage,$outhead,$outtail,$outmask
371          vmr            $outhead,$outtail
372          stvx           $stage,0,$out
373          addi           $inp,$out,15            # 15 is not typo
374          addi           $out,$out,16
375         bdz             Ldone
376
377         vspltw          $key,$in0,3             # just splat
378         vsldoi          $tmp,$zero,$in1,12      # >>32
379         vsbox           $key,$key
380
381         vxor            $in1,$in1,$tmp
382         vsldoi          $tmp,$zero,$tmp,12      # >>32
383         vxor            $in1,$in1,$tmp
384         vsldoi          $tmp,$zero,$tmp,12      # >>32
385         vxor            $in1,$in1,$tmp
386
387         vxor            $in1,$in1,$key
388         b               Loop256
389
390 .align  4
391 Ldone:
392         lvx             $in1,0,$inp             # redundant in aligned case
393         vsel            $in1,$outhead,$in1,$outmask
394         stvx            $in1,0,$inp
395         li              $ptr,0
396         mtspr           256,$vrsave
397         stw             $rounds,0($out)
398
399 Lenc_key_abort:
400         mr              r3,$ptr
401         blr
402         .long           0
403         .byte           0,12,0x14,1,0,0,3,0
404         .long           0
405 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
406
407 .globl  .${prefix}_set_decrypt_key
408         $STU            $sp,-$FRAME($sp)
409         mflr            r10
410         $PUSH           r10,$FRAME+$LRSAVE($sp)
411         bl              Lset_encrypt_key
412         mtlr            r10
413
414         cmpwi           r3,0
415         bne-            Ldec_key_abort
416
417         slwi            $cnt,$rounds,4
418         subi            $inp,$out,240           # first round key
419         srwi            $rounds,$rounds,1
420         add             $out,$inp,$cnt          # last round key
421         mtctr           $rounds
422
423 Ldeckey:
424         lwz             r0, 0($inp)
425         lwz             r6, 4($inp)
426         lwz             r7, 8($inp)
427         lwz             r8, 12($inp)
428         addi            $inp,$inp,16
429         lwz             r9, 0($out)
430         lwz             r10,4($out)
431         lwz             r11,8($out)
432         lwz             r12,12($out)
433         stw             r0, 0($out)
434         stw             r6, 4($out)
435         stw             r7, 8($out)
436         stw             r8, 12($out)
437         subi            $out,$out,16
438         stw             r9, -16($inp)
439         stw             r10,-12($inp)
440         stw             r11,-8($inp)
441         stw             r12,-4($inp)
442         bdnz            Ldeckey
443
444         xor             r3,r3,r3                # return value
445 Ldec_key_abort:
446         addi            $sp,$sp,$FRAME
447         blr
448         .long           0
449         .byte           0,12,4,1,0x80,0,3,0
450         .long           0
451 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
452 ___
453 }}}
454 #########################################################################
455 {{{     # Single block en- and decrypt procedures                       #
456 sub gen_block () {
457 my $dir = shift;
458 my $n   = $dir eq "de" ? "n" : "";
459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
460
461 $code.=<<___;
462 .globl  .${prefix}_${dir}crypt
463         lwz             $rounds,240($key)
464         lis             r0,0xfc00
465         mfspr           $vrsave,256
466         li              $idx,15                 # 15 is not typo
467         mtspr           256,r0
468
469         lvx             v0,0,$inp
470         neg             r11,$out
471         lvx             v1,$idx,$inp
472         lvsl            v2,0,$inp               # inpperm
473         le?vspltisb     v4,0x0f
474         ?lvsl           v3,0,r11                # outperm
475         le?vxor         v2,v2,v4
476         li              $idx,16
477         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
478         lvx             v1,0,$key
479         ?lvsl           v5,0,$key               # keyperm
480         srwi            $rounds,$rounds,1
481         lvx             v2,$idx,$key
482         addi            $idx,$idx,16
483         subi            $rounds,$rounds,1
484         ?vperm          v1,v1,v2,v5             # align round key
485
486         vxor            v0,v0,v1
487         lvx             v1,$idx,$key
488         addi            $idx,$idx,16
489         mtctr           $rounds
490
491 Loop_${dir}c:
492         ?vperm          v2,v2,v1,v5
493         v${n}cipher     v0,v0,v2
494         lvx             v2,$idx,$key
495         addi            $idx,$idx,16
496         ?vperm          v1,v1,v2,v5
497         v${n}cipher     v0,v0,v1
498         lvx             v1,$idx,$key
499         addi            $idx,$idx,16
500         bdnz            Loop_${dir}c
501
502         ?vperm          v2,v2,v1,v5
503         v${n}cipher     v0,v0,v2
504         lvx             v2,$idx,$key
505         ?vperm          v1,v1,v2,v5
506         v${n}cipherlast v0,v0,v1
507
508         vspltisb        v2,-1
509         vxor            v1,v1,v1
510         li              $idx,15                 # 15 is not typo
511         ?vperm          v2,v1,v2,v3             # outmask
512         le?vxor         v3,v3,v4
513         lvx             v1,0,$out               # outhead
514         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
515         vsel            v1,v1,v0,v2
516         lvx             v4,$idx,$out
517         stvx            v1,0,$out
518         vsel            v0,v0,v4,v2
519         stvx            v0,$idx,$out
520
521         mtspr           256,$vrsave
522         blr
523         .long           0
524         .byte           0,12,0x14,0,0,0,3,0
525         .long           0
526 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
527 ___
528 }
529 &gen_block("en");
530 &gen_block("de");
531 }}}
532 #########################################################################
533 {{{     # CBC en- and decrypt procedures                                #
534 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
535 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
536 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
537                                                 map("v$_",(4..10));
538 $code.=<<___;
539 .globl  .${prefix}_cbc_encrypt
540         ${UCMP}i        $len,16
541         bltlr-
542
543         cmpwi           $enc,0                  # test direction
544         lis             r0,0xffe0
545         mfspr           $vrsave,256
546         mtspr           256,r0
547
548         li              $idx,15
549         vxor            $rndkey0,$rndkey0,$rndkey0
550         le?vspltisb     $tmp,0x0f
551
552         lvx             $ivec,0,$ivp            # load [unaligned] iv
553         lvsl            $inpperm,0,$ivp
554         lvx             $inptail,$idx,$ivp
555         le?vxor         $inpperm,$inpperm,$tmp
556         vperm           $ivec,$ivec,$inptail,$inpperm
557
558         neg             r11,$inp
559         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
560         lwz             $rounds,240($key)
561
562         lvsr            $inpperm,0,r11          # prepare for unaligned load
563         lvx             $inptail,0,$inp
564         addi            $inp,$inp,15            # 15 is not typo
565         le?vxor         $inpperm,$inpperm,$tmp
566
567         ?lvsr           $outperm,0,$out         # prepare for unaligned store
568         vspltisb        $outmask,-1
569         lvx             $outhead,0,$out
570         ?vperm          $outmask,$rndkey0,$outmask,$outperm
571         le?vxor         $outperm,$outperm,$tmp
572
573         srwi            $rounds,$rounds,1
574         li              $idx,16
575         subi            $rounds,$rounds,1
576         beq             Lcbc_dec
577
578 Lcbc_enc:
579         vmr             $inout,$inptail
580         lvx             $inptail,0,$inp
581         addi            $inp,$inp,16
582         mtctr           $rounds
583         subi            $len,$len,16            # len-=16
584
585         lvx             $rndkey0,0,$key
586          vperm          $inout,$inout,$inptail,$inpperm
587         lvx             $rndkey1,$idx,$key
588         addi            $idx,$idx,16
589         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
590         vxor            $inout,$inout,$rndkey0
591         lvx             $rndkey0,$idx,$key
592         addi            $idx,$idx,16
593         vxor            $inout,$inout,$ivec
594
595 Loop_cbc_enc:
596         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
597         vcipher         $inout,$inout,$rndkey1
598         lvx             $rndkey1,$idx,$key
599         addi            $idx,$idx,16
600         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
601         vcipher         $inout,$inout,$rndkey0
602         lvx             $rndkey0,$idx,$key
603         addi            $idx,$idx,16
604         bdnz            Loop_cbc_enc
605
606         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
607         vcipher         $inout,$inout,$rndkey1
608         lvx             $rndkey1,$idx,$key
609         li              $idx,16
610         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
611         vcipherlast     $ivec,$inout,$rndkey0
612         ${UCMP}i        $len,16
613
614         vperm           $tmp,$ivec,$ivec,$outperm
615         vsel            $inout,$outhead,$tmp,$outmask
616         vmr             $outhead,$tmp
617         stvx            $inout,0,$out
618         addi            $out,$out,16
619         bge             Lcbc_enc
620
621         b               Lcbc_done
622
623 .align  4
624 Lcbc_dec:
625         ${UCMP}i        $len,128
626         bge             _aesp8_cbc_decrypt8x
627         vmr             $tmp,$inptail
628         lvx             $inptail,0,$inp
629         addi            $inp,$inp,16
630         mtctr           $rounds
631         subi            $len,$len,16            # len-=16
632
633         lvx             $rndkey0,0,$key
634          vperm          $tmp,$tmp,$inptail,$inpperm
635         lvx             $rndkey1,$idx,$key
636         addi            $idx,$idx,16
637         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
638         vxor            $inout,$tmp,$rndkey0
639         lvx             $rndkey0,$idx,$key
640         addi            $idx,$idx,16
641
642 Loop_cbc_dec:
643         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
644         vncipher        $inout,$inout,$rndkey1
645         lvx             $rndkey1,$idx,$key
646         addi            $idx,$idx,16
647         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
648         vncipher        $inout,$inout,$rndkey0
649         lvx             $rndkey0,$idx,$key
650         addi            $idx,$idx,16
651         bdnz            Loop_cbc_dec
652
653         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
654         vncipher        $inout,$inout,$rndkey1
655         lvx             $rndkey1,$idx,$key
656         li              $idx,16
657         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
658         vncipherlast    $inout,$inout,$rndkey0
659         ${UCMP}i        $len,16
660
661         vxor            $inout,$inout,$ivec
662         vmr             $ivec,$tmp
663         vperm           $tmp,$inout,$inout,$outperm
664         vsel            $inout,$outhead,$tmp,$outmask
665         vmr             $outhead,$tmp
666         stvx            $inout,0,$out
667         addi            $out,$out,16
668         bge             Lcbc_dec
669
670 Lcbc_done:
671         addi            $out,$out,-1
672         lvx             $inout,0,$out           # redundant in aligned case
673         vsel            $inout,$outhead,$inout,$outmask
674         stvx            $inout,0,$out
675
676         neg             $enc,$ivp               # write [unaligned] iv
677         li              $idx,15                 # 15 is not typo
678         vxor            $rndkey0,$rndkey0,$rndkey0
679         vspltisb        $outmask,-1
680         le?vspltisb     $tmp,0x0f
681         ?lvsl           $outperm,0,$enc
682         ?vperm          $outmask,$rndkey0,$outmask,$outperm
683         le?vxor         $outperm,$outperm,$tmp
684         lvx             $outhead,0,$ivp
685         vperm           $ivec,$ivec,$ivec,$outperm
686         vsel            $inout,$outhead,$ivec,$outmask
687         lvx             $inptail,$idx,$ivp
688         stvx            $inout,0,$ivp
689         vsel            $inout,$ivec,$inptail,$outmask
690         stvx            $inout,$idx,$ivp
691
692         mtspr           256,$vrsave
693         blr
694         .long           0
695         .byte           0,12,0x14,0,0,0,6,0
696         .long           0
697 ___
698 #########################################################################
699 {{      # Optimized CBC decrypt procedure                               #
700 my $key_="r11";
701 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
702 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
703 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
704 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
705                         # v26-v31 last 6 round keys
706 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
707
708 $code.=<<___;
709 .align  5
710 _aesp8_cbc_decrypt8x:
711         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
712         li              r10,`$FRAME+8*16+15`
713         li              r11,`$FRAME+8*16+31`
714         stvx            v20,r10,$sp             # ABI says so
715         addi            r10,r10,32
716         stvx            v21,r11,$sp
717         addi            r11,r11,32
718         stvx            v22,r10,$sp
719         addi            r10,r10,32
720         stvx            v23,r11,$sp
721         addi            r11,r11,32
722         stvx            v24,r10,$sp
723         addi            r10,r10,32
724         stvx            v25,r11,$sp
725         addi            r11,r11,32
726         stvx            v26,r10,$sp
727         addi            r10,r10,32
728         stvx            v27,r11,$sp
729         addi            r11,r11,32
730         stvx            v28,r10,$sp
731         addi            r10,r10,32
732         stvx            v29,r11,$sp
733         addi            r11,r11,32
734         stvx            v30,r10,$sp
735         stvx            v31,r11,$sp
736         li              r0,-1
737         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
738         li              $x10,0x10
739         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
740         li              $x20,0x20
741         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
742         li              $x30,0x30
743         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
744         li              $x40,0x40
745         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
746         li              $x50,0x50
747         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
748         li              $x60,0x60
749         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
750         li              $x70,0x70
751         mtspr           256,r0
752
753         subi            $rounds,$rounds,3       # -4 in total
754         subi            $len,$len,128           # bias
755
756         lvx             $rndkey0,$x00,$key      # load key schedule
757         lvx             v30,$x10,$key
758         addi            $key,$key,0x20
759         lvx             v31,$x00,$key
760         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
761         addi            $key_,$sp,$FRAME+15
762         mtctr           $rounds
763
764 Load_cbc_dec_key:
765         ?vperm          v24,v30,v31,$keyperm
766         lvx             v30,$x10,$key
767         addi            $key,$key,0x20
768         stvx            v24,$x00,$key_          # off-load round[1]
769         ?vperm          v25,v31,v30,$keyperm
770         lvx             v31,$x00,$key
771         stvx            v25,$x10,$key_          # off-load round[2]
772         addi            $key_,$key_,0x20
773         bdnz            Load_cbc_dec_key
774
775         lvx             v26,$x10,$key
776         ?vperm          v24,v30,v31,$keyperm
777         lvx             v27,$x20,$key
778         stvx            v24,$x00,$key_          # off-load round[3]
779         ?vperm          v25,v31,v26,$keyperm
780         lvx             v28,$x30,$key
781         stvx            v25,$x10,$key_          # off-load round[4]
782         addi            $key_,$sp,$FRAME+15     # rewind $key_
783         ?vperm          v26,v26,v27,$keyperm
784         lvx             v29,$x40,$key
785         ?vperm          v27,v27,v28,$keyperm
786         lvx             v30,$x50,$key
787         ?vperm          v28,v28,v29,$keyperm
788         lvx             v31,$x60,$key
789         ?vperm          v29,v29,v30,$keyperm
790         lvx             $out0,$x70,$key         # borrow $out0
791         ?vperm          v30,v30,v31,$keyperm
792         lvx             v24,$x00,$key_          # pre-load round[1]
793         ?vperm          v31,v31,$out0,$keyperm
794         lvx             v25,$x10,$key_          # pre-load round[2]
795
796         #lvx            $inptail,0,$inp         # "caller" already did this
797         #addi           $inp,$inp,15            # 15 is not typo
798         subi            $inp,$inp,15            # undo "caller"
799
800          le?li          $idx,8
801         lvx_u           $in0,$x00,$inp          # load first 8 "words"
802          le?lvsl        $inpperm,0,$idx
803          le?vspltisb    $tmp,0x0f
804         lvx_u           $in1,$x10,$inp
805          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
806         lvx_u           $in2,$x20,$inp
807          le?vperm       $in0,$in0,$in0,$inpperm
808         lvx_u           $in3,$x30,$inp
809          le?vperm       $in1,$in1,$in1,$inpperm
810         lvx_u           $in4,$x40,$inp
811          le?vperm       $in2,$in2,$in2,$inpperm
812         vxor            $out0,$in0,$rndkey0
813         lvx_u           $in5,$x50,$inp
814          le?vperm       $in3,$in3,$in3,$inpperm
815         vxor            $out1,$in1,$rndkey0
816         lvx_u           $in6,$x60,$inp
817          le?vperm       $in4,$in4,$in4,$inpperm
818         vxor            $out2,$in2,$rndkey0
819         lvx_u           $in7,$x70,$inp
820         addi            $inp,$inp,0x80
821          le?vperm       $in5,$in5,$in5,$inpperm
822         vxor            $out3,$in3,$rndkey0
823          le?vperm       $in6,$in6,$in6,$inpperm
824         vxor            $out4,$in4,$rndkey0
825          le?vperm       $in7,$in7,$in7,$inpperm
826         vxor            $out5,$in5,$rndkey0
827         vxor            $out6,$in6,$rndkey0
828         vxor            $out7,$in7,$rndkey0
829
830         mtctr           $rounds
831         b               Loop_cbc_dec8x
832 .align  5
833 Loop_cbc_dec8x:
834         vncipher        $out0,$out0,v24
835         vncipher        $out1,$out1,v24
836         vncipher        $out2,$out2,v24
837         vncipher        $out3,$out3,v24
838         vncipher        $out4,$out4,v24
839         vncipher        $out5,$out5,v24
840         vncipher        $out6,$out6,v24
841         vncipher        $out7,$out7,v24
842         lvx             v24,$x20,$key_          # round[3]
843         addi            $key_,$key_,0x20
844
845         vncipher        $out0,$out0,v25
846         vncipher        $out1,$out1,v25
847         vncipher        $out2,$out2,v25
848         vncipher        $out3,$out3,v25
849         vncipher        $out4,$out4,v25
850         vncipher        $out5,$out5,v25
851         vncipher        $out6,$out6,v25
852         vncipher        $out7,$out7,v25
853         lvx             v25,$x10,$key_          # round[4]
854         bdnz            Loop_cbc_dec8x
855
856         subic           $len,$len,128           # $len-=128
857         vncipher        $out0,$out0,v24
858         vncipher        $out1,$out1,v24
859         vncipher        $out2,$out2,v24
860         vncipher        $out3,$out3,v24
861         vncipher        $out4,$out4,v24
862         vncipher        $out5,$out5,v24
863         vncipher        $out6,$out6,v24
864         vncipher        $out7,$out7,v24
865
866         subfe.          r0,r0,r0                # borrow?-1:0
867         vncipher        $out0,$out0,v25
868         vncipher        $out1,$out1,v25
869         vncipher        $out2,$out2,v25
870         vncipher        $out3,$out3,v25
871         vncipher        $out4,$out4,v25
872         vncipher        $out5,$out5,v25
873         vncipher        $out6,$out6,v25
874         vncipher        $out7,$out7,v25
875
876         and             r0,r0,$len
877         vncipher        $out0,$out0,v26
878         vncipher        $out1,$out1,v26
879         vncipher        $out2,$out2,v26
880         vncipher        $out3,$out3,v26
881         vncipher        $out4,$out4,v26
882         vncipher        $out5,$out5,v26
883         vncipher        $out6,$out6,v26
884         vncipher        $out7,$out7,v26
885
886         add             $inp,$inp,r0            # $inp is adjusted in such
887                                                 # way that at exit from the
888                                                 # loop inX-in7 are loaded
889                                                 # with last "words"
890         vncipher        $out0,$out0,v27
891         vncipher        $out1,$out1,v27
892         vncipher        $out2,$out2,v27
893         vncipher        $out3,$out3,v27
894         vncipher        $out4,$out4,v27
895         vncipher        $out5,$out5,v27
896         vncipher        $out6,$out6,v27
897         vncipher        $out7,$out7,v27
898
899         addi            $key_,$sp,$FRAME+15     # rewind $key_
900         vncipher        $out0,$out0,v28
901         vncipher        $out1,$out1,v28
902         vncipher        $out2,$out2,v28
903         vncipher        $out3,$out3,v28
904         vncipher        $out4,$out4,v28
905         vncipher        $out5,$out5,v28
906         vncipher        $out6,$out6,v28
907         vncipher        $out7,$out7,v28
908         lvx             v24,$x00,$key_          # re-pre-load round[1]
909
910         vncipher        $out0,$out0,v29
911         vncipher        $out1,$out1,v29
912         vncipher        $out2,$out2,v29
913         vncipher        $out3,$out3,v29
914         vncipher        $out4,$out4,v29
915         vncipher        $out5,$out5,v29
916         vncipher        $out6,$out6,v29
917         vncipher        $out7,$out7,v29
918         lvx             v25,$x10,$key_          # re-pre-load round[2]
919
920         vncipher        $out0,$out0,v30
921          vxor           $ivec,$ivec,v31         # xor with last round key
922         vncipher        $out1,$out1,v30
923          vxor           $in0,$in0,v31
924         vncipher        $out2,$out2,v30
925          vxor           $in1,$in1,v31
926         vncipher        $out3,$out3,v30
927          vxor           $in2,$in2,v31
928         vncipher        $out4,$out4,v30
929          vxor           $in3,$in3,v31
930         vncipher        $out5,$out5,v30
931          vxor           $in4,$in4,v31
932         vncipher        $out6,$out6,v30
933          vxor           $in5,$in5,v31
934         vncipher        $out7,$out7,v30
935          vxor           $in6,$in6,v31
936
937         vncipherlast    $out0,$out0,$ivec
938         vncipherlast    $out1,$out1,$in0
939          lvx_u          $in0,$x00,$inp          # load next input block
940         vncipherlast    $out2,$out2,$in1
941          lvx_u          $in1,$x10,$inp
942         vncipherlast    $out3,$out3,$in2
943          le?vperm       $in0,$in0,$in0,$inpperm
944          lvx_u          $in2,$x20,$inp
945         vncipherlast    $out4,$out4,$in3
946          le?vperm       $in1,$in1,$in1,$inpperm
947          lvx_u          $in3,$x30,$inp
948         vncipherlast    $out5,$out5,$in4
949          le?vperm       $in2,$in2,$in2,$inpperm
950          lvx_u          $in4,$x40,$inp
951         vncipherlast    $out6,$out6,$in5
952          le?vperm       $in3,$in3,$in3,$inpperm
953          lvx_u          $in5,$x50,$inp
954         vncipherlast    $out7,$out7,$in6
955          le?vperm       $in4,$in4,$in4,$inpperm
956          lvx_u          $in6,$x60,$inp
957         vmr             $ivec,$in7
958          le?vperm       $in5,$in5,$in5,$inpperm
959          lvx_u          $in7,$x70,$inp
960          addi           $inp,$inp,0x80
961
962         le?vperm        $out0,$out0,$out0,$inpperm
963         le?vperm        $out1,$out1,$out1,$inpperm
964         stvx_u          $out0,$x00,$out
965          le?vperm       $in6,$in6,$in6,$inpperm
966          vxor           $out0,$in0,$rndkey0
967         le?vperm        $out2,$out2,$out2,$inpperm
968         stvx_u          $out1,$x10,$out
969          le?vperm       $in7,$in7,$in7,$inpperm
970          vxor           $out1,$in1,$rndkey0
971         le?vperm        $out3,$out3,$out3,$inpperm
972         stvx_u          $out2,$x20,$out
973          vxor           $out2,$in2,$rndkey0
974         le?vperm        $out4,$out4,$out4,$inpperm
975         stvx_u          $out3,$x30,$out
976          vxor           $out3,$in3,$rndkey0
977         le?vperm        $out5,$out5,$out5,$inpperm
978         stvx_u          $out4,$x40,$out
979          vxor           $out4,$in4,$rndkey0
980         le?vperm        $out6,$out6,$out6,$inpperm
981         stvx_u          $out5,$x50,$out
982          vxor           $out5,$in5,$rndkey0
983         le?vperm        $out7,$out7,$out7,$inpperm
984         stvx_u          $out6,$x60,$out
985          vxor           $out6,$in6,$rndkey0
986         stvx_u          $out7,$x70,$out
987         addi            $out,$out,0x80
988          vxor           $out7,$in7,$rndkey0
989
990         mtctr           $rounds
991         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
992
993         addic.          $len,$len,128
994         beq             Lcbc_dec8x_done
995         nop
996         nop
997
998 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
999         vncipher        $out1,$out1,v24
1000         vncipher        $out2,$out2,v24
1001         vncipher        $out3,$out3,v24
1002         vncipher        $out4,$out4,v24
1003         vncipher        $out5,$out5,v24
1004         vncipher        $out6,$out6,v24
1005         vncipher        $out7,$out7,v24
1006         lvx             v24,$x20,$key_          # round[3]
1007         addi            $key_,$key_,0x20
1008
1009         vncipher        $out1,$out1,v25
1010         vncipher        $out2,$out2,v25
1011         vncipher        $out3,$out3,v25
1012         vncipher        $out4,$out4,v25
1013         vncipher        $out5,$out5,v25
1014         vncipher        $out6,$out6,v25
1015         vncipher        $out7,$out7,v25
1016         lvx             v25,$x10,$key_          # round[4]
1017         bdnz            Loop_cbc_dec8x_tail
1018
1019         vncipher        $out1,$out1,v24
1020         vncipher        $out2,$out2,v24
1021         vncipher        $out3,$out3,v24
1022         vncipher        $out4,$out4,v24
1023         vncipher        $out5,$out5,v24
1024         vncipher        $out6,$out6,v24
1025         vncipher        $out7,$out7,v24
1026
1027         vncipher        $out1,$out1,v25
1028         vncipher        $out2,$out2,v25
1029         vncipher        $out3,$out3,v25
1030         vncipher        $out4,$out4,v25
1031         vncipher        $out5,$out5,v25
1032         vncipher        $out6,$out6,v25
1033         vncipher        $out7,$out7,v25
1034
1035         vncipher        $out1,$out1,v26
1036         vncipher        $out2,$out2,v26
1037         vncipher        $out3,$out3,v26
1038         vncipher        $out4,$out4,v26
1039         vncipher        $out5,$out5,v26
1040         vncipher        $out6,$out6,v26
1041         vncipher        $out7,$out7,v26
1042
1043         vncipher        $out1,$out1,v27
1044         vncipher        $out2,$out2,v27
1045         vncipher        $out3,$out3,v27
1046         vncipher        $out4,$out4,v27
1047         vncipher        $out5,$out5,v27
1048         vncipher        $out6,$out6,v27
1049         vncipher        $out7,$out7,v27
1050
1051         vncipher        $out1,$out1,v28
1052         vncipher        $out2,$out2,v28
1053         vncipher        $out3,$out3,v28
1054         vncipher        $out4,$out4,v28
1055         vncipher        $out5,$out5,v28
1056         vncipher        $out6,$out6,v28
1057         vncipher        $out7,$out7,v28
1058
1059         vncipher        $out1,$out1,v29
1060         vncipher        $out2,$out2,v29
1061         vncipher        $out3,$out3,v29
1062         vncipher        $out4,$out4,v29
1063         vncipher        $out5,$out5,v29
1064         vncipher        $out6,$out6,v29
1065         vncipher        $out7,$out7,v29
1066
1067         vncipher        $out1,$out1,v30
1068          vxor           $ivec,$ivec,v31         # last round key
1069         vncipher        $out2,$out2,v30
1070          vxor           $in1,$in1,v31
1071         vncipher        $out3,$out3,v30
1072          vxor           $in2,$in2,v31
1073         vncipher        $out4,$out4,v30
1074          vxor           $in3,$in3,v31
1075         vncipher        $out5,$out5,v30
1076          vxor           $in4,$in4,v31
1077         vncipher        $out6,$out6,v30
1078          vxor           $in5,$in5,v31
1079         vncipher        $out7,$out7,v30
1080          vxor           $in6,$in6,v31
1081
1082         cmplwi          $len,32                 # switch($len)
1083         blt             Lcbc_dec8x_one
1084         nop
1085         beq             Lcbc_dec8x_two
1086         cmplwi          $len,64
1087         blt             Lcbc_dec8x_three
1088         nop
1089         beq             Lcbc_dec8x_four
1090         cmplwi          $len,96
1091         blt             Lcbc_dec8x_five
1092         nop
1093         beq             Lcbc_dec8x_six
1094
1095 Lcbc_dec8x_seven:
1096         vncipherlast    $out1,$out1,$ivec
1097         vncipherlast    $out2,$out2,$in1
1098         vncipherlast    $out3,$out3,$in2
1099         vncipherlast    $out4,$out4,$in3
1100         vncipherlast    $out5,$out5,$in4
1101         vncipherlast    $out6,$out6,$in5
1102         vncipherlast    $out7,$out7,$in6
1103         vmr             $ivec,$in7
1104
1105         le?vperm        $out1,$out1,$out1,$inpperm
1106         le?vperm        $out2,$out2,$out2,$inpperm
1107         stvx_u          $out1,$x00,$out
1108         le?vperm        $out3,$out3,$out3,$inpperm
1109         stvx_u          $out2,$x10,$out
1110         le?vperm        $out4,$out4,$out4,$inpperm
1111         stvx_u          $out3,$x20,$out
1112         le?vperm        $out5,$out5,$out5,$inpperm
1113         stvx_u          $out4,$x30,$out
1114         le?vperm        $out6,$out6,$out6,$inpperm
1115         stvx_u          $out5,$x40,$out
1116         le?vperm        $out7,$out7,$out7,$inpperm
1117         stvx_u          $out6,$x50,$out
1118         stvx_u          $out7,$x60,$out
1119         addi            $out,$out,0x70
1120         b               Lcbc_dec8x_done
1121
1122 .align  5
1123 Lcbc_dec8x_six:
1124         vncipherlast    $out2,$out2,$ivec
1125         vncipherlast    $out3,$out3,$in2
1126         vncipherlast    $out4,$out4,$in3
1127         vncipherlast    $out5,$out5,$in4
1128         vncipherlast    $out6,$out6,$in5
1129         vncipherlast    $out7,$out7,$in6
1130         vmr             $ivec,$in7
1131
1132         le?vperm        $out2,$out2,$out2,$inpperm
1133         le?vperm        $out3,$out3,$out3,$inpperm
1134         stvx_u          $out2,$x00,$out
1135         le?vperm        $out4,$out4,$out4,$inpperm
1136         stvx_u          $out3,$x10,$out
1137         le?vperm        $out5,$out5,$out5,$inpperm
1138         stvx_u          $out4,$x20,$out
1139         le?vperm        $out6,$out6,$out6,$inpperm
1140         stvx_u          $out5,$x30,$out
1141         le?vperm        $out7,$out7,$out7,$inpperm
1142         stvx_u          $out6,$x40,$out
1143         stvx_u          $out7,$x50,$out
1144         addi            $out,$out,0x60
1145         b               Lcbc_dec8x_done
1146
1147 .align  5
1148 Lcbc_dec8x_five:
1149         vncipherlast    $out3,$out3,$ivec
1150         vncipherlast    $out4,$out4,$in3
1151         vncipherlast    $out5,$out5,$in4
1152         vncipherlast    $out6,$out6,$in5
1153         vncipherlast    $out7,$out7,$in6
1154         vmr             $ivec,$in7
1155
1156         le?vperm        $out3,$out3,$out3,$inpperm
1157         le?vperm        $out4,$out4,$out4,$inpperm
1158         stvx_u          $out3,$x00,$out
1159         le?vperm        $out5,$out5,$out5,$inpperm
1160         stvx_u          $out4,$x10,$out
1161         le?vperm        $out6,$out6,$out6,$inpperm
1162         stvx_u          $out5,$x20,$out
1163         le?vperm        $out7,$out7,$out7,$inpperm
1164         stvx_u          $out6,$x30,$out
1165         stvx_u          $out7,$x40,$out
1166         addi            $out,$out,0x50
1167         b               Lcbc_dec8x_done
1168
1169 .align  5
1170 Lcbc_dec8x_four:
1171         vncipherlast    $out4,$out4,$ivec
1172         vncipherlast    $out5,$out5,$in4
1173         vncipherlast    $out6,$out6,$in5
1174         vncipherlast    $out7,$out7,$in6
1175         vmr             $ivec,$in7
1176
1177         le?vperm        $out4,$out4,$out4,$inpperm
1178         le?vperm        $out5,$out5,$out5,$inpperm
1179         stvx_u          $out4,$x00,$out
1180         le?vperm        $out6,$out6,$out6,$inpperm
1181         stvx_u          $out5,$x10,$out
1182         le?vperm        $out7,$out7,$out7,$inpperm
1183         stvx_u          $out6,$x20,$out
1184         stvx_u          $out7,$x30,$out
1185         addi            $out,$out,0x40
1186         b               Lcbc_dec8x_done
1187
1188 .align  5
1189 Lcbc_dec8x_three:
1190         vncipherlast    $out5,$out5,$ivec
1191         vncipherlast    $out6,$out6,$in5
1192         vncipherlast    $out7,$out7,$in6
1193         vmr             $ivec,$in7
1194
1195         le?vperm        $out5,$out5,$out5,$inpperm
1196         le?vperm        $out6,$out6,$out6,$inpperm
1197         stvx_u          $out5,$x00,$out
1198         le?vperm        $out7,$out7,$out7,$inpperm
1199         stvx_u          $out6,$x10,$out
1200         stvx_u          $out7,$x20,$out
1201         addi            $out,$out,0x30
1202         b               Lcbc_dec8x_done
1203
1204 .align  5
1205 Lcbc_dec8x_two:
1206         vncipherlast    $out6,$out6,$ivec
1207         vncipherlast    $out7,$out7,$in6
1208         vmr             $ivec,$in7
1209
1210         le?vperm        $out6,$out6,$out6,$inpperm
1211         le?vperm        $out7,$out7,$out7,$inpperm
1212         stvx_u          $out6,$x00,$out
1213         stvx_u          $out7,$x10,$out
1214         addi            $out,$out,0x20
1215         b               Lcbc_dec8x_done
1216
1217 .align  5
1218 Lcbc_dec8x_one:
1219         vncipherlast    $out7,$out7,$ivec
1220         vmr             $ivec,$in7
1221
1222         le?vperm        $out7,$out7,$out7,$inpperm
1223         stvx_u          $out7,0,$out
1224         addi            $out,$out,0x10
1225
1226 Lcbc_dec8x_done:
1227         le?vperm        $ivec,$ivec,$ivec,$inpperm
1228         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1229
1230         li              r10,`$FRAME+15`
1231         li              r11,`$FRAME+31`
1232         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1233         addi            r10,r10,32
1234         stvx            $inpperm,r11,$sp
1235         addi            r11,r11,32
1236         stvx            $inpperm,r10,$sp
1237         addi            r10,r10,32
1238         stvx            $inpperm,r11,$sp
1239         addi            r11,r11,32
1240         stvx            $inpperm,r10,$sp
1241         addi            r10,r10,32
1242         stvx            $inpperm,r11,$sp
1243         addi            r11,r11,32
1244         stvx            $inpperm,r10,$sp
1245         addi            r10,r10,32
1246         stvx            $inpperm,r11,$sp
1247         addi            r11,r11,32
1248
1249         mtspr           256,$vrsave
1250         lvx             v20,r10,$sp             # ABI says so
1251         addi            r10,r10,32
1252         lvx             v21,r11,$sp
1253         addi            r11,r11,32
1254         lvx             v22,r10,$sp
1255         addi            r10,r10,32
1256         lvx             v23,r11,$sp
1257         addi            r11,r11,32
1258         lvx             v24,r10,$sp
1259         addi            r10,r10,32
1260         lvx             v25,r11,$sp
1261         addi            r11,r11,32
1262         lvx             v26,r10,$sp
1263         addi            r10,r10,32
1264         lvx             v27,r11,$sp
1265         addi            r11,r11,32
1266         lvx             v28,r10,$sp
1267         addi            r10,r10,32
1268         lvx             v29,r11,$sp
1269         addi            r11,r11,32
1270         lvx             v30,r10,$sp
1271         lvx             v31,r11,$sp
1272         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1273         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1274         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1275         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1276         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1277         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1278         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1279         blr
1280         .long           0
1281         .byte           0,12,0x14,0,0x80,6,6,0
1282         .long           0
1283 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1284 ___
1285 }}      }}}
1286
1287 #########################################################################
1288 {{{     # CTR procedure[s]                                              #
1289 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1290 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1291 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1292                                                 map("v$_",(4..11));
1293 my $dat=$tmp;
1294
1295 $code.=<<___;
1296 .globl  .${prefix}_ctr32_encrypt_blocks
1297         ${UCMP}i        $len,1
1298         bltlr-
1299
1300         lis             r0,0xfff0
1301         mfspr           $vrsave,256
1302         mtspr           256,r0
1303
1304         li              $idx,15
1305         vxor            $rndkey0,$rndkey0,$rndkey0
1306         le?vspltisb     $tmp,0x0f
1307
1308         lvx             $ivec,0,$ivp            # load [unaligned] iv
1309         lvsl            $inpperm,0,$ivp
1310         lvx             $inptail,$idx,$ivp
1311          vspltisb       $one,1
1312         le?vxor         $inpperm,$inpperm,$tmp
1313         vperm           $ivec,$ivec,$inptail,$inpperm
1314          vsldoi         $one,$rndkey0,$one,1
1315
1316         neg             r11,$inp
1317         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1318         lwz             $rounds,240($key)
1319
1320         lvsr            $inpperm,0,r11          # prepare for unaligned load
1321         lvx             $inptail,0,$inp
1322         addi            $inp,$inp,15            # 15 is not typo
1323         le?vxor         $inpperm,$inpperm,$tmp
1324
1325         srwi            $rounds,$rounds,1
1326         li              $idx,16
1327         subi            $rounds,$rounds,1
1328
1329         ${UCMP}i        $len,8
1330         bge             _aesp8_ctr32_encrypt8x
1331
1332         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1333         vspltisb        $outmask,-1
1334         lvx             $outhead,0,$out
1335         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1336         le?vxor         $outperm,$outperm,$tmp
1337
1338         lvx             $rndkey0,0,$key
1339         mtctr           $rounds
1340         lvx             $rndkey1,$idx,$key
1341         addi            $idx,$idx,16
1342         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1343         vxor            $inout,$ivec,$rndkey0
1344         lvx             $rndkey0,$idx,$key
1345         addi            $idx,$idx,16
1346         b               Loop_ctr32_enc
1347
1348 .align  5
1349 Loop_ctr32_enc:
1350         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1351         vcipher         $inout,$inout,$rndkey1
1352         lvx             $rndkey1,$idx,$key
1353         addi            $idx,$idx,16
1354         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1355         vcipher         $inout,$inout,$rndkey0
1356         lvx             $rndkey0,$idx,$key
1357         addi            $idx,$idx,16
1358         bdnz            Loop_ctr32_enc
1359
1360         vadduwm         $ivec,$ivec,$one
1361          vmr            $dat,$inptail
1362          lvx            $inptail,0,$inp
1363          addi           $inp,$inp,16
1364          subic.         $len,$len,1             # blocks--
1365
1366         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1367         vcipher         $inout,$inout,$rndkey1
1368         lvx             $rndkey1,$idx,$key
1369          vperm          $dat,$dat,$inptail,$inpperm
1370          li             $idx,16
1371         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1372          lvx            $rndkey0,0,$key
1373         vxor            $dat,$dat,$rndkey1      # last round key
1374         vcipherlast     $inout,$inout,$dat
1375
1376          lvx            $rndkey1,$idx,$key
1377          addi           $idx,$idx,16
1378         vperm           $inout,$inout,$inout,$outperm
1379         vsel            $dat,$outhead,$inout,$outmask
1380          mtctr          $rounds
1381          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1382         vmr             $outhead,$inout
1383          vxor           $inout,$ivec,$rndkey0
1384          lvx            $rndkey0,$idx,$key
1385          addi           $idx,$idx,16
1386         stvx            $dat,0,$out
1387         addi            $out,$out,16
1388         bne             Loop_ctr32_enc
1389
1390         addi            $out,$out,-1
1391         lvx             $inout,0,$out           # redundant in aligned case
1392         vsel            $inout,$outhead,$inout,$outmask
1393         stvx            $inout,0,$out
1394
1395         mtspr           256,$vrsave
1396         blr
1397         .long           0
1398         .byte           0,12,0x14,0,0,0,6,0
1399         .long           0
1400 ___
1401 #########################################################################
1402 {{      # Optimized CTR procedure                                       #
1403 my $key_="r11";
1404 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1405 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1406 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1407 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1408                         # v26-v31 last 6 round keys
1409 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1410 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1411
1412 $code.=<<___;
1413 .align  5
1414 _aesp8_ctr32_encrypt8x:
1415         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1416         li              r10,`$FRAME+8*16+15`
1417         li              r11,`$FRAME+8*16+31`
1418         stvx            v20,r10,$sp             # ABI says so
1419         addi            r10,r10,32
1420         stvx            v21,r11,$sp
1421         addi            r11,r11,32
1422         stvx            v22,r10,$sp
1423         addi            r10,r10,32
1424         stvx            v23,r11,$sp
1425         addi            r11,r11,32
1426         stvx            v24,r10,$sp
1427         addi            r10,r10,32
1428         stvx            v25,r11,$sp
1429         addi            r11,r11,32
1430         stvx            v26,r10,$sp
1431         addi            r10,r10,32
1432         stvx            v27,r11,$sp
1433         addi            r11,r11,32
1434         stvx            v28,r10,$sp
1435         addi            r10,r10,32
1436         stvx            v29,r11,$sp
1437         addi            r11,r11,32
1438         stvx            v30,r10,$sp
1439         stvx            v31,r11,$sp
1440         li              r0,-1
1441         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1442         li              $x10,0x10
1443         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1444         li              $x20,0x20
1445         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1446         li              $x30,0x30
1447         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1448         li              $x40,0x40
1449         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1450         li              $x50,0x50
1451         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1452         li              $x60,0x60
1453         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1454         li              $x70,0x70
1455         mtspr           256,r0
1456
1457         subi            $rounds,$rounds,3       # -4 in total
1458
1459         lvx             $rndkey0,$x00,$key      # load key schedule
1460         lvx             v30,$x10,$key
1461         addi            $key,$key,0x20
1462         lvx             v31,$x00,$key
1463         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1464         addi            $key_,$sp,$FRAME+15
1465         mtctr           $rounds
1466
1467 Load_ctr32_enc_key:
1468         ?vperm          v24,v30,v31,$keyperm
1469         lvx             v30,$x10,$key
1470         addi            $key,$key,0x20
1471         stvx            v24,$x00,$key_          # off-load round[1]
1472         ?vperm          v25,v31,v30,$keyperm
1473         lvx             v31,$x00,$key
1474         stvx            v25,$x10,$key_          # off-load round[2]
1475         addi            $key_,$key_,0x20
1476         bdnz            Load_ctr32_enc_key
1477
1478         lvx             v26,$x10,$key
1479         ?vperm          v24,v30,v31,$keyperm
1480         lvx             v27,$x20,$key
1481         stvx            v24,$x00,$key_          # off-load round[3]
1482         ?vperm          v25,v31,v26,$keyperm
1483         lvx             v28,$x30,$key
1484         stvx            v25,$x10,$key_          # off-load round[4]
1485         addi            $key_,$sp,$FRAME+15     # rewind $key_
1486         ?vperm          v26,v26,v27,$keyperm
1487         lvx             v29,$x40,$key
1488         ?vperm          v27,v27,v28,$keyperm
1489         lvx             v30,$x50,$key
1490         ?vperm          v28,v28,v29,$keyperm
1491         lvx             v31,$x60,$key
1492         ?vperm          v29,v29,v30,$keyperm
1493         lvx             $out0,$x70,$key         # borrow $out0
1494         ?vperm          v30,v30,v31,$keyperm
1495         lvx             v24,$x00,$key_          # pre-load round[1]
1496         ?vperm          v31,v31,$out0,$keyperm
1497         lvx             v25,$x10,$key_          # pre-load round[2]
1498
1499         vadduqm         $two,$one,$one
1500         subi            $inp,$inp,15            # undo "caller"
1501         $SHL            $len,$len,4
1502
1503         vadduqm         $out1,$ivec,$one        # counter values ...
1504         vadduqm         $out2,$ivec,$two
1505         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1506          le?li          $idx,8
1507         vadduqm         $out3,$out1,$two
1508         vxor            $out1,$out1,$rndkey0
1509          le?lvsl        $inpperm,0,$idx
1510         vadduqm         $out4,$out2,$two
1511         vxor            $out2,$out2,$rndkey0
1512          le?vspltisb    $tmp,0x0f
1513         vadduqm         $out5,$out3,$two
1514         vxor            $out3,$out3,$rndkey0
1515          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1516         vadduqm         $out6,$out4,$two
1517         vxor            $out4,$out4,$rndkey0
1518         vadduqm         $out7,$out5,$two
1519         vxor            $out5,$out5,$rndkey0
1520         vadduqm         $ivec,$out6,$two        # next counter value
1521         vxor            $out6,$out6,$rndkey0
1522         vxor            $out7,$out7,$rndkey0
1523
1524         mtctr           $rounds
1525         b               Loop_ctr32_enc8x
1526 .align  5
1527 Loop_ctr32_enc8x:
1528         vcipher         $out0,$out0,v24
1529         vcipher         $out1,$out1,v24
1530         vcipher         $out2,$out2,v24
1531         vcipher         $out3,$out3,v24
1532         vcipher         $out4,$out4,v24
1533         vcipher         $out5,$out5,v24
1534         vcipher         $out6,$out6,v24
1535         vcipher         $out7,$out7,v24
1536 Loop_ctr32_enc8x_middle:
1537         lvx             v24,$x20,$key_          # round[3]
1538         addi            $key_,$key_,0x20
1539
1540         vcipher         $out0,$out0,v25
1541         vcipher         $out1,$out1,v25
1542         vcipher         $out2,$out2,v25
1543         vcipher         $out3,$out3,v25
1544         vcipher         $out4,$out4,v25
1545         vcipher         $out5,$out5,v25
1546         vcipher         $out6,$out6,v25
1547         vcipher         $out7,$out7,v25
1548         lvx             v25,$x10,$key_          # round[4]
1549         bdnz            Loop_ctr32_enc8x
1550
1551         subic           r11,$len,256            # $len-256, borrow $key_
1552         vcipher         $out0,$out0,v24
1553         vcipher         $out1,$out1,v24
1554         vcipher         $out2,$out2,v24
1555         vcipher         $out3,$out3,v24
1556         vcipher         $out4,$out4,v24
1557         vcipher         $out5,$out5,v24
1558         vcipher         $out6,$out6,v24
1559         vcipher         $out7,$out7,v24
1560
1561         subfe           r0,r0,r0                # borrow?-1:0
1562         vcipher         $out0,$out0,v25
1563         vcipher         $out1,$out1,v25
1564         vcipher         $out2,$out2,v25
1565         vcipher         $out3,$out3,v25
1566         vcipher         $out4,$out4,v25
1567         vcipher         $out5,$out5,v25
1568         vcipher         $out6,$out6,v25
1569         vcipher         $out7,$out7,v25
1570
1571         and             r0,r0,r11
1572         addi            $key_,$sp,$FRAME+15     # rewind $key_
1573         vcipher         $out0,$out0,v26
1574         vcipher         $out1,$out1,v26
1575         vcipher         $out2,$out2,v26
1576         vcipher         $out3,$out3,v26
1577         vcipher         $out4,$out4,v26
1578         vcipher         $out5,$out5,v26
1579         vcipher         $out6,$out6,v26
1580         vcipher         $out7,$out7,v26
1581         lvx             v24,$x00,$key_          # re-pre-load round[1]
1582
1583         subic           $len,$len,129           # $len-=129
1584         vcipher         $out0,$out0,v27
1585         addi            $len,$len,1             # $len-=128 really
1586         vcipher         $out1,$out1,v27
1587         vcipher         $out2,$out2,v27
1588         vcipher         $out3,$out3,v27
1589         vcipher         $out4,$out4,v27
1590         vcipher         $out5,$out5,v27
1591         vcipher         $out6,$out6,v27
1592         vcipher         $out7,$out7,v27
1593         lvx             v25,$x10,$key_          # re-pre-load round[2]
1594
1595         vcipher         $out0,$out0,v28
1596          lvx_u          $in0,$x00,$inp          # load input
1597         vcipher         $out1,$out1,v28
1598          lvx_u          $in1,$x10,$inp
1599         vcipher         $out2,$out2,v28
1600          lvx_u          $in2,$x20,$inp
1601         vcipher         $out3,$out3,v28
1602          lvx_u          $in3,$x30,$inp
1603         vcipher         $out4,$out4,v28
1604          lvx_u          $in4,$x40,$inp
1605         vcipher         $out5,$out5,v28
1606          lvx_u          $in5,$x50,$inp
1607         vcipher         $out6,$out6,v28
1608          lvx_u          $in6,$x60,$inp
1609         vcipher         $out7,$out7,v28
1610          lvx_u          $in7,$x70,$inp
1611          addi           $inp,$inp,0x80
1612
1613         vcipher         $out0,$out0,v29
1614          le?vperm       $in0,$in0,$in0,$inpperm
1615         vcipher         $out1,$out1,v29
1616          le?vperm       $in1,$in1,$in1,$inpperm
1617         vcipher         $out2,$out2,v29
1618          le?vperm       $in2,$in2,$in2,$inpperm
1619         vcipher         $out3,$out3,v29
1620          le?vperm       $in3,$in3,$in3,$inpperm
1621         vcipher         $out4,$out4,v29
1622          le?vperm       $in4,$in4,$in4,$inpperm
1623         vcipher         $out5,$out5,v29
1624          le?vperm       $in5,$in5,$in5,$inpperm
1625         vcipher         $out6,$out6,v29
1626          le?vperm       $in6,$in6,$in6,$inpperm
1627         vcipher         $out7,$out7,v29
1628          le?vperm       $in7,$in7,$in7,$inpperm
1629
1630         add             $inp,$inp,r0            # $inp is adjusted in such
1631                                                 # way that at exit from the
1632                                                 # loop inX-in7 are loaded
1633                                                 # with last "words"
1634         subfe.          r0,r0,r0                # borrow?-1:0
1635         vcipher         $out0,$out0,v30
1636          vxor           $in0,$in0,v31           # xor with last round key
1637         vcipher         $out1,$out1,v30
1638          vxor           $in1,$in1,v31
1639         vcipher         $out2,$out2,v30
1640          vxor           $in2,$in2,v31
1641         vcipher         $out3,$out3,v30
1642          vxor           $in3,$in3,v31
1643         vcipher         $out4,$out4,v30
1644          vxor           $in4,$in4,v31
1645         vcipher         $out5,$out5,v30
1646          vxor           $in5,$in5,v31
1647         vcipher         $out6,$out6,v30
1648          vxor           $in6,$in6,v31
1649         vcipher         $out7,$out7,v30
1650          vxor           $in7,$in7,v31
1651
1652         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1653
1654         vcipherlast     $in0,$out0,$in0
1655         vcipherlast     $in1,$out1,$in1
1656          vadduqm        $out1,$ivec,$one        # counter values ...
1657         vcipherlast     $in2,$out2,$in2
1658          vadduqm        $out2,$ivec,$two
1659          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1660         vcipherlast     $in3,$out3,$in3
1661          vadduqm        $out3,$out1,$two
1662          vxor           $out1,$out1,$rndkey0
1663         vcipherlast     $in4,$out4,$in4
1664          vadduqm        $out4,$out2,$two
1665          vxor           $out2,$out2,$rndkey0
1666         vcipherlast     $in5,$out5,$in5
1667          vadduqm        $out5,$out3,$two
1668          vxor           $out3,$out3,$rndkey0
1669         vcipherlast     $in6,$out6,$in6
1670          vadduqm        $out6,$out4,$two
1671          vxor           $out4,$out4,$rndkey0
1672         vcipherlast     $in7,$out7,$in7
1673          vadduqm        $out7,$out5,$two
1674          vxor           $out5,$out5,$rndkey0
1675         le?vperm        $in0,$in0,$in0,$inpperm
1676          vadduqm        $ivec,$out6,$two        # next counter value
1677          vxor           $out6,$out6,$rndkey0
1678         le?vperm        $in1,$in1,$in1,$inpperm
1679          vxor           $out7,$out7,$rndkey0
1680         mtctr           $rounds
1681
1682          vcipher        $out0,$out0,v24
1683         stvx_u          $in0,$x00,$out
1684         le?vperm        $in2,$in2,$in2,$inpperm
1685          vcipher        $out1,$out1,v24
1686         stvx_u          $in1,$x10,$out
1687         le?vperm        $in3,$in3,$in3,$inpperm
1688          vcipher        $out2,$out2,v24
1689         stvx_u          $in2,$x20,$out
1690         le?vperm        $in4,$in4,$in4,$inpperm
1691          vcipher        $out3,$out3,v24
1692         stvx_u          $in3,$x30,$out
1693         le?vperm        $in5,$in5,$in5,$inpperm
1694          vcipher        $out4,$out4,v24
1695         stvx_u          $in4,$x40,$out
1696         le?vperm        $in6,$in6,$in6,$inpperm
1697          vcipher        $out5,$out5,v24
1698         stvx_u          $in5,$x50,$out
1699         le?vperm        $in7,$in7,$in7,$inpperm
1700          vcipher        $out6,$out6,v24
1701         stvx_u          $in6,$x60,$out
1702          vcipher        $out7,$out7,v24
1703         stvx_u          $in7,$x70,$out
1704         addi            $out,$out,0x80
1705
1706         b               Loop_ctr32_enc8x_middle
1707
1708 .align  5
1709 Lctr32_enc8x_break:
1710         cmpwi           $len,-0x60
1711         blt             Lctr32_enc8x_one
1712         nop
1713         beq             Lctr32_enc8x_two
1714         cmpwi           $len,-0x40
1715         blt             Lctr32_enc8x_three
1716         nop
1717         beq             Lctr32_enc8x_four
1718         cmpwi           $len,-0x20
1719         blt             Lctr32_enc8x_five
1720         nop
1721         beq             Lctr32_enc8x_six
1722         cmpwi           $len,0x00
1723         blt             Lctr32_enc8x_seven
1724
1725 Lctr32_enc8x_eight:
1726         vcipherlast     $out0,$out0,$in0
1727         vcipherlast     $out1,$out1,$in1
1728         vcipherlast     $out2,$out2,$in2
1729         vcipherlast     $out3,$out3,$in3
1730         vcipherlast     $out4,$out4,$in4
1731         vcipherlast     $out5,$out5,$in5
1732         vcipherlast     $out6,$out6,$in6
1733         vcipherlast     $out7,$out7,$in7
1734
1735         le?vperm        $out0,$out0,$out0,$inpperm
1736         le?vperm        $out1,$out1,$out1,$inpperm
1737         stvx_u          $out0,$x00,$out
1738         le?vperm        $out2,$out2,$out2,$inpperm
1739         stvx_u          $out1,$x10,$out
1740         le?vperm        $out3,$out3,$out3,$inpperm
1741         stvx_u          $out2,$x20,$out
1742         le?vperm        $out4,$out4,$out4,$inpperm
1743         stvx_u          $out3,$x30,$out
1744         le?vperm        $out5,$out5,$out5,$inpperm
1745         stvx_u          $out4,$x40,$out
1746         le?vperm        $out6,$out6,$out6,$inpperm
1747         stvx_u          $out5,$x50,$out
1748         le?vperm        $out7,$out7,$out7,$inpperm
1749         stvx_u          $out6,$x60,$out
1750         stvx_u          $out7,$x70,$out
1751         addi            $out,$out,0x80
1752         b               Lctr32_enc8x_done
1753
1754 .align  5
1755 Lctr32_enc8x_seven:
1756         vcipherlast     $out0,$out0,$in1
1757         vcipherlast     $out1,$out1,$in2
1758         vcipherlast     $out2,$out2,$in3
1759         vcipherlast     $out3,$out3,$in4
1760         vcipherlast     $out4,$out4,$in5
1761         vcipherlast     $out5,$out5,$in6
1762         vcipherlast     $out6,$out6,$in7
1763
1764         le?vperm        $out0,$out0,$out0,$inpperm
1765         le?vperm        $out1,$out1,$out1,$inpperm
1766         stvx_u          $out0,$x00,$out
1767         le?vperm        $out2,$out2,$out2,$inpperm
1768         stvx_u          $out1,$x10,$out
1769         le?vperm        $out3,$out3,$out3,$inpperm
1770         stvx_u          $out2,$x20,$out
1771         le?vperm        $out4,$out4,$out4,$inpperm
1772         stvx_u          $out3,$x30,$out
1773         le?vperm        $out5,$out5,$out5,$inpperm
1774         stvx_u          $out4,$x40,$out
1775         le?vperm        $out6,$out6,$out6,$inpperm
1776         stvx_u          $out5,$x50,$out
1777         stvx_u          $out6,$x60,$out
1778         addi            $out,$out,0x70
1779         b               Lctr32_enc8x_done
1780
1781 .align  5
1782 Lctr32_enc8x_six:
1783         vcipherlast     $out0,$out0,$in2
1784         vcipherlast     $out1,$out1,$in3
1785         vcipherlast     $out2,$out2,$in4
1786         vcipherlast     $out3,$out3,$in5
1787         vcipherlast     $out4,$out4,$in6
1788         vcipherlast     $out5,$out5,$in7
1789
1790         le?vperm        $out0,$out0,$out0,$inpperm
1791         le?vperm        $out1,$out1,$out1,$inpperm
1792         stvx_u          $out0,$x00,$out
1793         le?vperm        $out2,$out2,$out2,$inpperm
1794         stvx_u          $out1,$x10,$out
1795         le?vperm        $out3,$out3,$out3,$inpperm
1796         stvx_u          $out2,$x20,$out
1797         le?vperm        $out4,$out4,$out4,$inpperm
1798         stvx_u          $out3,$x30,$out
1799         le?vperm        $out5,$out5,$out5,$inpperm
1800         stvx_u          $out4,$x40,$out
1801         stvx_u          $out5,$x50,$out
1802         addi            $out,$out,0x60
1803         b               Lctr32_enc8x_done
1804
1805 .align  5
1806 Lctr32_enc8x_five:
1807         vcipherlast     $out0,$out0,$in3
1808         vcipherlast     $out1,$out1,$in4
1809         vcipherlast     $out2,$out2,$in5
1810         vcipherlast     $out3,$out3,$in6
1811         vcipherlast     $out4,$out4,$in7
1812
1813         le?vperm        $out0,$out0,$out0,$inpperm
1814         le?vperm        $out1,$out1,$out1,$inpperm
1815         stvx_u          $out0,$x00,$out
1816         le?vperm        $out2,$out2,$out2,$inpperm
1817         stvx_u          $out1,$x10,$out
1818         le?vperm        $out3,$out3,$out3,$inpperm
1819         stvx_u          $out2,$x20,$out
1820         le?vperm        $out4,$out4,$out4,$inpperm
1821         stvx_u          $out3,$x30,$out
1822         stvx_u          $out4,$x40,$out
1823         addi            $out,$out,0x50
1824         b               Lctr32_enc8x_done
1825
1826 .align  5
1827 Lctr32_enc8x_four:
1828         vcipherlast     $out0,$out0,$in4
1829         vcipherlast     $out1,$out1,$in5
1830         vcipherlast     $out2,$out2,$in6
1831         vcipherlast     $out3,$out3,$in7
1832
1833         le?vperm        $out0,$out0,$out0,$inpperm
1834         le?vperm        $out1,$out1,$out1,$inpperm
1835         stvx_u          $out0,$x00,$out
1836         le?vperm        $out2,$out2,$out2,$inpperm
1837         stvx_u          $out1,$x10,$out
1838         le?vperm        $out3,$out3,$out3,$inpperm
1839         stvx_u          $out2,$x20,$out
1840         stvx_u          $out3,$x30,$out
1841         addi            $out,$out,0x40
1842         b               Lctr32_enc8x_done
1843
1844 .align  5
1845 Lctr32_enc8x_three:
1846         vcipherlast     $out0,$out0,$in5
1847         vcipherlast     $out1,$out1,$in6
1848         vcipherlast     $out2,$out2,$in7
1849
1850         le?vperm        $out0,$out0,$out0,$inpperm
1851         le?vperm        $out1,$out1,$out1,$inpperm
1852         stvx_u          $out0,$x00,$out
1853         le?vperm        $out2,$out2,$out2,$inpperm
1854         stvx_u          $out1,$x10,$out
1855         stvx_u          $out2,$x20,$out
1856         addi            $out,$out,0x30
1857         b               Lctr32_enc8x_done
1858
1859 .align  5
1860 Lctr32_enc8x_two:
1861         vcipherlast     $out0,$out0,$in6
1862         vcipherlast     $out1,$out1,$in7
1863
1864         le?vperm        $out0,$out0,$out0,$inpperm
1865         le?vperm        $out1,$out1,$out1,$inpperm
1866         stvx_u          $out0,$x00,$out
1867         stvx_u          $out1,$x10,$out
1868         addi            $out,$out,0x20
1869         b               Lctr32_enc8x_done
1870
1871 .align  5
1872 Lctr32_enc8x_one:
1873         vcipherlast     $out0,$out0,$in7
1874
1875         le?vperm        $out0,$out0,$out0,$inpperm
1876         stvx_u          $out0,0,$out
1877         addi            $out,$out,0x10
1878
1879 Lctr32_enc8x_done:
1880         li              r10,`$FRAME+15`
1881         li              r11,`$FRAME+31`
1882         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1883         addi            r10,r10,32
1884         stvx            $inpperm,r11,$sp
1885         addi            r11,r11,32
1886         stvx            $inpperm,r10,$sp
1887         addi            r10,r10,32
1888         stvx            $inpperm,r11,$sp
1889         addi            r11,r11,32
1890         stvx            $inpperm,r10,$sp
1891         addi            r10,r10,32
1892         stvx            $inpperm,r11,$sp
1893         addi            r11,r11,32
1894         stvx            $inpperm,r10,$sp
1895         addi            r10,r10,32
1896         stvx            $inpperm,r11,$sp
1897         addi            r11,r11,32
1898
1899         mtspr           256,$vrsave
1900         lvx             v20,r10,$sp             # ABI says so
1901         addi            r10,r10,32
1902         lvx             v21,r11,$sp
1903         addi            r11,r11,32
1904         lvx             v22,r10,$sp
1905         addi            r10,r10,32
1906         lvx             v23,r11,$sp
1907         addi            r11,r11,32
1908         lvx             v24,r10,$sp
1909         addi            r10,r10,32
1910         lvx             v25,r11,$sp
1911         addi            r11,r11,32
1912         lvx             v26,r10,$sp
1913         addi            r10,r10,32
1914         lvx             v27,r11,$sp
1915         addi            r11,r11,32
1916         lvx             v28,r10,$sp
1917         addi            r10,r10,32
1918         lvx             v29,r11,$sp
1919         addi            r11,r11,32
1920         lvx             v30,r10,$sp
1921         lvx             v31,r11,$sp
1922         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1923         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1924         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1925         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1926         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1927         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1928         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1929         blr
1930         .long           0
1931         .byte           0,12,0x14,0,0x80,6,6,0
1932         .long           0
1933 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1934 ___
1935 }}      }}}
1936
1937 #########################################################################
1938 {{{     # XTS procedures                                                #
1939 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,   #
1940 #                             const AES_KEY *key1, const AES_KEY *key2, #
1941 #                             [const] unsigned char iv[16]);            #
1942 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which   #
1943 # input tweak value is assumed to be encrypted already, and last tweak  #
1944 # value, one suitable for consecutive call on same chunk of data, is    #
1945 # written back to original buffer. In addition, in "tweak chaining"     #
1946 # mode only complete input blocks are processed.                        #
1947
1948 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =     map("r$_",(3..10));
1949 my ($rndkey0,$rndkey1,$inout) =                         map("v$_",(0..2));
1950 my ($output,$inptail,$inpperm,$leperm,$keyperm) =       map("v$_",(3..7));
1951 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =              map("v$_",(8..12));
1952 my $taillen = $key2;
1953
1954    ($inp,$idx) = ($idx,$inp);                           # reassign
1955
1956 $code.=<<___;
1957 .globl  .${prefix}_xts_encrypt
1958         mr              $inp,r3                         # reassign
1959         li              r3,-1
1960         ${UCMP}i        $len,16
1961         bltlr-
1962
1963         lis             r0,0xfff0
1964         mfspr           r12,256                         # save vrsave
1965         li              r11,0
1966         mtspr           256,r0
1967
1968         vspltisb        $seven,0x07                     # 0x070707..07
1969         le?lvsl         $leperm,r11,r11
1970         le?vspltisb     $tmp,0x0f
1971         le?vxor         $leperm,$leperm,$seven
1972
1973         li              $idx,15
1974         lvx             $tweak,0,$ivp                   # load [unaligned] iv
1975         lvsl            $inpperm,0,$ivp
1976         lvx             $inptail,$idx,$ivp
1977         le?vxor         $inpperm,$inpperm,$tmp
1978         vperm           $tweak,$tweak,$inptail,$inpperm
1979
1980         neg             r11,$inp
1981         lvsr            $inpperm,0,r11                  # prepare for unaligned load
1982         lvx             $inout,0,$inp
1983         addi            $inp,$inp,15                    # 15 is not typo
1984         le?vxor         $inpperm,$inpperm,$tmp
1985
1986         ${UCMP}i        $key2,0                         # key2==NULL?
1987         beq             Lxts_enc_no_key2
1988
1989         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
1990         lwz             $rounds,240($key2)
1991         srwi            $rounds,$rounds,1
1992         subi            $rounds,$rounds,1
1993         li              $idx,16
1994
1995         lvx             $rndkey0,0,$key2
1996         lvx             $rndkey1,$idx,$key2
1997         addi            $idx,$idx,16
1998         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1999         vxor            $tweak,$tweak,$rndkey0
2000         lvx             $rndkey0,$idx,$key2
2001         addi            $idx,$idx,16
2002         mtctr           $rounds
2003
2004 Ltweak_xts_enc:
2005         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2006         vcipher         $tweak,$tweak,$rndkey1
2007         lvx             $rndkey1,$idx,$key2
2008         addi            $idx,$idx,16
2009         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2010         vcipher         $tweak,$tweak,$rndkey0
2011         lvx             $rndkey0,$idx,$key2
2012         addi            $idx,$idx,16
2013         bdnz            Ltweak_xts_enc
2014
2015         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2016         vcipher         $tweak,$tweak,$rndkey1
2017         lvx             $rndkey1,$idx,$key2
2018         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2019         vcipherlast     $tweak,$tweak,$rndkey0
2020
2021         li              $ivp,0                          # don't chain the tweak
2022         b               Lxts_enc
2023
2024 Lxts_enc_no_key2:
2025         li              $idx,-16
2026         and             $len,$len,$idx                  # in "tweak chaining"
2027                                                         # mode only complete
2028                                                         # blocks are processed
2029 Lxts_enc:
2030         lvx             $inptail,0,$inp
2031         addi            $inp,$inp,16
2032
2033         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2034         lwz             $rounds,240($key1)
2035         srwi            $rounds,$rounds,1
2036         subi            $rounds,$rounds,1
2037         li              $idx,16
2038
2039         vslb            $eighty7,$seven,$seven          # 0x808080..80
2040         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2041         vspltisb        $tmp,1                          # 0x010101..01
2042         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2043
2044         ${UCMP}i        $len,96
2045         bge             _aesp8_xts_encrypt6x
2046
2047         andi.           $taillen,$len,15
2048         subic           r0,$len,32
2049         subi            $taillen,$taillen,16
2050         subfe           r0,r0,r0
2051         and             r0,r0,$taillen
2052         add             $inp,$inp,r0
2053
2054         lvx             $rndkey0,0,$key1
2055         lvx             $rndkey1,$idx,$key1
2056         addi            $idx,$idx,16
2057         vperm           $inout,$inout,$inptail,$inpperm
2058         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2059         vxor            $inout,$inout,$tweak
2060         vxor            $inout,$inout,$rndkey0
2061         lvx             $rndkey0,$idx,$key1
2062         addi            $idx,$idx,16
2063         mtctr           $rounds
2064         b               Loop_xts_enc
2065
2066 .align  5
2067 Loop_xts_enc:
2068         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2069         vcipher         $inout,$inout,$rndkey1
2070         lvx             $rndkey1,$idx,$key1
2071         addi            $idx,$idx,16
2072         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2073         vcipher         $inout,$inout,$rndkey0
2074         lvx             $rndkey0,$idx,$key1
2075         addi            $idx,$idx,16
2076         bdnz            Loop_xts_enc
2077
2078         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2079         vcipher         $inout,$inout,$rndkey1
2080         lvx             $rndkey1,$idx,$key1
2081         li              $idx,16
2082         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2083         vxor            $rndkey0,$rndkey0,$tweak
2084         vcipherlast     $output,$inout,$rndkey0
2085
2086         le?vperm        $tmp,$output,$output,$leperm
2087         be?nop
2088         le?stvx_u       $tmp,0,$out
2089         be?stvx_u       $output,0,$out
2090         addi            $out,$out,16
2091
2092         subic.          $len,$len,16
2093         beq             Lxts_enc_done
2094
2095         vmr             $inout,$inptail
2096         lvx             $inptail,0,$inp
2097         addi            $inp,$inp,16
2098         lvx             $rndkey0,0,$key1
2099         lvx             $rndkey1,$idx,$key1
2100         addi            $idx,$idx,16
2101
2102         subic           r0,$len,32
2103         subfe           r0,r0,r0
2104         and             r0,r0,$taillen
2105         add             $inp,$inp,r0
2106
2107         vsrab           $tmp,$tweak,$seven              # next tweak value
2108         vaddubm         $tweak,$tweak,$tweak
2109         vsldoi          $tmp,$tmp,$tmp,15
2110         vand            $tmp,$tmp,$eighty7
2111         vxor            $tweak,$tweak,$tmp
2112
2113         vperm           $inout,$inout,$inptail,$inpperm
2114         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2115         vxor            $inout,$inout,$tweak
2116         vxor            $output,$output,$rndkey0        # just in case $len<16
2117         vxor            $inout,$inout,$rndkey0
2118         lvx             $rndkey0,$idx,$key1
2119         addi            $idx,$idx,16
2120
2121         mtctr           $rounds
2122         ${UCMP}i        $len,16
2123         bge             Loop_xts_enc
2124
2125         vxor            $output,$output,$tweak
2126         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2127         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2128         vspltisb        $tmp,-1
2129         vperm           $inptail,$inptail,$tmp,$inpperm
2130         vsel            $inout,$inout,$output,$inptail
2131
2132         subi            r11,$out,17
2133         subi            $out,$out,16
2134         mtctr           $len
2135         li              $len,16
2136 Loop_xts_enc_steal:
2137         lbzu            r0,1(r11)
2138         stb             r0,16(r11)
2139         bdnz            Loop_xts_enc_steal
2140
2141         mtctr           $rounds
2142         b               Loop_xts_enc                    # one more time...
2143
2144 Lxts_enc_done:
2145         ${UCMP}i        $ivp,0
2146         beq             Lxts_enc_ret
2147
2148         vsrab           $tmp,$tweak,$seven              # next tweak value
2149         vaddubm         $tweak,$tweak,$tweak
2150         vsldoi          $tmp,$tmp,$tmp,15
2151         vand            $tmp,$tmp,$eighty7
2152         vxor            $tweak,$tweak,$tmp
2153
2154         le?vperm        $tweak,$tweak,$tweak,$leperm
2155         stvx_u          $tweak,0,$ivp
2156
2157 Lxts_enc_ret:
2158         mtspr           256,r12                         # restore vrsave
2159         li              r3,0
2160         blr
2161         .long           0
2162         .byte           0,12,0x04,0,0x80,6,6,0
2163         .long           0
2164 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2165
2166 .globl  .${prefix}_xts_decrypt
2167         mr              $inp,r3                         # reassign
2168         li              r3,-1
2169         ${UCMP}i        $len,16
2170         bltlr-
2171
2172         lis             r0,0xfff8
2173         mfspr           r12,256                         # save vrsave
2174         li              r11,0
2175         mtspr           256,r0
2176
2177         andi.           r0,$len,15
2178         neg             r0,r0
2179         andi.           r0,r0,16
2180         sub             $len,$len,r0
2181
2182         vspltisb        $seven,0x07                     # 0x070707..07
2183         le?lvsl         $leperm,r11,r11
2184         le?vspltisb     $tmp,0x0f
2185         le?vxor         $leperm,$leperm,$seven
2186
2187         li              $idx,15
2188         lvx             $tweak,0,$ivp                   # load [unaligned] iv
2189         lvsl            $inpperm,0,$ivp
2190         lvx             $inptail,$idx,$ivp
2191         le?vxor         $inpperm,$inpperm,$tmp
2192         vperm           $tweak,$tweak,$inptail,$inpperm
2193
2194         neg             r11,$inp
2195         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2196         lvx             $inout,0,$inp
2197         addi            $inp,$inp,15                    # 15 is not typo
2198         le?vxor         $inpperm,$inpperm,$tmp
2199
2200         ${UCMP}i        $key2,0                         # key2==NULL?
2201         beq             Lxts_dec_no_key2
2202
2203         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2204         lwz             $rounds,240($key2)
2205         srwi            $rounds,$rounds,1
2206         subi            $rounds,$rounds,1
2207         li              $idx,16
2208
2209         lvx             $rndkey0,0,$key2
2210         lvx             $rndkey1,$idx,$key2
2211         addi            $idx,$idx,16
2212         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2213         vxor            $tweak,$tweak,$rndkey0
2214         lvx             $rndkey0,$idx,$key2
2215         addi            $idx,$idx,16
2216         mtctr           $rounds
2217
2218 Ltweak_xts_dec:
2219         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2220         vcipher         $tweak,$tweak,$rndkey1
2221         lvx             $rndkey1,$idx,$key2
2222         addi            $idx,$idx,16
2223         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2224         vcipher         $tweak,$tweak,$rndkey0
2225         lvx             $rndkey0,$idx,$key2
2226         addi            $idx,$idx,16
2227         bdnz            Ltweak_xts_dec
2228
2229         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2230         vcipher         $tweak,$tweak,$rndkey1
2231         lvx             $rndkey1,$idx,$key2
2232         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2233         vcipherlast     $tweak,$tweak,$rndkey0
2234
2235         li              $ivp,0                          # don't chain the tweak
2236         b               Lxts_dec
2237
2238 Lxts_dec_no_key2:
2239         neg             $idx,$len
2240         andi.           $idx,$idx,15
2241         add             $len,$len,$idx                  # in "tweak chaining"
2242                                                         # mode only complete
2243                                                         # blocks are processed
2244 Lxts_dec:
2245         lvx             $inptail,0,$inp
2246         addi            $inp,$inp,16
2247
2248         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2249         lwz             $rounds,240($key1)
2250         srwi            $rounds,$rounds,1
2251         subi            $rounds,$rounds,1
2252         li              $idx,16
2253
2254         vslb            $eighty7,$seven,$seven          # 0x808080..80
2255         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2256         vspltisb        $tmp,1                          # 0x010101..01
2257         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2258
2259         ${UCMP}i        $len,96
2260         bge             _aesp8_xts_decrypt6x
2261
2262         lvx             $rndkey0,0,$key1
2263         lvx             $rndkey1,$idx,$key1
2264         addi            $idx,$idx,16
2265         vperm           $inout,$inout,$inptail,$inpperm
2266         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2267         vxor            $inout,$inout,$tweak
2268         vxor            $inout,$inout,$rndkey0
2269         lvx             $rndkey0,$idx,$key1
2270         addi            $idx,$idx,16
2271         mtctr           $rounds
2272
2273         ${UCMP}i        $len,16
2274         blt             Ltail_xts_dec
2275         be?b            Loop_xts_dec
2276
2277 .align  5
2278 Loop_xts_dec:
2279         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2280         vncipher        $inout,$inout,$rndkey1
2281         lvx             $rndkey1,$idx,$key1
2282         addi            $idx,$idx,16
2283         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2284         vncipher        $inout,$inout,$rndkey0
2285         lvx             $rndkey0,$idx,$key1
2286         addi            $idx,$idx,16
2287         bdnz            Loop_xts_dec
2288
2289         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2290         vncipher        $inout,$inout,$rndkey1
2291         lvx             $rndkey1,$idx,$key1
2292         li              $idx,16
2293         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2294         vxor            $rndkey0,$rndkey0,$tweak
2295         vncipherlast    $output,$inout,$rndkey0
2296
2297         le?vperm        $tmp,$output,$output,$leperm
2298         be?nop
2299         le?stvx_u       $tmp,0,$out
2300         be?stvx_u       $output,0,$out
2301         addi            $out,$out,16
2302
2303         subic.          $len,$len,16
2304         beq             Lxts_dec_done
2305
2306         vmr             $inout,$inptail
2307         lvx             $inptail,0,$inp
2308         addi            $inp,$inp,16
2309         lvx             $rndkey0,0,$key1
2310         lvx             $rndkey1,$idx,$key1
2311         addi            $idx,$idx,16
2312
2313         vsrab           $tmp,$tweak,$seven              # next tweak value
2314         vaddubm         $tweak,$tweak,$tweak
2315         vsldoi          $tmp,$tmp,$tmp,15
2316         vand            $tmp,$tmp,$eighty7
2317         vxor            $tweak,$tweak,$tmp
2318
2319         vperm           $inout,$inout,$inptail,$inpperm
2320         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2321         vxor            $inout,$inout,$tweak
2322         vxor            $inout,$inout,$rndkey0
2323         lvx             $rndkey0,$idx,$key1
2324         addi            $idx,$idx,16
2325
2326         mtctr           $rounds
2327         ${UCMP}i        $len,16
2328         bge             Loop_xts_dec
2329
2330 Ltail_xts_dec:
2331         vsrab           $tmp,$tweak,$seven              # next tweak value
2332         vaddubm         $tweak1,$tweak,$tweak
2333         vsldoi          $tmp,$tmp,$tmp,15
2334         vand            $tmp,$tmp,$eighty7
2335         vxor            $tweak1,$tweak1,$tmp
2336
2337         subi            $inp,$inp,16
2338         add             $inp,$inp,$len
2339
2340         vxor            $inout,$inout,$tweak            # :-(
2341         vxor            $inout,$inout,$tweak1           # :-)
2342
2343 Loop_xts_dec_short:
2344         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2345         vncipher        $inout,$inout,$rndkey1
2346         lvx             $rndkey1,$idx,$key1
2347         addi            $idx,$idx,16
2348         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2349         vncipher        $inout,$inout,$rndkey0
2350         lvx             $rndkey0,$idx,$key1
2351         addi            $idx,$idx,16
2352         bdnz            Loop_xts_dec_short
2353
2354         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2355         vncipher        $inout,$inout,$rndkey1
2356         lvx             $rndkey1,$idx,$key1
2357         li              $idx,16
2358         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2359         vxor            $rndkey0,$rndkey0,$tweak1
2360         vncipherlast    $output,$inout,$rndkey0
2361
2362         le?vperm        $tmp,$output,$output,$leperm
2363         be?nop
2364         le?stvx_u       $tmp,0,$out
2365         be?stvx_u       $output,0,$out
2366
2367         vmr             $inout,$inptail
2368         lvx             $inptail,0,$inp
2369         #addi           $inp,$inp,16
2370         lvx             $rndkey0,0,$key1
2371         lvx             $rndkey1,$idx,$key1
2372         addi            $idx,$idx,16
2373         vperm           $inout,$inout,$inptail,$inpperm
2374         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2375
2376         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2377         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2378         vspltisb        $tmp,-1
2379         vperm           $inptail,$inptail,$tmp,$inpperm
2380         vsel            $inout,$inout,$output,$inptail
2381
2382         vxor            $rndkey0,$rndkey0,$tweak
2383         vxor            $inout,$inout,$rndkey0
2384         lvx             $rndkey0,$idx,$key1
2385         addi            $idx,$idx,16
2386
2387         subi            r11,$out,1
2388         mtctr           $len
2389         li              $len,16
2390 Loop_xts_dec_steal:
2391         lbzu            r0,1(r11)
2392         stb             r0,16(r11)
2393         bdnz            Loop_xts_dec_steal
2394
2395         mtctr           $rounds
2396         b               Loop_xts_dec                    # one more time...
2397
2398 Lxts_dec_done:
2399         ${UCMP}i        $ivp,0
2400         beq             Lxts_dec_ret
2401
2402         vsrab           $tmp,$tweak,$seven              # next tweak value
2403         vaddubm         $tweak,$tweak,$tweak
2404         vsldoi          $tmp,$tmp,$tmp,15
2405         vand            $tmp,$tmp,$eighty7
2406         vxor            $tweak,$tweak,$tmp
2407
2408         le?vperm        $tweak,$tweak,$tweak,$leperm
2409         stvx_u          $tweak,0,$ivp
2410
2411 Lxts_dec_ret:
2412         mtspr           256,r12                         # restore vrsave
2413         li              r3,0
2414         blr
2415         .long           0
2416         .byte           0,12,0x04,0,0x80,6,6,0
2417         .long           0
2418 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2419 ___
2420 #########################################################################
2421 {{      # Optimized XTS procedures                                      #
2422 my $key_=$key2;
2423 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2424     $x00=0 if ($flavour =~ /osx/);
2425 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2426 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2427 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2428 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
2429                         # v26-v31 last 6 round keys
2430 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2431 my $taillen=$x70;
2432
2433 $code.=<<___;
2434 .align  5
2435 _aesp8_xts_encrypt6x:
2436         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2437         mflr            r11
2438         li              r7,`$FRAME+8*16+15`
2439         li              r3,`$FRAME+8*16+31`
2440         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2441         stvx            v20,r7,$sp              # ABI says so
2442         addi            r7,r7,32
2443         stvx            v21,r3,$sp
2444         addi            r3,r3,32
2445         stvx            v22,r7,$sp
2446         addi            r7,r7,32
2447         stvx            v23,r3,$sp
2448         addi            r3,r3,32
2449         stvx            v24,r7,$sp
2450         addi            r7,r7,32
2451         stvx            v25,r3,$sp
2452         addi            r3,r3,32
2453         stvx            v26,r7,$sp
2454         addi            r7,r7,32
2455         stvx            v27,r3,$sp
2456         addi            r3,r3,32
2457         stvx            v28,r7,$sp
2458         addi            r7,r7,32
2459         stvx            v29,r3,$sp
2460         addi            r3,r3,32
2461         stvx            v30,r7,$sp
2462         stvx            v31,r3,$sp
2463         li              r0,-1
2464         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2465         li              $x10,0x10
2466         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2467         li              $x20,0x20
2468         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2469         li              $x30,0x30
2470         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2471         li              $x40,0x40
2472         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2473         li              $x50,0x50
2474         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2475         li              $x60,0x60
2476         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2477         li              $x70,0x70
2478         mtspr           256,r0
2479
2480         subi            $rounds,$rounds,3       # -4 in total
2481
2482         lvx             $rndkey0,$x00,$key1     # load key schedule
2483         lvx             v30,$x10,$key1
2484         addi            $key1,$key1,0x20
2485         lvx             v31,$x00,$key1
2486         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
2487         addi            $key_,$sp,$FRAME+15
2488         mtctr           $rounds
2489
2490 Load_xts_enc_key:
2491         ?vperm          v24,v30,v31,$keyperm
2492         lvx             v30,$x10,$key1
2493         addi            $key1,$key1,0x20
2494         stvx            v24,$x00,$key_          # off-load round[1]
2495         ?vperm          v25,v31,v30,$keyperm
2496         lvx             v31,$x00,$key1
2497         stvx            v25,$x10,$key_          # off-load round[2]
2498         addi            $key_,$key_,0x20
2499         bdnz            Load_xts_enc_key
2500
2501         lvx             v26,$x10,$key1
2502         ?vperm          v24,v30,v31,$keyperm
2503         lvx             v27,$x20,$key1
2504         stvx            v24,$x00,$key_          # off-load round[3]
2505         ?vperm          v25,v31,v26,$keyperm
2506         lvx             v28,$x30,$key1
2507         stvx            v25,$x10,$key_          # off-load round[4]
2508         addi            $key_,$sp,$FRAME+15     # rewind $key_
2509         ?vperm          v26,v26,v27,$keyperm
2510         lvx             v29,$x40,$key1
2511         ?vperm          v27,v27,v28,$keyperm
2512         lvx             v30,$x50,$key1
2513         ?vperm          v28,v28,v29,$keyperm
2514         lvx             v31,$x60,$key1
2515         ?vperm          v29,v29,v30,$keyperm
2516         lvx             $twk5,$x70,$key1        # borrow $twk5
2517         ?vperm          v30,v30,v31,$keyperm
2518         lvx             v24,$x00,$key_          # pre-load round[1]
2519         ?vperm          v31,v31,$twk5,$keyperm
2520         lvx             v25,$x10,$key_          # pre-load round[2]
2521
2522          vperm          $in0,$inout,$inptail,$inpperm
2523          subi           $inp,$inp,31            # undo "caller"
2524         vxor            $twk0,$tweak,$rndkey0
2525         vsrab           $tmp,$tweak,$seven      # next tweak value
2526         vaddubm         $tweak,$tweak,$tweak
2527         vsldoi          $tmp,$tmp,$tmp,15
2528         vand            $tmp,$tmp,$eighty7
2529          vxor           $out0,$in0,$twk0
2530         vxor            $tweak,$tweak,$tmp
2531
2532          lvx_u          $in1,$x10,$inp
2533         vxor            $twk1,$tweak,$rndkey0
2534         vsrab           $tmp,$tweak,$seven      # next tweak value
2535         vaddubm         $tweak,$tweak,$tweak
2536         vsldoi          $tmp,$tmp,$tmp,15
2537          le?vperm       $in1,$in1,$in1,$leperm
2538         vand            $tmp,$tmp,$eighty7
2539          vxor           $out1,$in1,$twk1
2540         vxor            $tweak,$tweak,$tmp
2541
2542          lvx_u          $in2,$x20,$inp
2543          andi.          $taillen,$len,15
2544         vxor            $twk2,$tweak,$rndkey0
2545         vsrab           $tmp,$tweak,$seven      # next tweak value
2546         vaddubm         $tweak,$tweak,$tweak
2547         vsldoi          $tmp,$tmp,$tmp,15
2548          le?vperm       $in2,$in2,$in2,$leperm
2549         vand            $tmp,$tmp,$eighty7
2550          vxor           $out2,$in2,$twk2
2551         vxor            $tweak,$tweak,$tmp
2552
2553          lvx_u          $in3,$x30,$inp
2554          sub            $len,$len,$taillen
2555         vxor            $twk3,$tweak,$rndkey0
2556         vsrab           $tmp,$tweak,$seven      # next tweak value
2557         vaddubm         $tweak,$tweak,$tweak
2558         vsldoi          $tmp,$tmp,$tmp,15
2559          le?vperm       $in3,$in3,$in3,$leperm
2560         vand            $tmp,$tmp,$eighty7
2561          vxor           $out3,$in3,$twk3
2562         vxor            $tweak,$tweak,$tmp
2563
2564          lvx_u          $in4,$x40,$inp
2565          subi           $len,$len,0x60
2566         vxor            $twk4,$tweak,$rndkey0
2567         vsrab           $tmp,$tweak,$seven      # next tweak value
2568         vaddubm         $tweak,$tweak,$tweak
2569         vsldoi          $tmp,$tmp,$tmp,15
2570          le?vperm       $in4,$in4,$in4,$leperm
2571         vand            $tmp,$tmp,$eighty7
2572          vxor           $out4,$in4,$twk4
2573         vxor            $tweak,$tweak,$tmp
2574
2575          lvx_u          $in5,$x50,$inp
2576          addi           $inp,$inp,0x60
2577         vxor            $twk5,$tweak,$rndkey0
2578         vsrab           $tmp,$tweak,$seven      # next tweak value
2579         vaddubm         $tweak,$tweak,$tweak
2580         vsldoi          $tmp,$tmp,$tmp,15
2581          le?vperm       $in5,$in5,$in5,$leperm
2582         vand            $tmp,$tmp,$eighty7
2583          vxor           $out5,$in5,$twk5
2584         vxor            $tweak,$tweak,$tmp
2585
2586         vxor            v31,v31,$rndkey0
2587         mtctr           $rounds
2588         b               Loop_xts_enc6x
2589
2590 .align  5
2591 Loop_xts_enc6x:
2592         vcipher         $out0,$out0,v24
2593         vcipher         $out1,$out1,v24
2594         vcipher         $out2,$out2,v24
2595         vcipher         $out3,$out3,v24
2596         vcipher         $out4,$out4,v24
2597         vcipher         $out5,$out5,v24
2598         lvx             v24,$x20,$key_          # round[3]
2599         addi            $key_,$key_,0x20
2600
2601         vcipher         $out0,$out0,v25
2602         vcipher         $out1,$out1,v25
2603         vcipher         $out2,$out2,v25
2604         vcipher         $out3,$out3,v25
2605         vcipher         $out4,$out4,v25
2606         vcipher         $out5,$out5,v25
2607         lvx             v25,$x10,$key_          # round[4]
2608         bdnz            Loop_xts_enc6x
2609
2610         subic           $len,$len,96            # $len-=96
2611          vxor           $in0,$twk0,v31          # xor with last round key
2612         vcipher         $out0,$out0,v24
2613         vcipher         $out1,$out1,v24
2614          vsrab          $tmp,$tweak,$seven      # next tweak value
2615          vxor           $twk0,$tweak,$rndkey0
2616          vaddubm        $tweak,$tweak,$tweak
2617         vcipher         $out2,$out2,v24
2618         vcipher         $out3,$out3,v24
2619          vsldoi         $tmp,$tmp,$tmp,15
2620         vcipher         $out4,$out4,v24
2621         vcipher         $out5,$out5,v24
2622
2623         subfe.          r0,r0,r0                # borrow?-1:0
2624          vand           $tmp,$tmp,$eighty7
2625         vcipher         $out0,$out0,v25
2626         vcipher         $out1,$out1,v25
2627          vxor           $tweak,$tweak,$tmp
2628         vcipher         $out2,$out2,v25
2629         vcipher         $out3,$out3,v25
2630          vxor           $in1,$twk1,v31
2631          vsrab          $tmp,$tweak,$seven      # next tweak value
2632          vxor           $twk1,$tweak,$rndkey0
2633         vcipher         $out4,$out4,v25
2634         vcipher         $out5,$out5,v25
2635
2636         and             r0,r0,$len
2637          vaddubm        $tweak,$tweak,$tweak
2638          vsldoi         $tmp,$tmp,$tmp,15
2639         vcipher         $out0,$out0,v26
2640         vcipher         $out1,$out1,v26
2641          vand           $tmp,$tmp,$eighty7
2642         vcipher         $out2,$out2,v26
2643         vcipher         $out3,$out3,v26
2644          vxor           $tweak,$tweak,$tmp
2645         vcipher         $out4,$out4,v26
2646         vcipher         $out5,$out5,v26
2647
2648         add             $inp,$inp,r0            # $inp is adjusted in such
2649                                                 # way that at exit from the
2650                                                 # loop inX-in5 are loaded
2651                                                 # with last "words"
2652          vxor           $in2,$twk2,v31
2653          vsrab          $tmp,$tweak,$seven      # next tweak value
2654          vxor           $twk2,$tweak,$rndkey0
2655          vaddubm        $tweak,$tweak,$tweak
2656         vcipher         $out0,$out0,v27
2657         vcipher         $out1,$out1,v27
2658          vsldoi         $tmp,$tmp,$tmp,15
2659         vcipher         $out2,$out2,v27
2660         vcipher         $out3,$out3,v27
2661          vand           $tmp,$tmp,$eighty7
2662         vcipher         $out4,$out4,v27
2663         vcipher         $out5,$out5,v27
2664
2665         addi            $key_,$sp,$FRAME+15     # rewind $key_
2666          vxor           $tweak,$tweak,$tmp
2667         vcipher         $out0,$out0,v28
2668         vcipher         $out1,$out1,v28
2669          vxor           $in3,$twk3,v31
2670          vsrab          $tmp,$tweak,$seven      # next tweak value
2671          vxor           $twk3,$tweak,$rndkey0
2672         vcipher         $out2,$out2,v28
2673         vcipher         $out3,$out3,v28
2674          vaddubm        $tweak,$tweak,$tweak
2675          vsldoi         $tmp,$tmp,$tmp,15
2676         vcipher         $out4,$out4,v28
2677         vcipher         $out5,$out5,v28
2678         lvx             v24,$x00,$key_          # re-pre-load round[1]
2679          vand           $tmp,$tmp,$eighty7
2680
2681         vcipher         $out0,$out0,v29
2682         vcipher         $out1,$out1,v29
2683          vxor           $tweak,$tweak,$tmp
2684         vcipher         $out2,$out2,v29
2685         vcipher         $out3,$out3,v29
2686          vxor           $in4,$twk4,v31
2687          vsrab          $tmp,$tweak,$seven      # next tweak value
2688          vxor           $twk4,$tweak,$rndkey0
2689         vcipher         $out4,$out4,v29
2690         vcipher         $out5,$out5,v29
2691         lvx             v25,$x10,$key_          # re-pre-load round[2]
2692          vaddubm        $tweak,$tweak,$tweak
2693          vsldoi         $tmp,$tmp,$tmp,15
2694
2695         vcipher         $out0,$out0,v30
2696         vcipher         $out1,$out1,v30
2697          vand           $tmp,$tmp,$eighty7
2698         vcipher         $out2,$out2,v30
2699         vcipher         $out3,$out3,v30
2700          vxor           $tweak,$tweak,$tmp
2701         vcipher         $out4,$out4,v30
2702         vcipher         $out5,$out5,v30
2703          vxor           $in5,$twk5,v31
2704          vsrab          $tmp,$tweak,$seven      # next tweak value
2705          vxor           $twk5,$tweak,$rndkey0
2706
2707         vcipherlast     $out0,$out0,$in0
2708          lvx_u          $in0,$x00,$inp          # load next input block
2709          vaddubm        $tweak,$tweak,$tweak
2710          vsldoi         $tmp,$tmp,$tmp,15
2711         vcipherlast     $out1,$out1,$in1
2712          lvx_u          $in1,$x10,$inp
2713         vcipherlast     $out2,$out2,$in2
2714          le?vperm       $in0,$in0,$in0,$leperm
2715          lvx_u          $in2,$x20,$inp
2716          vand           $tmp,$tmp,$eighty7
2717         vcipherlast     $out3,$out3,$in3
2718          le?vperm       $in1,$in1,$in1,$leperm
2719          lvx_u          $in3,$x30,$inp
2720         vcipherlast     $out4,$out4,$in4
2721          le?vperm       $in2,$in2,$in2,$leperm
2722          lvx_u          $in4,$x40,$inp
2723          vxor           $tweak,$tweak,$tmp
2724         vcipherlast     $tmp,$out5,$in5         # last block might be needed
2725                                                 # in stealing mode
2726          le?vperm       $in3,$in3,$in3,$leperm
2727          lvx_u          $in5,$x50,$inp
2728          addi           $inp,$inp,0x60
2729          le?vperm       $in4,$in4,$in4,$leperm
2730          le?vperm       $in5,$in5,$in5,$leperm
2731
2732         le?vperm        $out0,$out0,$out0,$leperm
2733         le?vperm        $out1,$out1,$out1,$leperm
2734         stvx_u          $out0,$x00,$out         # store output
2735          vxor           $out0,$in0,$twk0
2736         le?vperm        $out2,$out2,$out2,$leperm
2737         stvx_u          $out1,$x10,$out
2738          vxor           $out1,$in1,$twk1
2739         le?vperm        $out3,$out3,$out3,$leperm
2740         stvx_u          $out2,$x20,$out
2741          vxor           $out2,$in2,$twk2
2742         le?vperm        $out4,$out4,$out4,$leperm
2743         stvx_u          $out3,$x30,$out
2744          vxor           $out3,$in3,$twk3
2745         le?vperm        $out5,$tmp,$tmp,$leperm
2746         stvx_u          $out4,$x40,$out
2747          vxor           $out4,$in4,$twk4
2748         le?stvx_u       $out5,$x50,$out
2749         be?stvx_u       $tmp, $x50,$out
2750          vxor           $out5,$in5,$twk5
2751         addi            $out,$out,0x60
2752
2753         mtctr           $rounds
2754         beq             Loop_xts_enc6x          # did $len-=96 borrow?
2755
2756         addic.          $len,$len,0x60
2757         beq             Lxts_enc6x_zero
2758         cmpwi           $len,0x20
2759         blt             Lxts_enc6x_one
2760         nop
2761         beq             Lxts_enc6x_two
2762         cmpwi           $len,0x40
2763         blt             Lxts_enc6x_three
2764         nop
2765         beq             Lxts_enc6x_four
2766
2767 Lxts_enc6x_five:
2768         vxor            $out0,$in1,$twk0
2769         vxor            $out1,$in2,$twk1
2770         vxor            $out2,$in3,$twk2
2771         vxor            $out3,$in4,$twk3
2772         vxor            $out4,$in5,$twk4
2773
2774         bl              _aesp8_xts_enc5x
2775
2776         le?vperm        $out0,$out0,$out0,$leperm
2777         vmr             $twk0,$twk5             # unused tweak
2778         le?vperm        $out1,$out1,$out1,$leperm
2779         stvx_u          $out0,$x00,$out         # store output
2780         le?vperm        $out2,$out2,$out2,$leperm
2781         stvx_u          $out1,$x10,$out
2782         le?vperm        $out3,$out3,$out3,$leperm
2783         stvx_u          $out2,$x20,$out
2784         vxor            $tmp,$out4,$twk5        # last block prep for stealing
2785         le?vperm        $out4,$out4,$out4,$leperm
2786         stvx_u          $out3,$x30,$out
2787         stvx_u          $out4,$x40,$out
2788         addi            $out,$out,0x50
2789         bne             Lxts_enc6x_steal
2790         b               Lxts_enc6x_done
2791
2792 .align  4
2793 Lxts_enc6x_four:
2794         vxor            $out0,$in2,$twk0
2795         vxor            $out1,$in3,$twk1
2796         vxor            $out2,$in4,$twk2
2797         vxor            $out3,$in5,$twk3
2798         vxor            $out4,$out4,$out4
2799
2800         bl              _aesp8_xts_enc5x
2801
2802         le?vperm        $out0,$out0,$out0,$leperm
2803         vmr             $twk0,$twk4             # unused tweak
2804         le?vperm        $out1,$out1,$out1,$leperm
2805         stvx_u          $out0,$x00,$out         # store output
2806         le?vperm        $out2,$out2,$out2,$leperm
2807         stvx_u          $out1,$x10,$out
2808         vxor            $tmp,$out3,$twk4        # last block prep for stealing
2809         le?vperm        $out3,$out3,$out3,$leperm
2810         stvx_u          $out2,$x20,$out
2811         stvx_u          $out3,$x30,$out
2812         addi            $out,$out,0x40
2813         bne             Lxts_enc6x_steal
2814         b               Lxts_enc6x_done
2815
2816 .align  4
2817 Lxts_enc6x_three:
2818         vxor            $out0,$in3,$twk0
2819         vxor            $out1,$in4,$twk1
2820         vxor            $out2,$in5,$twk2
2821         vxor            $out3,$out3,$out3
2822         vxor            $out4,$out4,$out4
2823
2824         bl              _aesp8_xts_enc5x
2825
2826         le?vperm        $out0,$out0,$out0,$leperm
2827         vmr             $twk0,$twk3             # unused tweak
2828         le?vperm        $out1,$out1,$out1,$leperm
2829         stvx_u          $out0,$x00,$out         # store output
2830         vxor            $tmp,$out2,$twk3        # last block prep for stealing
2831         le?vperm        $out2,$out2,$out2,$leperm
2832         stvx_u          $out1,$x10,$out
2833         stvx_u          $out2,$x20,$out
2834         addi            $out,$out,0x30
2835         bne             Lxts_enc6x_steal
2836         b               Lxts_enc6x_done
2837
2838 .align  4
2839 Lxts_enc6x_two:
2840         vxor            $out0,$in4,$twk0
2841         vxor            $out1,$in5,$twk1
2842         vxor            $out2,$out2,$out2
2843         vxor            $out3,$out3,$out3
2844         vxor            $out4,$out4,$out4
2845
2846         bl              _aesp8_xts_enc5x
2847
2848         le?vperm        $out0,$out0,$out0,$leperm
2849         vmr             $twk0,$twk2             # unused tweak
2850         vxor            $tmp,$out1,$twk2        # last block prep for stealing
2851         le?vperm        $out1,$out1,$out1,$leperm
2852         stvx_u          $out0,$x00,$out         # store output
2853         stvx_u          $out1,$x10,$out
2854         addi            $out,$out,0x20
2855         bne             Lxts_enc6x_steal
2856         b               Lxts_enc6x_done
2857
2858 .align  4
2859 Lxts_enc6x_one:
2860         vxor            $out0,$in5,$twk0
2861         nop
2862 Loop_xts_enc1x:
2863         vcipher         $out0,$out0,v24
2864         lvx             v24,$x20,$key_          # round[3]
2865         addi            $key_,$key_,0x20
2866
2867         vcipher         $out0,$out0,v25
2868         lvx             v25,$x10,$key_          # round[4]
2869         bdnz            Loop_xts_enc1x
2870
2871         add             $inp,$inp,$taillen
2872         cmpwi           $taillen,0
2873         vcipher         $out0,$out0,v24
2874
2875         subi            $inp,$inp,16
2876         vcipher         $out0,$out0,v25
2877
2878         lvsr            $inpperm,0,$taillen
2879         vcipher         $out0,$out0,v26
2880
2881         lvx_u           $in0,0,$inp
2882         vcipher         $out0,$out0,v27
2883
2884         addi            $key_,$sp,$FRAME+15     # rewind $key_
2885         vcipher         $out0,$out0,v28
2886         lvx             v24,$x00,$key_          # re-pre-load round[1]
2887
2888         vcipher         $out0,$out0,v29
2889         lvx             v25,$x10,$key_          # re-pre-load round[2]
2890          vxor           $twk0,$twk0,v31
2891
2892         le?vperm        $in0,$in0,$in0,$leperm
2893         vcipher         $out0,$out0,v30
2894
2895         vperm           $in0,$in0,$in0,$inpperm
2896         vcipherlast     $out0,$out0,$twk0
2897
2898         vmr             $twk0,$twk1             # unused tweak
2899         vxor            $tmp,$out0,$twk1        # last block prep for stealing
2900         le?vperm        $out0,$out0,$out0,$leperm
2901         stvx_u          $out0,$x00,$out         # store output
2902         addi            $out,$out,0x10
2903         bne             Lxts_enc6x_steal
2904         b               Lxts_enc6x_done
2905
2906 .align  4
2907 Lxts_enc6x_zero:
2908         cmpwi           $taillen,0
2909         beq             Lxts_enc6x_done
2910
2911         add             $inp,$inp,$taillen
2912         subi            $inp,$inp,16
2913         lvx_u           $in0,0,$inp
2914         lvsr            $inpperm,0,$taillen     # $in5 is no more
2915         le?vperm        $in0,$in0,$in0,$leperm
2916         vperm           $in0,$in0,$in0,$inpperm
2917         vxor            $tmp,$tmp,$twk0
2918 Lxts_enc6x_steal:
2919         vxor            $in0,$in0,$twk0
2920         vxor            $out0,$out0,$out0
2921         vspltisb        $out1,-1
2922         vperm           $out0,$out0,$out1,$inpperm
2923         vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2924
2925         subi            r30,$out,17
2926         subi            $out,$out,16
2927         mtctr           $taillen
2928 Loop_xts_enc6x_steal:
2929         lbzu            r0,1(r30)
2930         stb             r0,16(r30)
2931         bdnz            Loop_xts_enc6x_steal
2932
2933         li              $taillen,0
2934         mtctr           $rounds
2935         b               Loop_xts_enc1x          # one more time...
2936
2937 .align  4
2938 Lxts_enc6x_done:
2939         ${UCMP}i        $ivp,0
2940         beq             Lxts_enc6x_ret
2941
2942         vxor            $tweak,$twk0,$rndkey0
2943         le?vperm        $tweak,$tweak,$tweak,$leperm
2944         stvx_u          $tweak,0,$ivp
2945
2946 Lxts_enc6x_ret:
2947         mtlr            r11
2948         li              r10,`$FRAME+15`
2949         li              r11,`$FRAME+31`
2950         stvx            $seven,r10,$sp          # wipe copies of round keys
2951         addi            r10,r10,32
2952         stvx            $seven,r11,$sp
2953         addi            r11,r11,32
2954         stvx            $seven,r10,$sp
2955         addi            r10,r10,32
2956         stvx            $seven,r11,$sp
2957         addi            r11,r11,32
2958         stvx            $seven,r10,$sp
2959         addi            r10,r10,32
2960         stvx            $seven,r11,$sp
2961         addi            r11,r11,32
2962         stvx            $seven,r10,$sp
2963         addi            r10,r10,32
2964         stvx            $seven,r11,$sp
2965         addi            r11,r11,32
2966
2967         mtspr           256,$vrsave
2968         lvx             v20,r10,$sp             # ABI says so
2969         addi            r10,r10,32
2970         lvx             v21,r11,$sp
2971         addi            r11,r11,32
2972         lvx             v22,r10,$sp
2973         addi            r10,r10,32
2974         lvx             v23,r11,$sp
2975         addi            r11,r11,32
2976         lvx             v24,r10,$sp
2977         addi            r10,r10,32
2978         lvx             v25,r11,$sp
2979         addi            r11,r11,32
2980         lvx             v26,r10,$sp
2981         addi            r10,r10,32
2982         lvx             v27,r11,$sp
2983         addi            r11,r11,32
2984         lvx             v28,r10,$sp
2985         addi            r10,r10,32
2986         lvx             v29,r11,$sp
2987         addi            r11,r11,32
2988         lvx             v30,r10,$sp
2989         lvx             v31,r11,$sp
2990         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2991         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2992         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2993         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2994         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2995         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2996         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2997         blr
2998         .long           0
2999         .byte           0,12,0x04,1,0x80,6,6,0
3000         .long           0
3001
3002 .align  5
3003 _aesp8_xts_enc5x:
3004         vcipher         $out0,$out0,v24
3005         vcipher         $out1,$out1,v24
3006         vcipher         $out2,$out2,v24
3007         vcipher         $out3,$out3,v24
3008         vcipher         $out4,$out4,v24
3009         lvx             v24,$x20,$key_          # round[3]
3010         addi            $key_,$key_,0x20
3011
3012         vcipher         $out0,$out0,v25
3013         vcipher         $out1,$out1,v25
3014         vcipher         $out2,$out2,v25
3015         vcipher         $out3,$out3,v25
3016         vcipher         $out4,$out4,v25
3017         lvx             v25,$x10,$key_          # round[4]
3018         bdnz            _aesp8_xts_enc5x
3019
3020         add             $inp,$inp,$taillen
3021         cmpwi           $taillen,0
3022         vcipher         $out0,$out0,v24
3023         vcipher         $out1,$out1,v24
3024         vcipher         $out2,$out2,v24
3025         vcipher         $out3,$out3,v24
3026         vcipher         $out4,$out4,v24
3027
3028         subi            $inp,$inp,16
3029         vcipher         $out0,$out0,v25
3030         vcipher         $out1,$out1,v25
3031         vcipher         $out2,$out2,v25
3032         vcipher         $out3,$out3,v25
3033         vcipher         $out4,$out4,v25
3034          vxor           $twk0,$twk0,v31
3035
3036         vcipher         $out0,$out0,v26
3037         lvsr            $inpperm,r0,$taillen    # $in5 is no more
3038         vcipher         $out1,$out1,v26
3039         vcipher         $out2,$out2,v26
3040         vcipher         $out3,$out3,v26
3041         vcipher         $out4,$out4,v26
3042          vxor           $in1,$twk1,v31
3043
3044         vcipher         $out0,$out0,v27
3045         lvx_u           $in0,0,$inp
3046         vcipher         $out1,$out1,v27
3047         vcipher         $out2,$out2,v27
3048         vcipher         $out3,$out3,v27
3049         vcipher         $out4,$out4,v27
3050          vxor           $in2,$twk2,v31
3051
3052         addi            $key_,$sp,$FRAME+15     # rewind $key_
3053         vcipher         $out0,$out0,v28
3054         vcipher         $out1,$out1,v28
3055         vcipher         $out2,$out2,v28
3056         vcipher         $out3,$out3,v28
3057         vcipher         $out4,$out4,v28
3058         lvx             v24,$x00,$key_          # re-pre-load round[1]
3059          vxor           $in3,$twk3,v31
3060
3061         vcipher         $out0,$out0,v29
3062         le?vperm        $in0,$in0,$in0,$leperm
3063         vcipher         $out1,$out1,v29
3064         vcipher         $out2,$out2,v29
3065         vcipher         $out3,$out3,v29
3066         vcipher         $out4,$out4,v29
3067         lvx             v25,$x10,$key_          # re-pre-load round[2]
3068          vxor           $in4,$twk4,v31
3069
3070         vcipher         $out0,$out0,v30
3071         vperm           $in0,$in0,$in0,$inpperm
3072         vcipher         $out1,$out1,v30
3073         vcipher         $out2,$out2,v30
3074         vcipher         $out3,$out3,v30
3075         vcipher         $out4,$out4,v30
3076
3077         vcipherlast     $out0,$out0,$twk0
3078         vcipherlast     $out1,$out1,$in1
3079         vcipherlast     $out2,$out2,$in2
3080         vcipherlast     $out3,$out3,$in3
3081         vcipherlast     $out4,$out4,$in4
3082         blr
3083         .long           0
3084         .byte           0,12,0x14,0,0,0,0,0
3085
3086 .align  5
3087 _aesp8_xts_decrypt6x:
3088         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3089         mflr            r11
3090         li              r7,`$FRAME+8*16+15`
3091         li              r3,`$FRAME+8*16+31`
3092         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3093         stvx            v20,r7,$sp              # ABI says so
3094         addi            r7,r7,32
3095         stvx            v21,r3,$sp
3096         addi            r3,r3,32
3097         stvx            v22,r7,$sp
3098         addi            r7,r7,32
3099         stvx            v23,r3,$sp
3100         addi            r3,r3,32
3101         stvx            v24,r7,$sp
3102         addi            r7,r7,32
3103         stvx            v25,r3,$sp
3104         addi            r3,r3,32
3105         stvx            v26,r7,$sp
3106         addi            r7,r7,32
3107         stvx            v27,r3,$sp
3108         addi            r3,r3,32
3109         stvx            v28,r7,$sp
3110         addi            r7,r7,32
3111         stvx            v29,r3,$sp
3112         addi            r3,r3,32
3113         stvx            v30,r7,$sp
3114         stvx            v31,r3,$sp
3115         li              r0,-1
3116         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
3117         li              $x10,0x10
3118         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3119         li              $x20,0x20
3120         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3121         li              $x30,0x30
3122         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3123         li              $x40,0x40
3124         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3125         li              $x50,0x50
3126         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3127         li              $x60,0x60
3128         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3129         li              $x70,0x70
3130         mtspr           256,r0
3131
3132         subi            $rounds,$rounds,3       # -4 in total
3133
3134         lvx             $rndkey0,$x00,$key1     # load key schedule
3135         lvx             v30,$x10,$key1
3136         addi            $key1,$key1,0x20
3137         lvx             v31,$x00,$key1
3138         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
3139         addi            $key_,$sp,$FRAME+15
3140         mtctr           $rounds
3141
3142 Load_xts_dec_key:
3143         ?vperm          v24,v30,v31,$keyperm
3144         lvx             v30,$x10,$key1
3145         addi            $key1,$key1,0x20
3146         stvx            v24,$x00,$key_          # off-load round[1]
3147         ?vperm          v25,v31,v30,$keyperm
3148         lvx             v31,$x00,$key1
3149         stvx            v25,$x10,$key_          # off-load round[2]
3150         addi            $key_,$key_,0x20
3151         bdnz            Load_xts_dec_key
3152
3153         lvx             v26,$x10,$key1
3154         ?vperm          v24,v30,v31,$keyperm
3155         lvx             v27,$x20,$key1
3156         stvx            v24,$x00,$key_          # off-load round[3]
3157         ?vperm          v25,v31,v26,$keyperm
3158         lvx             v28,$x30,$key1
3159         stvx            v25,$x10,$key_          # off-load round[4]
3160         addi            $key_,$sp,$FRAME+15     # rewind $key_
3161         ?vperm          v26,v26,v27,$keyperm
3162         lvx             v29,$x40,$key1
3163         ?vperm          v27,v27,v28,$keyperm
3164         lvx             v30,$x50,$key1
3165         ?vperm          v28,v28,v29,$keyperm
3166         lvx             v31,$x60,$key1
3167         ?vperm          v29,v29,v30,$keyperm
3168         lvx             $twk5,$x70,$key1        # borrow $twk5
3169         ?vperm          v30,v30,v31,$keyperm
3170         lvx             v24,$x00,$key_          # pre-load round[1]
3171         ?vperm          v31,v31,$twk5,$keyperm
3172         lvx             v25,$x10,$key_          # pre-load round[2]
3173
3174          vperm          $in0,$inout,$inptail,$inpperm
3175          subi           $inp,$inp,31            # undo "caller"
3176         vxor            $twk0,$tweak,$rndkey0
3177         vsrab           $tmp,$tweak,$seven      # next tweak value
3178         vaddubm         $tweak,$tweak,$tweak
3179         vsldoi          $tmp,$tmp,$tmp,15
3180         vand            $tmp,$tmp,$eighty7
3181          vxor           $out0,$in0,$twk0
3182         vxor            $tweak,$tweak,$tmp
3183
3184          lvx_u          $in1,$x10,$inp
3185         vxor            $twk1,$tweak,$rndkey0
3186         vsrab           $tmp,$tweak,$seven      # next tweak value
3187         vaddubm         $tweak,$tweak,$tweak
3188         vsldoi          $tmp,$tmp,$tmp,15
3189          le?vperm       $in1,$in1,$in1,$leperm
3190         vand            $tmp,$tmp,$eighty7
3191          vxor           $out1,$in1,$twk1
3192         vxor            $tweak,$tweak,$tmp
3193
3194          lvx_u          $in2,$x20,$inp
3195          andi.          $taillen,$len,15
3196         vxor            $twk2,$tweak,$rndkey0
3197         vsrab           $tmp,$tweak,$seven      # next tweak value
3198         vaddubm         $tweak,$tweak,$tweak
3199         vsldoi          $tmp,$tmp,$tmp,15
3200          le?vperm       $in2,$in2,$in2,$leperm
3201         vand            $tmp,$tmp,$eighty7
3202          vxor           $out2,$in2,$twk2
3203         vxor            $tweak,$tweak,$tmp
3204
3205          lvx_u          $in3,$x30,$inp
3206          sub            $len,$len,$taillen
3207         vxor            $twk3,$tweak,$rndkey0
3208         vsrab           $tmp,$tweak,$seven      # next tweak value
3209         vaddubm         $tweak,$tweak,$tweak
3210         vsldoi          $tmp,$tmp,$tmp,15
3211          le?vperm       $in3,$in3,$in3,$leperm
3212         vand            $tmp,$tmp,$eighty7
3213          vxor           $out3,$in3,$twk3
3214         vxor            $tweak,$tweak,$tmp
3215
3216          lvx_u          $in4,$x40,$inp
3217          subi           $len,$len,0x60
3218         vxor            $twk4,$tweak,$rndkey0
3219         vsrab           $tmp,$tweak,$seven      # next tweak value
3220         vaddubm         $tweak,$tweak,$tweak
3221         vsldoi          $tmp,$tmp,$tmp,15
3222          le?vperm       $in4,$in4,$in4,$leperm
3223         vand            $tmp,$tmp,$eighty7
3224          vxor           $out4,$in4,$twk4
3225         vxor            $tweak,$tweak,$tmp
3226
3227          lvx_u          $in5,$x50,$inp
3228          addi           $inp,$inp,0x60
3229         vxor            $twk5,$tweak,$rndkey0
3230         vsrab           $tmp,$tweak,$seven      # next tweak value
3231         vaddubm         $tweak,$tweak,$tweak
3232         vsldoi          $tmp,$tmp,$tmp,15
3233          le?vperm       $in5,$in5,$in5,$leperm
3234         vand            $tmp,$tmp,$eighty7
3235          vxor           $out5,$in5,$twk5
3236         vxor            $tweak,$tweak,$tmp
3237
3238         vxor            v31,v31,$rndkey0
3239         mtctr           $rounds
3240         b               Loop_xts_dec6x
3241
3242 .align  5
3243 Loop_xts_dec6x:
3244         vncipher        $out0,$out0,v24
3245         vncipher        $out1,$out1,v24
3246         vncipher        $out2,$out2,v24
3247         vncipher        $out3,$out3,v24
3248         vncipher        $out4,$out4,v24
3249         vncipher        $out5,$out5,v24
3250         lvx             v24,$x20,$key_          # round[3]
3251         addi            $key_,$key_,0x20
3252
3253         vncipher        $out0,$out0,v25
3254         vncipher        $out1,$out1,v25
3255         vncipher        $out2,$out2,v25
3256         vncipher        $out3,$out3,v25
3257         vncipher        $out4,$out4,v25
3258         vncipher        $out5,$out5,v25
3259         lvx             v25,$x10,$key_          # round[4]
3260         bdnz            Loop_xts_dec6x
3261
3262         subic           $len,$len,96            # $len-=96
3263          vxor           $in0,$twk0,v31          # xor with last round key
3264         vncipher        $out0,$out0,v24
3265         vncipher        $out1,$out1,v24
3266          vsrab          $tmp,$tweak,$seven      # next tweak value
3267          vxor           $twk0,$tweak,$rndkey0
3268          vaddubm        $tweak,$tweak,$tweak
3269         vncipher        $out2,$out2,v24
3270         vncipher        $out3,$out3,v24
3271          vsldoi         $tmp,$tmp,$tmp,15
3272         vncipher        $out4,$out4,v24
3273         vncipher        $out5,$out5,v24
3274
3275         subfe.          r0,r0,r0                # borrow?-1:0
3276          vand           $tmp,$tmp,$eighty7
3277         vncipher        $out0,$out0,v25
3278         vncipher        $out1,$out1,v25
3279          vxor           $tweak,$tweak,$tmp
3280         vncipher        $out2,$out2,v25
3281         vncipher        $out3,$out3,v25
3282          vxor           $in1,$twk1,v31
3283          vsrab          $tmp,$tweak,$seven      # next tweak value
3284          vxor           $twk1,$tweak,$rndkey0
3285         vncipher        $out4,$out4,v25
3286         vncipher        $out5,$out5,v25
3287
3288         and             r0,r0,$len
3289          vaddubm        $tweak,$tweak,$tweak
3290          vsldoi         $tmp,$tmp,$tmp,15
3291         vncipher        $out0,$out0,v26
3292         vncipher        $out1,$out1,v26
3293          vand           $tmp,$tmp,$eighty7
3294         vncipher        $out2,$out2,v26
3295         vncipher        $out3,$out3,v26
3296          vxor           $tweak,$tweak,$tmp
3297         vncipher        $out4,$out4,v26
3298         vncipher        $out5,$out5,v26
3299
3300         add             $inp,$inp,r0            # $inp is adjusted in such
3301                                                 # way that at exit from the
3302                                                 # loop inX-in5 are loaded
3303                                                 # with last "words"
3304          vxor           $in2,$twk2,v31
3305          vsrab          $tmp,$tweak,$seven      # next tweak value
3306          vxor           $twk2,$tweak,$rndkey0
3307          vaddubm        $tweak,$tweak,$tweak
3308         vncipher        $out0,$out0,v27
3309         vncipher        $out1,$out1,v27
3310          vsldoi         $tmp,$tmp,$tmp,15
3311         vncipher        $out2,$out2,v27
3312         vncipher        $out3,$out3,v27
3313          vand           $tmp,$tmp,$eighty7
3314         vncipher        $out4,$out4,v27
3315         vncipher        $out5,$out5,v27
3316
3317         addi            $key_,$sp,$FRAME+15     # rewind $key_
3318          vxor           $tweak,$tweak,$tmp
3319         vncipher        $out0,$out0,v28
3320         vncipher        $out1,$out1,v28
3321          vxor           $in3,$twk3,v31
3322          vsrab          $tmp,$tweak,$seven      # next tweak value
3323          vxor           $twk3,$tweak,$rndkey0
3324         vncipher        $out2,$out2,v28
3325         vncipher        $out3,$out3,v28
3326          vaddubm        $tweak,$tweak,$tweak
3327          vsldoi         $tmp,$tmp,$tmp,15
3328         vncipher        $out4,$out4,v28
3329         vncipher        $out5,$out5,v28
3330         lvx             v24,$x00,$key_          # re-pre-load round[1]
3331          vand           $tmp,$tmp,$eighty7
3332
3333         vncipher        $out0,$out0,v29
3334         vncipher        $out1,$out1,v29
3335          vxor           $tweak,$tweak,$tmp
3336         vncipher        $out2,$out2,v29
3337         vncipher        $out3,$out3,v29
3338          vxor           $in4,$twk4,v31
3339          vsrab          $tmp,$tweak,$seven      # next tweak value
3340          vxor           $twk4,$tweak,$rndkey0
3341         vncipher        $out4,$out4,v29
3342         vncipher        $out5,$out5,v29
3343         lvx             v25,$x10,$key_          # re-pre-load round[2]
3344          vaddubm        $tweak,$tweak,$tweak
3345          vsldoi         $tmp,$tmp,$tmp,15
3346
3347         vncipher        $out0,$out0,v30
3348         vncipher        $out1,$out1,v30
3349          vand           $tmp,$tmp,$eighty7
3350         vncipher        $out2,$out2,v30
3351         vncipher        $out3,$out3,v30
3352          vxor           $tweak,$tweak,$tmp
3353         vncipher        $out4,$out4,v30
3354         vncipher        $out5,$out5,v30
3355          vxor           $in5,$twk5,v31
3356          vsrab          $tmp,$tweak,$seven      # next tweak value
3357          vxor           $twk5,$tweak,$rndkey0
3358
3359         vncipherlast    $out0,$out0,$in0
3360          lvx_u          $in0,$x00,$inp          # load next input block
3361          vaddubm        $tweak,$tweak,$tweak
3362          vsldoi         $tmp,$tmp,$tmp,15
3363         vncipherlast    $out1,$out1,$in1
3364          lvx_u          $in1,$x10,$inp
3365         vncipherlast    $out2,$out2,$in2
3366          le?vperm       $in0,$in0,$in0,$leperm
3367          lvx_u          $in2,$x20,$inp
3368          vand           $tmp,$tmp,$eighty7
3369         vncipherlast    $out3,$out3,$in3
3370          le?vperm       $in1,$in1,$in1,$leperm
3371          lvx_u          $in3,$x30,$inp
3372         vncipherlast    $out4,$out4,$in4
3373          le?vperm       $in2,$in2,$in2,$leperm
3374          lvx_u          $in4,$x40,$inp
3375          vxor           $tweak,$tweak,$tmp
3376         vncipherlast    $out5,$out5,$in5
3377          le?vperm       $in3,$in3,$in3,$leperm
3378          lvx_u          $in5,$x50,$inp
3379          addi           $inp,$inp,0x60
3380          le?vperm       $in4,$in4,$in4,$leperm
3381          le?vperm       $in5,$in5,$in5,$leperm
3382
3383         le?vperm        $out0,$out0,$out0,$leperm
3384         le?vperm        $out1,$out1,$out1,$leperm
3385         stvx_u          $out0,$x00,$out         # store output
3386          vxor           $out0,$in0,$twk0
3387         le?vperm        $out2,$out2,$out2,$leperm
3388         stvx_u          $out1,$x10,$out
3389          vxor           $out1,$in1,$twk1
3390         le?vperm        $out3,$out3,$out3,$leperm
3391         stvx_u          $out2,$x20,$out
3392          vxor           $out2,$in2,$twk2
3393         le?vperm        $out4,$out4,$out4,$leperm
3394         stvx_u          $out3,$x30,$out
3395          vxor           $out3,$in3,$twk3
3396         le?vperm        $out5,$out5,$out5,$leperm
3397         stvx_u          $out4,$x40,$out
3398          vxor           $out4,$in4,$twk4
3399         stvx_u          $out5,$x50,$out
3400          vxor           $out5,$in5,$twk5
3401         addi            $out,$out,0x60
3402
3403         mtctr           $rounds
3404         beq             Loop_xts_dec6x          # did $len-=96 borrow?
3405
3406         addic.          $len,$len,0x60
3407         beq             Lxts_dec6x_zero
3408         cmpwi           $len,0x20
3409         blt             Lxts_dec6x_one
3410         nop
3411         beq             Lxts_dec6x_two
3412         cmpwi           $len,0x40
3413         blt             Lxts_dec6x_three
3414         nop
3415         beq             Lxts_dec6x_four
3416
3417 Lxts_dec6x_five:
3418         vxor            $out0,$in1,$twk0
3419         vxor            $out1,$in2,$twk1
3420         vxor            $out2,$in3,$twk2
3421         vxor            $out3,$in4,$twk3
3422         vxor            $out4,$in5,$twk4
3423
3424         bl              _aesp8_xts_dec5x
3425
3426         le?vperm        $out0,$out0,$out0,$leperm
3427         vmr             $twk0,$twk5             # unused tweak
3428         vxor            $twk1,$tweak,$rndkey0
3429         le?vperm        $out1,$out1,$out1,$leperm
3430         stvx_u          $out0,$x00,$out         # store output
3431         vxor            $out0,$in0,$twk1
3432         le?vperm        $out2,$out2,$out2,$leperm
3433         stvx_u          $out1,$x10,$out
3434         le?vperm        $out3,$out3,$out3,$leperm
3435         stvx_u          $out2,$x20,$out
3436         le?vperm        $out4,$out4,$out4,$leperm
3437         stvx_u          $out3,$x30,$out
3438         stvx_u          $out4,$x40,$out
3439         addi            $out,$out,0x50
3440         bne             Lxts_dec6x_steal
3441         b               Lxts_dec6x_done
3442
3443 .align  4
3444 Lxts_dec6x_four:
3445         vxor            $out0,$in2,$twk0
3446         vxor            $out1,$in3,$twk1
3447         vxor            $out2,$in4,$twk2
3448         vxor            $out3,$in5,$twk3
3449         vxor            $out4,$out4,$out4
3450
3451         bl              _aesp8_xts_dec5x
3452
3453         le?vperm        $out0,$out0,$out0,$leperm
3454         vmr             $twk0,$twk4             # unused tweak
3455         vmr             $twk1,$twk5
3456         le?vperm        $out1,$out1,$out1,$leperm
3457         stvx_u          $out0,$x00,$out         # store output
3458         vxor            $out0,$in0,$twk5
3459         le?vperm        $out2,$out2,$out2,$leperm
3460         stvx_u          $out1,$x10,$out
3461         le?vperm        $out3,$out3,$out3,$leperm
3462         stvx_u          $out2,$x20,$out
3463         stvx_u          $out3,$x30,$out
3464         addi            $out,$out,0x40
3465         bne             Lxts_dec6x_steal
3466         b               Lxts_dec6x_done
3467
3468 .align  4
3469 Lxts_dec6x_three:
3470         vxor            $out0,$in3,$twk0
3471         vxor            $out1,$in4,$twk1
3472         vxor            $out2,$in5,$twk2
3473         vxor            $out3,$out3,$out3
3474         vxor            $out4,$out4,$out4
3475
3476         bl              _aesp8_xts_dec5x
3477
3478         le?vperm        $out0,$out0,$out0,$leperm
3479         vmr             $twk0,$twk3             # unused tweak
3480         vmr             $twk1,$twk4
3481         le?vperm        $out1,$out1,$out1,$leperm
3482         stvx_u          $out0,$x00,$out         # store output
3483         vxor            $out0,$in0,$twk4
3484         le?vperm        $out2,$out2,$out2,$leperm
3485         stvx_u          $out1,$x10,$out
3486         stvx_u          $out2,$x20,$out
3487         addi            $out,$out,0x30
3488         bne             Lxts_dec6x_steal
3489         b               Lxts_dec6x_done
3490
3491 .align  4
3492 Lxts_dec6x_two:
3493         vxor            $out0,$in4,$twk0
3494         vxor            $out1,$in5,$twk1
3495         vxor            $out2,$out2,$out2
3496         vxor            $out3,$out3,$out3
3497         vxor            $out4,$out4,$out4
3498
3499         bl              _aesp8_xts_dec5x
3500
3501         le?vperm        $out0,$out0,$out0,$leperm
3502         vmr             $twk0,$twk2             # unused tweak
3503         vmr             $twk1,$twk3
3504         le?vperm        $out1,$out1,$out1,$leperm
3505         stvx_u          $out0,$x00,$out         # store output
3506         vxor            $out0,$in0,$twk3
3507         stvx_u          $out1,$x10,$out
3508         addi            $out,$out,0x20
3509         bne             Lxts_dec6x_steal
3510         b               Lxts_dec6x_done
3511
3512 .align  4
3513 Lxts_dec6x_one:
3514         vxor            $out0,$in5,$twk0
3515         nop
3516 Loop_xts_dec1x:
3517         vncipher        $out0,$out0,v24
3518         lvx             v24,$x20,$key_          # round[3]
3519         addi            $key_,$key_,0x20
3520
3521         vncipher        $out0,$out0,v25
3522         lvx             v25,$x10,$key_          # round[4]
3523         bdnz            Loop_xts_dec1x
3524
3525         subi            r0,$taillen,1
3526         vncipher        $out0,$out0,v24
3527
3528         andi.           r0,r0,16
3529         cmpwi           $taillen,0
3530         vncipher        $out0,$out0,v25
3531
3532         sub             $inp,$inp,r0
3533         vncipher        $out0,$out0,v26
3534
3535         lvx_u           $in0,0,$inp
3536         vncipher        $out0,$out0,v27
3537
3538         addi            $key_,$sp,$FRAME+15     # rewind $key_
3539         vncipher        $out0,$out0,v28
3540         lvx             v24,$x00,$key_          # re-pre-load round[1]
3541
3542         vncipher        $out0,$out0,v29
3543         lvx             v25,$x10,$key_          # re-pre-load round[2]
3544          vxor           $twk0,$twk0,v31
3545
3546         le?vperm        $in0,$in0,$in0,$leperm
3547         vncipher        $out0,$out0,v30
3548
3549         mtctr           $rounds
3550         vncipherlast    $out0,$out0,$twk0
3551
3552         vmr             $twk0,$twk1             # unused tweak
3553         vmr             $twk1,$twk2
3554         le?vperm        $out0,$out0,$out0,$leperm
3555         stvx_u          $out0,$x00,$out         # store output
3556         addi            $out,$out,0x10
3557         vxor            $out0,$in0,$twk2
3558         bne             Lxts_dec6x_steal
3559         b               Lxts_dec6x_done
3560
3561 .align  4
3562 Lxts_dec6x_zero:
3563         cmpwi           $taillen,0
3564         beq             Lxts_dec6x_done
3565
3566         lvx_u           $in0,0,$inp
3567         le?vperm        $in0,$in0,$in0,$leperm
3568         vxor            $out0,$in0,$twk1
3569 Lxts_dec6x_steal:
3570         vncipher        $out0,$out0,v24
3571         lvx             v24,$x20,$key_          # round[3]
3572         addi            $key_,$key_,0x20
3573
3574         vncipher        $out0,$out0,v25
3575         lvx             v25,$x10,$key_          # round[4]
3576         bdnz            Lxts_dec6x_steal
3577
3578         add             $inp,$inp,$taillen
3579         vncipher        $out0,$out0,v24
3580
3581         cmpwi           $taillen,0
3582         vncipher        $out0,$out0,v25
3583
3584         lvx_u           $in0,0,$inp
3585         vncipher        $out0,$out0,v26
3586
3587         lvsr            $inpperm,0,$taillen     # $in5 is no more
3588         vncipher        $out0,$out0,v27
3589
3590         addi            $key_,$sp,$FRAME+15     # rewind $key_
3591         vncipher        $out0,$out0,v28
3592         lvx             v24,$x00,$key_          # re-pre-load round[1]
3593
3594         vncipher        $out0,$out0,v29
3595         lvx             v25,$x10,$key_          # re-pre-load round[2]
3596          vxor           $twk1,$twk1,v31
3597
3598         le?vperm        $in0,$in0,$in0,$leperm
3599         vncipher        $out0,$out0,v30
3600
3601         vperm           $in0,$in0,$in0,$inpperm
3602         vncipherlast    $tmp,$out0,$twk1
3603
3604         le?vperm        $out0,$tmp,$tmp,$leperm
3605         le?stvx_u       $out0,0,$out
3606         be?stvx_u       $tmp,0,$out
3607
3608         vxor            $out0,$out0,$out0
3609         vspltisb        $out1,-1
3610         vperm           $out0,$out0,$out1,$inpperm
3611         vsel            $out0,$in0,$tmp,$out0
3612         vxor            $out0,$out0,$twk0
3613
3614         subi            r30,$out,1
3615         mtctr           $taillen
3616 Loop_xts_dec6x_steal:
3617         lbzu            r0,1(r30)
3618         stb             r0,16(r30)
3619         bdnz            Loop_xts_dec6x_steal
3620
3621         li              $taillen,0
3622         mtctr           $rounds
3623         b               Loop_xts_dec1x          # one more time...
3624
3625 .align  4
3626 Lxts_dec6x_done:
3627         ${UCMP}i        $ivp,0
3628         beq             Lxts_dec6x_ret
3629
3630         vxor            $tweak,$twk0,$rndkey0
3631         le?vperm        $tweak,$tweak,$tweak,$leperm
3632         stvx_u          $tweak,0,$ivp
3633
3634 Lxts_dec6x_ret:
3635         mtlr            r11
3636         li              r10,`$FRAME+15`
3637         li              r11,`$FRAME+31`
3638         stvx            $seven,r10,$sp          # wipe copies of round keys
3639         addi            r10,r10,32
3640         stvx            $seven,r11,$sp
3641         addi            r11,r11,32
3642         stvx            $seven,r10,$sp
3643         addi            r10,r10,32
3644         stvx            $seven,r11,$sp
3645         addi            r11,r11,32
3646         stvx            $seven,r10,$sp
3647         addi            r10,r10,32
3648         stvx            $seven,r11,$sp
3649         addi            r11,r11,32
3650         stvx            $seven,r10,$sp
3651         addi            r10,r10,32
3652         stvx            $seven,r11,$sp
3653         addi            r11,r11,32
3654
3655         mtspr           256,$vrsave
3656         lvx             v20,r10,$sp             # ABI says so
3657         addi            r10,r10,32
3658         lvx             v21,r11,$sp
3659         addi            r11,r11,32
3660         lvx             v22,r10,$sp
3661         addi            r10,r10,32
3662         lvx             v23,r11,$sp
3663         addi            r11,r11,32
3664         lvx             v24,r10,$sp
3665         addi            r10,r10,32
3666         lvx             v25,r11,$sp
3667         addi            r11,r11,32
3668         lvx             v26,r10,$sp
3669         addi            r10,r10,32
3670         lvx             v27,r11,$sp
3671         addi            r11,r11,32
3672         lvx             v28,r10,$sp
3673         addi            r10,r10,32
3674         lvx             v29,r11,$sp
3675         addi            r11,r11,32
3676         lvx             v30,r10,$sp
3677         lvx             v31,r11,$sp
3678         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3679         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3680         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3681         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3682         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3683         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3684         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3685         blr
3686         .long           0
3687         .byte           0,12,0x04,1,0x80,6,6,0
3688         .long           0
3689
3690 .align  5
3691 _aesp8_xts_dec5x:
3692         vncipher        $out0,$out0,v24
3693         vncipher        $out1,$out1,v24
3694         vncipher        $out2,$out2,v24
3695         vncipher        $out3,$out3,v24
3696         vncipher        $out4,$out4,v24
3697         lvx             v24,$x20,$key_          # round[3]
3698         addi            $key_,$key_,0x20
3699
3700         vncipher        $out0,$out0,v25
3701         vncipher        $out1,$out1,v25
3702         vncipher        $out2,$out2,v25
3703         vncipher        $out3,$out3,v25
3704         vncipher        $out4,$out4,v25
3705         lvx             v25,$x10,$key_          # round[4]
3706         bdnz            _aesp8_xts_dec5x
3707
3708         subi            r0,$taillen,1
3709         vncipher        $out0,$out0,v24
3710         vncipher        $out1,$out1,v24
3711         vncipher        $out2,$out2,v24
3712         vncipher        $out3,$out3,v24
3713         vncipher        $out4,$out4,v24
3714
3715         andi.           r0,r0,16
3716         cmpwi           $taillen,0
3717         vncipher        $out0,$out0,v25
3718         vncipher        $out1,$out1,v25
3719         vncipher        $out2,$out2,v25
3720         vncipher        $out3,$out3,v25
3721         vncipher        $out4,$out4,v25
3722          vxor           $twk0,$twk0,v31
3723
3724         sub             $inp,$inp,r0
3725         vncipher        $out0,$out0,v26
3726         vncipher        $out1,$out1,v26
3727         vncipher        $out2,$out2,v26
3728         vncipher        $out3,$out3,v26
3729         vncipher        $out4,$out4,v26
3730          vxor           $in1,$twk1,v31
3731
3732         vncipher        $out0,$out0,v27
3733         lvx_u           $in0,0,$inp
3734         vncipher        $out1,$out1,v27
3735         vncipher        $out2,$out2,v27
3736         vncipher        $out3,$out3,v27
3737         vncipher        $out4,$out4,v27
3738          vxor           $in2,$twk2,v31
3739
3740         addi            $key_,$sp,$FRAME+15     # rewind $key_
3741         vncipher        $out0,$out0,v28
3742         vncipher        $out1,$out1,v28
3743         vncipher        $out2,$out2,v28
3744         vncipher        $out3,$out3,v28
3745         vncipher        $out4,$out4,v28
3746         lvx             v24,$x00,$key_          # re-pre-load round[1]
3747          vxor           $in3,$twk3,v31
3748
3749         vncipher        $out0,$out0,v29
3750         le?vperm        $in0,$in0,$in0,$leperm
3751         vncipher        $out1,$out1,v29
3752         vncipher        $out2,$out2,v29
3753         vncipher        $out3,$out3,v29
3754         vncipher        $out4,$out4,v29
3755         lvx             v25,$x10,$key_          # re-pre-load round[2]
3756          vxor           $in4,$twk4,v31
3757
3758         vncipher        $out0,$out0,v30
3759         vncipher        $out1,$out1,v30
3760         vncipher        $out2,$out2,v30
3761         vncipher        $out3,$out3,v30
3762         vncipher        $out4,$out4,v30
3763
3764         vncipherlast    $out0,$out0,$twk0
3765         vncipherlast    $out1,$out1,$in1
3766         vncipherlast    $out2,$out2,$in2
3767         vncipherlast    $out3,$out3,$in3
3768         vncipherlast    $out4,$out4,$in4
3769         mtctr           $rounds
3770         blr
3771         .long           0
3772         .byte           0,12,0x14,0,0,0,0,0
3773 ___
3774 }}      }}}
3775
3776 my $consts=1;
3777 foreach(split("\n",$code)) {
3778         s/\`([^\`]*)\`/eval($1)/geo;
3779
3780         # constants table endian-specific conversion
3781         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3782             my $conv=$3;
3783             my @bytes=();
3784
3785             # convert to endian-agnostic format
3786             if ($1 eq "long") {
3787               foreach (split(/,\s*/,$2)) {
3788                 my $l = /^0/?oct:int;
3789                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3790               }
3791             } else {
3792                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3793             }
3794
3795             # little-endian conversion
3796             if ($flavour =~ /le$/o) {
3797                 SWITCH: for($conv)  {
3798                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
3799                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
3800                 }
3801             }
3802
3803             #emit
3804             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3805             next;
3806         }
3807         $consts=0 if (m/Lconsts:/o);    # end of table
3808
3809         # instructions prefixed with '?' are endian-specific and need
3810         # to be adjusted accordingly...
3811         if ($flavour =~ /le$/o) {       # little-endian
3812             s/le\?//o           or
3813             s/be\?/#be#/o       or
3814             s/\?lvsr/lvsl/o     or
3815             s/\?lvsl/lvsr/o     or
3816             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3817             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3818             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3819         } else {                        # big-endian
3820             s/le\?/#le#/o       or
3821             s/be\?//o           or
3822             s/\?([a-z]+)/$1/o;
3823         }
3824
3825         print $_,"\n";
3826 }
3827
3828 close STDOUT;