* larger and would only be 0.5% faster (on sandy-bridge).
*/
.align 8
-roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
+SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
%rcx, (%r9));
ret;
-ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
-roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
+SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
%xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
%rax, (%r9));
ret;
-ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
/*
* IN/OUT:
.text
.align 8
-__camellia_enc_blk16:
+SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 256 bytes
%xmm15, %rax, %rcx, 24);
jmp .Lenc_done;
-ENDPROC(__camellia_enc_blk16)
+SYM_FUNC_END(__camellia_enc_blk16)
.align 8
-__camellia_dec_blk16:
+SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 256 bytes
((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24;
-ENDPROC(__camellia_dec_blk16)
+SYM_FUNC_END(__camellia_dec_blk16)
-ENTRY(camellia_ecb_enc_16way)
+SYM_FUNC_START(camellia_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
FRAME_END
ret;
-ENDPROC(camellia_ecb_enc_16way)
+SYM_FUNC_END(camellia_ecb_enc_16way)
-ENTRY(camellia_ecb_dec_16way)
+SYM_FUNC_START(camellia_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
FRAME_END
ret;
-ENDPROC(camellia_ecb_dec_16way)
+SYM_FUNC_END(camellia_ecb_dec_16way)
-ENTRY(camellia_cbc_dec_16way)
+SYM_FUNC_START(camellia_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
FRAME_END
ret;
-ENDPROC(camellia_cbc_dec_16way)
+SYM_FUNC_END(camellia_cbc_dec_16way)
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
-ENTRY(camellia_ctr_16way)
+SYM_FUNC_START(camellia_ctr_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
FRAME_END
ret;
-ENDPROC(camellia_ctr_16way)
+SYM_FUNC_END(camellia_ctr_16way)
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
vpxor tmp, iv, iv;
.align 8
-camellia_xts_crypt_16way:
+SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
FRAME_END
ret;
-ENDPROC(camellia_xts_crypt_16way)
+SYM_FUNC_END(camellia_xts_crypt_16way)
-ENTRY(camellia_xts_enc_16way)
+SYM_FUNC_START(camellia_xts_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
leaq __camellia_enc_blk16, %r9;
jmp camellia_xts_crypt_16way;
-ENDPROC(camellia_xts_enc_16way)
+SYM_FUNC_END(camellia_xts_enc_16way)
-ENTRY(camellia_xts_dec_16way)
+SYM_FUNC_START(camellia_xts_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
leaq __camellia_dec_blk16, %r9;
jmp camellia_xts_crypt_16way;
-ENDPROC(camellia_xts_dec_16way)
+SYM_FUNC_END(camellia_xts_dec_16way)