602 lines
12 KiB
ArmAsm
602 lines
12 KiB
ArmAsm
// This file is generated from a similarly-named Perl script in the BoringSSL
|
|
// source tree. Do not edit by hand.
|
|
|
|
#include <ring-core/asm_base.h>
|
|
|
|
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|
|
.text
|
|
.globl ChaCha20_ctr32_ssse3
|
|
.hidden ChaCha20_ctr32_ssse3
|
|
.type ChaCha20_ctr32_ssse3,@function
|
|
.align 16
|
|
ChaCha20_ctr32_ssse3:
|
|
.L_ChaCha20_ctr32_ssse3_begin:
|
|
pushl %ebp
|
|
pushl %ebx
|
|
pushl %esi
|
|
pushl %edi
|
|
call .Lpic_point
|
|
.Lpic_point:
|
|
popl %eax
|
|
movl 20(%esp),%edi
|
|
movl 24(%esp),%esi
|
|
movl 28(%esp),%ecx
|
|
movl 32(%esp),%edx
|
|
movl 36(%esp),%ebx
|
|
movl %esp,%ebp
|
|
subl $524,%esp
|
|
andl $-64,%esp
|
|
movl %ebp,512(%esp)
|
|
leal .Lssse3_data-.Lpic_point(%eax),%eax
|
|
movdqu (%ebx),%xmm3
|
|
cmpl $256,%ecx
|
|
jb .L0001x
|
|
movl %edx,516(%esp)
|
|
movl %ebx,520(%esp)
|
|
subl $256,%ecx
|
|
leal 384(%esp),%ebp
|
|
movdqu (%edx),%xmm7
|
|
pshufd $0,%xmm3,%xmm0
|
|
pshufd $85,%xmm3,%xmm1
|
|
pshufd $170,%xmm3,%xmm2
|
|
pshufd $255,%xmm3,%xmm3
|
|
paddd 48(%eax),%xmm0
|
|
pshufd $0,%xmm7,%xmm4
|
|
pshufd $85,%xmm7,%xmm5
|
|
psubd 64(%eax),%xmm0
|
|
pshufd $170,%xmm7,%xmm6
|
|
pshufd $255,%xmm7,%xmm7
|
|
movdqa %xmm0,64(%ebp)
|
|
movdqa %xmm1,80(%ebp)
|
|
movdqa %xmm2,96(%ebp)
|
|
movdqa %xmm3,112(%ebp)
|
|
movdqu 16(%edx),%xmm3
|
|
movdqa %xmm4,-64(%ebp)
|
|
movdqa %xmm5,-48(%ebp)
|
|
movdqa %xmm6,-32(%ebp)
|
|
movdqa %xmm7,-16(%ebp)
|
|
movdqa 32(%eax),%xmm7
|
|
leal 128(%esp),%ebx
|
|
pshufd $0,%xmm3,%xmm0
|
|
pshufd $85,%xmm3,%xmm1
|
|
pshufd $170,%xmm3,%xmm2
|
|
pshufd $255,%xmm3,%xmm3
|
|
pshufd $0,%xmm7,%xmm4
|
|
pshufd $85,%xmm7,%xmm5
|
|
pshufd $170,%xmm7,%xmm6
|
|
pshufd $255,%xmm7,%xmm7
|
|
movdqa %xmm0,(%ebp)
|
|
movdqa %xmm1,16(%ebp)
|
|
movdqa %xmm2,32(%ebp)
|
|
movdqa %xmm3,48(%ebp)
|
|
movdqa %xmm4,-128(%ebp)
|
|
movdqa %xmm5,-112(%ebp)
|
|
movdqa %xmm6,-96(%ebp)
|
|
movdqa %xmm7,-80(%ebp)
|
|
leal 128(%esi),%esi
|
|
leal 128(%edi),%edi
|
|
jmp .L001outer_loop
|
|
.align 16
|
|
.L001outer_loop:
|
|
movdqa -112(%ebp),%xmm1
|
|
movdqa -96(%ebp),%xmm2
|
|
movdqa -80(%ebp),%xmm3
|
|
movdqa -48(%ebp),%xmm5
|
|
movdqa -32(%ebp),%xmm6
|
|
movdqa -16(%ebp),%xmm7
|
|
movdqa %xmm1,-112(%ebx)
|
|
movdqa %xmm2,-96(%ebx)
|
|
movdqa %xmm3,-80(%ebx)
|
|
movdqa %xmm5,-48(%ebx)
|
|
movdqa %xmm6,-32(%ebx)
|
|
movdqa %xmm7,-16(%ebx)
|
|
movdqa 32(%ebp),%xmm2
|
|
movdqa 48(%ebp),%xmm3
|
|
movdqa 64(%ebp),%xmm4
|
|
movdqa 80(%ebp),%xmm5
|
|
movdqa 96(%ebp),%xmm6
|
|
movdqa 112(%ebp),%xmm7
|
|
paddd 64(%eax),%xmm4
|
|
movdqa %xmm2,32(%ebx)
|
|
movdqa %xmm3,48(%ebx)
|
|
movdqa %xmm4,64(%ebx)
|
|
movdqa %xmm5,80(%ebx)
|
|
movdqa %xmm6,96(%ebx)
|
|
movdqa %xmm7,112(%ebx)
|
|
movdqa %xmm4,64(%ebp)
|
|
movdqa -128(%ebp),%xmm0
|
|
movdqa %xmm4,%xmm6
|
|
movdqa -64(%ebp),%xmm3
|
|
movdqa (%ebp),%xmm4
|
|
movdqa 16(%ebp),%xmm5
|
|
movl $10,%edx
|
|
nop
|
|
.align 16
|
|
.L002loop:
|
|
paddd %xmm3,%xmm0
|
|
movdqa %xmm3,%xmm2
|
|
pxor %xmm0,%xmm6
|
|
pshufb (%eax),%xmm6
|
|
paddd %xmm6,%xmm4
|
|
pxor %xmm4,%xmm2
|
|
movdqa -48(%ebx),%xmm3
|
|
movdqa %xmm2,%xmm1
|
|
pslld $12,%xmm2
|
|
psrld $20,%xmm1
|
|
por %xmm1,%xmm2
|
|
movdqa -112(%ebx),%xmm1
|
|
paddd %xmm2,%xmm0
|
|
movdqa 80(%ebx),%xmm7
|
|
pxor %xmm0,%xmm6
|
|
movdqa %xmm0,-128(%ebx)
|
|
pshufb 16(%eax),%xmm6
|
|
paddd %xmm6,%xmm4
|
|
movdqa %xmm6,64(%ebx)
|
|
pxor %xmm4,%xmm2
|
|
paddd %xmm3,%xmm1
|
|
movdqa %xmm2,%xmm0
|
|
pslld $7,%xmm2
|
|
psrld $25,%xmm0
|
|
pxor %xmm1,%xmm7
|
|
por %xmm0,%xmm2
|
|
movdqa %xmm4,(%ebx)
|
|
pshufb (%eax),%xmm7
|
|
movdqa %xmm2,-64(%ebx)
|
|
paddd %xmm7,%xmm5
|
|
movdqa 32(%ebx),%xmm4
|
|
pxor %xmm5,%xmm3
|
|
movdqa -32(%ebx),%xmm2
|
|
movdqa %xmm3,%xmm0
|
|
pslld $12,%xmm3
|
|
psrld $20,%xmm0
|
|
por %xmm0,%xmm3
|
|
movdqa -96(%ebx),%xmm0
|
|
paddd %xmm3,%xmm1
|
|
movdqa 96(%ebx),%xmm6
|
|
pxor %xmm1,%xmm7
|
|
movdqa %xmm1,-112(%ebx)
|
|
pshufb 16(%eax),%xmm7
|
|
paddd %xmm7,%xmm5
|
|
movdqa %xmm7,80(%ebx)
|
|
pxor %xmm5,%xmm3
|
|
paddd %xmm2,%xmm0
|
|
movdqa %xmm3,%xmm1
|
|
pslld $7,%xmm3
|
|
psrld $25,%xmm1
|
|
pxor %xmm0,%xmm6
|
|
por %xmm1,%xmm3
|
|
movdqa %xmm5,16(%ebx)
|
|
pshufb (%eax),%xmm6
|
|
movdqa %xmm3,-48(%ebx)
|
|
paddd %xmm6,%xmm4
|
|
movdqa 48(%ebx),%xmm5
|
|
pxor %xmm4,%xmm2
|
|
movdqa -16(%ebx),%xmm3
|
|
movdqa %xmm2,%xmm1
|
|
pslld $12,%xmm2
|
|
psrld $20,%xmm1
|
|
por %xmm1,%xmm2
|
|
movdqa -80(%ebx),%xmm1
|
|
paddd %xmm2,%xmm0
|
|
movdqa 112(%ebx),%xmm7
|
|
pxor %xmm0,%xmm6
|
|
movdqa %xmm0,-96(%ebx)
|
|
pshufb 16(%eax),%xmm6
|
|
paddd %xmm6,%xmm4
|
|
movdqa %xmm6,96(%ebx)
|
|
pxor %xmm4,%xmm2
|
|
paddd %xmm3,%xmm1
|
|
movdqa %xmm2,%xmm0
|
|
pslld $7,%xmm2
|
|
psrld $25,%xmm0
|
|
pxor %xmm1,%xmm7
|
|
por %xmm0,%xmm2
|
|
pshufb (%eax),%xmm7
|
|
movdqa %xmm2,-32(%ebx)
|
|
paddd %xmm7,%xmm5
|
|
pxor %xmm5,%xmm3
|
|
movdqa -48(%ebx),%xmm2
|
|
movdqa %xmm3,%xmm0
|
|
pslld $12,%xmm3
|
|
psrld $20,%xmm0
|
|
por %xmm0,%xmm3
|
|
movdqa -128(%ebx),%xmm0
|
|
paddd %xmm3,%xmm1
|
|
pxor %xmm1,%xmm7
|
|
movdqa %xmm1,-80(%ebx)
|
|
pshufb 16(%eax),%xmm7
|
|
paddd %xmm7,%xmm5
|
|
movdqa %xmm7,%xmm6
|
|
pxor %xmm5,%xmm3
|
|
paddd %xmm2,%xmm0
|
|
movdqa %xmm3,%xmm1
|
|
pslld $7,%xmm3
|
|
psrld $25,%xmm1
|
|
pxor %xmm0,%xmm6
|
|
por %xmm1,%xmm3
|
|
pshufb (%eax),%xmm6
|
|
movdqa %xmm3,-16(%ebx)
|
|
paddd %xmm6,%xmm4
|
|
pxor %xmm4,%xmm2
|
|
movdqa -32(%ebx),%xmm3
|
|
movdqa %xmm2,%xmm1
|
|
pslld $12,%xmm2
|
|
psrld $20,%xmm1
|
|
por %xmm1,%xmm2
|
|
movdqa -112(%ebx),%xmm1
|
|
paddd %xmm2,%xmm0
|
|
movdqa 64(%ebx),%xmm7
|
|
pxor %xmm0,%xmm6
|
|
movdqa %xmm0,-128(%ebx)
|
|
pshufb 16(%eax),%xmm6
|
|
paddd %xmm6,%xmm4
|
|
movdqa %xmm6,112(%ebx)
|
|
pxor %xmm4,%xmm2
|
|
paddd %xmm3,%xmm1
|
|
movdqa %xmm2,%xmm0
|
|
pslld $7,%xmm2
|
|
psrld $25,%xmm0
|
|
pxor %xmm1,%xmm7
|
|
por %xmm0,%xmm2
|
|
movdqa %xmm4,32(%ebx)
|
|
pshufb (%eax),%xmm7
|
|
movdqa %xmm2,-48(%ebx)
|
|
paddd %xmm7,%xmm5
|
|
movdqa (%ebx),%xmm4
|
|
pxor %xmm5,%xmm3
|
|
movdqa -16(%ebx),%xmm2
|
|
movdqa %xmm3,%xmm0
|
|
pslld $12,%xmm3
|
|
psrld $20,%xmm0
|
|
por %xmm0,%xmm3
|
|
movdqa -96(%ebx),%xmm0
|
|
paddd %xmm3,%xmm1
|
|
movdqa 80(%ebx),%xmm6
|
|
pxor %xmm1,%xmm7
|
|
movdqa %xmm1,-112(%ebx)
|
|
pshufb 16(%eax),%xmm7
|
|
paddd %xmm7,%xmm5
|
|
movdqa %xmm7,64(%ebx)
|
|
pxor %xmm5,%xmm3
|
|
paddd %xmm2,%xmm0
|
|
movdqa %xmm3,%xmm1
|
|
pslld $7,%xmm3
|
|
psrld $25,%xmm1
|
|
pxor %xmm0,%xmm6
|
|
por %xmm1,%xmm3
|
|
movdqa %xmm5,48(%ebx)
|
|
pshufb (%eax),%xmm6
|
|
movdqa %xmm3,-32(%ebx)
|
|
paddd %xmm6,%xmm4
|
|
movdqa 16(%ebx),%xmm5
|
|
pxor %xmm4,%xmm2
|
|
movdqa -64(%ebx),%xmm3
|
|
movdqa %xmm2,%xmm1
|
|
pslld $12,%xmm2
|
|
psrld $20,%xmm1
|
|
por %xmm1,%xmm2
|
|
movdqa -80(%ebx),%xmm1
|
|
paddd %xmm2,%xmm0
|
|
movdqa 96(%ebx),%xmm7
|
|
pxor %xmm0,%xmm6
|
|
movdqa %xmm0,-96(%ebx)
|
|
pshufb 16(%eax),%xmm6
|
|
paddd %xmm6,%xmm4
|
|
movdqa %xmm6,80(%ebx)
|
|
pxor %xmm4,%xmm2
|
|
paddd %xmm3,%xmm1
|
|
movdqa %xmm2,%xmm0
|
|
pslld $7,%xmm2
|
|
psrld $25,%xmm0
|
|
pxor %xmm1,%xmm7
|
|
por %xmm0,%xmm2
|
|
pshufb (%eax),%xmm7
|
|
movdqa %xmm2,-16(%ebx)
|
|
paddd %xmm7,%xmm5
|
|
pxor %xmm5,%xmm3
|
|
movdqa %xmm3,%xmm0
|
|
pslld $12,%xmm3
|
|
psrld $20,%xmm0
|
|
por %xmm0,%xmm3
|
|
movdqa -128(%ebx),%xmm0
|
|
paddd %xmm3,%xmm1
|
|
movdqa 64(%ebx),%xmm6
|
|
pxor %xmm1,%xmm7
|
|
movdqa %xmm1,-80(%ebx)
|
|
pshufb 16(%eax),%xmm7
|
|
paddd %xmm7,%xmm5
|
|
movdqa %xmm7,96(%ebx)
|
|
pxor %xmm5,%xmm3
|
|
movdqa %xmm3,%xmm1
|
|
pslld $7,%xmm3
|
|
psrld $25,%xmm1
|
|
por %xmm1,%xmm3
|
|
decl %edx
|
|
jnz .L002loop
|
|
movdqa %xmm3,-64(%ebx)
|
|
movdqa %xmm4,(%ebx)
|
|
movdqa %xmm5,16(%ebx)
|
|
movdqa %xmm6,64(%ebx)
|
|
movdqa %xmm7,96(%ebx)
|
|
movdqa -112(%ebx),%xmm1
|
|
movdqa -96(%ebx),%xmm2
|
|
movdqa -80(%ebx),%xmm3
|
|
paddd -128(%ebp),%xmm0
|
|
paddd -112(%ebp),%xmm1
|
|
paddd -96(%ebp),%xmm2
|
|
paddd -80(%ebp),%xmm3
|
|
movdqa %xmm0,%xmm6
|
|
punpckldq %xmm1,%xmm0
|
|
movdqa %xmm2,%xmm7
|
|
punpckldq %xmm3,%xmm2
|
|
punpckhdq %xmm1,%xmm6
|
|
punpckhdq %xmm3,%xmm7
|
|
movdqa %xmm0,%xmm1
|
|
punpcklqdq %xmm2,%xmm0
|
|
movdqa %xmm6,%xmm3
|
|
punpcklqdq %xmm7,%xmm6
|
|
punpckhqdq %xmm2,%xmm1
|
|
punpckhqdq %xmm7,%xmm3
|
|
movdqu -128(%esi),%xmm4
|
|
movdqu -64(%esi),%xmm5
|
|
movdqu (%esi),%xmm2
|
|
movdqu 64(%esi),%xmm7
|
|
leal 16(%esi),%esi
|
|
pxor %xmm0,%xmm4
|
|
movdqa -64(%ebx),%xmm0
|
|
pxor %xmm1,%xmm5
|
|
movdqa -48(%ebx),%xmm1
|
|
pxor %xmm2,%xmm6
|
|
movdqa -32(%ebx),%xmm2
|
|
pxor %xmm3,%xmm7
|
|
movdqa -16(%ebx),%xmm3
|
|
movdqu %xmm4,-128(%edi)
|
|
movdqu %xmm5,-64(%edi)
|
|
movdqu %xmm6,(%edi)
|
|
movdqu %xmm7,64(%edi)
|
|
leal 16(%edi),%edi
|
|
paddd -64(%ebp),%xmm0
|
|
paddd -48(%ebp),%xmm1
|
|
paddd -32(%ebp),%xmm2
|
|
paddd -16(%ebp),%xmm3
|
|
movdqa %xmm0,%xmm6
|
|
punpckldq %xmm1,%xmm0
|
|
movdqa %xmm2,%xmm7
|
|
punpckldq %xmm3,%xmm2
|
|
punpckhdq %xmm1,%xmm6
|
|
punpckhdq %xmm3,%xmm7
|
|
movdqa %xmm0,%xmm1
|
|
punpcklqdq %xmm2,%xmm0
|
|
movdqa %xmm6,%xmm3
|
|
punpcklqdq %xmm7,%xmm6
|
|
punpckhqdq %xmm2,%xmm1
|
|
punpckhqdq %xmm7,%xmm3
|
|
movdqu -128(%esi),%xmm4
|
|
movdqu -64(%esi),%xmm5
|
|
movdqu (%esi),%xmm2
|
|
movdqu 64(%esi),%xmm7
|
|
leal 16(%esi),%esi
|
|
pxor %xmm0,%xmm4
|
|
movdqa (%ebx),%xmm0
|
|
pxor %xmm1,%xmm5
|
|
movdqa 16(%ebx),%xmm1
|
|
pxor %xmm2,%xmm6
|
|
movdqa 32(%ebx),%xmm2
|
|
pxor %xmm3,%xmm7
|
|
movdqa 48(%ebx),%xmm3
|
|
movdqu %xmm4,-128(%edi)
|
|
movdqu %xmm5,-64(%edi)
|
|
movdqu %xmm6,(%edi)
|
|
movdqu %xmm7,64(%edi)
|
|
leal 16(%edi),%edi
|
|
paddd (%ebp),%xmm0
|
|
paddd 16(%ebp),%xmm1
|
|
paddd 32(%ebp),%xmm2
|
|
paddd 48(%ebp),%xmm3
|
|
movdqa %xmm0,%xmm6
|
|
punpckldq %xmm1,%xmm0
|
|
movdqa %xmm2,%xmm7
|
|
punpckldq %xmm3,%xmm2
|
|
punpckhdq %xmm1,%xmm6
|
|
punpckhdq %xmm3,%xmm7
|
|
movdqa %xmm0,%xmm1
|
|
punpcklqdq %xmm2,%xmm0
|
|
movdqa %xmm6,%xmm3
|
|
punpcklqdq %xmm7,%xmm6
|
|
punpckhqdq %xmm2,%xmm1
|
|
punpckhqdq %xmm7,%xmm3
|
|
movdqu -128(%esi),%xmm4
|
|
movdqu -64(%esi),%xmm5
|
|
movdqu (%esi),%xmm2
|
|
movdqu 64(%esi),%xmm7
|
|
leal 16(%esi),%esi
|
|
pxor %xmm0,%xmm4
|
|
movdqa 64(%ebx),%xmm0
|
|
pxor %xmm1,%xmm5
|
|
movdqa 80(%ebx),%xmm1
|
|
pxor %xmm2,%xmm6
|
|
movdqa 96(%ebx),%xmm2
|
|
pxor %xmm3,%xmm7
|
|
movdqa 112(%ebx),%xmm3
|
|
movdqu %xmm4,-128(%edi)
|
|
movdqu %xmm5,-64(%edi)
|
|
movdqu %xmm6,(%edi)
|
|
movdqu %xmm7,64(%edi)
|
|
leal 16(%edi),%edi
|
|
paddd 64(%ebp),%xmm0
|
|
paddd 80(%ebp),%xmm1
|
|
paddd 96(%ebp),%xmm2
|
|
paddd 112(%ebp),%xmm3
|
|
movdqa %xmm0,%xmm6
|
|
punpckldq %xmm1,%xmm0
|
|
movdqa %xmm2,%xmm7
|
|
punpckldq %xmm3,%xmm2
|
|
punpckhdq %xmm1,%xmm6
|
|
punpckhdq %xmm3,%xmm7
|
|
movdqa %xmm0,%xmm1
|
|
punpcklqdq %xmm2,%xmm0
|
|
movdqa %xmm6,%xmm3
|
|
punpcklqdq %xmm7,%xmm6
|
|
punpckhqdq %xmm2,%xmm1
|
|
punpckhqdq %xmm7,%xmm3
|
|
movdqu -128(%esi),%xmm4
|
|
movdqu -64(%esi),%xmm5
|
|
movdqu (%esi),%xmm2
|
|
movdqu 64(%esi),%xmm7
|
|
leal 208(%esi),%esi
|
|
pxor %xmm0,%xmm4
|
|
pxor %xmm1,%xmm5
|
|
pxor %xmm2,%xmm6
|
|
pxor %xmm3,%xmm7
|
|
movdqu %xmm4,-128(%edi)
|
|
movdqu %xmm5,-64(%edi)
|
|
movdqu %xmm6,(%edi)
|
|
movdqu %xmm7,64(%edi)
|
|
leal 208(%edi),%edi
|
|
subl $256,%ecx
|
|
jnc .L001outer_loop
|
|
addl $256,%ecx
|
|
jz .L003done
|
|
movl 520(%esp),%ebx
|
|
leal -128(%esi),%esi
|
|
movl 516(%esp),%edx
|
|
leal -128(%edi),%edi
|
|
movd 64(%ebp),%xmm2
|
|
movdqu (%ebx),%xmm3
|
|
paddd 96(%eax),%xmm2
|
|
pand 112(%eax),%xmm3
|
|
por %xmm2,%xmm3
|
|
.L0001x:
|
|
movdqa 32(%eax),%xmm0
|
|
movdqu (%edx),%xmm1
|
|
movdqu 16(%edx),%xmm2
|
|
movdqa (%eax),%xmm6
|
|
movdqa 16(%eax),%xmm7
|
|
movl %ebp,48(%esp)
|
|
movdqa %xmm0,(%esp)
|
|
movdqa %xmm1,16(%esp)
|
|
movdqa %xmm2,32(%esp)
|
|
movdqa %xmm3,48(%esp)
|
|
movl $10,%edx
|
|
jmp .L004loop1x
|
|
.align 16
|
|
.L005outer1x:
|
|
movdqa 80(%eax),%xmm3
|
|
movdqa (%esp),%xmm0
|
|
movdqa 16(%esp),%xmm1
|
|
movdqa 32(%esp),%xmm2
|
|
paddd 48(%esp),%xmm3
|
|
movl $10,%edx
|
|
movdqa %xmm3,48(%esp)
|
|
jmp .L004loop1x
|
|
.align 16
|
|
.L004loop1x:
|
|
paddd %xmm1,%xmm0
|
|
pxor %xmm0,%xmm3
|
|
.byte 102,15,56,0,222
|
|
paddd %xmm3,%xmm2
|
|
pxor %xmm2,%xmm1
|
|
movdqa %xmm1,%xmm4
|
|
psrld $20,%xmm1
|
|
pslld $12,%xmm4
|
|
por %xmm4,%xmm1
|
|
paddd %xmm1,%xmm0
|
|
pxor %xmm0,%xmm3
|
|
.byte 102,15,56,0,223
|
|
paddd %xmm3,%xmm2
|
|
pxor %xmm2,%xmm1
|
|
movdqa %xmm1,%xmm4
|
|
psrld $25,%xmm1
|
|
pslld $7,%xmm4
|
|
por %xmm4,%xmm1
|
|
pshufd $78,%xmm2,%xmm2
|
|
pshufd $57,%xmm1,%xmm1
|
|
pshufd $147,%xmm3,%xmm3
|
|
nop
|
|
paddd %xmm1,%xmm0
|
|
pxor %xmm0,%xmm3
|
|
.byte 102,15,56,0,222
|
|
paddd %xmm3,%xmm2
|
|
pxor %xmm2,%xmm1
|
|
movdqa %xmm1,%xmm4
|
|
psrld $20,%xmm1
|
|
pslld $12,%xmm4
|
|
por %xmm4,%xmm1
|
|
paddd %xmm1,%xmm0
|
|
pxor %xmm0,%xmm3
|
|
.byte 102,15,56,0,223
|
|
paddd %xmm3,%xmm2
|
|
pxor %xmm2,%xmm1
|
|
movdqa %xmm1,%xmm4
|
|
psrld $25,%xmm1
|
|
pslld $7,%xmm4
|
|
por %xmm4,%xmm1
|
|
pshufd $78,%xmm2,%xmm2
|
|
pshufd $147,%xmm1,%xmm1
|
|
pshufd $57,%xmm3,%xmm3
|
|
decl %edx
|
|
jnz .L004loop1x
|
|
paddd (%esp),%xmm0
|
|
paddd 16(%esp),%xmm1
|
|
paddd 32(%esp),%xmm2
|
|
paddd 48(%esp),%xmm3
|
|
cmpl $64,%ecx
|
|
jb .L006tail
|
|
movdqu (%esi),%xmm4
|
|
movdqu 16(%esi),%xmm5
|
|
pxor %xmm4,%xmm0
|
|
movdqu 32(%esi),%xmm4
|
|
pxor %xmm5,%xmm1
|
|
movdqu 48(%esi),%xmm5
|
|
pxor %xmm4,%xmm2
|
|
pxor %xmm5,%xmm3
|
|
leal 64(%esi),%esi
|
|
movdqu %xmm0,(%edi)
|
|
movdqu %xmm1,16(%edi)
|
|
movdqu %xmm2,32(%edi)
|
|
movdqu %xmm3,48(%edi)
|
|
leal 64(%edi),%edi
|
|
subl $64,%ecx
|
|
jnz .L005outer1x
|
|
jmp .L003done
|
|
.L006tail:
|
|
movdqa %xmm0,(%esp)
|
|
movdqa %xmm1,16(%esp)
|
|
movdqa %xmm2,32(%esp)
|
|
movdqa %xmm3,48(%esp)
|
|
xorl %eax,%eax
|
|
xorl %edx,%edx
|
|
xorl %ebp,%ebp
|
|
.L007tail_loop:
|
|
movb (%esp,%ebp,1),%al
|
|
movb (%esi,%ebp,1),%dl
|
|
leal 1(%ebp),%ebp
|
|
xorb %dl,%al
|
|
movb %al,-1(%edi,%ebp,1)
|
|
decl %ecx
|
|
jnz .L007tail_loop
|
|
.L003done:
|
|
movl 512(%esp),%esp
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebx
|
|
popl %ebp
|
|
ret
|
|
.size ChaCha20_ctr32_ssse3,.-.L_ChaCha20_ctr32_ssse3_begin
|
|
.align 64
|
|
.Lssse3_data:
|
|
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
|
|
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
|
|
.long 1634760805,857760878,2036477234,1797285236
|
|
.long 0,1,2,3
|
|
.long 4,4,4,4
|
|
.long 1,0,0,0
|
|
.long 4,0,0,0
|
|
.long 0,-1,-1,-1
|
|
.align 64
|
|
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
|
|
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
|
|
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
|
.byte 114,103,62,0
|
|
#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
|