nexmon – Rev 1

Subversion Repositories:
Rev:
#if defined(__i386__)
.file   "chacha-x86.S"
.text
.globl  ChaCha20_ctr32
.hidden ChaCha20_ctr32
.type   ChaCha20_ctr32,@function
.align  16
ChaCha20_ctr32:
.L_ChaCha20_ctr32_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
        pushl   %edi
        xorl    %eax,%eax
        cmpl    28(%esp),%eax
        je      .L000no_data
        call    .Lpic_point
.Lpic_point:
        popl    %eax
        leal    OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
        testl   $16777216,(%ebp)
        jz      .L001x86
        testl   $512,4(%ebp)
        jz      .L001x86
        jmp     .Lssse3_shortcut
.L001x86:
        movl    32(%esp),%esi
        movl    36(%esp),%edi
        subl    $132,%esp
        movl    (%esi),%eax
        movl    4(%esi),%ebx
        movl    8(%esi),%ecx
        movl    12(%esi),%edx
        movl    %eax,80(%esp)
        movl    %ebx,84(%esp)
        movl    %ecx,88(%esp)
        movl    %edx,92(%esp)
        movl    16(%esi),%eax
        movl    20(%esi),%ebx
        movl    24(%esi),%ecx
        movl    28(%esi),%edx
        movl    %eax,96(%esp)
        movl    %ebx,100(%esp)
        movl    %ecx,104(%esp)
        movl    %edx,108(%esp)
        movl    (%edi),%eax
        movl    4(%edi),%ebx
        movl    8(%edi),%ecx
        movl    12(%edi),%edx
        subl    $1,%eax
        movl    %eax,112(%esp)
        movl    %ebx,116(%esp)
        movl    %ecx,120(%esp)
        movl    %edx,124(%esp)
        jmp     .L002entry
.align  16
.L003outer_loop:
        movl    %ebx,156(%esp)
        movl    %eax,152(%esp)
        movl    %ecx,160(%esp)
.L002entry:
        movl    $1634760805,%eax
        movl    $857760878,4(%esp)
        movl    $2036477234,8(%esp)
        movl    $1797285236,12(%esp)
        movl    84(%esp),%ebx
        movl    88(%esp),%ebp
        movl    104(%esp),%ecx
        movl    108(%esp),%esi
        movl    116(%esp),%edx
        movl    120(%esp),%edi
        movl    %ebx,20(%esp)
        movl    %ebp,24(%esp)
        movl    %ecx,40(%esp)
        movl    %esi,44(%esp)
        movl    %edx,52(%esp)
        movl    %edi,56(%esp)
        movl    92(%esp),%ebx
        movl    124(%esp),%edi
        movl    112(%esp),%edx
        movl    80(%esp),%ebp
        movl    96(%esp),%ecx
        movl    100(%esp),%esi
        addl    $1,%edx
        movl    %ebx,28(%esp)
        movl    %edi,60(%esp)
        movl    %edx,112(%esp)
        movl    $10,%ebx
        jmp     .L004loop
.align  16
.L004loop:
        addl    %ebp,%eax
        movl    %ebx,128(%esp)
        movl    %ebp,%ebx
        xorl    %eax,%edx
        roll    $16,%edx
        addl    %edx,%ecx
        xorl    %ecx,%ebx
        movl    52(%esp),%edi
        roll    $12,%ebx
        movl    20(%esp),%ebp
        addl    %ebx,%eax
        xorl    %eax,%edx
        movl    %eax,(%esp)
        roll    $8,%edx
        movl    4(%esp),%eax
        addl    %edx,%ecx
        movl    %edx,48(%esp)
        xorl    %ecx,%ebx
        addl    %ebp,%eax
        roll    $7,%ebx
        xorl    %eax,%edi
        movl    %ecx,32(%esp)
        roll    $16,%edi
        movl    %ebx,16(%esp)
        addl    %edi,%esi
        movl    40(%esp),%ecx
        xorl    %esi,%ebp
        movl    56(%esp),%edx
        roll    $12,%ebp
        movl    24(%esp),%ebx
        addl    %ebp,%eax
        xorl    %eax,%edi
        movl    %eax,4(%esp)
        roll    $8,%edi
        movl    8(%esp),%eax
        addl    %edi,%esi
        movl    %edi,52(%esp)
        xorl    %esi,%ebp
        addl    %ebx,%eax
        roll    $7,%ebp
        xorl    %eax,%edx
        movl    %esi,36(%esp)
        roll    $16,%edx
        movl    %ebp,20(%esp)
        addl    %edx,%ecx
        movl    44(%esp),%esi
        xorl    %ecx,%ebx
        movl    60(%esp),%edi
        roll    $12,%ebx
        movl    28(%esp),%ebp
        addl    %ebx,%eax
        xorl    %eax,%edx
        movl    %eax,8(%esp)
        roll    $8,%edx
        movl    12(%esp),%eax
        addl    %edx,%ecx
        movl    %edx,56(%esp)
        xorl    %ecx,%ebx
        addl    %ebp,%eax
        roll    $7,%ebx
        xorl    %eax,%edi
        roll    $16,%edi
        movl    %ebx,24(%esp)
        addl    %edi,%esi
        xorl    %esi,%ebp
        roll    $12,%ebp
        movl    20(%esp),%ebx
        addl    %ebp,%eax
        xorl    %eax,%edi
        movl    %eax,12(%esp)
        roll    $8,%edi
        movl    (%esp),%eax
        addl    %edi,%esi
        movl    %edi,%edx
        xorl    %esi,%ebp
        addl    %ebx,%eax
        roll    $7,%ebp
        xorl    %eax,%edx
        roll    $16,%edx
        movl    %ebp,28(%esp)
        addl    %edx,%ecx
        xorl    %ecx,%ebx
        movl    48(%esp),%edi
        roll    $12,%ebx
        movl    24(%esp),%ebp
        addl    %ebx,%eax
        xorl    %eax,%edx
        movl    %eax,(%esp)
        roll    $8,%edx
        movl    4(%esp),%eax
        addl    %edx,%ecx
        movl    %edx,60(%esp)
        xorl    %ecx,%ebx
        addl    %ebp,%eax
        roll    $7,%ebx
        xorl    %eax,%edi
        movl    %ecx,40(%esp)
        roll    $16,%edi
        movl    %ebx,20(%esp)
        addl    %edi,%esi
        movl    32(%esp),%ecx
        xorl    %esi,%ebp
        movl    52(%esp),%edx
        roll    $12,%ebp
        movl    28(%esp),%ebx
        addl    %ebp,%eax
        xorl    %eax,%edi
        movl    %eax,4(%esp)
        roll    $8,%edi
        movl    8(%esp),%eax
        addl    %edi,%esi
        movl    %edi,48(%esp)
        xorl    %esi,%ebp
        addl    %ebx,%eax
        roll    $7,%ebp
        xorl    %eax,%edx
        movl    %esi,44(%esp)
        roll    $16,%edx
        movl    %ebp,24(%esp)
        addl    %edx,%ecx
        movl    36(%esp),%esi
        xorl    %ecx,%ebx
        movl    56(%esp),%edi
        roll    $12,%ebx
        movl    16(%esp),%ebp
        addl    %ebx,%eax
        xorl    %eax,%edx
        movl    %eax,8(%esp)
        roll    $8,%edx
        movl    12(%esp),%eax
        addl    %edx,%ecx
        movl    %edx,52(%esp)
        xorl    %ecx,%ebx
        addl    %ebp,%eax
        roll    $7,%ebx
        xorl    %eax,%edi
        roll    $16,%edi
        movl    %ebx,28(%esp)
        addl    %edi,%esi
        xorl    %esi,%ebp
        movl    48(%esp),%edx
        roll    $12,%ebp
        movl    128(%esp),%ebx
        addl    %ebp,%eax
        xorl    %eax,%edi
        movl    %eax,12(%esp)
        roll    $8,%edi
        movl    (%esp),%eax
        addl    %edi,%esi
        movl    %edi,56(%esp)
        xorl    %esi,%ebp
        roll    $7,%ebp
        decl    %ebx
        jnz     .L004loop
        movl    160(%esp),%ebx
        addl    $1634760805,%eax
        addl    80(%esp),%ebp
        addl    96(%esp),%ecx
        addl    100(%esp),%esi
        cmpl    $64,%ebx
        jb      .L005tail
        movl    156(%esp),%ebx
        addl    112(%esp),%edx
        addl    120(%esp),%edi
        xorl    (%ebx),%eax
        xorl    16(%ebx),%ebp
        movl    %eax,(%esp)
        movl    152(%esp),%eax
        xorl    32(%ebx),%ecx
        xorl    36(%ebx),%esi
        xorl    48(%ebx),%edx
        xorl    56(%ebx),%edi
        movl    %ebp,16(%esp)
        movl    (%esp),%ebp
        movl    %ecx,32(%esp)
        movl    %esi,36(%esp)
        movl    %edx,48(%esp)
        movl    %edi,56(%esp)
        movl    %ebp,(%eax)
        movl    4(%esp),%ebp
        movl    8(%esp),%ecx
        movl    12(%esp),%esi
        movl    20(%esp),%edx
        movl    24(%esp),%edi
        addl    $857760878,%ebp
        addl    $2036477234,%ecx
        addl    $1797285236,%esi
        addl    84(%esp),%edx
        addl    88(%esp),%edi
        xorl    4(%ebx),%ebp
        xorl    8(%ebx),%ecx
        xorl    12(%ebx),%esi
        xorl    20(%ebx),%edx
        xorl    24(%ebx),%edi
        movl    %ebp,4(%eax)
        movl    16(%esp),%ebp
        movl    %ecx,8(%eax)
        movl    %esi,12(%eax)
        movl    %ebp,16(%eax)
        movl    %edx,20(%eax)
        movl    %edi,24(%eax)
        movl    28(%esp),%ecx
        movl    32(%esp),%edx
        movl    36(%esp),%edi
        addl    92(%esp),%ecx
        movl    40(%esp),%ebp
        xorl    28(%ebx),%ecx
        movl    44(%esp),%esi
        movl    %ecx,28(%eax)
        movl    %edx,32(%eax)
        movl    %edi,36(%eax)
        addl    104(%esp),%ebp
        addl    108(%esp),%esi
        xorl    40(%ebx),%ebp
        xorl    44(%ebx),%esi
        movl    %ebp,40(%eax)
        movl    %esi,44(%eax)
        movl    48(%esp),%ecx
        movl    56(%esp),%esi
        movl    52(%esp),%edx
        movl    60(%esp),%edi
        addl    116(%esp),%edx
        addl    124(%esp),%edi
        xorl    52(%ebx),%edx
        xorl    60(%ebx),%edi
        leal    64(%ebx),%ebx
        movl    %ecx,48(%eax)
        movl    160(%esp),%ecx
        movl    %edx,52(%eax)
        movl    %esi,56(%eax)
        movl    %edi,60(%eax)
        leal    64(%eax),%eax
        subl    $64,%ecx
        jnz     .L003outer_loop
        jmp     .L006done
.L005tail:
        addl    112(%esp),%edx
        addl    120(%esp),%edi
        movl    %eax,(%esp)
        movl    %ebp,16(%esp)
        movl    %ecx,32(%esp)
        movl    %esi,36(%esp)
        movl    %edx,48(%esp)
        movl    %edi,56(%esp)
        movl    4(%esp),%ebp
        movl    8(%esp),%ecx
        movl    12(%esp),%esi
        movl    20(%esp),%edx
        movl    24(%esp),%edi
        addl    $857760878,%ebp
        addl    $2036477234,%ecx
        addl    $1797285236,%esi
        addl    84(%esp),%edx
        addl    88(%esp),%edi
        movl    %ebp,4(%esp)
        movl    %ecx,8(%esp)
        movl    %esi,12(%esp)
        movl    %edx,20(%esp)
        movl    %edi,24(%esp)
        movl    28(%esp),%ebp
        movl    40(%esp),%ecx
        movl    44(%esp),%esi
        movl    52(%esp),%edx
        movl    60(%esp),%edi
        addl    92(%esp),%ebp
        addl    104(%esp),%ecx
        addl    108(%esp),%esi
        addl    116(%esp),%edx
        addl    124(%esp),%edi
        movl    %ebp,28(%esp)
        movl    156(%esp),%ebp
        movl    %ecx,40(%esp)
        movl    152(%esp),%ecx
        movl    %esi,44(%esp)
        xorl    %esi,%esi
        movl    %edx,52(%esp)
        movl    %edi,60(%esp)
        xorl    %eax,%eax
        xorl    %edx,%edx
.L007tail_loop:
        movb    (%esi,%ebp,1),%al
        movb    (%esp,%esi,1),%dl
        leal    1(%esi),%esi
        xorb    %dl,%al
        movb    %al,-1(%ecx,%esi,1)
        decl    %ebx
        jnz     .L007tail_loop
.L006done:
        addl    $132,%esp
.L000no_data:
        popl    %edi
        popl    %esi
        popl    %ebx
        popl    %ebp
        ret
.size   ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
.globl  ChaCha20_ssse3
.hidden ChaCha20_ssse3
.type   ChaCha20_ssse3,@function
.align  16
ChaCha20_ssse3:
.L_ChaCha20_ssse3_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
        pushl   %edi
.Lssse3_shortcut:
        movl    20(%esp),%edi
        movl    24(%esp),%esi
        movl    28(%esp),%ecx
        movl    32(%esp),%edx
        movl    36(%esp),%ebx
        movl    %esp,%ebp
        subl    $524,%esp
        andl    $-64,%esp
        movl    %ebp,512(%esp)
        leal    .Lssse3_data-.Lpic_point(%eax),%eax
        movdqu  (%ebx),%xmm3
        cmpl    $256,%ecx
        jb      .L0081x
        movl    %edx,516(%esp)
        movl    %ebx,520(%esp)
        subl    $256,%ecx
        leal    384(%esp),%ebp
        movdqu  (%edx),%xmm7
        pshufd  $0,%xmm3,%xmm0
        pshufd  $85,%xmm3,%xmm1
        pshufd  $170,%xmm3,%xmm2
        pshufd  $255,%xmm3,%xmm3
        paddd   48(%eax),%xmm0
        pshufd  $0,%xmm7,%xmm4
        pshufd  $85,%xmm7,%xmm5
        psubd   64(%eax),%xmm0
        pshufd  $170,%xmm7,%xmm6
        pshufd  $255,%xmm7,%xmm7
        movdqa  %xmm0,64(%ebp)
        movdqa  %xmm1,80(%ebp)
        movdqa  %xmm2,96(%ebp)
        movdqa  %xmm3,112(%ebp)
        movdqu  16(%edx),%xmm3
        movdqa  %xmm4,-64(%ebp)
        movdqa  %xmm5,-48(%ebp)
        movdqa  %xmm6,-32(%ebp)
        movdqa  %xmm7,-16(%ebp)
        movdqa  32(%eax),%xmm7
        leal    128(%esp),%ebx
        pshufd  $0,%xmm3,%xmm0
        pshufd  $85,%xmm3,%xmm1
        pshufd  $170,%xmm3,%xmm2
        pshufd  $255,%xmm3,%xmm3
        pshufd  $0,%xmm7,%xmm4
        pshufd  $85,%xmm7,%xmm5
        pshufd  $170,%xmm7,%xmm6
        pshufd  $255,%xmm7,%xmm7
        movdqa  %xmm0,(%ebp)
        movdqa  %xmm1,16(%ebp)
        movdqa  %xmm2,32(%ebp)
        movdqa  %xmm3,48(%ebp)
        movdqa  %xmm4,-128(%ebp)
        movdqa  %xmm5,-112(%ebp)
        movdqa  %xmm6,-96(%ebp)
        movdqa  %xmm7,-80(%ebp)
        leal    128(%esi),%esi
        leal    128(%edi),%edi
        jmp     .L009outer_loop
.align  16
.L009outer_loop:
        movdqa  -112(%ebp),%xmm1
        movdqa  -96(%ebp),%xmm2
        movdqa  -80(%ebp),%xmm3
        movdqa  -48(%ebp),%xmm5
        movdqa  -32(%ebp),%xmm6
        movdqa  -16(%ebp),%xmm7
        movdqa  %xmm1,-112(%ebx)
        movdqa  %xmm2,-96(%ebx)
        movdqa  %xmm3,-80(%ebx)
        movdqa  %xmm5,-48(%ebx)
        movdqa  %xmm6,-32(%ebx)
        movdqa  %xmm7,-16(%ebx)
        movdqa  32(%ebp),%xmm2
        movdqa  48(%ebp),%xmm3
        movdqa  64(%ebp),%xmm4
        movdqa  80(%ebp),%xmm5
        movdqa  96(%ebp),%xmm6
        movdqa  112(%ebp),%xmm7
        paddd   64(%eax),%xmm4
        movdqa  %xmm2,32(%ebx)
        movdqa  %xmm3,48(%ebx)
        movdqa  %xmm4,64(%ebx)
        movdqa  %xmm5,80(%ebx)
        movdqa  %xmm6,96(%ebx)
        movdqa  %xmm7,112(%ebx)
        movdqa  %xmm4,64(%ebp)
        movdqa  -128(%ebp),%xmm0
        movdqa  %xmm4,%xmm6
        movdqa  -64(%ebp),%xmm3
        movdqa  (%ebp),%xmm4
        movdqa  16(%ebp),%xmm5
        movl    $10,%edx
        nop
.align  16
.L010loop:
        paddd   %xmm3,%xmm0
        movdqa  %xmm3,%xmm2
        pxor    %xmm0,%xmm6
        pshufb  (%eax),%xmm6
        paddd   %xmm6,%xmm4
        pxor    %xmm4,%xmm2
        movdqa  -48(%ebx),%xmm3
        movdqa  %xmm2,%xmm1
        pslld   $12,%xmm2
        psrld   $20,%xmm1
        por     %xmm1,%xmm2
        movdqa  -112(%ebx),%xmm1
        paddd   %xmm2,%xmm0
        movdqa  80(%ebx),%xmm7
        pxor    %xmm0,%xmm6
        movdqa  %xmm0,-128(%ebx)
        pshufb  16(%eax),%xmm6
        paddd   %xmm6,%xmm4
        movdqa  %xmm6,64(%ebx)
        pxor    %xmm4,%xmm2
        paddd   %xmm3,%xmm1
        movdqa  %xmm2,%xmm0
        pslld   $7,%xmm2
        psrld   $25,%xmm0
        pxor    %xmm1,%xmm7
        por     %xmm0,%xmm2
        movdqa  %xmm4,(%ebx)
        pshufb  (%eax),%xmm7
        movdqa  %xmm2,-64(%ebx)
        paddd   %xmm7,%xmm5
        movdqa  32(%ebx),%xmm4
        pxor    %xmm5,%xmm3
        movdqa  -32(%ebx),%xmm2
        movdqa  %xmm3,%xmm0
        pslld   $12,%xmm3
        psrld   $20,%xmm0
        por     %xmm0,%xmm3
        movdqa  -96(%ebx),%xmm0
        paddd   %xmm3,%xmm1
        movdqa  96(%ebx),%xmm6
        pxor    %xmm1,%xmm7
        movdqa  %xmm1,-112(%ebx)
        pshufb  16(%eax),%xmm7
        paddd   %xmm7,%xmm5
        movdqa  %xmm7,80(%ebx)
        pxor    %xmm5,%xmm3
        paddd   %xmm2,%xmm0
        movdqa  %xmm3,%xmm1
        pslld   $7,%xmm3
        psrld   $25,%xmm1
        pxor    %xmm0,%xmm6
        por     %xmm1,%xmm3
        movdqa  %xmm5,16(%ebx)
        pshufb  (%eax),%xmm6
        movdqa  %xmm3,-48(%ebx)
        paddd   %xmm6,%xmm4
        movdqa  48(%ebx),%xmm5
        pxor    %xmm4,%xmm2
        movdqa  -16(%ebx),%xmm3
        movdqa  %xmm2,%xmm1
        pslld   $12,%xmm2
        psrld   $20,%xmm1
        por     %xmm1,%xmm2
        movdqa  -80(%ebx),%xmm1
        paddd   %xmm2,%xmm0
        movdqa  112(%ebx),%xmm7
        pxor    %xmm0,%xmm6
        movdqa  %xmm0,-96(%ebx)
        pshufb  16(%eax),%xmm6
        paddd   %xmm6,%xmm4
        movdqa  %xmm6,96(%ebx)
        pxor    %xmm4,%xmm2
        paddd   %xmm3,%xmm1
        movdqa  %xmm2,%xmm0
        pslld   $7,%xmm2
        psrld   $25,%xmm0
        pxor    %xmm1,%xmm7
        por     %xmm0,%xmm2
        pshufb  (%eax),%xmm7
        movdqa  %xmm2,-32(%ebx)
        paddd   %xmm7,%xmm5
        pxor    %xmm5,%xmm3
        movdqa  -48(%ebx),%xmm2
        movdqa  %xmm3,%xmm0
        pslld   $12,%xmm3
        psrld   $20,%xmm0
        por     %xmm0,%xmm3
        movdqa  -128(%ebx),%xmm0
        paddd   %xmm3,%xmm1
        pxor    %xmm1,%xmm7
        movdqa  %xmm1,-80(%ebx)
        pshufb  16(%eax),%xmm7
        paddd   %xmm7,%xmm5
        movdqa  %xmm7,%xmm6
        pxor    %xmm5,%xmm3
        paddd   %xmm2,%xmm0
        movdqa  %xmm3,%xmm1
        pslld   $7,%xmm3
        psrld   $25,%xmm1
        pxor    %xmm0,%xmm6
        por     %xmm1,%xmm3
        pshufb  (%eax),%xmm6
        movdqa  %xmm3,-16(%ebx)
        paddd   %xmm6,%xmm4
        pxor    %xmm4,%xmm2
        movdqa  -32(%ebx),%xmm3
        movdqa  %xmm2,%xmm1
        pslld   $12,%xmm2
        psrld   $20,%xmm1
        por     %xmm1,%xmm2
        movdqa  -112(%ebx),%xmm1
        paddd   %xmm2,%xmm0
        movdqa  64(%ebx),%xmm7
        pxor    %xmm0,%xmm6
        movdqa  %xmm0,-128(%ebx)
        pshufb  16(%eax),%xmm6
        paddd   %xmm6,%xmm4
        movdqa  %xmm6,112(%ebx)
        pxor    %xmm4,%xmm2
        paddd   %xmm3,%xmm1
        movdqa  %xmm2,%xmm0
        pslld   $7,%xmm2
        psrld   $25,%xmm0
        pxor    %xmm1,%xmm7
        por     %xmm0,%xmm2
        movdqa  %xmm4,32(%ebx)
        pshufb  (%eax),%xmm7
        movdqa  %xmm2,-48(%ebx)
        paddd   %xmm7,%xmm5
        movdqa  (%ebx),%xmm4
        pxor    %xmm5,%xmm3
        movdqa  -16(%ebx),%xmm2
        movdqa  %xmm3,%xmm0
        pslld   $12,%xmm3
        psrld   $20,%xmm0
        por     %xmm0,%xmm3
        movdqa  -96(%ebx),%xmm0
        paddd   %xmm3,%xmm1
        movdqa  80(%ebx),%xmm6
        pxor    %xmm1,%xmm7
        movdqa  %xmm1,-112(%ebx)
        pshufb  16(%eax),%xmm7
        paddd   %xmm7,%xmm5
        movdqa  %xmm7,64(%ebx)
        pxor    %xmm5,%xmm3
        paddd   %xmm2,%xmm0
        movdqa  %xmm3,%xmm1
        pslld   $7,%xmm3
        psrld   $25,%xmm1
        pxor    %xmm0,%xmm6
        por     %xmm1,%xmm3
        movdqa  %xmm5,48(%ebx)
        pshufb  (%eax),%xmm6
        movdqa  %xmm3,-32(%ebx)
        paddd   %xmm6,%xmm4
        movdqa  16(%ebx),%xmm5
        pxor    %xmm4,%xmm2
        movdqa  -64(%ebx),%xmm3
        movdqa  %xmm2,%xmm1
        pslld   $12,%xmm2
        psrld   $20,%xmm1
        por     %xmm1,%xmm2
        movdqa  -80(%ebx),%xmm1
        paddd   %xmm2,%xmm0
        movdqa  96(%ebx),%xmm7
        pxor    %xmm0,%xmm6
        movdqa  %xmm0,-96(%ebx)
        pshufb  16(%eax),%xmm6
        paddd   %xmm6,%xmm4
        movdqa  %xmm6,80(%ebx)
        pxor    %xmm4,%xmm2
        paddd   %xmm3,%xmm1
        movdqa  %xmm2,%xmm0
        pslld   $7,%xmm2
        psrld   $25,%xmm0
        pxor    %xmm1,%xmm7
        por     %xmm0,%xmm2
        pshufb  (%eax),%xmm7
        movdqa  %xmm2,-16(%ebx)
        paddd   %xmm7,%xmm5
        pxor    %xmm5,%xmm3
        movdqa  %xmm3,%xmm0
        pslld   $12,%xmm3
        psrld   $20,%xmm0
        por     %xmm0,%xmm3
        movdqa  -128(%ebx),%xmm0
        paddd   %xmm3,%xmm1
        movdqa  64(%ebx),%xmm6
        pxor    %xmm1,%xmm7
        movdqa  %xmm1,-80(%ebx)
        pshufb  16(%eax),%xmm7
        paddd   %xmm7,%xmm5
        movdqa  %xmm7,96(%ebx)
        pxor    %xmm5,%xmm3
        movdqa  %xmm3,%xmm1
        pslld   $7,%xmm3
        psrld   $25,%xmm1
        por     %xmm1,%xmm3
        decl    %edx
        jnz     .L010loop
        movdqa  %xmm3,-64(%ebx)
        movdqa  %xmm4,(%ebx)
        movdqa  %xmm5,16(%ebx)
        movdqa  %xmm6,64(%ebx)
        movdqa  %xmm7,96(%ebx)
        movdqa  -112(%ebx),%xmm1
        movdqa  -96(%ebx),%xmm2
        movdqa  -80(%ebx),%xmm3
        paddd   -128(%ebp),%xmm0
        paddd   -112(%ebp),%xmm1
        paddd   -96(%ebp),%xmm2
        paddd   -80(%ebp),%xmm3
        movdqa  %xmm0,%xmm6
        punpckldq       %xmm1,%xmm0
        movdqa  %xmm2,%xmm7
        punpckldq       %xmm3,%xmm2
        punpckhdq       %xmm1,%xmm6
        punpckhdq       %xmm3,%xmm7
        movdqa  %xmm0,%xmm1
        punpcklqdq      %xmm2,%xmm0
        movdqa  %xmm6,%xmm3
        punpcklqdq      %xmm7,%xmm6
        punpckhqdq      %xmm2,%xmm1
        punpckhqdq      %xmm7,%xmm3
        movdqa  %xmm0,-128(%ebx)
        movdqa  -64(%ebx),%xmm0
        movdqa  %xmm1,-112(%ebx)
        movdqa  %xmm6,-96(%ebx)
        movdqa  %xmm3,-80(%ebx)
        movdqa  -48(%ebx),%xmm1
        movdqa  -32(%ebx),%xmm2
        movdqa  -16(%ebx),%xmm3
        paddd   -64(%ebp),%xmm0
        paddd   -48(%ebp),%xmm1
        paddd   -32(%ebp),%xmm2
        paddd   -16(%ebp),%xmm3
        movdqa  %xmm0,%xmm6
        punpckldq       %xmm1,%xmm0
        movdqa  %xmm2,%xmm7
        punpckldq       %xmm3,%xmm2
        punpckhdq       %xmm1,%xmm6
        punpckhdq       %xmm3,%xmm7
        movdqa  %xmm0,%xmm1
        punpcklqdq      %xmm2,%xmm0
        movdqa  %xmm6,%xmm3
        punpcklqdq      %xmm7,%xmm6
        punpckhqdq      %xmm2,%xmm1
        punpckhqdq      %xmm7,%xmm3
        movdqa  %xmm0,-64(%ebx)
        movdqa  (%ebx),%xmm0
        movdqa  %xmm1,-48(%ebx)
        movdqa  %xmm6,-32(%ebx)
        movdqa  %xmm3,-16(%ebx)
        movdqa  16(%ebx),%xmm1
        movdqa  32(%ebx),%xmm2
        movdqa  48(%ebx),%xmm3
        paddd   (%ebp),%xmm0
        paddd   16(%ebp),%xmm1
        paddd   32(%ebp),%xmm2
        paddd   48(%ebp),%xmm3
        movdqa  %xmm0,%xmm6
        punpckldq       %xmm1,%xmm0
        movdqa  %xmm2,%xmm7
        punpckldq       %xmm3,%xmm2
        punpckhdq       %xmm1,%xmm6
        punpckhdq       %xmm3,%xmm7
        movdqa  %xmm0,%xmm1
        punpcklqdq      %xmm2,%xmm0
        movdqa  %xmm6,%xmm3
        punpcklqdq      %xmm7,%xmm6
        punpckhqdq      %xmm2,%xmm1
        punpckhqdq      %xmm7,%xmm3
        movdqa  %xmm0,(%ebx)
        movdqa  64(%ebx),%xmm0
        movdqa  %xmm1,16(%ebx)
        movdqa  %xmm6,32(%ebx)
        movdqa  %xmm3,48(%ebx)
        movdqa  80(%ebx),%xmm1
        movdqa  96(%ebx),%xmm2
        movdqa  112(%ebx),%xmm3
        paddd   64(%ebp),%xmm0
        paddd   80(%ebp),%xmm1
        paddd   96(%ebp),%xmm2
        paddd   112(%ebp),%xmm3
        movdqa  %xmm0,%xmm6
        punpckldq       %xmm1,%xmm0
        movdqa  %xmm2,%xmm7
        punpckldq       %xmm3,%xmm2
        punpckhdq       %xmm1,%xmm6
        punpckhdq       %xmm3,%xmm7
        movdqa  %xmm0,%xmm1
        punpcklqdq      %xmm2,%xmm0
        movdqa  %xmm6,%xmm3
        punpcklqdq      %xmm7,%xmm6
        punpckhqdq      %xmm2,%xmm1
        punpckhqdq      %xmm7,%xmm3
        movdqa  %xmm0,64(%ebx)
        movdqa  %xmm1,80(%ebx)
        movdqa  %xmm6,96(%ebx)
        movdqa  %xmm3,112(%ebx)
        movdqu  -128(%esi),%xmm0
        movdqu  -112(%esi),%xmm1
        movdqu  -96(%esi),%xmm2
        movdqu  -80(%esi),%xmm3
        pxor    -128(%ebx),%xmm0
        pxor    -64(%ebx),%xmm1
        pxor    (%ebx),%xmm2
        pxor    64(%ebx),%xmm3
        movdqu  %xmm0,-128(%edi)
        movdqu  %xmm1,-112(%edi)
        movdqu  %xmm2,-96(%edi)
        movdqu  %xmm3,-80(%edi)
        movdqu  -64(%esi),%xmm0
        movdqu  -48(%esi),%xmm1
        movdqu  -32(%esi),%xmm2
        movdqu  -16(%esi),%xmm3
        pxor    -112(%ebx),%xmm0
        pxor    -48(%ebx),%xmm1
        pxor    16(%ebx),%xmm2
        pxor    80(%ebx),%xmm3
        movdqu  %xmm0,-64(%edi)
        movdqu  %xmm1,-48(%edi)
        movdqu  %xmm2,-32(%edi)
        movdqu  %xmm3,-16(%edi)
        movdqu  (%esi),%xmm0
        movdqu  16(%esi),%xmm1
        movdqu  32(%esi),%xmm2
        movdqu  48(%esi),%xmm3
        pxor    -96(%ebx),%xmm0
        pxor    -32(%ebx),%xmm1
        pxor    32(%ebx),%xmm2
        pxor    96(%ebx),%xmm3
        movdqu  %xmm0,(%edi)
        movdqu  %xmm1,16(%edi)
        movdqu  %xmm2,32(%edi)
        movdqu  %xmm3,48(%edi)
        movdqu  64(%esi),%xmm0
        movdqu  80(%esi),%xmm1
        movdqu  96(%esi),%xmm2
        movdqu  112(%esi),%xmm3
        pxor    -80(%ebx),%xmm0
        pxor    -16(%ebx),%xmm1
        pxor    48(%ebx),%xmm2
        pxor    112(%ebx),%xmm3
        movdqu  %xmm0,64(%edi)
        movdqu  %xmm1,80(%edi)
        movdqu  %xmm2,96(%edi)
        movdqu  %xmm3,112(%edi)
        leal    256(%esi),%esi
        leal    256(%edi),%edi
        subl    $256,%ecx
        jnc     .L009outer_loop
        addl    $256,%ecx
        jz      .L011done
        movl    520(%esp),%ebx
        leal    -128(%esi),%esi
        movl    516(%esp),%edx
        leal    -128(%edi),%edi
        movd    64(%ebp),%xmm2
        movdqu  (%ebx),%xmm3
        paddd   96(%eax),%xmm2
        pand    112(%eax),%xmm3
        por     %xmm2,%xmm3
.L0081x:
        movdqa  32(%eax),%xmm0
        movdqu  (%edx),%xmm1
        movdqu  16(%edx),%xmm2
        movdqa  (%eax),%xmm6
        movdqa  16(%eax),%xmm7
        movl    %ebp,48(%esp)
        movdqa  %xmm0,(%esp)
        movdqa  %xmm1,16(%esp)
        movdqa  %xmm2,32(%esp)
        movdqa  %xmm3,48(%esp)
        movl    $10,%edx
        jmp     .L012loop1x
.align  16
.L013outer1x:
        movdqa  80(%eax),%xmm3
        movdqa  (%esp),%xmm0
        movdqa  16(%esp),%xmm1
        movdqa  32(%esp),%xmm2
        paddd   48(%esp),%xmm3
        movl    $10,%edx
        movdqa  %xmm3,48(%esp)
        jmp     .L012loop1x
.align  16
.L012loop1x:
        paddd   %xmm1,%xmm0
        pxor    %xmm0,%xmm3
.byte   102,15,56,0,222
        paddd   %xmm3,%xmm2
        pxor    %xmm2,%xmm1
        movdqa  %xmm1,%xmm4
        psrld   $20,%xmm1
        pslld   $12,%xmm4
        por     %xmm4,%xmm1
        paddd   %xmm1,%xmm0
        pxor    %xmm0,%xmm3
.byte   102,15,56,0,223
        paddd   %xmm3,%xmm2
        pxor    %xmm2,%xmm1
        movdqa  %xmm1,%xmm4
        psrld   $25,%xmm1
        pslld   $7,%xmm4
        por     %xmm4,%xmm1
        pshufd  $78,%xmm2,%xmm2
        pshufd  $57,%xmm1,%xmm1
        pshufd  $147,%xmm3,%xmm3
        nop
        paddd   %xmm1,%xmm0
        pxor    %xmm0,%xmm3
.byte   102,15,56,0,222
        paddd   %xmm3,%xmm2
        pxor    %xmm2,%xmm1
        movdqa  %xmm1,%xmm4
        psrld   $20,%xmm1
        pslld   $12,%xmm4
        por     %xmm4,%xmm1
        paddd   %xmm1,%xmm0
        pxor    %xmm0,%xmm3
.byte   102,15,56,0,223
        paddd   %xmm3,%xmm2
        pxor    %xmm2,%xmm1
        movdqa  %xmm1,%xmm4
        psrld   $25,%xmm1
        pslld   $7,%xmm4
        por     %xmm4,%xmm1
        pshufd  $78,%xmm2,%xmm2
        pshufd  $147,%xmm1,%xmm1
        pshufd  $57,%xmm3,%xmm3
        decl    %edx
        jnz     .L012loop1x
        paddd   (%esp),%xmm0
        paddd   16(%esp),%xmm1
        paddd   32(%esp),%xmm2
        paddd   48(%esp),%xmm3
        cmpl    $64,%ecx
        jb      .L014tail
        movdqu  (%esi),%xmm4
        movdqu  16(%esi),%xmm5
        pxor    %xmm4,%xmm0
        movdqu  32(%esi),%xmm4
        pxor    %xmm5,%xmm1
        movdqu  48(%esi),%xmm5
        pxor    %xmm4,%xmm2
        pxor    %xmm5,%xmm3
        leal    64(%esi),%esi
        movdqu  %xmm0,(%edi)
        movdqu  %xmm1,16(%edi)
        movdqu  %xmm2,32(%edi)
        movdqu  %xmm3,48(%edi)
        leal    64(%edi),%edi
        subl    $64,%ecx
        jnz     .L013outer1x
        jmp     .L011done
.L014tail:
        movdqa  %xmm0,(%esp)
        movdqa  %xmm1,16(%esp)
        movdqa  %xmm2,32(%esp)
        movdqa  %xmm3,48(%esp)
        xorl    %eax,%eax
        xorl    %edx,%edx
        xorl    %ebp,%ebp
.L015tail_loop:
        movb    (%esp,%ebp,1),%al
        movb    (%esi,%ebp,1),%dl
        leal    1(%ebp),%ebp
        xorb    %dl,%al
        movb    %al,-1(%edi,%ebp,1)
        decl    %ecx
        jnz     .L015tail_loop
.L011done:
        movl    512(%esp),%esp
        popl    %edi
        popl    %esi
        popl    %ebx
        popl    %ebp
        ret
.size   ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
.align  64
.Lssse3_data:
.byte   2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
.byte   3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
.long   1634760805,857760878,2036477234,1797285236
.long   0,1,2,3
.long   4,4,4,4
.long   1,0,0,0
.long   4,0,0,0
.long   0,-1,-1,-1
.align  64
.byte   67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
.byte   44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte   60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte   114,103,62,0
#endif