nexmon – Rev 1

Subversion Repositories:
Rev:
#if defined(__i386__)
.file   "src/crypto/bn/asm/x86-mont.S"
.text
.globl  bn_mul_mont
.hidden bn_mul_mont
.type   bn_mul_mont,@function
.align  16
bn_mul_mont:
.L_bn_mul_mont_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
        pushl   %edi
        xorl    %eax,%eax
        movl    40(%esp),%edi
        cmpl    $4,%edi
        jl      .L000just_leave
        leal    20(%esp),%esi
        leal    24(%esp),%edx
        movl    %esp,%ebp
        addl    $2,%edi
        negl    %edi
        leal    -32(%esp,%edi,4),%esp
        negl    %edi
        movl    %esp,%eax
        subl    %edx,%eax
        andl    $2047,%eax
        subl    %eax,%esp
        xorl    %esp,%edx
        andl    $2048,%edx
        xorl    $2048,%edx
        subl    %edx,%esp
        andl    $-64,%esp
        movl    (%esi),%eax
        movl    4(%esi),%ebx
        movl    8(%esi),%ecx
        movl    12(%esi),%edx
        movl    16(%esi),%esi
        movl    (%esi),%esi
        movl    %eax,4(%esp)
        movl    %ebx,8(%esp)
        movl    %ecx,12(%esp)
        movl    %edx,16(%esp)
        movl    %esi,20(%esp)
        leal    -3(%edi),%ebx
        movl    %ebp,24(%esp)
        call    .L001PIC_me_up
.L001PIC_me_up:
        popl    %eax
        leal    OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax
        btl     $26,(%eax)
        jnc     .L002non_sse2
        movl    $-1,%eax
        movd    %eax,%mm7
        movl    8(%esp),%esi
        movl    12(%esp),%edi
        movl    16(%esp),%ebp
        xorl    %edx,%edx
        xorl    %ecx,%ecx
        movd    (%edi),%mm4
        movd    (%esi),%mm5
        movd    (%ebp),%mm3
        pmuludq %mm4,%mm5
        movq    %mm5,%mm2
        movq    %mm5,%mm0
        pand    %mm7,%mm0
        pmuludq 20(%esp),%mm5
        pmuludq %mm5,%mm3
        paddq   %mm0,%mm3
        movd    4(%ebp),%mm1
        movd    4(%esi),%mm0
        psrlq   $32,%mm2
        psrlq   $32,%mm3
        incl    %ecx
.align  16
.L0031st:
        pmuludq %mm4,%mm0
        pmuludq %mm5,%mm1
        paddq   %mm0,%mm2
        paddq   %mm1,%mm3
        movq    %mm2,%mm0
        pand    %mm7,%mm0
        movd    4(%ebp,%ecx,4),%mm1
        paddq   %mm0,%mm3
        movd    4(%esi,%ecx,4),%mm0
        psrlq   $32,%mm2
        movd    %mm3,28(%esp,%ecx,4)
        psrlq   $32,%mm3
        leal    1(%ecx),%ecx
        cmpl    %ebx,%ecx
        jl      .L0031st
        pmuludq %mm4,%mm0
        pmuludq %mm5,%mm1
        paddq   %mm0,%mm2
        paddq   %mm1,%mm3
        movq    %mm2,%mm0
        pand    %mm7,%mm0
        paddq   %mm0,%mm3
        movd    %mm3,28(%esp,%ecx,4)
        psrlq   $32,%mm2
        psrlq   $32,%mm3
        paddq   %mm2,%mm3
        movq    %mm3,32(%esp,%ebx,4)
        incl    %edx
.L004outer:
        xorl    %ecx,%ecx
        movd    (%edi,%edx,4),%mm4
        movd    (%esi),%mm5
        movd    32(%esp),%mm6
        movd    (%ebp),%mm3
        pmuludq %mm4,%mm5
        paddq   %mm6,%mm5
        movq    %mm5,%mm0
        movq    %mm5,%mm2
        pand    %mm7,%mm0
        pmuludq 20(%esp),%mm5
        pmuludq %mm5,%mm3
        paddq   %mm0,%mm3
        movd    36(%esp),%mm6
        movd    4(%ebp),%mm1
        movd    4(%esi),%mm0
        psrlq   $32,%mm2
        psrlq   $32,%mm3
        paddq   %mm6,%mm2
        incl    %ecx
        decl    %ebx
.L005inner:
        pmuludq %mm4,%mm0
        pmuludq %mm5,%mm1
        paddq   %mm0,%mm2
        paddq   %mm1,%mm3
        movq    %mm2,%mm0
        movd    36(%esp,%ecx,4),%mm6
        pand    %mm7,%mm0
        movd    4(%ebp,%ecx,4),%mm1
        paddq   %mm0,%mm3
        movd    4(%esi,%ecx,4),%mm0
        psrlq   $32,%mm2
        movd    %mm3,28(%esp,%ecx,4)
        psrlq   $32,%mm3
        paddq   %mm6,%mm2
        decl    %ebx
        leal    1(%ecx),%ecx
        jnz     .L005inner
        movl    %ecx,%ebx
        pmuludq %mm4,%mm0
        pmuludq %mm5,%mm1
        paddq   %mm0,%mm2
        paddq   %mm1,%mm3
        movq    %mm2,%mm0
        pand    %mm7,%mm0
        paddq   %mm0,%mm3
        movd    %mm3,28(%esp,%ecx,4)
        psrlq   $32,%mm2
        psrlq   $32,%mm3
        movd    36(%esp,%ebx,4),%mm6
        paddq   %mm2,%mm3
        paddq   %mm6,%mm3
        movq    %mm3,32(%esp,%ebx,4)
        leal    1(%edx),%edx
        cmpl    %ebx,%edx
        jle     .L004outer
        emms
        jmp     .L006common_tail
.align  16
.L002non_sse2:
        movl    8(%esp),%esi
        leal    1(%ebx),%ebp
        movl    12(%esp),%edi
        xorl    %ecx,%ecx
        movl    %esi,%edx
        andl    $1,%ebp
        subl    %edi,%edx
        leal    4(%edi,%ebx,4),%eax
        orl     %edx,%ebp
        movl    (%edi),%edi
        jz      .L007bn_sqr_mont
        movl    %eax,28(%esp)
        movl    (%esi),%eax
        xorl    %edx,%edx
.align  16
.L008mull:
        movl    %edx,%ebp
        mull    %edi
        addl    %eax,%ebp
        leal    1(%ecx),%ecx
        adcl    $0,%edx
        movl    (%esi,%ecx,4),%eax
        cmpl    %ebx,%ecx
        movl    %ebp,28(%esp,%ecx,4)
        jl      .L008mull
        movl    %edx,%ebp
        mull    %edi
        movl    20(%esp),%edi
        addl    %ebp,%eax
        movl    16(%esp),%esi
        adcl    $0,%edx
        imull   32(%esp),%edi
        movl    %eax,32(%esp,%ebx,4)
        xorl    %ecx,%ecx
        movl    %edx,36(%esp,%ebx,4)
        movl    %ecx,40(%esp,%ebx,4)
        movl    (%esi),%eax
        mull    %edi
        addl    32(%esp),%eax
        movl    4(%esi),%eax
        adcl    $0,%edx
        incl    %ecx
        jmp     .L0092ndmadd
.align  16
.L0101stmadd:
        movl    %edx,%ebp
        mull    %edi
        addl    32(%esp,%ecx,4),%ebp
        leal    1(%ecx),%ecx
        adcl    $0,%edx
        addl    %eax,%ebp
        movl    (%esi,%ecx,4),%eax
        adcl    $0,%edx
        cmpl    %ebx,%ecx
        movl    %ebp,28(%esp,%ecx,4)
        jl      .L0101stmadd
        movl    %edx,%ebp
        mull    %edi
        addl    32(%esp,%ebx,4),%eax
        movl    20(%esp),%edi
        adcl    $0,%edx
        movl    16(%esp),%esi
        addl    %eax,%ebp
        adcl    $0,%edx
        imull   32(%esp),%edi
        xorl    %ecx,%ecx
        addl    36(%esp,%ebx,4),%edx
        movl    %ebp,32(%esp,%ebx,4)
        adcl    $0,%ecx
        movl    (%esi),%eax
        movl    %edx,36(%esp,%ebx,4)
        movl    %ecx,40(%esp,%ebx,4)
        mull    %edi
        addl    32(%esp),%eax
        movl    4(%esi),%eax
        adcl    $0,%edx
        movl    $1,%ecx
.align  16
.L0092ndmadd:
        movl    %edx,%ebp
        mull    %edi
        addl    32(%esp,%ecx,4),%ebp
        leal    1(%ecx),%ecx
        adcl    $0,%edx
        addl    %eax,%ebp
        movl    (%esi,%ecx,4),%eax
        adcl    $0,%edx
        cmpl    %ebx,%ecx
        movl    %ebp,24(%esp,%ecx,4)
        jl      .L0092ndmadd
        movl    %edx,%ebp
        mull    %edi
        addl    32(%esp,%ebx,4),%ebp
        adcl    $0,%edx
        addl    %eax,%ebp
        adcl    $0,%edx
        movl    %ebp,28(%esp,%ebx,4)
        xorl    %eax,%eax
        movl    12(%esp),%ecx
        addl    36(%esp,%ebx,4),%edx
        adcl    40(%esp,%ebx,4),%eax
        leal    4(%ecx),%ecx
        movl    %edx,32(%esp,%ebx,4)
        cmpl    28(%esp),%ecx
        movl    %eax,36(%esp,%ebx,4)
        je      .L006common_tail
        movl    (%ecx),%edi
        movl    8(%esp),%esi
        movl    %ecx,12(%esp)
        xorl    %ecx,%ecx
        xorl    %edx,%edx
        movl    (%esi),%eax
        jmp     .L0101stmadd
.align  16
.L007bn_sqr_mont:
        movl    %ebx,(%esp)
        movl    %ecx,12(%esp)
        movl    %edi,%eax
        mull    %edi
        movl    %eax,32(%esp)
        movl    %edx,%ebx
        shrl    $1,%edx
        andl    $1,%ebx
        incl    %ecx
.align  16
.L011sqr:
        movl    (%esi,%ecx,4),%eax
        movl    %edx,%ebp
        mull    %edi
        addl    %ebp,%eax
        leal    1(%ecx),%ecx
        adcl    $0,%edx
        leal    (%ebx,%eax,2),%ebp
        shrl    $31,%eax
        cmpl    (%esp),%ecx
        movl    %eax,%ebx
        movl    %ebp,28(%esp,%ecx,4)
        jl      .L011sqr
        movl    (%esi,%ecx,4),%eax
        movl    %edx,%ebp
        mull    %edi
        addl    %ebp,%eax
        movl    20(%esp),%edi
        adcl    $0,%edx
        movl    16(%esp),%esi
        leal    (%ebx,%eax,2),%ebp
        imull   32(%esp),%edi
        shrl    $31,%eax
        movl    %ebp,32(%esp,%ecx,4)
        leal    (%eax,%edx,2),%ebp
        movl    (%esi),%eax
        shrl    $31,%edx
        movl    %ebp,36(%esp,%ecx,4)
        movl    %edx,40(%esp,%ecx,4)
        mull    %edi
        addl    32(%esp),%eax
        movl    %ecx,%ebx
        adcl    $0,%edx
        movl    4(%esi),%eax
        movl    $1,%ecx
.align  16
.L0123rdmadd:
        movl    %edx,%ebp
        mull    %edi
        addl    32(%esp,%ecx,4),%ebp
        adcl    $0,%edx
        addl    %eax,%ebp
        movl    4(%esi,%ecx,4),%eax
        adcl    $0,%edx
        movl    %ebp,28(%esp,%ecx,4)
        movl    %edx,%ebp
        mull    %edi
        addl    36(%esp,%ecx,4),%ebp
        leal    2(%ecx),%ecx
        adcl    $0,%edx
        addl    %eax,%ebp
        movl    (%esi,%ecx,4),%eax
        adcl    $0,%edx
        cmpl    %ebx,%ecx
        movl    %ebp,24(%esp,%ecx,4)
        jl      .L0123rdmadd
        movl    %edx,%ebp
        mull    %edi
        addl    32(%esp,%ebx,4),%ebp
        adcl    $0,%edx
        addl    %eax,%ebp
        adcl    $0,%edx
        movl    %ebp,28(%esp,%ebx,4)
        movl    12(%esp),%ecx
        xorl    %eax,%eax
        movl    8(%esp),%esi
        addl    36(%esp,%ebx,4),%edx
        adcl    40(%esp,%ebx,4),%eax
        movl    %edx,32(%esp,%ebx,4)
        cmpl    %ebx,%ecx
        movl    %eax,36(%esp,%ebx,4)
        je      .L006common_tail
        movl    4(%esi,%ecx,4),%edi
        leal    1(%ecx),%ecx
        movl    %edi,%eax
        movl    %ecx,12(%esp)
        mull    %edi
        addl    32(%esp,%ecx,4),%eax
        adcl    $0,%edx
        movl    %eax,32(%esp,%ecx,4)
        xorl    %ebp,%ebp
        cmpl    %ebx,%ecx
        leal    1(%ecx),%ecx
        je      .L013sqrlast
        movl    %edx,%ebx
        shrl    $1,%edx
        andl    $1,%ebx
.align  16
.L014sqradd:
        movl    (%esi,%ecx,4),%eax
        movl    %edx,%ebp
        mull    %edi
        addl    %ebp,%eax
        leal    (%eax,%eax,1),%ebp
        adcl    $0,%edx
        shrl    $31,%eax
        addl    32(%esp,%ecx,4),%ebp
        leal    1(%ecx),%ecx
        adcl    $0,%eax
        addl    %ebx,%ebp
        adcl    $0,%eax
        cmpl    (%esp),%ecx
        movl    %ebp,28(%esp,%ecx,4)
        movl    %eax,%ebx
        jle     .L014sqradd
        movl    %edx,%ebp
        addl    %edx,%edx
        shrl    $31,%ebp
        addl    %ebx,%edx
        adcl    $0,%ebp
.L013sqrlast:
        movl    20(%esp),%edi
        movl    16(%esp),%esi
        imull   32(%esp),%edi
        addl    32(%esp,%ecx,4),%edx
        movl    (%esi),%eax
        adcl    $0,%ebp
        movl    %edx,32(%esp,%ecx,4)
        movl    %ebp,36(%esp,%ecx,4)
        mull    %edi
        addl    32(%esp),%eax
        leal    -1(%ecx),%ebx
        adcl    $0,%edx
        movl    $1,%ecx
        movl    4(%esi),%eax
        jmp     .L0123rdmadd
.align  16
.L006common_tail:
        movl    16(%esp),%ebp
        movl    4(%esp),%edi
        leal    32(%esp),%esi
        movl    (%esi),%eax
        movl    %ebx,%ecx
        xorl    %edx,%edx
.align  16
.L015sub:
        sbbl    (%ebp,%edx,4),%eax
        movl    %eax,(%edi,%edx,4)
        decl    %ecx
        movl    4(%esi,%edx,4),%eax
        leal    1(%edx),%edx
        jge     .L015sub
        sbbl    $0,%eax
.align  16
.L016copy:
        movl    (%esi,%ebx,4),%edx
        movl    (%edi,%ebx,4),%ebp
        xorl    %ebp,%edx
        andl    %eax,%edx
        xorl    %ebp,%edx
        movl    %ecx,(%esi,%ebx,4)
        movl    %edx,(%edi,%ebx,4)
        decl    %ebx
        jge     .L016copy
        movl    24(%esp),%esp
        movl    $1,%eax
.L000just_leave:
        popl    %edi
        popl    %esi
        popl    %ebx
        popl    %ebp
        ret
.size   bn_mul_mont,.-.L_bn_mul_mont_begin
.byte   77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
.byte   112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
.byte   54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte   32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte   111,114,103,62,0
#endif