nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | default rel |
2 | %define XMMWORD |
||
3 | %define YMMWORD |
||
4 | %define ZMMWORD |
||
5 | section .text code align=64 |
||
6 | |||
7 | |||
8 | EXTERN OPENSSL_ia32cap_P |
||
9 | |||
10 | global bn_mul_mont |
||
11 | |||
12 | ALIGN 16 |
||
13 | bn_mul_mont: |
||
14 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
||
15 | mov QWORD[16+rsp],rsi |
||
16 | mov rax,rsp |
||
17 | $L$SEH_begin_bn_mul_mont: |
||
18 | mov rdi,rcx |
||
19 | mov rsi,rdx |
||
20 | mov rdx,r8 |
||
21 | mov rcx,r9 |
||
22 | mov r8,QWORD[40+rsp] |
||
23 | mov r9,QWORD[48+rsp] |
||
24 | |||
25 | |||
26 | test r9d,3 |
||
27 | jnz NEAR $L$mul_enter |
||
28 | cmp r9d,8 |
||
29 | jb NEAR $L$mul_enter |
||
30 | cmp rdx,rsi |
||
31 | jne NEAR $L$mul4x_enter |
||
32 | test r9d,7 |
||
33 | jz NEAR $L$sqr8x_enter |
||
34 | jmp NEAR $L$mul4x_enter |
||
35 | |||
36 | ALIGN 16 |
||
37 | $L$mul_enter: |
||
38 | push rbx |
||
39 | push rbp |
||
40 | push r12 |
||
41 | push r13 |
||
42 | push r14 |
||
43 | push r15 |
||
44 | |||
45 | mov r9d,r9d |
||
46 | lea r10,[2+r9] |
||
47 | mov r11,rsp |
||
48 | neg r10 |
||
49 | lea rsp,[r10*8+rsp] |
||
50 | and rsp,-1024 |
||
51 | |||
52 | mov QWORD[8+r9*8+rsp],r11 |
||
53 | $L$mul_body: |
||
54 | mov r12,rdx |
||
55 | mov r8,QWORD[r8] |
||
56 | mov rbx,QWORD[r12] |
||
57 | mov rax,QWORD[rsi] |
||
58 | |||
59 | xor r14,r14 |
||
60 | xor r15,r15 |
||
61 | |||
62 | mov rbp,r8 |
||
63 | mul rbx |
||
64 | mov r10,rax |
||
65 | mov rax,QWORD[rcx] |
||
66 | |||
67 | imul rbp,r10 |
||
68 | mov r11,rdx |
||
69 | |||
70 | mul rbp |
||
71 | add r10,rax |
||
72 | mov rax,QWORD[8+rsi] |
||
73 | adc rdx,0 |
||
74 | mov r13,rdx |
||
75 | |||
76 | lea r15,[1+r15] |
||
77 | jmp NEAR $L$1st_enter |
||
78 | |||
79 | ALIGN 16 |
||
80 | $L$1st: |
||
81 | add r13,rax |
||
82 | mov rax,QWORD[r15*8+rsi] |
||
83 | adc rdx,0 |
||
84 | add r13,r11 |
||
85 | mov r11,r10 |
||
86 | adc rdx,0 |
||
87 | mov QWORD[((-16))+r15*8+rsp],r13 |
||
88 | mov r13,rdx |
||
89 | |||
90 | $L$1st_enter: |
||
91 | mul rbx |
||
92 | add r11,rax |
||
93 | mov rax,QWORD[r15*8+rcx] |
||
94 | adc rdx,0 |
||
95 | lea r15,[1+r15] |
||
96 | mov r10,rdx |
||
97 | |||
98 | mul rbp |
||
99 | cmp r15,r9 |
||
100 | jne NEAR $L$1st |
||
101 | |||
102 | add r13,rax |
||
103 | mov rax,QWORD[rsi] |
||
104 | adc rdx,0 |
||
105 | add r13,r11 |
||
106 | adc rdx,0 |
||
107 | mov QWORD[((-16))+r15*8+rsp],r13 |
||
108 | mov r13,rdx |
||
109 | mov r11,r10 |
||
110 | |||
111 | xor rdx,rdx |
||
112 | add r13,r11 |
||
113 | adc rdx,0 |
||
114 | mov QWORD[((-8))+r9*8+rsp],r13 |
||
115 | mov QWORD[r9*8+rsp],rdx |
||
116 | |||
117 | lea r14,[1+r14] |
||
118 | jmp NEAR $L$outer |
||
119 | ALIGN 16 |
||
120 | $L$outer: |
||
121 | mov rbx,QWORD[r14*8+r12] |
||
122 | xor r15,r15 |
||
123 | mov rbp,r8 |
||
124 | mov r10,QWORD[rsp] |
||
125 | mul rbx |
||
126 | add r10,rax |
||
127 | mov rax,QWORD[rcx] |
||
128 | adc rdx,0 |
||
129 | |||
130 | imul rbp,r10 |
||
131 | mov r11,rdx |
||
132 | |||
133 | mul rbp |
||
134 | add r10,rax |
||
135 | mov rax,QWORD[8+rsi] |
||
136 | adc rdx,0 |
||
137 | mov r10,QWORD[8+rsp] |
||
138 | mov r13,rdx |
||
139 | |||
140 | lea r15,[1+r15] |
||
141 | jmp NEAR $L$inner_enter |
||
142 | |||
143 | ALIGN 16 |
||
144 | $L$inner: |
||
145 | add r13,rax |
||
146 | mov rax,QWORD[r15*8+rsi] |
||
147 | adc rdx,0 |
||
148 | add r13,r10 |
||
149 | mov r10,QWORD[r15*8+rsp] |
||
150 | adc rdx,0 |
||
151 | mov QWORD[((-16))+r15*8+rsp],r13 |
||
152 | mov r13,rdx |
||
153 | |||
154 | $L$inner_enter: |
||
155 | mul rbx |
||
156 | add r11,rax |
||
157 | mov rax,QWORD[r15*8+rcx] |
||
158 | adc rdx,0 |
||
159 | add r10,r11 |
||
160 | mov r11,rdx |
||
161 | adc r11,0 |
||
162 | lea r15,[1+r15] |
||
163 | |||
164 | mul rbp |
||
165 | cmp r15,r9 |
||
166 | jne NEAR $L$inner |
||
167 | |||
168 | add r13,rax |
||
169 | mov rax,QWORD[rsi] |
||
170 | adc rdx,0 |
||
171 | add r13,r10 |
||
172 | mov r10,QWORD[r15*8+rsp] |
||
173 | adc rdx,0 |
||
174 | mov QWORD[((-16))+r15*8+rsp],r13 |
||
175 | mov r13,rdx |
||
176 | |||
177 | xor rdx,rdx |
||
178 | add r13,r11 |
||
179 | adc rdx,0 |
||
180 | add r13,r10 |
||
181 | adc rdx,0 |
||
182 | mov QWORD[((-8))+r9*8+rsp],r13 |
||
183 | mov QWORD[r9*8+rsp],rdx |
||
184 | |||
185 | lea r14,[1+r14] |
||
186 | cmp r14,r9 |
||
187 | jb NEAR $L$outer |
||
188 | |||
189 | xor r14,r14 |
||
190 | mov rax,QWORD[rsp] |
||
191 | lea rsi,[rsp] |
||
192 | mov r15,r9 |
||
193 | jmp NEAR $L$sub |
||
194 | ALIGN 16 |
||
195 | $L$sub: sbb rax,QWORD[r14*8+rcx] |
||
196 | mov QWORD[r14*8+rdi],rax |
||
197 | mov rax,QWORD[8+r14*8+rsi] |
||
198 | lea r14,[1+r14] |
||
199 | dec r15 |
||
200 | jnz NEAR $L$sub |
||
201 | |||
202 | sbb rax,0 |
||
203 | xor r14,r14 |
||
204 | mov r15,r9 |
||
205 | ALIGN 16 |
||
206 | $L$copy: |
||
207 | mov rsi,QWORD[r14*8+rsp] |
||
208 | mov rcx,QWORD[r14*8+rdi] |
||
209 | xor rsi,rcx |
||
210 | and rsi,rax |
||
211 | xor rsi,rcx |
||
212 | mov QWORD[r14*8+rsp],r14 |
||
213 | mov QWORD[r14*8+rdi],rsi |
||
214 | lea r14,[1+r14] |
||
215 | sub r15,1 |
||
216 | jnz NEAR $L$copy |
||
217 | |||
218 | mov rsi,QWORD[8+r9*8+rsp] |
||
219 | mov rax,1 |
||
220 | mov r15,QWORD[rsi] |
||
221 | mov r14,QWORD[8+rsi] |
||
222 | mov r13,QWORD[16+rsi] |
||
223 | mov r12,QWORD[24+rsi] |
||
224 | mov rbp,QWORD[32+rsi] |
||
225 | mov rbx,QWORD[40+rsi] |
||
226 | lea rsp,[48+rsi] |
||
227 | $L$mul_epilogue: |
||
228 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
||
229 | mov rsi,QWORD[16+rsp] |
||
230 | DB 0F3h,0C3h ;repret |
||
231 | $L$SEH_end_bn_mul_mont: |
||
232 | |||
233 | ALIGN 16 |
||
234 | bn_mul4x_mont: |
||
235 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
||
236 | mov QWORD[16+rsp],rsi |
||
237 | mov rax,rsp |
||
238 | $L$SEH_begin_bn_mul4x_mont: |
||
239 | mov rdi,rcx |
||
240 | mov rsi,rdx |
||
241 | mov rdx,r8 |
||
242 | mov rcx,r9 |
||
243 | mov r8,QWORD[40+rsp] |
||
244 | mov r9,QWORD[48+rsp] |
||
245 | |||
246 | |||
247 | $L$mul4x_enter: |
||
248 | push rbx |
||
249 | push rbp |
||
250 | push r12 |
||
251 | push r13 |
||
252 | push r14 |
||
253 | push r15 |
||
254 | |||
255 | mov r9d,r9d |
||
256 | lea r10,[4+r9] |
||
257 | mov r11,rsp |
||
258 | neg r10 |
||
259 | lea rsp,[r10*8+rsp] |
||
260 | and rsp,-1024 |
||
261 | |||
262 | mov QWORD[8+r9*8+rsp],r11 |
||
263 | $L$mul4x_body: |
||
264 | mov QWORD[16+r9*8+rsp],rdi |
||
265 | mov r12,rdx |
||
266 | mov r8,QWORD[r8] |
||
267 | mov rbx,QWORD[r12] |
||
268 | mov rax,QWORD[rsi] |
||
269 | |||
270 | xor r14,r14 |
||
271 | xor r15,r15 |
||
272 | |||
273 | mov rbp,r8 |
||
274 | mul rbx |
||
275 | mov r10,rax |
||
276 | mov rax,QWORD[rcx] |
||
277 | |||
278 | imul rbp,r10 |
||
279 | mov r11,rdx |
||
280 | |||
281 | mul rbp |
||
282 | add r10,rax |
||
283 | mov rax,QWORD[8+rsi] |
||
284 | adc rdx,0 |
||
285 | mov rdi,rdx |
||
286 | |||
287 | mul rbx |
||
288 | add r11,rax |
||
289 | mov rax,QWORD[8+rcx] |
||
290 | adc rdx,0 |
||
291 | mov r10,rdx |
||
292 | |||
293 | mul rbp |
||
294 | add rdi,rax |
||
295 | mov rax,QWORD[16+rsi] |
||
296 | adc rdx,0 |
||
297 | add rdi,r11 |
||
298 | lea r15,[4+r15] |
||
299 | adc rdx,0 |
||
300 | mov QWORD[rsp],rdi |
||
301 | mov r13,rdx |
||
302 | jmp NEAR $L$1st4x |
||
303 | ALIGN 16 |
||
304 | $L$1st4x: |
||
305 | mul rbx |
||
306 | add r10,rax |
||
307 | mov rax,QWORD[((-16))+r15*8+rcx] |
||
308 | adc rdx,0 |
||
309 | mov r11,rdx |
||
310 | |||
311 | mul rbp |
||
312 | add r13,rax |
||
313 | mov rax,QWORD[((-8))+r15*8+rsi] |
||
314 | adc rdx,0 |
||
315 | add r13,r10 |
||
316 | adc rdx,0 |
||
317 | mov QWORD[((-24))+r15*8+rsp],r13 |
||
318 | mov rdi,rdx |
||
319 | |||
320 | mul rbx |
||
321 | add r11,rax |
||
322 | mov rax,QWORD[((-8))+r15*8+rcx] |
||
323 | adc rdx,0 |
||
324 | mov r10,rdx |
||
325 | |||
326 | mul rbp |
||
327 | add rdi,rax |
||
328 | mov rax,QWORD[r15*8+rsi] |
||
329 | adc rdx,0 |
||
330 | add rdi,r11 |
||
331 | adc rdx,0 |
||
332 | mov QWORD[((-16))+r15*8+rsp],rdi |
||
333 | mov r13,rdx |
||
334 | |||
335 | mul rbx |
||
336 | add r10,rax |
||
337 | mov rax,QWORD[r15*8+rcx] |
||
338 | adc rdx,0 |
||
339 | mov r11,rdx |
||
340 | |||
341 | mul rbp |
||
342 | add r13,rax |
||
343 | mov rax,QWORD[8+r15*8+rsi] |
||
344 | adc rdx,0 |
||
345 | add r13,r10 |
||
346 | adc rdx,0 |
||
347 | mov QWORD[((-8))+r15*8+rsp],r13 |
||
348 | mov rdi,rdx |
||
349 | |||
350 | mul rbx |
||
351 | add r11,rax |
||
352 | mov rax,QWORD[8+r15*8+rcx] |
||
353 | adc rdx,0 |
||
354 | lea r15,[4+r15] |
||
355 | mov r10,rdx |
||
356 | |||
357 | mul rbp |
||
358 | add rdi,rax |
||
359 | mov rax,QWORD[((-16))+r15*8+rsi] |
||
360 | adc rdx,0 |
||
361 | add rdi,r11 |
||
362 | adc rdx,0 |
||
363 | mov QWORD[((-32))+r15*8+rsp],rdi |
||
364 | mov r13,rdx |
||
365 | cmp r15,r9 |
||
366 | jb NEAR $L$1st4x |
||
367 | |||
368 | mul rbx |
||
369 | add r10,rax |
||
370 | mov rax,QWORD[((-16))+r15*8+rcx] |
||
371 | adc rdx,0 |
||
372 | mov r11,rdx |
||
373 | |||
374 | mul rbp |
||
375 | add r13,rax |
||
376 | mov rax,QWORD[((-8))+r15*8+rsi] |
||
377 | adc rdx,0 |
||
378 | add r13,r10 |
||
379 | adc rdx,0 |
||
380 | mov QWORD[((-24))+r15*8+rsp],r13 |
||
381 | mov rdi,rdx |
||
382 | |||
383 | mul rbx |
||
384 | add r11,rax |
||
385 | mov rax,QWORD[((-8))+r15*8+rcx] |
||
386 | adc rdx,0 |
||
387 | mov r10,rdx |
||
388 | |||
389 | mul rbp |
||
390 | add rdi,rax |
||
391 | mov rax,QWORD[rsi] |
||
392 | adc rdx,0 |
||
393 | add rdi,r11 |
||
394 | adc rdx,0 |
||
395 | mov QWORD[((-16))+r15*8+rsp],rdi |
||
396 | mov r13,rdx |
||
397 | |||
398 | xor rdi,rdi |
||
399 | add r13,r10 |
||
400 | adc rdi,0 |
||
401 | mov QWORD[((-8))+r15*8+rsp],r13 |
||
402 | mov QWORD[r15*8+rsp],rdi |
||
403 | |||
404 | lea r14,[1+r14] |
||
405 | ALIGN 4 |
||
406 | $L$outer4x: |
||
407 | mov rbx,QWORD[r14*8+r12] |
||
408 | xor r15,r15 |
||
409 | mov r10,QWORD[rsp] |
||
410 | mov rbp,r8 |
||
411 | mul rbx |
||
412 | add r10,rax |
||
413 | mov rax,QWORD[rcx] |
||
414 | adc rdx,0 |
||
415 | |||
416 | imul rbp,r10 |
||
417 | mov r11,rdx |
||
418 | |||
419 | mul rbp |
||
420 | add r10,rax |
||
421 | mov rax,QWORD[8+rsi] |
||
422 | adc rdx,0 |
||
423 | mov rdi,rdx |
||
424 | |||
425 | mul rbx |
||
426 | add r11,rax |
||
427 | mov rax,QWORD[8+rcx] |
||
428 | adc rdx,0 |
||
429 | add r11,QWORD[8+rsp] |
||
430 | adc rdx,0 |
||
431 | mov r10,rdx |
||
432 | |||
433 | mul rbp |
||
434 | add rdi,rax |
||
435 | mov rax,QWORD[16+rsi] |
||
436 | adc rdx,0 |
||
437 | add rdi,r11 |
||
438 | lea r15,[4+r15] |
||
439 | adc rdx,0 |
||
440 | mov QWORD[rsp],rdi |
||
441 | mov r13,rdx |
||
442 | jmp NEAR $L$inner4x |
||
443 | ALIGN 16 |
||
444 | $L$inner4x: |
||
445 | mul rbx |
||
446 | add r10,rax |
||
447 | mov rax,QWORD[((-16))+r15*8+rcx] |
||
448 | adc rdx,0 |
||
449 | add r10,QWORD[((-16))+r15*8+rsp] |
||
450 | adc rdx,0 |
||
451 | mov r11,rdx |
||
452 | |||
453 | mul rbp |
||
454 | add r13,rax |
||
455 | mov rax,QWORD[((-8))+r15*8+rsi] |
||
456 | adc rdx,0 |
||
457 | add r13,r10 |
||
458 | adc rdx,0 |
||
459 | mov QWORD[((-24))+r15*8+rsp],r13 |
||
460 | mov rdi,rdx |
||
461 | |||
462 | mul rbx |
||
463 | add r11,rax |
||
464 | mov rax,QWORD[((-8))+r15*8+rcx] |
||
465 | adc rdx,0 |
||
466 | add r11,QWORD[((-8))+r15*8+rsp] |
||
467 | adc rdx,0 |
||
468 | mov r10,rdx |
||
469 | |||
470 | mul rbp |
||
471 | add rdi,rax |
||
472 | mov rax,QWORD[r15*8+rsi] |
||
473 | adc rdx,0 |
||
474 | add rdi,r11 |
||
475 | adc rdx,0 |
||
476 | mov QWORD[((-16))+r15*8+rsp],rdi |
||
477 | mov r13,rdx |
||
478 | |||
479 | mul rbx |
||
480 | add r10,rax |
||
481 | mov rax,QWORD[r15*8+rcx] |
||
482 | adc rdx,0 |
||
483 | add r10,QWORD[r15*8+rsp] |
||
484 | adc rdx,0 |
||
485 | mov r11,rdx |
||
486 | |||
487 | mul rbp |
||
488 | add r13,rax |
||
489 | mov rax,QWORD[8+r15*8+rsi] |
||
490 | adc rdx,0 |
||
491 | add r13,r10 |
||
492 | adc rdx,0 |
||
493 | mov QWORD[((-8))+r15*8+rsp],r13 |
||
494 | mov rdi,rdx |
||
495 | |||
496 | mul rbx |
||
497 | add r11,rax |
||
498 | mov rax,QWORD[8+r15*8+rcx] |
||
499 | adc rdx,0 |
||
500 | add r11,QWORD[8+r15*8+rsp] |
||
501 | adc rdx,0 |
||
502 | lea r15,[4+r15] |
||
503 | mov r10,rdx |
||
504 | |||
505 | mul rbp |
||
506 | add rdi,rax |
||
507 | mov rax,QWORD[((-16))+r15*8+rsi] |
||
508 | adc rdx,0 |
||
509 | add rdi,r11 |
||
510 | adc rdx,0 |
||
511 | mov QWORD[((-32))+r15*8+rsp],rdi |
||
512 | mov r13,rdx |
||
513 | cmp r15,r9 |
||
514 | jb NEAR $L$inner4x |
||
515 | |||
516 | mul rbx |
||
517 | add r10,rax |
||
518 | mov rax,QWORD[((-16))+r15*8+rcx] |
||
519 | adc rdx,0 |
||
520 | add r10,QWORD[((-16))+r15*8+rsp] |
||
521 | adc rdx,0 |
||
522 | mov r11,rdx |
||
523 | |||
524 | mul rbp |
||
525 | add r13,rax |
||
526 | mov rax,QWORD[((-8))+r15*8+rsi] |
||
527 | adc rdx,0 |
||
528 | add r13,r10 |
||
529 | adc rdx,0 |
||
530 | mov QWORD[((-24))+r15*8+rsp],r13 |
||
531 | mov rdi,rdx |
||
532 | |||
533 | mul rbx |
||
534 | add r11,rax |
||
535 | mov rax,QWORD[((-8))+r15*8+rcx] |
||
536 | adc rdx,0 |
||
537 | add r11,QWORD[((-8))+r15*8+rsp] |
||
538 | adc rdx,0 |
||
539 | lea r14,[1+r14] |
||
540 | mov r10,rdx |
||
541 | |||
542 | mul rbp |
||
543 | add rdi,rax |
||
544 | mov rax,QWORD[rsi] |
||
545 | adc rdx,0 |
||
546 | add rdi,r11 |
||
547 | adc rdx,0 |
||
548 | mov QWORD[((-16))+r15*8+rsp],rdi |
||
549 | mov r13,rdx |
||
550 | |||
551 | xor rdi,rdi |
||
552 | add r13,r10 |
||
553 | adc rdi,0 |
||
554 | add r13,QWORD[r9*8+rsp] |
||
555 | adc rdi,0 |
||
556 | mov QWORD[((-8))+r15*8+rsp],r13 |
||
557 | mov QWORD[r15*8+rsp],rdi |
||
558 | |||
559 | cmp r14,r9 |
||
560 | jb NEAR $L$outer4x |
||
561 | mov rdi,QWORD[16+r9*8+rsp] |
||
562 | mov rax,QWORD[rsp] |
||
563 | mov rdx,QWORD[8+rsp] |
||
564 | shr r9,2 |
||
565 | lea rsi,[rsp] |
||
566 | xor r14,r14 |
||
567 | |||
568 | sub rax,QWORD[rcx] |
||
569 | mov rbx,QWORD[16+rsi] |
||
570 | mov rbp,QWORD[24+rsi] |
||
571 | sbb rdx,QWORD[8+rcx] |
||
572 | lea r15,[((-1))+r9] |
||
573 | jmp NEAR $L$sub4x |
||
574 | ALIGN 16 |
||
575 | $L$sub4x: |
||
576 | mov QWORD[r14*8+rdi],rax |
||
577 | mov QWORD[8+r14*8+rdi],rdx |
||
578 | sbb rbx,QWORD[16+r14*8+rcx] |
||
579 | mov rax,QWORD[32+r14*8+rsi] |
||
580 | mov rdx,QWORD[40+r14*8+rsi] |
||
581 | sbb rbp,QWORD[24+r14*8+rcx] |
||
582 | mov QWORD[16+r14*8+rdi],rbx |
||
583 | mov QWORD[24+r14*8+rdi],rbp |
||
584 | sbb rax,QWORD[32+r14*8+rcx] |
||
585 | mov rbx,QWORD[48+r14*8+rsi] |
||
586 | mov rbp,QWORD[56+r14*8+rsi] |
||
587 | sbb rdx,QWORD[40+r14*8+rcx] |
||
588 | lea r14,[4+r14] |
||
589 | dec r15 |
||
590 | jnz NEAR $L$sub4x |
||
591 | |||
592 | mov QWORD[r14*8+rdi],rax |
||
593 | mov rax,QWORD[32+r14*8+rsi] |
||
594 | sbb rbx,QWORD[16+r14*8+rcx] |
||
595 | mov QWORD[8+r14*8+rdi],rdx |
||
596 | sbb rbp,QWORD[24+r14*8+rcx] |
||
597 | mov QWORD[16+r14*8+rdi],rbx |
||
598 | |||
599 | sbb rax,0 |
||
600 | DB 66h, 48h, 0fh, 6eh, 0c0h |
||
601 | punpcklqdq xmm0,xmm0 |
||
602 | mov QWORD[24+r14*8+rdi],rbp |
||
603 | xor r14,r14 |
||
604 | |||
605 | mov r15,r9 |
||
606 | pxor xmm5,xmm5 |
||
607 | jmp NEAR $L$copy4x |
||
608 | ALIGN 16 |
||
609 | $L$copy4x: |
||
610 | movdqu xmm2,XMMWORD[r14*1+rsp] |
||
611 | movdqu xmm4,XMMWORD[16+r14*1+rsp] |
||
612 | movdqu xmm1,XMMWORD[r14*1+rdi] |
||
613 | movdqu xmm3,XMMWORD[16+r14*1+rdi] |
||
614 | pxor xmm2,xmm1 |
||
615 | pxor xmm4,xmm3 |
||
616 | pand xmm2,xmm0 |
||
617 | pand xmm4,xmm0 |
||
618 | pxor xmm2,xmm1 |
||
619 | pxor xmm4,xmm3 |
||
620 | movdqu XMMWORD[r14*1+rdi],xmm2 |
||
621 | movdqu XMMWORD[16+r14*1+rdi],xmm4 |
||
622 | movdqa XMMWORD[r14*1+rsp],xmm5 |
||
623 | movdqa XMMWORD[16+r14*1+rsp],xmm5 |
||
624 | |||
625 | lea r14,[32+r14] |
||
626 | dec r15 |
||
627 | jnz NEAR $L$copy4x |
||
628 | |||
629 | shl r9,2 |
||
630 | mov rsi,QWORD[8+r9*8+rsp] |
||
631 | mov rax,1 |
||
632 | mov r15,QWORD[rsi] |
||
633 | mov r14,QWORD[8+rsi] |
||
634 | mov r13,QWORD[16+rsi] |
||
635 | mov r12,QWORD[24+rsi] |
||
636 | mov rbp,QWORD[32+rsi] |
||
637 | mov rbx,QWORD[40+rsi] |
||
638 | lea rsp,[48+rsi] |
||
639 | $L$mul4x_epilogue: |
||
640 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
||
641 | mov rsi,QWORD[16+rsp] |
||
642 | DB 0F3h,0C3h ;repret |
||
643 | $L$SEH_end_bn_mul4x_mont: |
||
644 | EXTERN bn_sqr8x_internal |
||
645 | |||
646 | |||
647 | ALIGN 32 |
||
648 | bn_sqr8x_mont: |
||
649 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
||
650 | mov QWORD[16+rsp],rsi |
||
651 | mov rax,rsp |
||
652 | $L$SEH_begin_bn_sqr8x_mont: |
||
653 | mov rdi,rcx |
||
654 | mov rsi,rdx |
||
655 | mov rdx,r8 |
||
656 | mov rcx,r9 |
||
657 | mov r8,QWORD[40+rsp] |
||
658 | mov r9,QWORD[48+rsp] |
||
659 | |||
660 | |||
661 | $L$sqr8x_enter: |
||
662 | mov rax,rsp |
||
663 | push rbx |
||
664 | push rbp |
||
665 | push r12 |
||
666 | push r13 |
||
667 | push r14 |
||
668 | push r15 |
||
669 | |||
670 | mov r10d,r9d |
||
671 | shl r9d,3 |
||
672 | shl r10,3+2 |
||
673 | neg r9 |
||
674 | |||
675 | |||
676 | |||
677 | |||
678 | |||
679 | |||
680 | lea r11,[((-64))+r9*2+rsp] |
||
681 | mov r8,QWORD[r8] |
||
682 | sub r11,rsi |
||
683 | and r11,4095 |
||
684 | cmp r10,r11 |
||
685 | jb NEAR $L$sqr8x_sp_alt |
||
686 | sub rsp,r11 |
||
687 | lea rsp,[((-64))+r9*2+rsp] |
||
688 | jmp NEAR $L$sqr8x_sp_done |
||
689 | |||
690 | ALIGN 32 |
||
691 | $L$sqr8x_sp_alt: |
||
692 | lea r10,[((4096-64))+r9*2] |
||
693 | lea rsp,[((-64))+r9*2+rsp] |
||
694 | sub r11,r10 |
||
695 | mov r10,0 |
||
696 | cmovc r11,r10 |
||
697 | sub rsp,r11 |
||
698 | $L$sqr8x_sp_done: |
||
699 | and rsp,-64 |
||
700 | mov r10,r9 |
||
701 | neg r9 |
||
702 | |||
703 | mov QWORD[32+rsp],r8 |
||
704 | mov QWORD[40+rsp],rax |
||
705 | $L$sqr8x_body: |
||
706 | |||
707 | DB 102,72,15,110,209 |
||
708 | pxor xmm0,xmm0 |
||
709 | DB 102,72,15,110,207 |
||
710 | DB 102,73,15,110,218 |
||
711 | call bn_sqr8x_internal |
||
712 | |||
713 | |||
714 | |||
715 | |||
716 | lea rbx,[r9*1+rdi] |
||
717 | mov rcx,r9 |
||
718 | mov rdx,r9 |
||
719 | DB 102,72,15,126,207 |
||
720 | sar rcx,3+2 |
||
721 | jmp NEAR $L$sqr8x_sub |
||
722 | |||
723 | ALIGN 32 |
||
724 | $L$sqr8x_sub: |
||
725 | mov r12,QWORD[rbx] |
||
726 | mov r13,QWORD[8+rbx] |
||
727 | mov r14,QWORD[16+rbx] |
||
728 | mov r15,QWORD[24+rbx] |
||
729 | lea rbx,[32+rbx] |
||
730 | sbb r12,QWORD[rbp] |
||
731 | sbb r13,QWORD[8+rbp] |
||
732 | sbb r14,QWORD[16+rbp] |
||
733 | sbb r15,QWORD[24+rbp] |
||
734 | lea rbp,[32+rbp] |
||
735 | mov QWORD[rdi],r12 |
||
736 | mov QWORD[8+rdi],r13 |
||
737 | mov QWORD[16+rdi],r14 |
||
738 | mov QWORD[24+rdi],r15 |
||
739 | lea rdi,[32+rdi] |
||
740 | inc rcx |
||
741 | jnz NEAR $L$sqr8x_sub |
||
742 | |||
743 | sbb rax,0 |
||
744 | lea rbx,[r9*1+rbx] |
||
745 | lea rdi,[r9*1+rdi] |
||
746 | |||
747 | DB 102,72,15,110,200 |
||
748 | pxor xmm0,xmm0 |
||
749 | pshufd xmm1,xmm1,0 |
||
750 | mov rsi,QWORD[40+rsp] |
||
751 | jmp NEAR $L$sqr8x_cond_copy |
||
752 | |||
753 | ALIGN 32 |
||
754 | $L$sqr8x_cond_copy: |
||
755 | movdqa xmm2,XMMWORD[rbx] |
||
756 | movdqa xmm3,XMMWORD[16+rbx] |
||
757 | lea rbx,[32+rbx] |
||
758 | movdqu xmm4,XMMWORD[rdi] |
||
759 | movdqu xmm5,XMMWORD[16+rdi] |
||
760 | lea rdi,[32+rdi] |
||
761 | movdqa XMMWORD[(-32)+rbx],xmm0 |
||
762 | movdqa XMMWORD[(-16)+rbx],xmm0 |
||
763 | movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0 |
||
764 | movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0 |
||
765 | pcmpeqd xmm0,xmm1 |
||
766 | pand xmm2,xmm1 |
||
767 | pand xmm3,xmm1 |
||
768 | pand xmm4,xmm0 |
||
769 | pand xmm5,xmm0 |
||
770 | pxor xmm0,xmm0 |
||
771 | por xmm4,xmm2 |
||
772 | por xmm5,xmm3 |
||
773 | movdqu XMMWORD[(-32)+rdi],xmm4 |
||
774 | movdqu XMMWORD[(-16)+rdi],xmm5 |
||
775 | add r9,32 |
||
776 | jnz NEAR $L$sqr8x_cond_copy |
||
777 | |||
778 | mov rax,1 |
||
779 | mov r15,QWORD[((-48))+rsi] |
||
780 | mov r14,QWORD[((-40))+rsi] |
||
781 | mov r13,QWORD[((-32))+rsi] |
||
782 | mov r12,QWORD[((-24))+rsi] |
||
783 | mov rbp,QWORD[((-16))+rsi] |
||
784 | mov rbx,QWORD[((-8))+rsi] |
||
785 | lea rsp,[rsi] |
||
786 | $L$sqr8x_epilogue: |
||
787 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
||
788 | mov rsi,QWORD[16+rsp] |
||
789 | DB 0F3h,0C3h ;repret |
||
790 | $L$SEH_end_bn_sqr8x_mont: |
||
791 | DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
||
792 | DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
||
793 | DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 |
||
794 | DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 |
||
795 | DB 115,108,46,111,114,103,62,0 |
||
796 | ALIGN 16 |
||
797 | EXTERN __imp_RtlVirtualUnwind |
||
798 | |||
799 | ALIGN 16 |
||
800 | mul_handler: |
||
801 | push rsi |
||
802 | push rdi |
||
803 | push rbx |
||
804 | push rbp |
||
805 | push r12 |
||
806 | push r13 |
||
807 | push r14 |
||
808 | push r15 |
||
809 | pushfq |
||
810 | sub rsp,64 |
||
811 | |||
812 | mov rax,QWORD[120+r8] |
||
813 | mov rbx,QWORD[248+r8] |
||
814 | |||
815 | mov rsi,QWORD[8+r9] |
||
816 | mov r11,QWORD[56+r9] |
||
817 | |||
818 | mov r10d,DWORD[r11] |
||
819 | lea r10,[r10*1+rsi] |
||
820 | cmp rbx,r10 |
||
821 | jb NEAR $L$common_seh_tail |
||
822 | |||
823 | mov rax,QWORD[152+r8] |
||
824 | |||
825 | mov r10d,DWORD[4+r11] |
||
826 | lea r10,[r10*1+rsi] |
||
827 | cmp rbx,r10 |
||
828 | jae NEAR $L$common_seh_tail |
||
829 | |||
830 | mov r10,QWORD[192+r8] |
||
831 | mov rax,QWORD[8+r10*8+rax] |
||
832 | lea rax,[48+rax] |
||
833 | |||
834 | mov rbx,QWORD[((-8))+rax] |
||
835 | mov rbp,QWORD[((-16))+rax] |
||
836 | mov r12,QWORD[((-24))+rax] |
||
837 | mov r13,QWORD[((-32))+rax] |
||
838 | mov r14,QWORD[((-40))+rax] |
||
839 | mov r15,QWORD[((-48))+rax] |
||
840 | mov QWORD[144+r8],rbx |
||
841 | mov QWORD[160+r8],rbp |
||
842 | mov QWORD[216+r8],r12 |
||
843 | mov QWORD[224+r8],r13 |
||
844 | mov QWORD[232+r8],r14 |
||
845 | mov QWORD[240+r8],r15 |
||
846 | |||
847 | jmp NEAR $L$common_seh_tail |
||
848 | |||
849 | |||
850 | |||
851 | ALIGN 16 |
||
852 | sqr_handler: |
||
853 | push rsi |
||
854 | push rdi |
||
855 | push rbx |
||
856 | push rbp |
||
857 | push r12 |
||
858 | push r13 |
||
859 | push r14 |
||
860 | push r15 |
||
861 | pushfq |
||
862 | sub rsp,64 |
||
863 | |||
864 | mov rax,QWORD[120+r8] |
||
865 | mov rbx,QWORD[248+r8] |
||
866 | |||
867 | mov rsi,QWORD[8+r9] |
||
868 | mov r11,QWORD[56+r9] |
||
869 | |||
870 | mov r10d,DWORD[r11] |
||
871 | lea r10,[r10*1+rsi] |
||
872 | cmp rbx,r10 |
||
873 | jb NEAR $L$common_seh_tail |
||
874 | |||
875 | mov rax,QWORD[152+r8] |
||
876 | |||
877 | mov r10d,DWORD[4+r11] |
||
878 | lea r10,[r10*1+rsi] |
||
879 | cmp rbx,r10 |
||
880 | jae NEAR $L$common_seh_tail |
||
881 | |||
882 | mov rax,QWORD[40+rax] |
||
883 | |||
884 | mov rbx,QWORD[((-8))+rax] |
||
885 | mov rbp,QWORD[((-16))+rax] |
||
886 | mov r12,QWORD[((-24))+rax] |
||
887 | mov r13,QWORD[((-32))+rax] |
||
888 | mov r14,QWORD[((-40))+rax] |
||
889 | mov r15,QWORD[((-48))+rax] |
||
890 | mov QWORD[144+r8],rbx |
||
891 | mov QWORD[160+r8],rbp |
||
892 | mov QWORD[216+r8],r12 |
||
893 | mov QWORD[224+r8],r13 |
||
894 | mov QWORD[232+r8],r14 |
||
895 | mov QWORD[240+r8],r15 |
||
896 | |||
897 | $L$common_seh_tail: |
||
898 | mov rdi,QWORD[8+rax] |
||
899 | mov rsi,QWORD[16+rax] |
||
900 | mov QWORD[152+r8],rax |
||
901 | mov QWORD[168+r8],rsi |
||
902 | mov QWORD[176+r8],rdi |
||
903 | |||
904 | mov rdi,QWORD[40+r9] |
||
905 | mov rsi,r8 |
||
906 | mov ecx,154 |
||
907 | DD 0xa548f3fc |
||
908 | |||
909 | mov rsi,r9 |
||
910 | xor rcx,rcx |
||
911 | mov rdx,QWORD[8+rsi] |
||
912 | mov r8,QWORD[rsi] |
||
913 | mov r9,QWORD[16+rsi] |
||
914 | mov r10,QWORD[40+rsi] |
||
915 | lea r11,[56+rsi] |
||
916 | lea r12,[24+rsi] |
||
917 | mov QWORD[32+rsp],r10 |
||
918 | mov QWORD[40+rsp],r11 |
||
919 | mov QWORD[48+rsp],r12 |
||
920 | mov QWORD[56+rsp],rcx |
||
921 | call QWORD[__imp_RtlVirtualUnwind] |
||
922 | |||
923 | mov eax,1 |
||
924 | add rsp,64 |
||
925 | popfq |
||
926 | pop r15 |
||
927 | pop r14 |
||
928 | pop r13 |
||
929 | pop r12 |
||
930 | pop rbp |
||
931 | pop rbx |
||
932 | pop rdi |
||
933 | pop rsi |
||
934 | DB 0F3h,0C3h ;repret |
||
935 | |||
936 | |||
937 | section .pdata rdata align=4 |
||
938 | ALIGN 4 |
||
939 | DD $L$SEH_begin_bn_mul_mont wrt ..imagebase |
||
940 | DD $L$SEH_end_bn_mul_mont wrt ..imagebase |
||
941 | DD $L$SEH_info_bn_mul_mont wrt ..imagebase |
||
942 | |||
943 | DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase |
||
944 | DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase |
||
945 | DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase |
||
946 | |||
947 | DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase |
||
948 | DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase |
||
949 | DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase |
||
950 | section .xdata rdata align=8 |
||
951 | ALIGN 8 |
||
952 | $L$SEH_info_bn_mul_mont: |
||
953 | DB 9,0,0,0 |
||
954 | DD mul_handler wrt ..imagebase |
||
955 | DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase |
||
956 | $L$SEH_info_bn_mul4x_mont: |
||
957 | DB 9,0,0,0 |
||
958 | DD mul_handler wrt ..imagebase |
||
959 | DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase |
||
960 | $L$SEH_info_bn_sqr8x_mont: |
||
961 | DB 9,0,0,0 |
||
962 | DD sqr_handler wrt ..imagebase |
||
963 | DD $L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase |