nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | default rel |
2 | %define XMMWORD |
||
3 | %define YMMWORD |
||
4 | %define ZMMWORD |
||
5 | section .text code align=64 |
||
6 | |||
7 | |||
8 | EXTERN OPENSSL_ia32cap_P |
||
9 | |||
10 | global rsaz_512_sqr |
||
11 | |||
12 | ALIGN 32 |
||
13 | rsaz_512_sqr: |
||
14 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
||
15 | mov QWORD[16+rsp],rsi |
||
16 | mov rax,rsp |
||
17 | $L$SEH_begin_rsaz_512_sqr: |
||
18 | mov rdi,rcx |
||
19 | mov rsi,rdx |
||
20 | mov rdx,r8 |
||
21 | mov rcx,r9 |
||
22 | mov r8,QWORD[40+rsp] |
||
23 | |||
24 | |||
25 | push rbx |
||
26 | push rbp |
||
27 | push r12 |
||
28 | push r13 |
||
29 | push r14 |
||
30 | push r15 |
||
31 | |||
32 | sub rsp,128+24 |
||
33 | $L$sqr_body: |
||
34 | mov rbp,rdx |
||
35 | mov rdx,QWORD[rsi] |
||
36 | mov rax,QWORD[8+rsi] |
||
37 | mov QWORD[128+rsp],rcx |
||
38 | jmp NEAR $L$oop_sqr |
||
39 | |||
40 | ALIGN 32 |
||
41 | $L$oop_sqr: |
||
42 | mov DWORD[((128+8))+rsp],r8d |
||
43 | |||
44 | mov rbx,rdx |
||
45 | mul rdx |
||
46 | mov r8,rax |
||
47 | mov rax,QWORD[16+rsi] |
||
48 | mov r9,rdx |
||
49 | |||
50 | mul rbx |
||
51 | add r9,rax |
||
52 | mov rax,QWORD[24+rsi] |
||
53 | mov r10,rdx |
||
54 | adc r10,0 |
||
55 | |||
56 | mul rbx |
||
57 | add r10,rax |
||
58 | mov rax,QWORD[32+rsi] |
||
59 | mov r11,rdx |
||
60 | adc r11,0 |
||
61 | |||
62 | mul rbx |
||
63 | add r11,rax |
||
64 | mov rax,QWORD[40+rsi] |
||
65 | mov r12,rdx |
||
66 | adc r12,0 |
||
67 | |||
68 | mul rbx |
||
69 | add r12,rax |
||
70 | mov rax,QWORD[48+rsi] |
||
71 | mov r13,rdx |
||
72 | adc r13,0 |
||
73 | |||
74 | mul rbx |
||
75 | add r13,rax |
||
76 | mov rax,QWORD[56+rsi] |
||
77 | mov r14,rdx |
||
78 | adc r14,0 |
||
79 | |||
80 | mul rbx |
||
81 | add r14,rax |
||
82 | mov rax,rbx |
||
83 | mov r15,rdx |
||
84 | adc r15,0 |
||
85 | |||
86 | add r8,r8 |
||
87 | mov rcx,r9 |
||
88 | adc r9,r9 |
||
89 | |||
90 | mul rax |
||
91 | mov QWORD[rsp],rax |
||
92 | add r8,rdx |
||
93 | adc r9,0 |
||
94 | |||
95 | mov QWORD[8+rsp],r8 |
||
96 | shr rcx,63 |
||
97 | |||
98 | |||
99 | mov r8,QWORD[8+rsi] |
||
100 | mov rax,QWORD[16+rsi] |
||
101 | mul r8 |
||
102 | add r10,rax |
||
103 | mov rax,QWORD[24+rsi] |
||
104 | mov rbx,rdx |
||
105 | adc rbx,0 |
||
106 | |||
107 | mul r8 |
||
108 | add r11,rax |
||
109 | mov rax,QWORD[32+rsi] |
||
110 | adc rdx,0 |
||
111 | add r11,rbx |
||
112 | mov rbx,rdx |
||
113 | adc rbx,0 |
||
114 | |||
115 | mul r8 |
||
116 | add r12,rax |
||
117 | mov rax,QWORD[40+rsi] |
||
118 | adc rdx,0 |
||
119 | add r12,rbx |
||
120 | mov rbx,rdx |
||
121 | adc rbx,0 |
||
122 | |||
123 | mul r8 |
||
124 | add r13,rax |
||
125 | mov rax,QWORD[48+rsi] |
||
126 | adc rdx,0 |
||
127 | add r13,rbx |
||
128 | mov rbx,rdx |
||
129 | adc rbx,0 |
||
130 | |||
131 | mul r8 |
||
132 | add r14,rax |
||
133 | mov rax,QWORD[56+rsi] |
||
134 | adc rdx,0 |
||
135 | add r14,rbx |
||
136 | mov rbx,rdx |
||
137 | adc rbx,0 |
||
138 | |||
139 | mul r8 |
||
140 | add r15,rax |
||
141 | mov rax,r8 |
||
142 | adc rdx,0 |
||
143 | add r15,rbx |
||
144 | mov r8,rdx |
||
145 | mov rdx,r10 |
||
146 | adc r8,0 |
||
147 | |||
148 | add rdx,rdx |
||
149 | lea r10,[r10*2+rcx] |
||
150 | mov rbx,r11 |
||
151 | adc r11,r11 |
||
152 | |||
153 | mul rax |
||
154 | add r9,rax |
||
155 | adc r10,rdx |
||
156 | adc r11,0 |
||
157 | |||
158 | mov QWORD[16+rsp],r9 |
||
159 | mov QWORD[24+rsp],r10 |
||
160 | shr rbx,63 |
||
161 | |||
162 | |||
163 | mov r9,QWORD[16+rsi] |
||
164 | mov rax,QWORD[24+rsi] |
||
165 | mul r9 |
||
166 | add r12,rax |
||
167 | mov rax,QWORD[32+rsi] |
||
168 | mov rcx,rdx |
||
169 | adc rcx,0 |
||
170 | |||
171 | mul r9 |
||
172 | add r13,rax |
||
173 | mov rax,QWORD[40+rsi] |
||
174 | adc rdx,0 |
||
175 | add r13,rcx |
||
176 | mov rcx,rdx |
||
177 | adc rcx,0 |
||
178 | |||
179 | mul r9 |
||
180 | add r14,rax |
||
181 | mov rax,QWORD[48+rsi] |
||
182 | adc rdx,0 |
||
183 | add r14,rcx |
||
184 | mov rcx,rdx |
||
185 | adc rcx,0 |
||
186 | |||
187 | mul r9 |
||
188 | mov r10,r12 |
||
189 | lea r12,[r12*2+rbx] |
||
190 | add r15,rax |
||
191 | mov rax,QWORD[56+rsi] |
||
192 | adc rdx,0 |
||
193 | add r15,rcx |
||
194 | mov rcx,rdx |
||
195 | adc rcx,0 |
||
196 | |||
197 | mul r9 |
||
198 | shr r10,63 |
||
199 | add r8,rax |
||
200 | mov rax,r9 |
||
201 | adc rdx,0 |
||
202 | add r8,rcx |
||
203 | mov r9,rdx |
||
204 | adc r9,0 |
||
205 | |||
206 | mov rcx,r13 |
||
207 | lea r13,[r13*2+r10] |
||
208 | |||
209 | mul rax |
||
210 | add r11,rax |
||
211 | adc r12,rdx |
||
212 | adc r13,0 |
||
213 | |||
214 | mov QWORD[32+rsp],r11 |
||
215 | mov QWORD[40+rsp],r12 |
||
216 | shr rcx,63 |
||
217 | |||
218 | |||
219 | mov r10,QWORD[24+rsi] |
||
220 | mov rax,QWORD[32+rsi] |
||
221 | mul r10 |
||
222 | add r14,rax |
||
223 | mov rax,QWORD[40+rsi] |
||
224 | mov rbx,rdx |
||
225 | adc rbx,0 |
||
226 | |||
227 | mul r10 |
||
228 | add r15,rax |
||
229 | mov rax,QWORD[48+rsi] |
||
230 | adc rdx,0 |
||
231 | add r15,rbx |
||
232 | mov rbx,rdx |
||
233 | adc rbx,0 |
||
234 | |||
235 | mul r10 |
||
236 | mov r12,r14 |
||
237 | lea r14,[r14*2+rcx] |
||
238 | add r8,rax |
||
239 | mov rax,QWORD[56+rsi] |
||
240 | adc rdx,0 |
||
241 | add r8,rbx |
||
242 | mov rbx,rdx |
||
243 | adc rbx,0 |
||
244 | |||
245 | mul r10 |
||
246 | shr r12,63 |
||
247 | add r9,rax |
||
248 | mov rax,r10 |
||
249 | adc rdx,0 |
||
250 | add r9,rbx |
||
251 | mov r10,rdx |
||
252 | adc r10,0 |
||
253 | |||
254 | mov rbx,r15 |
||
255 | lea r15,[r15*2+r12] |
||
256 | |||
257 | mul rax |
||
258 | add r13,rax |
||
259 | adc r14,rdx |
||
260 | adc r15,0 |
||
261 | |||
262 | mov QWORD[48+rsp],r13 |
||
263 | mov QWORD[56+rsp],r14 |
||
264 | shr rbx,63 |
||
265 | |||
266 | |||
267 | mov r11,QWORD[32+rsi] |
||
268 | mov rax,QWORD[40+rsi] |
||
269 | mul r11 |
||
270 | add r8,rax |
||
271 | mov rax,QWORD[48+rsi] |
||
272 | mov rcx,rdx |
||
273 | adc rcx,0 |
||
274 | |||
275 | mul r11 |
||
276 | add r9,rax |
||
277 | mov rax,QWORD[56+rsi] |
||
278 | adc rdx,0 |
||
279 | mov r12,r8 |
||
280 | lea r8,[r8*2+rbx] |
||
281 | add r9,rcx |
||
282 | mov rcx,rdx |
||
283 | adc rcx,0 |
||
284 | |||
285 | mul r11 |
||
286 | shr r12,63 |
||
287 | add r10,rax |
||
288 | mov rax,r11 |
||
289 | adc rdx,0 |
||
290 | add r10,rcx |
||
291 | mov r11,rdx |
||
292 | adc r11,0 |
||
293 | |||
294 | mov rcx,r9 |
||
295 | lea r9,[r9*2+r12] |
||
296 | |||
297 | mul rax |
||
298 | add r15,rax |
||
299 | adc r8,rdx |
||
300 | adc r9,0 |
||
301 | |||
302 | mov QWORD[64+rsp],r15 |
||
303 | mov QWORD[72+rsp],r8 |
||
304 | shr rcx,63 |
||
305 | |||
306 | |||
307 | mov r12,QWORD[40+rsi] |
||
308 | mov rax,QWORD[48+rsi] |
||
309 | mul r12 |
||
310 | add r10,rax |
||
311 | mov rax,QWORD[56+rsi] |
||
312 | mov rbx,rdx |
||
313 | adc rbx,0 |
||
314 | |||
315 | mul r12 |
||
316 | add r11,rax |
||
317 | mov rax,r12 |
||
318 | mov r15,r10 |
||
319 | lea r10,[r10*2+rcx] |
||
320 | adc rdx,0 |
||
321 | shr r15,63 |
||
322 | add r11,rbx |
||
323 | mov r12,rdx |
||
324 | adc r12,0 |
||
325 | |||
326 | mov rbx,r11 |
||
327 | lea r11,[r11*2+r15] |
||
328 | |||
329 | mul rax |
||
330 | add r9,rax |
||
331 | adc r10,rdx |
||
332 | adc r11,0 |
||
333 | |||
334 | mov QWORD[80+rsp],r9 |
||
335 | mov QWORD[88+rsp],r10 |
||
336 | |||
337 | |||
338 | mov r13,QWORD[48+rsi] |
||
339 | mov rax,QWORD[56+rsi] |
||
340 | mul r13 |
||
341 | add r12,rax |
||
342 | mov rax,r13 |
||
343 | mov r13,rdx |
||
344 | adc r13,0 |
||
345 | |||
346 | xor r14,r14 |
||
347 | shl rbx,1 |
||
348 | adc r12,r12 |
||
349 | adc r13,r13 |
||
350 | adc r14,r14 |
||
351 | |||
352 | mul rax |
||
353 | add r11,rax |
||
354 | adc r12,rdx |
||
355 | adc r13,0 |
||
356 | |||
357 | mov QWORD[96+rsp],r11 |
||
358 | mov QWORD[104+rsp],r12 |
||
359 | |||
360 | |||
361 | mov rax,QWORD[56+rsi] |
||
362 | mul rax |
||
363 | add r13,rax |
||
364 | adc rdx,0 |
||
365 | |||
366 | add r14,rdx |
||
367 | |||
368 | mov QWORD[112+rsp],r13 |
||
369 | mov QWORD[120+rsp],r14 |
||
370 | |||
371 | mov r8,QWORD[rsp] |
||
372 | mov r9,QWORD[8+rsp] |
||
373 | mov r10,QWORD[16+rsp] |
||
374 | mov r11,QWORD[24+rsp] |
||
375 | mov r12,QWORD[32+rsp] |
||
376 | mov r13,QWORD[40+rsp] |
||
377 | mov r14,QWORD[48+rsp] |
||
378 | mov r15,QWORD[56+rsp] |
||
379 | |||
380 | call __rsaz_512_reduce |
||
381 | |||
382 | add r8,QWORD[64+rsp] |
||
383 | adc r9,QWORD[72+rsp] |
||
384 | adc r10,QWORD[80+rsp] |
||
385 | adc r11,QWORD[88+rsp] |
||
386 | adc r12,QWORD[96+rsp] |
||
387 | adc r13,QWORD[104+rsp] |
||
388 | adc r14,QWORD[112+rsp] |
||
389 | adc r15,QWORD[120+rsp] |
||
390 | sbb rcx,rcx |
||
391 | |||
392 | call __rsaz_512_subtract |
||
393 | |||
394 | mov rdx,r8 |
||
395 | mov rax,r9 |
||
396 | mov r8d,DWORD[((128+8))+rsp] |
||
397 | mov rsi,rdi |
||
398 | |||
399 | dec r8d |
||
400 | jnz NEAR $L$oop_sqr |
||
401 | |||
402 | lea rax,[((128+24+48))+rsp] |
||
403 | mov r15,QWORD[((-48))+rax] |
||
404 | mov r14,QWORD[((-40))+rax] |
||
405 | mov r13,QWORD[((-32))+rax] |
||
406 | mov r12,QWORD[((-24))+rax] |
||
407 | mov rbp,QWORD[((-16))+rax] |
||
408 | mov rbx,QWORD[((-8))+rax] |
||
409 | lea rsp,[rax] |
||
410 | $L$sqr_epilogue: |
||
411 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
||
412 | mov rsi,QWORD[16+rsp] |
||
413 | DB 0F3h,0C3h ;repret |
||
414 | $L$SEH_end_rsaz_512_sqr: |
||
415 | global rsaz_512_mul |
||
416 | |||
417 | ALIGN 32 |
||
418 | rsaz_512_mul: |
||
419 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
||
420 | mov QWORD[16+rsp],rsi |
||
421 | mov rax,rsp |
||
422 | $L$SEH_begin_rsaz_512_mul: |
||
423 | mov rdi,rcx |
||
424 | mov rsi,rdx |
||
425 | mov rdx,r8 |
||
426 | mov rcx,r9 |
||
427 | mov r8,QWORD[40+rsp] |
||
428 | |||
429 | |||
430 | push rbx |
||
431 | push rbp |
||
432 | push r12 |
||
433 | push r13 |
||
434 | push r14 |
||
435 | push r15 |
||
436 | |||
437 | sub rsp,128+24 |
||
438 | $L$mul_body: |
||
439 | DB 102,72,15,110,199 |
||
440 | DB 102,72,15,110,201 |
||
441 | mov QWORD[128+rsp],r8 |
||
442 | mov rbx,QWORD[rdx] |
||
443 | mov rbp,rdx |
||
444 | call __rsaz_512_mul |
||
445 | |||
446 | DB 102,72,15,126,199 |
||
447 | DB 102,72,15,126,205 |
||
448 | |||
449 | mov r8,QWORD[rsp] |
||
450 | mov r9,QWORD[8+rsp] |
||
451 | mov r10,QWORD[16+rsp] |
||
452 | mov r11,QWORD[24+rsp] |
||
453 | mov r12,QWORD[32+rsp] |
||
454 | mov r13,QWORD[40+rsp] |
||
455 | mov r14,QWORD[48+rsp] |
||
456 | mov r15,QWORD[56+rsp] |
||
457 | |||
458 | call __rsaz_512_reduce |
||
459 | add r8,QWORD[64+rsp] |
||
460 | adc r9,QWORD[72+rsp] |
||
461 | adc r10,QWORD[80+rsp] |
||
462 | adc r11,QWORD[88+rsp] |
||
463 | adc r12,QWORD[96+rsp] |
||
464 | adc r13,QWORD[104+rsp] |
||
465 | adc r14,QWORD[112+rsp] |
||
466 | adc r15,QWORD[120+rsp] |
||
467 | sbb rcx,rcx |
||
468 | |||
469 | call __rsaz_512_subtract |
||
470 | |||
471 | lea rax,[((128+24+48))+rsp] |
||
472 | mov r15,QWORD[((-48))+rax] |
||
473 | mov r14,QWORD[((-40))+rax] |
||
474 | mov r13,QWORD[((-32))+rax] |
||
475 | mov r12,QWORD[((-24))+rax] |
||
476 | mov rbp,QWORD[((-16))+rax] |
||
477 | mov rbx,QWORD[((-8))+rax] |
||
478 | lea rsp,[rax] |
||
479 | $L$mul_epilogue: |
||
480 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
||
481 | mov rsi,QWORD[16+rsp] |
||
482 | DB 0F3h,0C3h ;repret |
||
483 | $L$SEH_end_rsaz_512_mul: |
||
484 | global rsaz_512_mul_gather4 |
||
485 | |||
486 | ALIGN 32 |
||
487 | rsaz_512_mul_gather4: |
||
488 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
||
489 | mov QWORD[16+rsp],rsi |
||
490 | mov rax,rsp |
||
491 | $L$SEH_begin_rsaz_512_mul_gather4: |
||
492 | mov rdi,rcx |
||
493 | mov rsi,rdx |
||
494 | mov rdx,r8 |
||
495 | mov rcx,r9 |
||
496 | mov r8,QWORD[40+rsp] |
||
497 | mov r9,QWORD[48+rsp] |
||
498 | |||
499 | |||
500 | push rbx |
||
501 | push rbp |
||
502 | push r12 |
||
503 | push r13 |
||
504 | push r14 |
||
505 | push r15 |
||
506 | |||
507 | sub rsp,328 |
||
508 | movaps XMMWORD[160+rsp],xmm6 |
||
509 | movaps XMMWORD[176+rsp],xmm7 |
||
510 | movaps XMMWORD[192+rsp],xmm8 |
||
511 | movaps XMMWORD[208+rsp],xmm9 |
||
512 | movaps XMMWORD[224+rsp],xmm10 |
||
513 | movaps XMMWORD[240+rsp],xmm11 |
||
514 | movaps XMMWORD[256+rsp],xmm12 |
||
515 | movaps XMMWORD[272+rsp],xmm13 |
||
516 | movaps XMMWORD[288+rsp],xmm14 |
||
517 | movaps XMMWORD[304+rsp],xmm15 |
||
518 | $L$mul_gather4_body: |
||
519 | movd xmm8,r9d |
||
520 | movdqa xmm1,XMMWORD[(($L$inc+16))] |
||
521 | movdqa xmm0,XMMWORD[$L$inc] |
||
522 | |||
523 | pshufd xmm8,xmm8,0 |
||
524 | movdqa xmm7,xmm1 |
||
525 | movdqa xmm2,xmm1 |
||
526 | paddd xmm1,xmm0 |
||
527 | pcmpeqd xmm0,xmm8 |
||
528 | movdqa xmm3,xmm7 |
||
529 | paddd xmm2,xmm1 |
||
530 | pcmpeqd xmm1,xmm8 |
||
531 | movdqa xmm4,xmm7 |
||
532 | paddd xmm3,xmm2 |
||
533 | pcmpeqd xmm2,xmm8 |
||
534 | movdqa xmm5,xmm7 |
||
535 | paddd xmm4,xmm3 |
||
536 | pcmpeqd xmm3,xmm8 |
||
537 | movdqa xmm6,xmm7 |
||
538 | paddd xmm5,xmm4 |
||
539 | pcmpeqd xmm4,xmm8 |
||
540 | paddd xmm6,xmm5 |
||
541 | pcmpeqd xmm5,xmm8 |
||
542 | paddd xmm7,xmm6 |
||
543 | pcmpeqd xmm6,xmm8 |
||
544 | pcmpeqd xmm7,xmm8 |
||
545 | |||
546 | movdqa xmm8,XMMWORD[rdx] |
||
547 | movdqa xmm9,XMMWORD[16+rdx] |
||
548 | movdqa xmm10,XMMWORD[32+rdx] |
||
549 | movdqa xmm11,XMMWORD[48+rdx] |
||
550 | pand xmm8,xmm0 |
||
551 | movdqa xmm12,XMMWORD[64+rdx] |
||
552 | pand xmm9,xmm1 |
||
553 | movdqa xmm13,XMMWORD[80+rdx] |
||
554 | pand xmm10,xmm2 |
||
555 | movdqa xmm14,XMMWORD[96+rdx] |
||
556 | pand xmm11,xmm3 |
||
557 | movdqa xmm15,XMMWORD[112+rdx] |
||
558 | lea rbp,[128+rdx] |
||
559 | pand xmm12,xmm4 |
||
560 | pand xmm13,xmm5 |
||
561 | pand xmm14,xmm6 |
||
562 | pand xmm15,xmm7 |
||
563 | por xmm8,xmm10 |
||
564 | por xmm9,xmm11 |
||
565 | por xmm8,xmm12 |
||
566 | por xmm9,xmm13 |
||
567 | por xmm8,xmm14 |
||
568 | por xmm9,xmm15 |
||
569 | |||
570 | por xmm8,xmm9 |
||
571 | pshufd xmm9,xmm8,0x4e |
||
572 | por xmm8,xmm9 |
||
573 | DB 102,76,15,126,195 |
||
574 | |||
575 | mov QWORD[128+rsp],r8 |
||
576 | mov QWORD[((128+8))+rsp],rdi |
||
577 | mov QWORD[((128+16))+rsp],rcx |
||
578 | |||
579 | mov rax,QWORD[rsi] |
||
580 | mov rcx,QWORD[8+rsi] |
||
581 | mul rbx |
||
582 | mov QWORD[rsp],rax |
||
583 | mov rax,rcx |
||
584 | mov r8,rdx |
||
585 | |||
586 | mul rbx |
||
587 | add r8,rax |
||
588 | mov rax,QWORD[16+rsi] |
||
589 | mov r9,rdx |
||
590 | adc r9,0 |
||
591 | |||
592 | mul rbx |
||
593 | add r9,rax |
||
594 | mov rax,QWORD[24+rsi] |
||
595 | mov r10,rdx |
||
596 | adc r10,0 |
||
597 | |||
598 | mul rbx |
||
599 | add r10,rax |
||
600 | mov rax,QWORD[32+rsi] |
||
601 | mov r11,rdx |
||
602 | adc r11,0 |
||
603 | |||
604 | mul rbx |
||
605 | add r11,rax |
||
606 | mov rax,QWORD[40+rsi] |
||
607 | mov r12,rdx |
||
608 | adc r12,0 |
||
609 | |||
610 | mul rbx |
||
611 | add r12,rax |
||
612 | mov rax,QWORD[48+rsi] |
||
613 | mov r13,rdx |
||
614 | adc r13,0 |
||
615 | |||
616 | mul rbx |
||
617 | add r13,rax |
||
618 | mov rax,QWORD[56+rsi] |
||
619 | mov r14,rdx |
||
620 | adc r14,0 |
||
621 | |||
622 | mul rbx |
||
623 | add r14,rax |
||
624 | mov rax,QWORD[rsi] |
||
625 | mov r15,rdx |
||
626 | adc r15,0 |
||
627 | |||
628 | lea rdi,[8+rsp] |
||
629 | mov ecx,7 |
||
630 | jmp NEAR $L$oop_mul_gather |
||
631 | |||
632 | ALIGN 32 |
||
633 | $L$oop_mul_gather: |
||
634 | movdqa xmm8,XMMWORD[rbp] |
||
635 | movdqa xmm9,XMMWORD[16+rbp] |
||
636 | movdqa xmm10,XMMWORD[32+rbp] |
||
637 | movdqa xmm11,XMMWORD[48+rbp] |
||
638 | pand xmm8,xmm0 |
||
639 | movdqa xmm12,XMMWORD[64+rbp] |
||
640 | pand xmm9,xmm1 |
||
641 | movdqa xmm13,XMMWORD[80+rbp] |
||
642 | pand xmm10,xmm2 |
||
643 | movdqa xmm14,XMMWORD[96+rbp] |
||
644 | pand xmm11,xmm3 |
||
645 | movdqa xmm15,XMMWORD[112+rbp] |
||
646 | lea rbp,[128+rbp] |
||
647 | pand xmm12,xmm4 |
||
648 | pand xmm13,xmm5 |
||
649 | pand xmm14,xmm6 |
||
650 | pand xmm15,xmm7 |
||
651 | por xmm8,xmm10 |
||
652 | por xmm9,xmm11 |
||
653 | por xmm8,xmm12 |
||
654 | por xmm9,xmm13 |
||
655 | por xmm8,xmm14 |
||
656 | por xmm9,xmm15 |
||
657 | |||
658 | por xmm8,xmm9 |
||
659 | pshufd xmm9,xmm8,0x4e |
||
660 | por xmm8,xmm9 |
||
661 | DB 102,76,15,126,195 |
||
662 | |||
663 | mul rbx |
||
664 | add r8,rax |
||
665 | mov rax,QWORD[8+rsi] |
||
666 | mov QWORD[rdi],r8 |
||
667 | mov r8,rdx |
||
668 | adc r8,0 |
||
669 | |||
670 | mul rbx |
||
671 | add r9,rax |
||
672 | mov rax,QWORD[16+rsi] |
||
673 | adc rdx,0 |
||
674 | add r8,r9 |
||
675 | mov r9,rdx |
||
676 | adc r9,0 |
||
677 | |||
678 | mul rbx |
||
679 | add r10,rax |
||
680 | mov rax,QWORD[24+rsi] |
||
681 | adc rdx,0 |
||
682 | add r9,r10 |
||
683 | mov r10,rdx |
||
684 | adc r10,0 |
||
685 | |||
686 | mul rbx |
||
687 | add r11,rax |
||
688 | mov rax,QWORD[32+rsi] |
||
689 | adc rdx,0 |
||
690 | add r10,r11 |
||
691 | mov r11,rdx |
||
692 | adc r11,0 |
||
693 | |||
694 | mul rbx |
||
695 | add r12,rax |
||
696 | mov rax,QWORD[40+rsi] |
||
697 | adc rdx,0 |
||
698 | add r11,r12 |
||
699 | mov r12,rdx |
||
700 | adc r12,0 |
||
701 | |||
702 | mul rbx |
||
703 | add r13,rax |
||
704 | mov rax,QWORD[48+rsi] |
||
705 | adc rdx,0 |
||
706 | add r12,r13 |
||
707 | mov r13,rdx |
||
708 | adc r13,0 |
||
709 | |||
710 | mul rbx |
||
711 | add r14,rax |
||
712 | mov rax,QWORD[56+rsi] |
||
713 | adc rdx,0 |
||
714 | add r13,r14 |
||
715 | mov r14,rdx |
||
716 | adc r14,0 |
||
717 | |||
718 | mul rbx |
||
719 | add r15,rax |
||
720 | mov rax,QWORD[rsi] |
||
721 | adc rdx,0 |
||
722 | add r14,r15 |
||
723 | mov r15,rdx |
||
724 | adc r15,0 |
||
725 | |||
726 | lea rdi,[8+rdi] |
||
727 | |||
728 | dec ecx |
||
729 | jnz NEAR $L$oop_mul_gather |
||
730 | |||
731 | mov QWORD[rdi],r8 |
||
732 | mov QWORD[8+rdi],r9 |
||
733 | mov QWORD[16+rdi],r10 |
||
734 | mov QWORD[24+rdi],r11 |
||
735 | mov QWORD[32+rdi],r12 |
||
736 | mov QWORD[40+rdi],r13 |
||
737 | mov QWORD[48+rdi],r14 |
||
738 | mov QWORD[56+rdi],r15 |
||
739 | |||
740 | mov rdi,QWORD[((128+8))+rsp] |
||
741 | mov rbp,QWORD[((128+16))+rsp] |
||
742 | |||
743 | mov r8,QWORD[rsp] |
||
744 | mov r9,QWORD[8+rsp] |
||
745 | mov r10,QWORD[16+rsp] |
||
746 | mov r11,QWORD[24+rsp] |
||
747 | mov r12,QWORD[32+rsp] |
||
748 | mov r13,QWORD[40+rsp] |
||
749 | mov r14,QWORD[48+rsp] |
||
750 | mov r15,QWORD[56+rsp] |
||
751 | |||
752 | call __rsaz_512_reduce |
||
753 | add r8,QWORD[64+rsp] |
||
754 | adc r9,QWORD[72+rsp] |
||
755 | adc r10,QWORD[80+rsp] |
||
756 | adc r11,QWORD[88+rsp] |
||
757 | adc r12,QWORD[96+rsp] |
||
758 | adc r13,QWORD[104+rsp] |
||
759 | adc r14,QWORD[112+rsp] |
||
760 | adc r15,QWORD[120+rsp] |
||
761 | sbb rcx,rcx |
||
762 | |||
763 | call __rsaz_512_subtract |
||
764 | |||
765 | lea rax,[((128+24+48))+rsp] |
||
766 | movaps xmm6,XMMWORD[((160-200))+rax] |
||
767 | movaps xmm7,XMMWORD[((176-200))+rax] |
||
768 | movaps xmm8,XMMWORD[((192-200))+rax] |
||
769 | movaps xmm9,XMMWORD[((208-200))+rax] |
||
770 | movaps xmm10,XMMWORD[((224-200))+rax] |
||
771 | movaps xmm11,XMMWORD[((240-200))+rax] |
||
772 | movaps xmm12,XMMWORD[((256-200))+rax] |
||
773 | movaps xmm13,XMMWORD[((272-200))+rax] |
||
774 | movaps xmm14,XMMWORD[((288-200))+rax] |
||
775 | movaps xmm15,XMMWORD[((304-200))+rax] |
||
776 | lea rax,[176+rax] |
||
777 | mov r15,QWORD[((-48))+rax] |
||
778 | mov r14,QWORD[((-40))+rax] |
||
779 | mov r13,QWORD[((-32))+rax] |
||
780 | mov r12,QWORD[((-24))+rax] |
||
781 | mov rbp,QWORD[((-16))+rax] |
||
782 | mov rbx,QWORD[((-8))+rax] |
||
783 | lea rsp,[rax] |
||
784 | $L$mul_gather4_epilogue: |
||
785 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
||
786 | mov rsi,QWORD[16+rsp] |
||
787 | DB 0F3h,0C3h ;repret |
||
788 | $L$SEH_end_rsaz_512_mul_gather4: |
||
789 | global rsaz_512_mul_scatter4 |
||
790 | |||
791 | ALIGN 32 |
||
792 | rsaz_512_mul_scatter4: |
||
793 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
||
794 | mov QWORD[16+rsp],rsi |
||
795 | mov rax,rsp |
||
796 | $L$SEH_begin_rsaz_512_mul_scatter4: |
||
797 | mov rdi,rcx |
||
798 | mov rsi,rdx |
||
799 | mov rdx,r8 |
||
800 | mov rcx,r9 |
||
801 | mov r8,QWORD[40+rsp] |
||
802 | mov r9,QWORD[48+rsp] |
||
803 | |||
804 | |||
805 | push rbx |
||
806 | push rbp |
||
807 | push r12 |
||
808 | push r13 |
||
809 | push r14 |
||
810 | push r15 |
||
811 | |||
812 | mov r9d,r9d |
||
813 | sub rsp,128+24 |
||
814 | $L$mul_scatter4_body: |
||
815 | lea r8,[r9*8+r8] |
||
816 | DB 102,72,15,110,199 |
||
817 | DB 102,72,15,110,202 |
||
818 | DB 102,73,15,110,208 |
||
819 | mov QWORD[128+rsp],rcx |
||
820 | |||
821 | mov rbp,rdi |
||
822 | mov rbx,QWORD[rdi] |
||
823 | call __rsaz_512_mul |
||
824 | |||
825 | DB 102,72,15,126,199 |
||
826 | DB 102,72,15,126,205 |
||
827 | |||
828 | mov r8,QWORD[rsp] |
||
829 | mov r9,QWORD[8+rsp] |
||
830 | mov r10,QWORD[16+rsp] |
||
831 | mov r11,QWORD[24+rsp] |
||
832 | mov r12,QWORD[32+rsp] |
||
833 | mov r13,QWORD[40+rsp] |
||
834 | mov r14,QWORD[48+rsp] |
||
835 | mov r15,QWORD[56+rsp] |
||
836 | |||
837 | call __rsaz_512_reduce |
||
838 | add r8,QWORD[64+rsp] |
||
839 | adc r9,QWORD[72+rsp] |
||
840 | adc r10,QWORD[80+rsp] |
||
841 | adc r11,QWORD[88+rsp] |
||
842 | adc r12,QWORD[96+rsp] |
||
843 | adc r13,QWORD[104+rsp] |
||
844 | adc r14,QWORD[112+rsp] |
||
845 | adc r15,QWORD[120+rsp] |
||
846 | DB 102,72,15,126,214 |
||
847 | sbb rcx,rcx |
||
848 | |||
849 | call __rsaz_512_subtract |
||
850 | |||
851 | mov QWORD[rsi],r8 |
||
852 | mov QWORD[128+rsi],r9 |
||
853 | mov QWORD[256+rsi],r10 |
||
854 | mov QWORD[384+rsi],r11 |
||
855 | mov QWORD[512+rsi],r12 |
||
856 | mov QWORD[640+rsi],r13 |
||
857 | mov QWORD[768+rsi],r14 |
||
858 | mov QWORD[896+rsi],r15 |
||
859 | |||
860 | lea rax,[((128+24+48))+rsp] |
||
861 | mov r15,QWORD[((-48))+rax] |
||
862 | mov r14,QWORD[((-40))+rax] |
||
863 | mov r13,QWORD[((-32))+rax] |
||
864 | mov r12,QWORD[((-24))+rax] |
||
865 | mov rbp,QWORD[((-16))+rax] |
||
866 | mov rbx,QWORD[((-8))+rax] |
||
867 | lea rsp,[rax] |
||
868 | $L$mul_scatter4_epilogue: |
||
869 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
||
870 | mov rsi,QWORD[16+rsp] |
||
871 | DB 0F3h,0C3h ;repret |
||
872 | $L$SEH_end_rsaz_512_mul_scatter4: |
||
873 | global rsaz_512_mul_by_one |
||
874 | |||
875 | ALIGN 32 |
||
876 | rsaz_512_mul_by_one: |
||
877 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
||
878 | mov QWORD[16+rsp],rsi |
||
879 | mov rax,rsp |
||
880 | $L$SEH_begin_rsaz_512_mul_by_one: |
||
881 | mov rdi,rcx |
||
882 | mov rsi,rdx |
||
883 | mov rdx,r8 |
||
884 | mov rcx,r9 |
||
885 | |||
886 | |||
887 | push rbx |
||
888 | push rbp |
||
889 | push r12 |
||
890 | push r13 |
||
891 | push r14 |
||
892 | push r15 |
||
893 | |||
894 | sub rsp,128+24 |
||
895 | $L$mul_by_one_body: |
||
896 | mov rbp,rdx |
||
897 | mov QWORD[128+rsp],rcx |
||
898 | |||
899 | mov r8,QWORD[rsi] |
||
900 | pxor xmm0,xmm0 |
||
901 | mov r9,QWORD[8+rsi] |
||
902 | mov r10,QWORD[16+rsi] |
||
903 | mov r11,QWORD[24+rsi] |
||
904 | mov r12,QWORD[32+rsi] |
||
905 | mov r13,QWORD[40+rsi] |
||
906 | mov r14,QWORD[48+rsi] |
||
907 | mov r15,QWORD[56+rsi] |
||
908 | |||
909 | movdqa XMMWORD[rsp],xmm0 |
||
910 | movdqa XMMWORD[16+rsp],xmm0 |
||
911 | movdqa XMMWORD[32+rsp],xmm0 |
||
912 | movdqa XMMWORD[48+rsp],xmm0 |
||
913 | movdqa XMMWORD[64+rsp],xmm0 |
||
914 | movdqa XMMWORD[80+rsp],xmm0 |
||
915 | movdqa XMMWORD[96+rsp],xmm0 |
||
916 | call __rsaz_512_reduce |
||
917 | mov QWORD[rdi],r8 |
||
918 | mov QWORD[8+rdi],r9 |
||
919 | mov QWORD[16+rdi],r10 |
||
920 | mov QWORD[24+rdi],r11 |
||
921 | mov QWORD[32+rdi],r12 |
||
922 | mov QWORD[40+rdi],r13 |
||
923 | mov QWORD[48+rdi],r14 |
||
924 | mov QWORD[56+rdi],r15 |
||
925 | |||
926 | lea rax,[((128+24+48))+rsp] |
||
927 | mov r15,QWORD[((-48))+rax] |
||
928 | mov r14,QWORD[((-40))+rax] |
||
929 | mov r13,QWORD[((-32))+rax] |
||
930 | mov r12,QWORD[((-24))+rax] |
||
931 | mov rbp,QWORD[((-16))+rax] |
||
932 | mov rbx,QWORD[((-8))+rax] |
||
933 | lea rsp,[rax] |
||
934 | $L$mul_by_one_epilogue: |
||
935 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
||
936 | mov rsi,QWORD[16+rsp] |
||
937 | DB 0F3h,0C3h ;repret |
||
938 | $L$SEH_end_rsaz_512_mul_by_one: |
||
939 | |||
940 | ALIGN 32 |
||
941 | __rsaz_512_reduce: |
||
942 | mov rbx,r8 |
||
943 | imul rbx,QWORD[((128+8))+rsp] |
||
944 | mov rax,QWORD[rbp] |
||
945 | mov ecx,8 |
||
946 | jmp NEAR $L$reduction_loop |
||
947 | |||
948 | ALIGN 32 |
||
949 | $L$reduction_loop: |
||
950 | mul rbx |
||
951 | mov rax,QWORD[8+rbp] |
||
952 | neg r8 |
||
953 | mov r8,rdx |
||
954 | adc r8,0 |
||
955 | |||
956 | mul rbx |
||
957 | add r9,rax |
||
958 | mov rax,QWORD[16+rbp] |
||
959 | adc rdx,0 |
||
960 | add r8,r9 |
||
961 | mov r9,rdx |
||
962 | adc r9,0 |
||
963 | |||
964 | mul rbx |
||
965 | add r10,rax |
||
966 | mov rax,QWORD[24+rbp] |
||
967 | adc rdx,0 |
||
968 | add r9,r10 |
||
969 | mov r10,rdx |
||
970 | adc r10,0 |
||
971 | |||
972 | mul rbx |
||
973 | add r11,rax |
||
974 | mov rax,QWORD[32+rbp] |
||
975 | adc rdx,0 |
||
976 | add r10,r11 |
||
977 | mov rsi,QWORD[((128+8))+rsp] |
||
978 | |||
979 | |||
980 | adc rdx,0 |
||
981 | mov r11,rdx |
||
982 | |||
983 | mul rbx |
||
984 | add r12,rax |
||
985 | mov rax,QWORD[40+rbp] |
||
986 | adc rdx,0 |
||
987 | imul rsi,r8 |
||
988 | add r11,r12 |
||
989 | mov r12,rdx |
||
990 | adc r12,0 |
||
991 | |||
992 | mul rbx |
||
993 | add r13,rax |
||
994 | mov rax,QWORD[48+rbp] |
||
995 | adc rdx,0 |
||
996 | add r12,r13 |
||
997 | mov r13,rdx |
||
998 | adc r13,0 |
||
999 | |||
1000 | mul rbx |
||
1001 | add r14,rax |
||
1002 | mov rax,QWORD[56+rbp] |
||
1003 | adc rdx,0 |
||
1004 | add r13,r14 |
||
1005 | mov r14,rdx |
||
1006 | adc r14,0 |
||
1007 | |||
1008 | mul rbx |
||
1009 | mov rbx,rsi |
||
1010 | add r15,rax |
||
1011 | mov rax,QWORD[rbp] |
||
1012 | adc rdx,0 |
||
1013 | add r14,r15 |
||
1014 | mov r15,rdx |
||
1015 | adc r15,0 |
||
1016 | |||
1017 | dec ecx |
||
1018 | jne NEAR $L$reduction_loop |
||
1019 | |||
1020 | DB 0F3h,0C3h ;repret |
||
1021 | |||
1022 | |||
1023 | ALIGN 32 |
||
1024 | __rsaz_512_subtract: |
||
1025 | mov QWORD[rdi],r8 |
||
1026 | mov QWORD[8+rdi],r9 |
||
1027 | mov QWORD[16+rdi],r10 |
||
1028 | mov QWORD[24+rdi],r11 |
||
1029 | mov QWORD[32+rdi],r12 |
||
1030 | mov QWORD[40+rdi],r13 |
||
1031 | mov QWORD[48+rdi],r14 |
||
1032 | mov QWORD[56+rdi],r15 |
||
1033 | |||
1034 | mov r8,QWORD[rbp] |
||
1035 | mov r9,QWORD[8+rbp] |
||
1036 | neg r8 |
||
1037 | not r9 |
||
1038 | and r8,rcx |
||
1039 | mov r10,QWORD[16+rbp] |
||
1040 | and r9,rcx |
||
1041 | not r10 |
||
1042 | mov r11,QWORD[24+rbp] |
||
1043 | and r10,rcx |
||
1044 | not r11 |
||
1045 | mov r12,QWORD[32+rbp] |
||
1046 | and r11,rcx |
||
1047 | not r12 |
||
1048 | mov r13,QWORD[40+rbp] |
||
1049 | and r12,rcx |
||
1050 | not r13 |
||
1051 | mov r14,QWORD[48+rbp] |
||
1052 | and r13,rcx |
||
1053 | not r14 |
||
1054 | mov r15,QWORD[56+rbp] |
||
1055 | and r14,rcx |
||
1056 | not r15 |
||
1057 | and r15,rcx |
||
1058 | |||
1059 | add r8,QWORD[rdi] |
||
1060 | adc r9,QWORD[8+rdi] |
||
1061 | adc r10,QWORD[16+rdi] |
||
1062 | adc r11,QWORD[24+rdi] |
||
1063 | adc r12,QWORD[32+rdi] |
||
1064 | adc r13,QWORD[40+rdi] |
||
1065 | adc r14,QWORD[48+rdi] |
||
1066 | adc r15,QWORD[56+rdi] |
||
1067 | |||
1068 | mov QWORD[rdi],r8 |
||
1069 | mov QWORD[8+rdi],r9 |
||
1070 | mov QWORD[16+rdi],r10 |
||
1071 | mov QWORD[24+rdi],r11 |
||
1072 | mov QWORD[32+rdi],r12 |
||
1073 | mov QWORD[40+rdi],r13 |
||
1074 | mov QWORD[48+rdi],r14 |
||
1075 | mov QWORD[56+rdi],r15 |
||
1076 | |||
1077 | DB 0F3h,0C3h ;repret |
||
1078 | |||
1079 | |||
1080 | ALIGN 32 |
||
1081 | __rsaz_512_mul: |
||
1082 | lea rdi,[8+rsp] |
||
1083 | |||
1084 | mov rax,QWORD[rsi] |
||
1085 | mul rbx |
||
1086 | mov QWORD[rdi],rax |
||
1087 | mov rax,QWORD[8+rsi] |
||
1088 | mov r8,rdx |
||
1089 | |||
1090 | mul rbx |
||
1091 | add r8,rax |
||
1092 | mov rax,QWORD[16+rsi] |
||
1093 | mov r9,rdx |
||
1094 | adc r9,0 |
||
1095 | |||
1096 | mul rbx |
||
1097 | add r9,rax |
||
1098 | mov rax,QWORD[24+rsi] |
||
1099 | mov r10,rdx |
||
1100 | adc r10,0 |
||
1101 | |||
1102 | mul rbx |
||
1103 | add r10,rax |
||
1104 | mov rax,QWORD[32+rsi] |
||
1105 | mov r11,rdx |
||
1106 | adc r11,0 |
||
1107 | |||
1108 | mul rbx |
||
1109 | add r11,rax |
||
1110 | mov rax,QWORD[40+rsi] |
||
1111 | mov r12,rdx |
||
1112 | adc r12,0 |
||
1113 | |||
1114 | mul rbx |
||
1115 | add r12,rax |
||
1116 | mov rax,QWORD[48+rsi] |
||
1117 | mov r13,rdx |
||
1118 | adc r13,0 |
||
1119 | |||
1120 | mul rbx |
||
1121 | add r13,rax |
||
1122 | mov rax,QWORD[56+rsi] |
||
1123 | mov r14,rdx |
||
1124 | adc r14,0 |
||
1125 | |||
1126 | mul rbx |
||
1127 | add r14,rax |
||
1128 | mov rax,QWORD[rsi] |
||
1129 | mov r15,rdx |
||
1130 | adc r15,0 |
||
1131 | |||
1132 | lea rbp,[8+rbp] |
||
1133 | lea rdi,[8+rdi] |
||
1134 | |||
1135 | mov ecx,7 |
||
1136 | jmp NEAR $L$oop_mul |
||
1137 | |||
1138 | ALIGN 32 |
||
1139 | $L$oop_mul: |
||
1140 | mov rbx,QWORD[rbp] |
||
1141 | mul rbx |
||
1142 | add r8,rax |
||
1143 | mov rax,QWORD[8+rsi] |
||
1144 | mov QWORD[rdi],r8 |
||
1145 | mov r8,rdx |
||
1146 | adc r8,0 |
||
1147 | |||
1148 | mul rbx |
||
1149 | add r9,rax |
||
1150 | mov rax,QWORD[16+rsi] |
||
1151 | adc rdx,0 |
||
1152 | add r8,r9 |
||
1153 | mov r9,rdx |
||
1154 | adc r9,0 |
||
1155 | |||
1156 | mul rbx |
||
1157 | add r10,rax |
||
1158 | mov rax,QWORD[24+rsi] |
||
1159 | adc rdx,0 |
||
1160 | add r9,r10 |
||
1161 | mov r10,rdx |
||
1162 | adc r10,0 |
||
1163 | |||
1164 | mul rbx |
||
1165 | add r11,rax |
||
1166 | mov rax,QWORD[32+rsi] |
||
1167 | adc rdx,0 |
||
1168 | add r10,r11 |
||
1169 | mov r11,rdx |
||
1170 | adc r11,0 |
||
1171 | |||
1172 | mul rbx |
||
1173 | add r12,rax |
||
1174 | mov rax,QWORD[40+rsi] |
||
1175 | adc rdx,0 |
||
1176 | add r11,r12 |
||
1177 | mov r12,rdx |
||
1178 | adc r12,0 |
||
1179 | |||
1180 | mul rbx |
||
1181 | add r13,rax |
||
1182 | mov rax,QWORD[48+rsi] |
||
1183 | adc rdx,0 |
||
1184 | add r12,r13 |
||
1185 | mov r13,rdx |
||
1186 | adc r13,0 |
||
1187 | |||
1188 | mul rbx |
||
1189 | add r14,rax |
||
1190 | mov rax,QWORD[56+rsi] |
||
1191 | adc rdx,0 |
||
1192 | add r13,r14 |
||
1193 | mov r14,rdx |
||
1194 | lea rbp,[8+rbp] |
||
1195 | adc r14,0 |
||
1196 | |||
1197 | mul rbx |
||
1198 | add r15,rax |
||
1199 | mov rax,QWORD[rsi] |
||
1200 | adc rdx,0 |
||
1201 | add r14,r15 |
||
1202 | mov r15,rdx |
||
1203 | adc r15,0 |
||
1204 | |||
1205 | lea rdi,[8+rdi] |
||
1206 | |||
1207 | dec ecx |
||
1208 | jnz NEAR $L$oop_mul |
||
1209 | |||
1210 | mov QWORD[rdi],r8 |
||
1211 | mov QWORD[8+rdi],r9 |
||
1212 | mov QWORD[16+rdi],r10 |
||
1213 | mov QWORD[24+rdi],r11 |
||
1214 | mov QWORD[32+rdi],r12 |
||
1215 | mov QWORD[40+rdi],r13 |
||
1216 | mov QWORD[48+rdi],r14 |
||
1217 | mov QWORD[56+rdi],r15 |
||
1218 | |||
1219 | DB 0F3h,0C3h ;repret |
||
1220 | |||
1221 | global rsaz_512_scatter4 |
||
1222 | |||
1223 | ALIGN 16 |
||
1224 | rsaz_512_scatter4: |
||
1225 | lea rcx,[r8*8+rcx] |
||
1226 | mov r9d,8 |
||
1227 | jmp NEAR $L$oop_scatter |
||
1228 | ALIGN 16 |
||
1229 | $L$oop_scatter: |
||
1230 | mov rax,QWORD[rdx] |
||
1231 | lea rdx,[8+rdx] |
||
1232 | mov QWORD[rcx],rax |
||
1233 | lea rcx,[128+rcx] |
||
1234 | dec r9d |
||
1235 | jnz NEAR $L$oop_scatter |
||
1236 | DB 0F3h,0C3h ;repret |
||
1237 | |||
1238 | |||
1239 | global rsaz_512_gather4 |
||
1240 | |||
1241 | ALIGN 16 |
||
1242 | rsaz_512_gather4: |
||
1243 | $L$SEH_begin_rsaz_512_gather4: |
||
1244 | DB 0x48,0x81,0xec,0xa8,0x00,0x00,0x00 |
||
1245 | DB 0x0f,0x29,0x34,0x24 |
||
1246 | DB 0x0f,0x29,0x7c,0x24,0x10 |
||
1247 | DB 0x44,0x0f,0x29,0x44,0x24,0x20 |
||
1248 | DB 0x44,0x0f,0x29,0x4c,0x24,0x30 |
||
1249 | DB 0x44,0x0f,0x29,0x54,0x24,0x40 |
||
1250 | DB 0x44,0x0f,0x29,0x5c,0x24,0x50 |
||
1251 | DB 0x44,0x0f,0x29,0x64,0x24,0x60 |
||
1252 | DB 0x44,0x0f,0x29,0x6c,0x24,0x70 |
||
1253 | DB 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0 |
||
1254 | DB 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0 |
||
1255 | movd xmm8,r8d |
||
1256 | movdqa xmm1,XMMWORD[(($L$inc+16))] |
||
1257 | movdqa xmm0,XMMWORD[$L$inc] |
||
1258 | |||
1259 | pshufd xmm8,xmm8,0 |
||
1260 | movdqa xmm7,xmm1 |
||
1261 | movdqa xmm2,xmm1 |
||
1262 | paddd xmm1,xmm0 |
||
1263 | pcmpeqd xmm0,xmm8 |
||
1264 | movdqa xmm3,xmm7 |
||
1265 | paddd xmm2,xmm1 |
||
1266 | pcmpeqd xmm1,xmm8 |
||
1267 | movdqa xmm4,xmm7 |
||
1268 | paddd xmm3,xmm2 |
||
1269 | pcmpeqd xmm2,xmm8 |
||
1270 | movdqa xmm5,xmm7 |
||
1271 | paddd xmm4,xmm3 |
||
1272 | pcmpeqd xmm3,xmm8 |
||
1273 | movdqa xmm6,xmm7 |
||
1274 | paddd xmm5,xmm4 |
||
1275 | pcmpeqd xmm4,xmm8 |
||
1276 | paddd xmm6,xmm5 |
||
1277 | pcmpeqd xmm5,xmm8 |
||
1278 | paddd xmm7,xmm6 |
||
1279 | pcmpeqd xmm6,xmm8 |
||
1280 | pcmpeqd xmm7,xmm8 |
||
1281 | mov r9d,8 |
||
1282 | jmp NEAR $L$oop_gather |
||
1283 | ALIGN 16 |
||
1284 | $L$oop_gather: |
||
1285 | movdqa xmm8,XMMWORD[rdx] |
||
1286 | movdqa xmm9,XMMWORD[16+rdx] |
||
1287 | movdqa xmm10,XMMWORD[32+rdx] |
||
1288 | movdqa xmm11,XMMWORD[48+rdx] |
||
1289 | pand xmm8,xmm0 |
||
1290 | movdqa xmm12,XMMWORD[64+rdx] |
||
1291 | pand xmm9,xmm1 |
||
1292 | movdqa xmm13,XMMWORD[80+rdx] |
||
1293 | pand xmm10,xmm2 |
||
1294 | movdqa xmm14,XMMWORD[96+rdx] |
||
1295 | pand xmm11,xmm3 |
||
1296 | movdqa xmm15,XMMWORD[112+rdx] |
||
1297 | lea rdx,[128+rdx] |
||
1298 | pand xmm12,xmm4 |
||
1299 | pand xmm13,xmm5 |
||
1300 | pand xmm14,xmm6 |
||
1301 | pand xmm15,xmm7 |
||
1302 | por xmm8,xmm10 |
||
1303 | por xmm9,xmm11 |
||
1304 | por xmm8,xmm12 |
||
1305 | por xmm9,xmm13 |
||
1306 | por xmm8,xmm14 |
||
1307 | por xmm9,xmm15 |
||
1308 | |||
1309 | por xmm8,xmm9 |
||
1310 | pshufd xmm9,xmm8,0x4e |
||
1311 | por xmm8,xmm9 |
||
1312 | movq QWORD[rcx],xmm8 |
||
1313 | lea rcx,[8+rcx] |
||
1314 | dec r9d |
||
1315 | jnz NEAR $L$oop_gather |
||
1316 | movaps xmm6,XMMWORD[rsp] |
||
1317 | movaps xmm7,XMMWORD[16+rsp] |
||
1318 | movaps xmm8,XMMWORD[32+rsp] |
||
1319 | movaps xmm9,XMMWORD[48+rsp] |
||
1320 | movaps xmm10,XMMWORD[64+rsp] |
||
1321 | movaps xmm11,XMMWORD[80+rsp] |
||
1322 | movaps xmm12,XMMWORD[96+rsp] |
||
1323 | movaps xmm13,XMMWORD[112+rsp] |
||
1324 | movaps xmm14,XMMWORD[128+rsp] |
||
1325 | movaps xmm15,XMMWORD[144+rsp] |
||
1326 | add rsp,0xa8 |
||
1327 | DB 0F3h,0C3h ;repret |
||
1328 | $L$SEH_end_rsaz_512_gather4: |
||
1329 | |||
1330 | |||
1331 | ALIGN 64 |
||
1332 | $L$inc: |
||
1333 | DD 0,0,1,1 |
||
1334 | DD 2,2,2,2 |
||
1335 | EXTERN __imp_RtlVirtualUnwind |
||
1336 | |||
1337 | ALIGN 16 |
||
1338 | se_handler: |
||
1339 | push rsi |
||
1340 | push rdi |
||
1341 | push rbx |
||
1342 | push rbp |
||
1343 | push r12 |
||
1344 | push r13 |
||
1345 | push r14 |
||
1346 | push r15 |
||
1347 | pushfq |
||
1348 | sub rsp,64 |
||
1349 | |||
1350 | mov rax,QWORD[120+r8] |
||
1351 | mov rbx,QWORD[248+r8] |
||
1352 | |||
1353 | mov rsi,QWORD[8+r9] |
||
1354 | mov r11,QWORD[56+r9] |
||
1355 | |||
1356 | mov r10d,DWORD[r11] |
||
1357 | lea r10,[r10*1+rsi] |
||
1358 | cmp rbx,r10 |
||
1359 | jb NEAR $L$common_seh_tail |
||
1360 | |||
1361 | mov rax,QWORD[152+r8] |
||
1362 | |||
1363 | mov r10d,DWORD[4+r11] |
||
1364 | lea r10,[r10*1+rsi] |
||
1365 | cmp rbx,r10 |
||
1366 | jae NEAR $L$common_seh_tail |
||
1367 | |||
1368 | lea rax,[((128+24+48))+rax] |
||
1369 | |||
1370 | lea rbx,[$L$mul_gather4_epilogue] |
||
1371 | cmp rbx,r10 |
||
1372 | jne NEAR $L$se_not_in_mul_gather4 |
||
1373 | |||
1374 | lea rax,[176+rax] |
||
1375 | |||
1376 | lea rsi,[((-48-168))+rax] |
||
1377 | lea rdi,[512+r8] |
||
1378 | mov ecx,20 |
||
1379 | DD 0xa548f3fc |
||
1380 | |||
1381 | $L$se_not_in_mul_gather4: |
||
1382 | mov rbx,QWORD[((-8))+rax] |
||
1383 | mov rbp,QWORD[((-16))+rax] |
||
1384 | mov r12,QWORD[((-24))+rax] |
||
1385 | mov r13,QWORD[((-32))+rax] |
||
1386 | mov r14,QWORD[((-40))+rax] |
||
1387 | mov r15,QWORD[((-48))+rax] |
||
1388 | mov QWORD[144+r8],rbx |
||
1389 | mov QWORD[160+r8],rbp |
||
1390 | mov QWORD[216+r8],r12 |
||
1391 | mov QWORD[224+r8],r13 |
||
1392 | mov QWORD[232+r8],r14 |
||
1393 | mov QWORD[240+r8],r15 |
||
1394 | |||
1395 | $L$common_seh_tail: |
||
1396 | mov rdi,QWORD[8+rax] |
||
1397 | mov rsi,QWORD[16+rax] |
||
1398 | mov QWORD[152+r8],rax |
||
1399 | mov QWORD[168+r8],rsi |
||
1400 | mov QWORD[176+r8],rdi |
||
1401 | |||
1402 | mov rdi,QWORD[40+r9] |
||
1403 | mov rsi,r8 |
||
1404 | mov ecx,154 |
||
1405 | DD 0xa548f3fc |
||
1406 | |||
1407 | mov rsi,r9 |
||
1408 | xor rcx,rcx |
||
1409 | mov rdx,QWORD[8+rsi] |
||
1410 | mov r8,QWORD[rsi] |
||
1411 | mov r9,QWORD[16+rsi] |
||
1412 | mov r10,QWORD[40+rsi] |
||
1413 | lea r11,[56+rsi] |
||
1414 | lea r12,[24+rsi] |
||
1415 | mov QWORD[32+rsp],r10 |
||
1416 | mov QWORD[40+rsp],r11 |
||
1417 | mov QWORD[48+rsp],r12 |
||
1418 | mov QWORD[56+rsp],rcx |
||
1419 | call QWORD[__imp_RtlVirtualUnwind] |
||
1420 | |||
1421 | mov eax,1 |
||
1422 | add rsp,64 |
||
1423 | popfq |
||
1424 | pop r15 |
||
1425 | pop r14 |
||
1426 | pop r13 |
||
1427 | pop r12 |
||
1428 | pop rbp |
||
1429 | pop rbx |
||
1430 | pop rdi |
||
1431 | pop rsi |
||
1432 | DB 0F3h,0C3h ;repret |
||
1433 | |||
1434 | |||
1435 | section .pdata rdata align=4 |
||
1436 | ALIGN 4 |
||
1437 | DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase |
||
1438 | DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase |
||
1439 | DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase |
||
1440 | |||
1441 | DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase |
||
1442 | DD $L$SEH_end_rsaz_512_mul wrt ..imagebase |
||
1443 | DD $L$SEH_info_rsaz_512_mul wrt ..imagebase |
||
1444 | |||
1445 | DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase |
||
1446 | DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase |
||
1447 | DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase |
||
1448 | |||
1449 | DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase |
||
1450 | DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase |
||
1451 | DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase |
||
1452 | |||
1453 | DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase |
||
1454 | DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase |
||
1455 | DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase |
||
1456 | |||
1457 | DD $L$SEH_begin_rsaz_512_gather4 wrt ..imagebase |
||
1458 | DD $L$SEH_end_rsaz_512_gather4 wrt ..imagebase |
||
1459 | DD $L$SEH_info_rsaz_512_gather4 wrt ..imagebase |
||
1460 | |||
1461 | section .xdata rdata align=8 |
||
1462 | ALIGN 8 |
||
1463 | $L$SEH_info_rsaz_512_sqr: |
||
1464 | DB 9,0,0,0 |
||
1465 | DD se_handler wrt ..imagebase |
||
1466 | DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase |
||
1467 | $L$SEH_info_rsaz_512_mul: |
||
1468 | DB 9,0,0,0 |
||
1469 | DD se_handler wrt ..imagebase |
||
1470 | DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase |
||
1471 | $L$SEH_info_rsaz_512_mul_gather4: |
||
1472 | DB 9,0,0,0 |
||
1473 | DD se_handler wrt ..imagebase |
||
1474 | DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase |
||
1475 | $L$SEH_info_rsaz_512_mul_scatter4: |
||
1476 | DB 9,0,0,0 |
||
1477 | DD se_handler wrt ..imagebase |
||
1478 | DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase |
||
1479 | $L$SEH_info_rsaz_512_mul_by_one: |
||
1480 | DB 9,0,0,0 |
||
1481 | DD se_handler wrt ..imagebase |
||
1482 | DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase |
||
1483 | $L$SEH_info_rsaz_512_gather4: |
||
1484 | DB 0x01,0x46,0x16,0x00 |
||
1485 | DB 0x46,0xf8,0x09,0x00 |
||
1486 | DB 0x3d,0xe8,0x08,0x00 |
||
1487 | DB 0x34,0xd8,0x07,0x00 |
||
1488 | DB 0x2e,0xc8,0x06,0x00 |
||
1489 | DB 0x28,0xb8,0x05,0x00 |
||
1490 | DB 0x22,0xa8,0x04,0x00 |
||
1491 | DB 0x1c,0x98,0x03,0x00 |
||
1492 | DB 0x16,0x88,0x02,0x00 |
||
1493 | DB 0x10,0x78,0x01,0x00 |
||
1494 | DB 0x0b,0x68,0x00,0x00 |
||
1495 | DB 0x07,0x01,0x15,0x00 |