summaryrefslogtreecommitdiff
path: root/nacl/crypto_onetimeauth/poly1305/53/auth.c
diff options
context:
space:
mode:
Diffstat (limited to 'nacl/crypto_onetimeauth/poly1305/53/auth.c')
-rw-r--r--nacl/crypto_onetimeauth/poly1305/53/auth.c1616
1 files changed, 1616 insertions, 0 deletions
diff --git a/nacl/crypto_onetimeauth/poly1305/53/auth.c b/nacl/crypto_onetimeauth/poly1305/53/auth.c
new file mode 100644
index 00000000..a4a9c3f6
--- /dev/null
+++ b/nacl/crypto_onetimeauth/poly1305/53/auth.c
@@ -0,0 +1,1616 @@
1/*
220080910
3D. J. Bernstein
4Public domain.
5*/
6
7#include "crypto_onetimeauth.h"
8
9typedef unsigned char uchar;
10typedef int int32;
11typedef unsigned int uint32;
12typedef long long int64;
13typedef unsigned long long uint64;
14
15static const double poly1305_53_constants[] = {
16 0.00000000558793544769287109375 /* alpham80 = 3 2^(-29) */
17, 24.0 /* alpham48 = 3 2^3 */
18, 103079215104.0 /* alpham16 = 3 2^35 */
19, 6755399441055744.0 /* alpha0 = 3 2^51 */
20, 1770887431076116955136.0 /* alpha18 = 3 2^69 */
21, 29014219670751100192948224.0 /* alpha32 = 3 2^83 */
22, 7605903601369376408980219232256.0 /* alpha50 = 3 2^101 */
23, 124615124604835863084731911901282304.0 /* alpha64 = 3 2^115 */
24, 32667107224410092492483962313449748299776.0 /* alpha82 = 3 2^133 */
25, 535217884764734955396857238543560676143529984.0 /* alpha96 = 3 2^147 */
26, 35076039295941670036888435985190792471742381031424.0 /* alpha112 = 3 2^163 */
27, 9194973245195333150150082162901855101712434733101613056.0 /* alpha130 = 3 2^181 */
28, 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 /* scale = 5 2^(-130) */
29, 6755408030990331.0 /* offset0 = alpha0 + 2^33 - 5 */
30, 29014256564239239022116864.0 /* offset1 = alpha32 + 2^65 - 2^33 */
31, 124615283061160854719918951570079744.0 /* offset2 = alpha64 + 2^97 - 2^65 */
32, 535219245894202480694386063513315216128475136.0 /* offset3 = alpha96 + 2^130 - 2^97 */
33} ;
34
35int crypto_onetimeauth(unsigned char *out,const unsigned char *m,unsigned long long l,const unsigned char *k)
36{
37 register const unsigned char *r = k;
38 register const unsigned char *s = k + 16;
39 double r0high_stack;
40 double r1high_stack;
41 double r1low_stack;
42 double sr1high_stack;
43 double r2low_stack;
44 double sr2high_stack;
45 double r0low_stack;
46 double sr1low_stack;
47 double r2high_stack;
48 double sr2low_stack;
49 double r3high_stack;
50 double sr3high_stack;
51 double r3low_stack;
52 double sr3low_stack;
53 int64 d0;
54 int64 d1;
55 int64 d2;
56 int64 d3;
57 register double scale;
58 register double alpha0;
59 register double alpha32;
60 register double alpha64;
61 register double alpha96;
62 register double alpha130;
63 register double h0;
64 register double h1;
65 register double h2;
66 register double h3;
67 register double h4;
68 register double h5;
69 register double h6;
70 register double h7;
71 register double y7;
72 register double y6;
73 register double y1;
74 register double y0;
75 register double y5;
76 register double y4;
77 register double x7;
78 register double x6;
79 register double x1;
80 register double x0;
81 register double y3;
82 register double y2;
83 register double r3low;
84 register double r0low;
85 register double r3high;
86 register double r0high;
87 register double sr1low;
88 register double x5;
89 register double r3lowx0;
90 register double sr1high;
91 register double x4;
92 register double r0lowx6;
93 register double r1low;
94 register double x3;
95 register double r3highx0;
96 register double r1high;
97 register double x2;
98 register double r0highx6;
99 register double sr2low;
100 register double r0lowx0;
101 register double sr2high;
102 register double sr1lowx6;
103 register double r2low;
104 register double r0highx0;
105 register double r2high;
106 register double sr1highx6;
107 register double sr3low;
108 register double r1lowx0;
109 register double sr3high;
110 register double sr2lowx6;
111 register double r1highx0;
112 register double sr2highx6;
113 register double r2lowx0;
114 register double sr3lowx6;
115 register double r2highx0;
116 register double sr3highx6;
117 register double r1highx4;
118 register double r1lowx4;
119 register double r0highx4;
120 register double r0lowx4;
121 register double sr3highx4;
122 register double sr3lowx4;
123 register double sr2highx4;
124 register double sr2lowx4;
125 register double r0lowx2;
126 register double r0highx2;
127 register double r1lowx2;
128 register double r1highx2;
129 register double r2lowx2;
130 register double r2highx2;
131 register double sr3lowx2;
132 register double sr3highx2;
133 register double z0;
134 register double z1;
135 register double z2;
136 register double z3;
137 register int64 r0;
138 register int64 r1;
139 register int64 r2;
140 register int64 r3;
141 register uint32 r00;
142 register uint32 r01;
143 register uint32 r02;
144 register uint32 r03;
145 register uint32 r10;
146 register uint32 r11;
147 register uint32 r12;
148 register uint32 r13;
149 register uint32 r20;
150 register uint32 r21;
151 register uint32 r22;
152 register uint32 r23;
153 register uint32 r30;
154 register uint32 r31;
155 register uint32 r32;
156 register uint32 r33;
157 register int64 m0;
158 register int64 m1;
159 register int64 m2;
160 register int64 m3;
161 register uint32 m00;
162 register uint32 m01;
163 register uint32 m02;
164 register uint32 m03;
165 register uint32 m10;
166 register uint32 m11;
167 register uint32 m12;
168 register uint32 m13;
169 register uint32 m20;
170 register uint32 m21;
171 register uint32 m22;
172 register uint32 m23;
173 register uint32 m30;
174 register uint32 m31;
175 register uint32 m32;
176 register uint64 m33;
177 register char *constants;
178 register int32 lbelow2;
179 register int32 lbelow3;
180 register int32 lbelow4;
181 register int32 lbelow5;
182 register int32 lbelow6;
183 register int32 lbelow7;
184 register int32 lbelow8;
185 register int32 lbelow9;
186 register int32 lbelow10;
187 register int32 lbelow11;
188 register int32 lbelow12;
189 register int32 lbelow13;
190 register int32 lbelow14;
191 register int32 lbelow15;
192 register double alpham80;
193 register double alpham48;
194 register double alpham16;
195 register double alpha18;
196 register double alpha50;
197 register double alpha82;
198 register double alpha112;
199 register double offset0;
200 register double offset1;
201 register double offset2;
202 register double offset3;
203 register uint32 s00;
204 register uint32 s01;
205 register uint32 s02;
206 register uint32 s03;
207 register uint32 s10;
208 register uint32 s11;
209 register uint32 s12;
210 register uint32 s13;
211 register uint32 s20;
212 register uint32 s21;
213 register uint32 s22;
214 register uint32 s23;
215 register uint32 s30;
216 register uint32 s31;
217 register uint32 s32;
218 register uint32 s33;
219 register uint64 bits32;
220 register uint64 f;
221 register uint64 f0;
222 register uint64 f1;
223 register uint64 f2;
224 register uint64 f3;
225 register uint64 f4;
226 register uint64 g;
227 register uint64 g0;
228 register uint64 g1;
229 register uint64 g2;
230 register uint64 g3;
231 register uint64 g4;
232
233 r00 = *(uchar *) (r + 0);
234 constants = (char *) &poly1305_53_constants;
235
236 r01 = *(uchar *) (r + 1);
237
238 r02 = *(uchar *) (r + 2);
239 r0 = 2151;
240
241 r03 = *(uchar *) (r + 3); r03 &= 15;
242 r0 <<= 51;
243
244 r10 = *(uchar *) (r + 4); r10 &= 252;
245 r01 <<= 8;
246 r0 += r00;
247
248 r11 = *(uchar *) (r + 5);
249 r02 <<= 16;
250 r0 += r01;
251
252 r12 = *(uchar *) (r + 6);
253 r03 <<= 24;
254 r0 += r02;
255
256 r13 = *(uchar *) (r + 7); r13 &= 15;
257 r1 = 2215;
258 r0 += r03;
259
260 d0 = r0;
261 r1 <<= 51;
262 r2 = 2279;
263
264 r20 = *(uchar *) (r + 8); r20 &= 252;
265 r11 <<= 8;
266 r1 += r10;
267
268 r21 = *(uchar *) (r + 9);
269 r12 <<= 16;
270 r1 += r11;
271
272 r22 = *(uchar *) (r + 10);
273 r13 <<= 24;
274 r1 += r12;
275
276 r23 = *(uchar *) (r + 11); r23 &= 15;
277 r2 <<= 51;
278 r1 += r13;
279
280 d1 = r1;
281 r21 <<= 8;
282 r2 += r20;
283
284 r30 = *(uchar *) (r + 12); r30 &= 252;
285 r22 <<= 16;
286 r2 += r21;
287
288 r31 = *(uchar *) (r + 13);
289 r23 <<= 24;
290 r2 += r22;
291
292 r32 = *(uchar *) (r + 14);
293 r2 += r23;
294 r3 = 2343;
295
296 d2 = r2;
297 r3 <<= 51;
298 alpha32 = *(double *) (constants + 40);
299
300 r33 = *(uchar *) (r + 15); r33 &= 15;
301 r31 <<= 8;
302 r3 += r30;
303
304 r32 <<= 16;
305 r3 += r31;
306
307 r33 <<= 24;
308 r3 += r32;
309
310 r3 += r33;
311 h0 = alpha32 - alpha32;
312
313 d3 = r3;
314 h1 = alpha32 - alpha32;
315
316 alpha0 = *(double *) (constants + 24);
317 h2 = alpha32 - alpha32;
318
319 alpha64 = *(double *) (constants + 56);
320 h3 = alpha32 - alpha32;
321
322 alpha18 = *(double *) (constants + 32);
323 h4 = alpha32 - alpha32;
324
325 r0low = *(double *) &d0;
326 h5 = alpha32 - alpha32;
327
328 r1low = *(double *) &d1;
329 h6 = alpha32 - alpha32;
330
331 r2low = *(double *) &d2;
332 h7 = alpha32 - alpha32;
333
334 alpha50 = *(double *) (constants + 48);
335 r0low -= alpha0;
336
337 alpha82 = *(double *) (constants + 64);
338 r1low -= alpha32;
339
340 scale = *(double *) (constants + 96);
341 r2low -= alpha64;
342
343 alpha96 = *(double *) (constants + 72);
344 r0high = r0low + alpha18;
345
346 r3low = *(double *) &d3;
347
348 alpham80 = *(double *) (constants + 0);
349 r1high = r1low + alpha50;
350 sr1low = scale * r1low;
351
352 alpham48 = *(double *) (constants + 8);
353 r2high = r2low + alpha82;
354 sr2low = scale * r2low;
355
356 r0high -= alpha18;
357 r0high_stack = r0high;
358
359 r3low -= alpha96;
360
361 r1high -= alpha50;
362 r1high_stack = r1high;
363
364 sr1high = sr1low + alpham80;
365
366 alpha112 = *(double *) (constants + 80);
367 r0low -= r0high;
368
369 alpham16 = *(double *) (constants + 16);
370 r2high -= alpha82;
371 sr3low = scale * r3low;
372
373 alpha130 = *(double *) (constants + 88);
374 sr2high = sr2low + alpham48;
375
376 r1low -= r1high;
377 r1low_stack = r1low;
378
379 sr1high -= alpham80;
380 sr1high_stack = sr1high;
381
382 r2low -= r2high;
383 r2low_stack = r2low;
384
385 sr2high -= alpham48;
386 sr2high_stack = sr2high;
387
388 r3high = r3low + alpha112;
389 r0low_stack = r0low;
390
391 sr1low -= sr1high;
392 sr1low_stack = sr1low;
393
394 sr3high = sr3low + alpham16;
395 r2high_stack = r2high;
396
397 sr2low -= sr2high;
398 sr2low_stack = sr2low;
399
400 r3high -= alpha112;
401 r3high_stack = r3high;
402
403
404 sr3high -= alpham16;
405 sr3high_stack = sr3high;
406
407
408 r3low -= r3high;
409 r3low_stack = r3low;
410
411
412 sr3low -= sr3high;
413 sr3low_stack = sr3low;
414
415if (l < 16) goto addatmost15bytes;
416
417 m00 = *(uchar *) (m + 0);
418 m0 = 2151;
419
420 m0 <<= 51;
421 m1 = 2215;
422 m01 = *(uchar *) (m + 1);
423
424 m1 <<= 51;
425 m2 = 2279;
426 m02 = *(uchar *) (m + 2);
427
428 m2 <<= 51;
429 m3 = 2343;
430 m03 = *(uchar *) (m + 3);
431
432 m10 = *(uchar *) (m + 4);
433 m01 <<= 8;
434 m0 += m00;
435
436 m11 = *(uchar *) (m + 5);
437 m02 <<= 16;
438 m0 += m01;
439
440 m12 = *(uchar *) (m + 6);
441 m03 <<= 24;
442 m0 += m02;
443
444 m13 = *(uchar *) (m + 7);
445 m3 <<= 51;
446 m0 += m03;
447
448 m20 = *(uchar *) (m + 8);
449 m11 <<= 8;
450 m1 += m10;
451
452 m21 = *(uchar *) (m + 9);
453 m12 <<= 16;
454 m1 += m11;
455
456 m22 = *(uchar *) (m + 10);
457 m13 <<= 24;
458 m1 += m12;
459
460 m23 = *(uchar *) (m + 11);
461 m1 += m13;
462
463 m30 = *(uchar *) (m + 12);
464 m21 <<= 8;
465 m2 += m20;
466
467 m31 = *(uchar *) (m + 13);
468 m22 <<= 16;
469 m2 += m21;
470
471 m32 = *(uchar *) (m + 14);
472 m23 <<= 24;
473 m2 += m22;
474
475 m33 = *(uchar *) (m + 15);
476 m2 += m23;
477
478 d0 = m0;
479 m31 <<= 8;
480 m3 += m30;
481
482 d1 = m1;
483 m32 <<= 16;
484 m3 += m31;
485
486 d2 = m2;
487 m33 += 256;
488
489 m33 <<= 24;
490 m3 += m32;
491
492 m3 += m33;
493 d3 = m3;
494
495 m += 16;
496 l -= 16;
497
498 z0 = *(double *) &d0;
499
500 z1 = *(double *) &d1;
501
502 z2 = *(double *) &d2;
503
504 z3 = *(double *) &d3;
505
506 z0 -= alpha0;
507
508 z1 -= alpha32;
509
510 z2 -= alpha64;
511
512 z3 -= alpha96;
513
514 h0 += z0;
515
516 h1 += z1;
517
518 h3 += z2;
519
520 h5 += z3;
521
522if (l < 16) goto multiplyaddatmost15bytes;
523
524multiplyaddatleast16bytes:;
525
526 m2 = 2279;
527 m20 = *(uchar *) (m + 8);
528 y7 = h7 + alpha130;
529
530 m2 <<= 51;
531 m3 = 2343;
532 m21 = *(uchar *) (m + 9);
533 y6 = h6 + alpha130;
534
535 m3 <<= 51;
536 m0 = 2151;
537 m22 = *(uchar *) (m + 10);
538 y1 = h1 + alpha32;
539
540 m0 <<= 51;
541 m1 = 2215;
542 m23 = *(uchar *) (m + 11);
543 y0 = h0 + alpha32;
544
545 m1 <<= 51;
546 m30 = *(uchar *) (m + 12);
547 y7 -= alpha130;
548
549 m21 <<= 8;
550 m2 += m20;
551 m31 = *(uchar *) (m + 13);
552 y6 -= alpha130;
553
554 m22 <<= 16;
555 m2 += m21;
556 m32 = *(uchar *) (m + 14);
557 y1 -= alpha32;
558
559 m23 <<= 24;
560 m2 += m22;
561 m33 = *(uchar *) (m + 15);
562 y0 -= alpha32;
563
564 m2 += m23;
565 m00 = *(uchar *) (m + 0);
566 y5 = h5 + alpha96;
567
568 m31 <<= 8;
569 m3 += m30;
570 m01 = *(uchar *) (m + 1);
571 y4 = h4 + alpha96;
572
573 m32 <<= 16;
574 m02 = *(uchar *) (m + 2);
575 x7 = h7 - y7;
576 y7 *= scale;
577
578 m33 += 256;
579 m03 = *(uchar *) (m + 3);
580 x6 = h6 - y6;
581 y6 *= scale;
582
583 m33 <<= 24;
584 m3 += m31;
585 m10 = *(uchar *) (m + 4);
586 x1 = h1 - y1;
587
588 m01 <<= 8;
589 m3 += m32;
590 m11 = *(uchar *) (m + 5);
591 x0 = h0 - y0;
592
593 m3 += m33;
594 m0 += m00;
595 m12 = *(uchar *) (m + 6);
596 y5 -= alpha96;
597
598 m02 <<= 16;
599 m0 += m01;
600 m13 = *(uchar *) (m + 7);
601 y4 -= alpha96;
602
603 m03 <<= 24;
604 m0 += m02;
605 d2 = m2;
606 x1 += y7;
607
608 m0 += m03;
609 d3 = m3;
610 x0 += y6;
611
612 m11 <<= 8;
613 m1 += m10;
614 d0 = m0;
615 x7 += y5;
616
617 m12 <<= 16;
618 m1 += m11;
619 x6 += y4;
620
621 m13 <<= 24;
622 m1 += m12;
623 y3 = h3 + alpha64;
624
625 m1 += m13;
626 d1 = m1;
627 y2 = h2 + alpha64;
628
629 x0 += x1;
630
631 x6 += x7;
632
633 y3 -= alpha64;
634 r3low = r3low_stack;
635
636 y2 -= alpha64;
637 r0low = r0low_stack;
638
639 x5 = h5 - y5;
640 r3lowx0 = r3low * x0;
641 r3high = r3high_stack;
642
643 x4 = h4 - y4;
644 r0lowx6 = r0low * x6;
645 r0high = r0high_stack;
646
647 x3 = h3 - y3;
648 r3highx0 = r3high * x0;
649 sr1low = sr1low_stack;
650
651 x2 = h2 - y2;
652 r0highx6 = r0high * x6;
653 sr1high = sr1high_stack;
654
655 x5 += y3;
656 r0lowx0 = r0low * x0;
657 r1low = r1low_stack;
658
659 h6 = r3lowx0 + r0lowx6;
660 sr1lowx6 = sr1low * x6;
661 r1high = r1high_stack;
662
663 x4 += y2;
664 r0highx0 = r0high * x0;
665 sr2low = sr2low_stack;
666
667 h7 = r3highx0 + r0highx6;
668 sr1highx6 = sr1high * x6;
669 sr2high = sr2high_stack;
670
671 x3 += y1;
672 r1lowx0 = r1low * x0;
673 r2low = r2low_stack;
674
675 h0 = r0lowx0 + sr1lowx6;
676 sr2lowx6 = sr2low * x6;
677 r2high = r2high_stack;
678
679 x2 += y0;
680 r1highx0 = r1high * x0;
681 sr3low = sr3low_stack;
682
683 h1 = r0highx0 + sr1highx6;
684 sr2highx6 = sr2high * x6;
685 sr3high = sr3high_stack;
686
687 x4 += x5;
688 r2lowx0 = r2low * x0;
689 z2 = *(double *) &d2;
690
691 h2 = r1lowx0 + sr2lowx6;
692 sr3lowx6 = sr3low * x6;
693
694 x2 += x3;
695 r2highx0 = r2high * x0;
696 z3 = *(double *) &d3;
697
698 h3 = r1highx0 + sr2highx6;
699 sr3highx6 = sr3high * x6;
700
701 r1highx4 = r1high * x4;
702 z2 -= alpha64;
703
704 h4 = r2lowx0 + sr3lowx6;
705 r1lowx4 = r1low * x4;
706
707 r0highx4 = r0high * x4;
708 z3 -= alpha96;
709
710 h5 = r2highx0 + sr3highx6;
711 r0lowx4 = r0low * x4;
712
713 h7 += r1highx4;
714 sr3highx4 = sr3high * x4;
715
716 h6 += r1lowx4;
717 sr3lowx4 = sr3low * x4;
718
719 h5 += r0highx4;
720 sr2highx4 = sr2high * x4;
721
722 h4 += r0lowx4;
723 sr2lowx4 = sr2low * x4;
724
725 h3 += sr3highx4;
726 r0lowx2 = r0low * x2;
727
728 h2 += sr3lowx4;
729 r0highx2 = r0high * x2;
730
731 h1 += sr2highx4;
732 r1lowx2 = r1low * x2;
733
734 h0 += sr2lowx4;
735 r1highx2 = r1high * x2;
736
737 h2 += r0lowx2;
738 r2lowx2 = r2low * x2;
739
740 h3 += r0highx2;
741 r2highx2 = r2high * x2;
742
743 h4 += r1lowx2;
744 sr3lowx2 = sr3low * x2;
745
746 h5 += r1highx2;
747 sr3highx2 = sr3high * x2;
748 alpha0 = *(double *) (constants + 24);
749
750 m += 16;
751 h6 += r2lowx2;
752
753 l -= 16;
754 h7 += r2highx2;
755
756 z1 = *(double *) &d1;
757 h0 += sr3lowx2;
758
759 z0 = *(double *) &d0;
760 h1 += sr3highx2;
761
762 z1 -= alpha32;
763
764 z0 -= alpha0;
765
766 h5 += z3;
767
768 h3 += z2;
769
770 h1 += z1;
771
772 h0 += z0;
773
774if (l >= 16) goto multiplyaddatleast16bytes;
775
776multiplyaddatmost15bytes:;
777
778 y7 = h7 + alpha130;
779
780 y6 = h6 + alpha130;
781
782 y1 = h1 + alpha32;
783
784 y0 = h0 + alpha32;
785
786 y7 -= alpha130;
787
788 y6 -= alpha130;
789
790 y1 -= alpha32;
791
792 y0 -= alpha32;
793
794 y5 = h5 + alpha96;
795
796 y4 = h4 + alpha96;
797
798 x7 = h7 - y7;
799 y7 *= scale;
800
801 x6 = h6 - y6;
802 y6 *= scale;
803
804 x1 = h1 - y1;
805
806 x0 = h0 - y0;
807
808 y5 -= alpha96;
809
810 y4 -= alpha96;
811
812 x1 += y7;
813
814 x0 += y6;
815
816 x7 += y5;
817
818 x6 += y4;
819
820 y3 = h3 + alpha64;
821
822 y2 = h2 + alpha64;
823
824 x0 += x1;
825
826 x6 += x7;
827
828 y3 -= alpha64;
829 r3low = r3low_stack;
830
831 y2 -= alpha64;
832 r0low = r0low_stack;
833
834 x5 = h5 - y5;
835 r3lowx0 = r3low * x0;
836 r3high = r3high_stack;
837
838 x4 = h4 - y4;
839 r0lowx6 = r0low * x6;
840 r0high = r0high_stack;
841
842 x3 = h3 - y3;
843 r3highx0 = r3high * x0;
844 sr1low = sr1low_stack;
845
846 x2 = h2 - y2;
847 r0highx6 = r0high * x6;
848 sr1high = sr1high_stack;
849
850 x5 += y3;
851 r0lowx0 = r0low * x0;
852 r1low = r1low_stack;
853
854 h6 = r3lowx0 + r0lowx6;
855 sr1lowx6 = sr1low * x6;
856 r1high = r1high_stack;
857
858 x4 += y2;
859 r0highx0 = r0high * x0;
860 sr2low = sr2low_stack;
861
862 h7 = r3highx0 + r0highx6;
863 sr1highx6 = sr1high * x6;
864 sr2high = sr2high_stack;
865
866 x3 += y1;
867 r1lowx0 = r1low * x0;
868 r2low = r2low_stack;
869
870 h0 = r0lowx0 + sr1lowx6;
871 sr2lowx6 = sr2low * x6;
872 r2high = r2high_stack;
873
874 x2 += y0;
875 r1highx0 = r1high * x0;
876 sr3low = sr3low_stack;
877
878 h1 = r0highx0 + sr1highx6;
879 sr2highx6 = sr2high * x6;
880 sr3high = sr3high_stack;
881
882 x4 += x5;
883 r2lowx0 = r2low * x0;
884
885 h2 = r1lowx0 + sr2lowx6;
886 sr3lowx6 = sr3low * x6;
887
888 x2 += x3;
889 r2highx0 = r2high * x0;
890
891 h3 = r1highx0 + sr2highx6;
892 sr3highx6 = sr3high * x6;
893
894 r1highx4 = r1high * x4;
895
896 h4 = r2lowx0 + sr3lowx6;
897 r1lowx4 = r1low * x4;
898
899 r0highx4 = r0high * x4;
900
901 h5 = r2highx0 + sr3highx6;
902 r0lowx4 = r0low * x4;
903
904 h7 += r1highx4;
905 sr3highx4 = sr3high * x4;
906
907 h6 += r1lowx4;
908 sr3lowx4 = sr3low * x4;
909
910 h5 += r0highx4;
911 sr2highx4 = sr2high * x4;
912
913 h4 += r0lowx4;
914 sr2lowx4 = sr2low * x4;
915
916 h3 += sr3highx4;
917 r0lowx2 = r0low * x2;
918
919 h2 += sr3lowx4;
920 r0highx2 = r0high * x2;
921
922 h1 += sr2highx4;
923 r1lowx2 = r1low * x2;
924
925 h0 += sr2lowx4;
926 r1highx2 = r1high * x2;
927
928 h2 += r0lowx2;
929 r2lowx2 = r2low * x2;
930
931 h3 += r0highx2;
932 r2highx2 = r2high * x2;
933
934 h4 += r1lowx2;
935 sr3lowx2 = sr3low * x2;
936
937 h5 += r1highx2;
938 sr3highx2 = sr3high * x2;
939
940 h6 += r2lowx2;
941
942 h7 += r2highx2;
943
944 h0 += sr3lowx2;
945
946 h1 += sr3highx2;
947
948addatmost15bytes:;
949
950if (l == 0) goto nomorebytes;
951
952 lbelow2 = l - 2;
953
954 lbelow3 = l - 3;
955
956 lbelow2 >>= 31;
957 lbelow4 = l - 4;
958
959 m00 = *(uchar *) (m + 0);
960 lbelow3 >>= 31;
961 m += lbelow2;
962
963 m01 = *(uchar *) (m + 1);
964 lbelow4 >>= 31;
965 m += lbelow3;
966
967 m02 = *(uchar *) (m + 2);
968 m += lbelow4;
969 m0 = 2151;
970
971 m03 = *(uchar *) (m + 3);
972 m0 <<= 51;
973 m1 = 2215;
974
975 m0 += m00;
976 m01 &= ~lbelow2;
977
978 m02 &= ~lbelow3;
979 m01 -= lbelow2;
980
981 m01 <<= 8;
982 m03 &= ~lbelow4;
983
984 m0 += m01;
985 lbelow2 -= lbelow3;
986
987 m02 += lbelow2;
988 lbelow3 -= lbelow4;
989
990 m02 <<= 16;
991 m03 += lbelow3;
992
993 m03 <<= 24;
994 m0 += m02;
995
996 m0 += m03;
997 lbelow5 = l - 5;
998
999 lbelow6 = l - 6;
1000 lbelow7 = l - 7;
1001
1002 lbelow5 >>= 31;
1003 lbelow8 = l - 8;
1004
1005 lbelow6 >>= 31;
1006 m += lbelow5;
1007
1008 m10 = *(uchar *) (m + 4);
1009 lbelow7 >>= 31;
1010 m += lbelow6;
1011
1012 m11 = *(uchar *) (m + 5);
1013 lbelow8 >>= 31;
1014 m += lbelow7;
1015
1016 m12 = *(uchar *) (m + 6);
1017 m1 <<= 51;
1018 m += lbelow8;
1019
1020 m13 = *(uchar *) (m + 7);
1021 m10 &= ~lbelow5;
1022 lbelow4 -= lbelow5;
1023
1024 m10 += lbelow4;
1025 lbelow5 -= lbelow6;
1026
1027 m11 &= ~lbelow6;
1028 m11 += lbelow5;
1029
1030 m11 <<= 8;
1031 m1 += m10;
1032
1033 m1 += m11;
1034 m12 &= ~lbelow7;
1035
1036 lbelow6 -= lbelow7;
1037 m13 &= ~lbelow8;
1038
1039 m12 += lbelow6;
1040 lbelow7 -= lbelow8;
1041
1042 m12 <<= 16;
1043 m13 += lbelow7;
1044
1045 m13 <<= 24;
1046 m1 += m12;
1047
1048 m1 += m13;
1049 m2 = 2279;
1050
1051 lbelow9 = l - 9;
1052 m3 = 2343;
1053
1054 lbelow10 = l - 10;
1055 lbelow11 = l - 11;
1056
1057 lbelow9 >>= 31;
1058 lbelow12 = l - 12;
1059
1060 lbelow10 >>= 31;
1061 m += lbelow9;
1062
1063 m20 = *(uchar *) (m + 8);
1064 lbelow11 >>= 31;
1065 m += lbelow10;
1066
1067 m21 = *(uchar *) (m + 9);
1068 lbelow12 >>= 31;
1069 m += lbelow11;
1070
1071 m22 = *(uchar *) (m + 10);
1072 m2 <<= 51;
1073 m += lbelow12;
1074
1075 m23 = *(uchar *) (m + 11);
1076 m20 &= ~lbelow9;
1077 lbelow8 -= lbelow9;
1078
1079 m20 += lbelow8;
1080 lbelow9 -= lbelow10;
1081
1082 m21 &= ~lbelow10;
1083 m21 += lbelow9;
1084
1085 m21 <<= 8;
1086 m2 += m20;
1087
1088 m2 += m21;
1089 m22 &= ~lbelow11;
1090
1091 lbelow10 -= lbelow11;
1092 m23 &= ~lbelow12;
1093
1094 m22 += lbelow10;
1095 lbelow11 -= lbelow12;
1096
1097 m22 <<= 16;
1098 m23 += lbelow11;
1099
1100 m23 <<= 24;
1101 m2 += m22;
1102
1103 m3 <<= 51;
1104 lbelow13 = l - 13;
1105
1106 lbelow13 >>= 31;
1107 lbelow14 = l - 14;
1108
1109 lbelow14 >>= 31;
1110 m += lbelow13;
1111 lbelow15 = l - 15;
1112
1113 m30 = *(uchar *) (m + 12);
1114 lbelow15 >>= 31;
1115 m += lbelow14;
1116
1117 m31 = *(uchar *) (m + 13);
1118 m += lbelow15;
1119 m2 += m23;
1120
1121 m32 = *(uchar *) (m + 14);
1122 m30 &= ~lbelow13;
1123 lbelow12 -= lbelow13;
1124
1125 m30 += lbelow12;
1126 lbelow13 -= lbelow14;
1127
1128 m3 += m30;
1129 m31 &= ~lbelow14;
1130
1131 m31 += lbelow13;
1132 m32 &= ~lbelow15;
1133
1134 m31 <<= 8;
1135 lbelow14 -= lbelow15;
1136
1137 m3 += m31;
1138 m32 += lbelow14;
1139 d0 = m0;
1140
1141 m32 <<= 16;
1142 m33 = lbelow15 + 1;
1143 d1 = m1;
1144
1145 m33 <<= 24;
1146 m3 += m32;
1147 d2 = m2;
1148
1149 m3 += m33;
1150 d3 = m3;
1151
1152 alpha0 = *(double *) (constants + 24);
1153
1154 z3 = *(double *) &d3;
1155
1156 z2 = *(double *) &d2;
1157
1158 z1 = *(double *) &d1;
1159
1160 z0 = *(double *) &d0;
1161
1162 z3 -= alpha96;
1163
1164 z2 -= alpha64;
1165
1166 z1 -= alpha32;
1167
1168 z0 -= alpha0;
1169
1170 h5 += z3;
1171
1172 h3 += z2;
1173
1174 h1 += z1;
1175
1176 h0 += z0;
1177
1178 y7 = h7 + alpha130;
1179
1180 y6 = h6 + alpha130;
1181
1182 y1 = h1 + alpha32;
1183
1184 y0 = h0 + alpha32;
1185
1186 y7 -= alpha130;
1187
1188 y6 -= alpha130;
1189
1190 y1 -= alpha32;
1191
1192 y0 -= alpha32;
1193
1194 y5 = h5 + alpha96;
1195
1196 y4 = h4 + alpha96;
1197
1198 x7 = h7 - y7;
1199 y7 *= scale;
1200
1201 x6 = h6 - y6;
1202 y6 *= scale;
1203
1204 x1 = h1 - y1;
1205
1206 x0 = h0 - y0;
1207
1208 y5 -= alpha96;
1209
1210 y4 -= alpha96;
1211
1212 x1 += y7;
1213
1214 x0 += y6;
1215
1216 x7 += y5;
1217
1218 x6 += y4;
1219
1220 y3 = h3 + alpha64;
1221
1222 y2 = h2 + alpha64;
1223
1224 x0 += x1;
1225
1226 x6 += x7;
1227
1228 y3 -= alpha64;
1229 r3low = r3low_stack;
1230
1231 y2 -= alpha64;
1232 r0low = r0low_stack;
1233
1234 x5 = h5 - y5;
1235 r3lowx0 = r3low * x0;
1236 r3high = r3high_stack;
1237
1238 x4 = h4 - y4;
1239 r0lowx6 = r0low * x6;
1240 r0high = r0high_stack;
1241
1242 x3 = h3 - y3;
1243 r3highx0 = r3high * x0;
1244 sr1low = sr1low_stack;
1245
1246 x2 = h2 - y2;
1247 r0highx6 = r0high * x6;
1248 sr1high = sr1high_stack;
1249
1250 x5 += y3;
1251 r0lowx0 = r0low * x0;
1252 r1low = r1low_stack;
1253
1254 h6 = r3lowx0 + r0lowx6;
1255 sr1lowx6 = sr1low * x6;
1256 r1high = r1high_stack;
1257
1258 x4 += y2;
1259 r0highx0 = r0high * x0;
1260 sr2low = sr2low_stack;
1261
1262 h7 = r3highx0 + r0highx6;
1263 sr1highx6 = sr1high * x6;
1264 sr2high = sr2high_stack;
1265
1266 x3 += y1;
1267 r1lowx0 = r1low * x0;
1268 r2low = r2low_stack;
1269
1270 h0 = r0lowx0 + sr1lowx6;
1271 sr2lowx6 = sr2low * x6;
1272 r2high = r2high_stack;
1273
1274 x2 += y0;
1275 r1highx0 = r1high * x0;
1276 sr3low = sr3low_stack;
1277
1278 h1 = r0highx0 + sr1highx6;
1279 sr2highx6 = sr2high * x6;
1280 sr3high = sr3high_stack;
1281
1282 x4 += x5;
1283 r2lowx0 = r2low * x0;
1284
1285 h2 = r1lowx0 + sr2lowx6;
1286 sr3lowx6 = sr3low * x6;
1287
1288 x2 += x3;
1289 r2highx0 = r2high * x0;
1290
1291 h3 = r1highx0 + sr2highx6;
1292 sr3highx6 = sr3high * x6;
1293
1294 r1highx4 = r1high * x4;
1295
1296 h4 = r2lowx0 + sr3lowx6;
1297 r1lowx4 = r1low * x4;
1298
1299 r0highx4 = r0high * x4;
1300
1301 h5 = r2highx0 + sr3highx6;
1302 r0lowx4 = r0low * x4;
1303
1304 h7 += r1highx4;
1305 sr3highx4 = sr3high * x4;
1306
1307 h6 += r1lowx4;
1308 sr3lowx4 = sr3low * x4;
1309
1310 h5 += r0highx4;
1311 sr2highx4 = sr2high * x4;
1312
1313 h4 += r0lowx4;
1314 sr2lowx4 = sr2low * x4;
1315
1316 h3 += sr3highx4;
1317 r0lowx2 = r0low * x2;
1318
1319 h2 += sr3lowx4;
1320 r0highx2 = r0high * x2;
1321
1322 h1 += sr2highx4;
1323 r1lowx2 = r1low * x2;
1324
1325 h0 += sr2lowx4;
1326 r1highx2 = r1high * x2;
1327
1328 h2 += r0lowx2;
1329 r2lowx2 = r2low * x2;
1330
1331 h3 += r0highx2;
1332 r2highx2 = r2high * x2;
1333
1334 h4 += r1lowx2;
1335 sr3lowx2 = sr3low * x2;
1336
1337 h5 += r1highx2;
1338 sr3highx2 = sr3high * x2;
1339
1340 h6 += r2lowx2;
1341
1342 h7 += r2highx2;
1343
1344 h0 += sr3lowx2;
1345
1346 h1 += sr3highx2;
1347
1348
1349nomorebytes:;
1350
1351 offset0 = *(double *) (constants + 104);
1352 y7 = h7 + alpha130;
1353
1354 offset1 = *(double *) (constants + 112);
1355 y0 = h0 + alpha32;
1356
1357 offset2 = *(double *) (constants + 120);
1358 y1 = h1 + alpha32;
1359
1360 offset3 = *(double *) (constants + 128);
1361 y2 = h2 + alpha64;
1362
1363 y7 -= alpha130;
1364
1365 y3 = h3 + alpha64;
1366
1367 y4 = h4 + alpha96;
1368
1369 y5 = h5 + alpha96;
1370
1371 x7 = h7 - y7;
1372 y7 *= scale;
1373
1374 y0 -= alpha32;
1375
1376 y1 -= alpha32;
1377
1378 y2 -= alpha64;
1379
1380 h6 += x7;
1381
1382 y3 -= alpha64;
1383
1384 y4 -= alpha96;
1385
1386 y5 -= alpha96;
1387
1388 y6 = h6 + alpha130;
1389
1390 x0 = h0 - y0;
1391
1392 x1 = h1 - y1;
1393
1394 x2 = h2 - y2;
1395
1396 y6 -= alpha130;
1397
1398 x0 += y7;
1399
1400 x3 = h3 - y3;
1401
1402 x4 = h4 - y4;
1403
1404 x5 = h5 - y5;
1405
1406 x6 = h6 - y6;
1407
1408 y6 *= scale;
1409
1410 x2 += y0;
1411
1412 x3 += y1;
1413
1414 x4 += y2;
1415
1416 x0 += y6;
1417
1418 x5 += y3;
1419
1420 x6 += y4;
1421
1422 x2 += x3;
1423
1424 x0 += x1;
1425
1426 x4 += x5;
1427
1428 x6 += y5;
1429
1430 x2 += offset1;
1431 *(double *) &d1 = x2;
1432
1433 x0 += offset0;
1434 *(double *) &d0 = x0;
1435
1436 x4 += offset2;
1437 *(double *) &d2 = x4;
1438
1439 x6 += offset3;
1440 *(double *) &d3 = x6;
1441
1442
1443
1444
1445 f0 = d0;
1446
1447 f1 = d1;
1448 bits32 = -1;
1449
1450 f2 = d2;
1451 bits32 >>= 32;
1452
1453 f3 = d3;
1454 f = f0 >> 32;
1455
1456 f0 &= bits32;
1457 f &= 255;
1458
1459 f1 += f;
1460 g0 = f0 + 5;
1461
1462 g = g0 >> 32;
1463 g0 &= bits32;
1464
1465 f = f1 >> 32;
1466 f1 &= bits32;
1467
1468 f &= 255;
1469 g1 = f1 + g;
1470
1471 g = g1 >> 32;
1472 f2 += f;
1473
1474 f = f2 >> 32;
1475 g1 &= bits32;
1476
1477 f2 &= bits32;
1478 f &= 255;
1479
1480 f3 += f;
1481 g2 = f2 + g;
1482
1483 g = g2 >> 32;
1484 g2 &= bits32;
1485
1486 f4 = f3 >> 32;
1487 f3 &= bits32;
1488
1489 f4 &= 255;
1490 g3 = f3 + g;
1491
1492 g = g3 >> 32;
1493 g3 &= bits32;
1494
1495 g4 = f4 + g;
1496
1497 g4 = g4 - 4;
1498 s00 = *(uchar *) (s + 0);
1499
1500 f = (int64) g4 >> 63;
1501 s01 = *(uchar *) (s + 1);
1502
1503 f0 &= f;
1504 g0 &= ~f;
1505 s02 = *(uchar *) (s + 2);
1506
1507 f1 &= f;
1508 f0 |= g0;
1509 s03 = *(uchar *) (s + 3);
1510
1511 g1 &= ~f;
1512 f2 &= f;
1513 s10 = *(uchar *) (s + 4);
1514
1515 f3 &= f;
1516 g2 &= ~f;
1517 s11 = *(uchar *) (s + 5);
1518
1519 g3 &= ~f;
1520 f1 |= g1;
1521 s12 = *(uchar *) (s + 6);
1522
1523 f2 |= g2;
1524 f3 |= g3;
1525 s13 = *(uchar *) (s + 7);
1526
1527 s01 <<= 8;
1528 f0 += s00;
1529 s20 = *(uchar *) (s + 8);
1530
1531 s02 <<= 16;
1532 f0 += s01;
1533 s21 = *(uchar *) (s + 9);
1534
1535 s03 <<= 24;
1536 f0 += s02;
1537 s22 = *(uchar *) (s + 10);
1538
1539 s11 <<= 8;
1540 f1 += s10;
1541 s23 = *(uchar *) (s + 11);
1542
1543 s12 <<= 16;
1544 f1 += s11;
1545 s30 = *(uchar *) (s + 12);
1546
1547 s13 <<= 24;
1548 f1 += s12;
1549 s31 = *(uchar *) (s + 13);
1550
1551 f0 += s03;
1552 f1 += s13;
1553 s32 = *(uchar *) (s + 14);
1554
1555 s21 <<= 8;
1556 f2 += s20;
1557 s33 = *(uchar *) (s + 15);
1558
1559 s22 <<= 16;
1560 f2 += s21;
1561
1562 s23 <<= 24;
1563 f2 += s22;
1564
1565 s31 <<= 8;
1566 f3 += s30;
1567
1568 s32 <<= 16;
1569 f3 += s31;
1570
1571 s33 <<= 24;
1572 f3 += s32;
1573
1574 f2 += s23;
1575 f3 += s33;
1576
1577 *(uchar *) (out + 0) = f0;
1578 f0 >>= 8;
1579 *(uchar *) (out + 1) = f0;
1580 f0 >>= 8;
1581 *(uchar *) (out + 2) = f0;
1582 f0 >>= 8;
1583 *(uchar *) (out + 3) = f0;
1584 f0 >>= 8;
1585 f1 += f0;
1586
1587 *(uchar *) (out + 4) = f1;
1588 f1 >>= 8;
1589 *(uchar *) (out + 5) = f1;
1590 f1 >>= 8;
1591 *(uchar *) (out + 6) = f1;
1592 f1 >>= 8;
1593 *(uchar *) (out + 7) = f1;
1594 f1 >>= 8;
1595 f2 += f1;
1596
1597 *(uchar *) (out + 8) = f2;
1598 f2 >>= 8;
1599 *(uchar *) (out + 9) = f2;
1600 f2 >>= 8;
1601 *(uchar *) (out + 10) = f2;
1602 f2 >>= 8;
1603 *(uchar *) (out + 11) = f2;
1604 f2 >>= 8;
1605 f3 += f2;
1606
1607 *(uchar *) (out + 12) = f3;
1608 f3 >>= 8;
1609 *(uchar *) (out + 13) = f3;
1610 f3 >>= 8;
1611 *(uchar *) (out + 14) = f3;
1612 f3 >>= 8;
1613 *(uchar *) (out + 15) = f3;
1614
1615 return 0;
1616}