summaryrefslogtreecommitdiff
path: root/nacl/crypto_stream/aes128ctr/portable/afternm.c
diff options
context:
space:
mode:
Diffstat (limited to 'nacl/crypto_stream/aes128ctr/portable/afternm.c')
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/afternm.c158
1 files changed, 158 insertions, 0 deletions
diff --git a/nacl/crypto_stream/aes128ctr/portable/afternm.c b/nacl/crypto_stream/aes128ctr/portable/afternm.c
new file mode 100644
index 00000000..93c96e42
--- /dev/null
+++ b/nacl/crypto_stream/aes128ctr/portable/afternm.c
@@ -0,0 +1,158 @@
1/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
2 * Date: 2009-03-19
3 * Public domain */
4
5#include "int128.h"
6#include "common.h"
7#include "consts.h"
8#include "crypto_stream.h"
9
10int crypto_stream_afternm(unsigned char *outp, unsigned long long len, const unsigned char *noncep, const unsigned char *c)
11{
12
13 int128 xmm0;
14 int128 xmm1;
15 int128 xmm2;
16 int128 xmm3;
17 int128 xmm4;
18 int128 xmm5;
19 int128 xmm6;
20 int128 xmm7;
21
22 int128 xmm8;
23 int128 xmm9;
24 int128 xmm10;
25 int128 xmm11;
26 int128 xmm12;
27 int128 xmm13;
28 int128 xmm14;
29 int128 xmm15;
30
31 int128 nonce_stack;
32 unsigned long long lensav;
33 unsigned char bl[128];
34 unsigned char *blp;
35 unsigned char b;
36
37 uint32 tmp;
38
39 /* Copy nonce on the stack */
40 copy2(&nonce_stack, (int128 *) (noncep + 0));
41 unsigned char *np = (unsigned char *)&nonce_stack;
42
43 enc_block:
44
45 xmm0 = *(int128 *) (np + 0);
46 copy2(&xmm1, &xmm0);
47 shufb(&xmm1, SWAP32);
48 copy2(&xmm2, &xmm1);
49 copy2(&xmm3, &xmm1);
50 copy2(&xmm4, &xmm1);
51 copy2(&xmm5, &xmm1);
52 copy2(&xmm6, &xmm1);
53 copy2(&xmm7, &xmm1);
54
55 add_uint32_big(&xmm1, 1);
56 add_uint32_big(&xmm2, 2);
57 add_uint32_big(&xmm3, 3);
58 add_uint32_big(&xmm4, 4);
59 add_uint32_big(&xmm5, 5);
60 add_uint32_big(&xmm6, 6);
61 add_uint32_big(&xmm7, 7);
62
63 shufb(&xmm0, M0);
64 shufb(&xmm1, M0SWAP);
65 shufb(&xmm2, M0SWAP);
66 shufb(&xmm3, M0SWAP);
67 shufb(&xmm4, M0SWAP);
68 shufb(&xmm5, M0SWAP);
69 shufb(&xmm6, M0SWAP);
70 shufb(&xmm7, M0SWAP);
71
72 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
73
74 aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
75 aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
76 aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
77 aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
78 aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
79 aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
80 aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
81 aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
82 aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
83 lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
84
85 bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
86
87 if(len < 128) goto partial;
88 if(len == 128) goto full;
89
90 tmp = load32_bigendian(np + 12);
91 tmp += 8;
92 store32_bigendian(np + 12, tmp);
93
94 *(int128 *) (outp + 0) = xmm8;
95 *(int128 *) (outp + 16) = xmm9;
96 *(int128 *) (outp + 32) = xmm12;
97 *(int128 *) (outp + 48) = xmm14;
98 *(int128 *) (outp + 64) = xmm11;
99 *(int128 *) (outp + 80) = xmm15;
100 *(int128 *) (outp + 96) = xmm10;
101 *(int128 *) (outp + 112) = xmm13;
102
103 len -= 128;
104 outp += 128;
105
106 goto enc_block;
107
108 partial:
109
110 lensav = len;
111 len >>= 4;
112
113 tmp = load32_bigendian(np + 12);
114 tmp += len;
115 store32_bigendian(np + 12, tmp);
116
117 blp = bl;
118 *(int128 *)(blp + 0) = xmm8;
119 *(int128 *)(blp + 16) = xmm9;
120 *(int128 *)(blp + 32) = xmm12;
121 *(int128 *)(blp + 48) = xmm14;
122 *(int128 *)(blp + 64) = xmm11;
123 *(int128 *)(blp + 80) = xmm15;
124 *(int128 *)(blp + 96) = xmm10;
125 *(int128 *)(blp + 112) = xmm13;
126
127 bytes:
128
129 if(lensav == 0) goto end;
130
131 b = blp[0];
132 *(unsigned char *)(outp + 0) = b;
133
134 blp += 1;
135 outp +=1;
136 lensav -= 1;
137
138 goto bytes;
139
140 full:
141
142 tmp = load32_bigendian(np + 12);
143 tmp += 8;
144 store32_bigendian(np + 12, tmp);
145
146 *(int128 *) (outp + 0) = xmm8;
147 *(int128 *) (outp + 16) = xmm9;
148 *(int128 *) (outp + 32) = xmm12;
149 *(int128 *) (outp + 48) = xmm14;
150 *(int128 *) (outp + 64) = xmm11;
151 *(int128 *) (outp + 80) = xmm15;
152 *(int128 *) (outp + 96) = xmm10;
153 *(int128 *) (outp + 112) = xmm13;
154
155 end:
156 return 0;
157
158}