summaryrefslogtreecommitdiff
path: root/nacl/crypto_stream/aes128ctr/portable
diff options
context:
space:
mode:
authorirungentoo <irungentoo@gmail.com>2013-07-13 10:09:38 -0400
committerirungentoo <irungentoo@gmail.com>2013-07-13 10:09:38 -0400
commitd4fe483efd3e0062f12430efe9deb66d43d914d7 (patch)
treee6aa9ac716ae82cdb15c6e6cb5d9d1d9d29f053b /nacl/crypto_stream/aes128ctr/portable
parent835ef0320d47372eac14bef31c979b8217d04498 (diff)
NaCl moved to other repo.
Diffstat (limited to 'nacl/crypto_stream/aes128ctr/portable')
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/afternm.c158
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/api.h3
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/beforenm.c59
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/common.c64
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/common.h788
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/consts.c14
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/consts.h28
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/int128.c128
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/int128.h47
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/stream.c28
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/types.h10
-rw-r--r--nacl/crypto_stream/aes128ctr/portable/xor_afternm.c180
12 files changed, 0 insertions, 1507 deletions
diff --git a/nacl/crypto_stream/aes128ctr/portable/afternm.c b/nacl/crypto_stream/aes128ctr/portable/afternm.c
deleted file mode 100644
index 93c96e42..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/afternm.c
+++ /dev/null
@@ -1,158 +0,0 @@
1/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
2 * Date: 2009-03-19
3 * Public domain */
4
5#include "int128.h"
6#include "common.h"
7#include "consts.h"
8#include "crypto_stream.h"
9
10int crypto_stream_afternm(unsigned char *outp, unsigned long long len, const unsigned char *noncep, const unsigned char *c)
11{
12
13 int128 xmm0;
14 int128 xmm1;
15 int128 xmm2;
16 int128 xmm3;
17 int128 xmm4;
18 int128 xmm5;
19 int128 xmm6;
20 int128 xmm7;
21
22 int128 xmm8;
23 int128 xmm9;
24 int128 xmm10;
25 int128 xmm11;
26 int128 xmm12;
27 int128 xmm13;
28 int128 xmm14;
29 int128 xmm15;
30
31 int128 nonce_stack;
32 unsigned long long lensav;
33 unsigned char bl[128];
34 unsigned char *blp;
35 unsigned char b;
36
37 uint32 tmp;
38
39 /* Copy nonce on the stack */
40 copy2(&nonce_stack, (int128 *) (noncep + 0));
41 unsigned char *np = (unsigned char *)&nonce_stack;
42
43 enc_block:
44
45 xmm0 = *(int128 *) (np + 0);
46 copy2(&xmm1, &xmm0);
47 shufb(&xmm1, SWAP32);
48 copy2(&xmm2, &xmm1);
49 copy2(&xmm3, &xmm1);
50 copy2(&xmm4, &xmm1);
51 copy2(&xmm5, &xmm1);
52 copy2(&xmm6, &xmm1);
53 copy2(&xmm7, &xmm1);
54
55 add_uint32_big(&xmm1, 1);
56 add_uint32_big(&xmm2, 2);
57 add_uint32_big(&xmm3, 3);
58 add_uint32_big(&xmm4, 4);
59 add_uint32_big(&xmm5, 5);
60 add_uint32_big(&xmm6, 6);
61 add_uint32_big(&xmm7, 7);
62
63 shufb(&xmm0, M0);
64 shufb(&xmm1, M0SWAP);
65 shufb(&xmm2, M0SWAP);
66 shufb(&xmm3, M0SWAP);
67 shufb(&xmm4, M0SWAP);
68 shufb(&xmm5, M0SWAP);
69 shufb(&xmm6, M0SWAP);
70 shufb(&xmm7, M0SWAP);
71
72 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
73
74 aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
75 aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
76 aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
77 aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
78 aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
79 aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
80 aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
81 aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
82 aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
83 lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
84
85 bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
86
87 if(len < 128) goto partial;
88 if(len == 128) goto full;
89
90 tmp = load32_bigendian(np + 12);
91 tmp += 8;
92 store32_bigendian(np + 12, tmp);
93
94 *(int128 *) (outp + 0) = xmm8;
95 *(int128 *) (outp + 16) = xmm9;
96 *(int128 *) (outp + 32) = xmm12;
97 *(int128 *) (outp + 48) = xmm14;
98 *(int128 *) (outp + 64) = xmm11;
99 *(int128 *) (outp + 80) = xmm15;
100 *(int128 *) (outp + 96) = xmm10;
101 *(int128 *) (outp + 112) = xmm13;
102
103 len -= 128;
104 outp += 128;
105
106 goto enc_block;
107
108 partial:
109
110 lensav = len;
111 len >>= 4;
112
113 tmp = load32_bigendian(np + 12);
114 tmp += len;
115 store32_bigendian(np + 12, tmp);
116
117 blp = bl;
118 *(int128 *)(blp + 0) = xmm8;
119 *(int128 *)(blp + 16) = xmm9;
120 *(int128 *)(blp + 32) = xmm12;
121 *(int128 *)(blp + 48) = xmm14;
122 *(int128 *)(blp + 64) = xmm11;
123 *(int128 *)(blp + 80) = xmm15;
124 *(int128 *)(blp + 96) = xmm10;
125 *(int128 *)(blp + 112) = xmm13;
126
127 bytes:
128
129 if(lensav == 0) goto end;
130
131 b = blp[0];
132 *(unsigned char *)(outp + 0) = b;
133
134 blp += 1;
135 outp +=1;
136 lensav -= 1;
137
138 goto bytes;
139
140 full:
141
142 tmp = load32_bigendian(np + 12);
143 tmp += 8;
144 store32_bigendian(np + 12, tmp);
145
146 *(int128 *) (outp + 0) = xmm8;
147 *(int128 *) (outp + 16) = xmm9;
148 *(int128 *) (outp + 32) = xmm12;
149 *(int128 *) (outp + 48) = xmm14;
150 *(int128 *) (outp + 64) = xmm11;
151 *(int128 *) (outp + 80) = xmm15;
152 *(int128 *) (outp + 96) = xmm10;
153 *(int128 *) (outp + 112) = xmm13;
154
155 end:
156 return 0;
157
158}
diff --git a/nacl/crypto_stream/aes128ctr/portable/api.h b/nacl/crypto_stream/aes128ctr/portable/api.h
deleted file mode 100644
index 62fc8d88..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/api.h
+++ /dev/null
@@ -1,3 +0,0 @@
1#define CRYPTO_KEYBYTES 16
2#define CRYPTO_NONCEBYTES 16
3#define CRYPTO_BEFORENMBYTES 1408
diff --git a/nacl/crypto_stream/aes128ctr/portable/beforenm.c b/nacl/crypto_stream/aes128ctr/portable/beforenm.c
deleted file mode 100644
index 8fa2673d..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/beforenm.c
+++ /dev/null
@@ -1,59 +0,0 @@
1/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
2 * Date: 2009-03-19
3 * Public domain */
4
5#include "consts.h"
6#include "int128.h"
7#include "common.h"
8#include "crypto_stream.h"
9
10int crypto_stream_beforenm(unsigned char *c, const unsigned char *k)
11{
12
13 /*
14 int64 x0;
15 int64 x1;
16 int64 x2;
17 int64 x3;
18 int64 e;
19 int64 q0;
20 int64 q1;
21 int64 q2;
22 int64 q3;
23 */
24
25 int128 xmm0;
26 int128 xmm1;
27 int128 xmm2;
28 int128 xmm3;
29 int128 xmm4;
30 int128 xmm5;
31 int128 xmm6;
32 int128 xmm7;
33 int128 xmm8;
34 int128 xmm9;
35 int128 xmm10;
36 int128 xmm11;
37 int128 xmm12;
38 int128 xmm13;
39 int128 xmm14;
40 int128 xmm15;
41 int128 t;
42
43 bitslicekey0(k, c)
44
45 keyexpbs1(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
46 keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm1);, 2,c)
47 keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm6);, 3,c)
48 keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 4,c)
49
50 keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 5,c)
51 keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm5);, 6,c)
52 keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 7,c)
53 keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm7);, 8,c)
54
55 keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm0); xor_rcon(&xmm1); xor_rcon(&xmm6); xor_rcon(&xmm3);, 9,c)
56 keyexpbs10(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
57
58 return 0;
59}
diff --git a/nacl/crypto_stream/aes128ctr/portable/common.c b/nacl/crypto_stream/aes128ctr/portable/common.c
deleted file mode 100644
index 14a28cc6..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/common.c
+++ /dev/null
@@ -1,64 +0,0 @@
1#include "common.h"
2
3uint32 load32_bigendian(const unsigned char *x)
4{
5 return
6 (uint32) (x[3]) \
7 | (((uint32) (x[2])) << 8) \
8 | (((uint32) (x[1])) << 16) \
9 | (((uint32) (x[0])) << 24)
10 ;
11}
12
13void store32_bigendian(unsigned char *x,uint32 u)
14{
15 x[3] = u; u >>= 8;
16 x[2] = u; u >>= 8;
17 x[1] = u; u >>= 8;
18 x[0] = u;
19}
20
21uint32 load32_littleendian(const unsigned char *x)
22{
23 return
24 (uint32) (x[0]) \
25 | (((uint32) (x[1])) << 8) \
26 | (((uint32) (x[2])) << 16) \
27 | (((uint32) (x[3])) << 24)
28 ;
29}
30
31void store32_littleendian(unsigned char *x,uint32 u)
32{
33 x[0] = u; u >>= 8;
34 x[1] = u; u >>= 8;
35 x[2] = u; u >>= 8;
36 x[3] = u;
37}
38
39
40uint64 load64_littleendian(const unsigned char *x)
41{
42 return
43 (uint64) (x[0]) \
44 | (((uint64) (x[1])) << 8) \
45 | (((uint64) (x[2])) << 16) \
46 | (((uint64) (x[3])) << 24)
47 | (((uint64) (x[4])) << 32)
48 | (((uint64) (x[5])) << 40)
49 | (((uint64) (x[6])) << 48)
50 | (((uint64) (x[7])) << 56)
51 ;
52}
53
54void store64_littleendian(unsigned char *x,uint64 u)
55{
56 x[0] = u; u >>= 8;
57 x[1] = u; u >>= 8;
58 x[2] = u; u >>= 8;
59 x[3] = u; u >>= 8;
60 x[4] = u; u >>= 8;
61 x[5] = u; u >>= 8;
62 x[6] = u; u >>= 8;
63 x[7] = u;
64}
diff --git a/nacl/crypto_stream/aes128ctr/portable/common.h b/nacl/crypto_stream/aes128ctr/portable/common.h
deleted file mode 100644
index 0f723332..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/common.h
+++ /dev/null
@@ -1,788 +0,0 @@
1/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
2 Date: 2009-03-19
3 Public domain */
4#ifndef COMMON_H
5#define COMMON_H
6
7#include "types.h"
8
9#define load32_bigendian crypto_stream_aes128ctr_portable_load32_bigendian
10uint32 load32_bigendian(const unsigned char *x);
11
12#define store32_bigendian crypto_stream_aes128ctr_portable_store32_bigendian
13void store32_bigendian(unsigned char *x,uint32 u);
14
15#define load32_littleendian crypto_stream_aes128ctr_portable_load32_littleendian
16uint32 load32_littleendian(const unsigned char *x);
17
18#define store32_littleendian crypto_stream_aes128ctr_portable_store32_littleendian
19void store32_littleendian(unsigned char *x,uint32 u);
20
21#define load64_littleendian crypto_stream_aes128ctr_portable_load64_littleendian
22uint64 load64_littleendian(const unsigned char *x);
23
24#define store64_littleendian crypto_stream_aes128ctr_portable_store64_littleendian
25void store64_littleendian(unsigned char *x,uint64 u);
26
27/* Macros required only for key expansion */
28
29#define keyexpbs1(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
30 rotbyte(&b0);\
31 rotbyte(&b1);\
32 rotbyte(&b2);\
33 rotbyte(&b3);\
34 rotbyte(&b4);\
35 rotbyte(&b5);\
36 rotbyte(&b6);\
37 rotbyte(&b7);\
38 ;\
39 sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
40 ;\
41 xor_rcon(&b0);\
42 shufb(&b0, EXPB0);\
43 shufb(&b1, EXPB0);\
44 shufb(&b4, EXPB0);\
45 shufb(&b6, EXPB0);\
46 shufb(&b3, EXPB0);\
47 shufb(&b7, EXPB0);\
48 shufb(&b2, EXPB0);\
49 shufb(&b5, EXPB0);\
50 shufb(&b0, EXPB0);\
51 ;\
52 t0 = *(int128 *)(bskey + 0);\
53 t1 = *(int128 *)(bskey + 16);\
54 t2 = *(int128 *)(bskey + 32);\
55 t3 = *(int128 *)(bskey + 48);\
56 t4 = *(int128 *)(bskey + 64);\
57 t5 = *(int128 *)(bskey + 80);\
58 t6 = *(int128 *)(bskey + 96);\
59 t7 = *(int128 *)(bskey + 112);\
60 ;\
61 xor2(&b0, &t0);\
62 xor2(&b1, &t1);\
63 xor2(&b4, &t2);\
64 xor2(&b6, &t3);\
65 xor2(&b3, &t4);\
66 xor2(&b7, &t5);\
67 xor2(&b2, &t6);\
68 xor2(&b5, &t7);\
69 ;\
70 rshift32_littleendian(&t0, 8);\
71 rshift32_littleendian(&t1, 8);\
72 rshift32_littleendian(&t2, 8);\
73 rshift32_littleendian(&t3, 8);\
74 rshift32_littleendian(&t4, 8);\
75 rshift32_littleendian(&t5, 8);\
76 rshift32_littleendian(&t6, 8);\
77 rshift32_littleendian(&t7, 8);\
78 ;\
79 xor2(&b0, &t0);\
80 xor2(&b1, &t1);\
81 xor2(&b4, &t2);\
82 xor2(&b6, &t3);\
83 xor2(&b3, &t4);\
84 xor2(&b7, &t5);\
85 xor2(&b2, &t6);\
86 xor2(&b5, &t7);\
87 ;\
88 rshift32_littleendian(&t0, 8);\
89 rshift32_littleendian(&t1, 8);\
90 rshift32_littleendian(&t2, 8);\
91 rshift32_littleendian(&t3, 8);\
92 rshift32_littleendian(&t4, 8);\
93 rshift32_littleendian(&t5, 8);\
94 rshift32_littleendian(&t6, 8);\
95 rshift32_littleendian(&t7, 8);\
96 ;\
97 xor2(&b0, &t0);\
98 xor2(&b1, &t1);\
99 xor2(&b4, &t2);\
100 xor2(&b6, &t3);\
101 xor2(&b3, &t4);\
102 xor2(&b7, &t5);\
103 xor2(&b2, &t6);\
104 xor2(&b5, &t7);\
105 ;\
106 rshift32_littleendian(&t0, 8);\
107 rshift32_littleendian(&t1, 8);\
108 rshift32_littleendian(&t2, 8);\
109 rshift32_littleendian(&t3, 8);\
110 rshift32_littleendian(&t4, 8);\
111 rshift32_littleendian(&t5, 8);\
112 rshift32_littleendian(&t6, 8);\
113 rshift32_littleendian(&t7, 8);\
114 ;\
115 xor2(&b0, &t0);\
116 xor2(&b1, &t1);\
117 xor2(&b4, &t2);\
118 xor2(&b6, &t3);\
119 xor2(&b3, &t4);\
120 xor2(&b7, &t5);\
121 xor2(&b2, &t6);\
122 xor2(&b5, &t7);\
123 ;\
124 *(int128 *)(bskey + 128) = b0;\
125 *(int128 *)(bskey + 144) = b1;\
126 *(int128 *)(bskey + 160) = b4;\
127 *(int128 *)(bskey + 176) = b6;\
128 *(int128 *)(bskey + 192) = b3;\
129 *(int128 *)(bskey + 208) = b7;\
130 *(int128 *)(bskey + 224) = b2;\
131 *(int128 *)(bskey + 240) = b5;\
132
133#define keyexpbs10(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) ;\
134 toggle(&b0);\
135 toggle(&b1);\
136 toggle(&b5);\
137 toggle(&b6);\
138 rotbyte(&b0);\
139 rotbyte(&b1);\
140 rotbyte(&b2);\
141 rotbyte(&b3);\
142 rotbyte(&b4);\
143 rotbyte(&b5);\
144 rotbyte(&b6);\
145 rotbyte(&b7);\
146 ;\
147 sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
148 ;\
149 xor_rcon(&b1);\
150 xor_rcon(&b4);\
151 xor_rcon(&b3);\
152 xor_rcon(&b7);\
153 shufb(&b0, EXPB0);\
154 shufb(&b1, EXPB0);\
155 shufb(&b4, EXPB0);\
156 shufb(&b6, EXPB0);\
157 shufb(&b3, EXPB0);\
158 shufb(&b7, EXPB0);\
159 shufb(&b2, EXPB0);\
160 shufb(&b5, EXPB0);\
161 ;\
162 t0 = *(int128 *)(bskey + 9 * 128 + 0);\
163 t1 = *(int128 *)(bskey + 9 * 128 + 16);\
164 t2 = *(int128 *)(bskey + 9 * 128 + 32);\
165 t3 = *(int128 *)(bskey + 9 * 128 + 48);\
166 t4 = *(int128 *)(bskey + 9 * 128 + 64);\
167 t5 = *(int128 *)(bskey + 9 * 128 + 80);\
168 t6 = *(int128 *)(bskey + 9 * 128 + 96);\
169 t7 = *(int128 *)(bskey + 9 * 128 + 112);\
170 ;\
171 toggle(&t0);\
172 toggle(&t1);\
173 toggle(&t5);\
174 toggle(&t6);\
175 ;\
176 xor2(&b0, &t0);\
177 xor2(&b1, &t1);\
178 xor2(&b4, &t2);\
179 xor2(&b6, &t3);\
180 xor2(&b3, &t4);\
181 xor2(&b7, &t5);\
182 xor2(&b2, &t6);\
183 xor2(&b5, &t7);\
184 ;\
185 rshift32_littleendian(&t0, 8);\
186 rshift32_littleendian(&t1, 8);\
187 rshift32_littleendian(&t2, 8);\
188 rshift32_littleendian(&t3, 8);\
189 rshift32_littleendian(&t4, 8);\
190 rshift32_littleendian(&t5, 8);\
191 rshift32_littleendian(&t6, 8);\
192 rshift32_littleendian(&t7, 8);\
193 ;\
194 xor2(&b0, &t0);\
195 xor2(&b1, &t1);\
196 xor2(&b4, &t2);\
197 xor2(&b6, &t3);\
198 xor2(&b3, &t4);\
199 xor2(&b7, &t5);\
200 xor2(&b2, &t6);\
201 xor2(&b5, &t7);\
202 ;\
203 rshift32_littleendian(&t0, 8);\
204 rshift32_littleendian(&t1, 8);\
205 rshift32_littleendian(&t2, 8);\
206 rshift32_littleendian(&t3, 8);\
207 rshift32_littleendian(&t4, 8);\
208 rshift32_littleendian(&t5, 8);\
209 rshift32_littleendian(&t6, 8);\
210 rshift32_littleendian(&t7, 8);\
211 ;\
212 xor2(&b0, &t0);\
213 xor2(&b1, &t1);\
214 xor2(&b4, &t2);\
215 xor2(&b6, &t3);\
216 xor2(&b3, &t4);\
217 xor2(&b7, &t5);\
218 xor2(&b2, &t6);\
219 xor2(&b5, &t7);\
220 ;\
221 rshift32_littleendian(&t0, 8);\
222 rshift32_littleendian(&t1, 8);\
223 rshift32_littleendian(&t2, 8);\
224 rshift32_littleendian(&t3, 8);\
225 rshift32_littleendian(&t4, 8);\
226 rshift32_littleendian(&t5, 8);\
227 rshift32_littleendian(&t6, 8);\
228 rshift32_littleendian(&t7, 8);\
229 ;\
230 xor2(&b0, &t0);\
231 xor2(&b1, &t1);\
232 xor2(&b4, &t2);\
233 xor2(&b6, &t3);\
234 xor2(&b3, &t4);\
235 xor2(&b7, &t5);\
236 xor2(&b2, &t6);\
237 xor2(&b5, &t7);\
238 ;\
239 shufb(&b0, M0);\
240 shufb(&b1, M0);\
241 shufb(&b2, M0);\
242 shufb(&b3, M0);\
243 shufb(&b4, M0);\
244 shufb(&b5, M0);\
245 shufb(&b6, M0);\
246 shufb(&b7, M0);\
247 ;\
248 *(int128 *)(bskey + 1280) = b0;\
249 *(int128 *)(bskey + 1296) = b1;\
250 *(int128 *)(bskey + 1312) = b4;\
251 *(int128 *)(bskey + 1328) = b6;\
252 *(int128 *)(bskey + 1344) = b3;\
253 *(int128 *)(bskey + 1360) = b7;\
254 *(int128 *)(bskey + 1376) = b2;\
255 *(int128 *)(bskey + 1392) = b5;\
256
257
258#define keyexpbs(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, rcon, i, bskey) \
259 toggle(&b0);\
260 toggle(&b1);\
261 toggle(&b5);\
262 toggle(&b6);\
263 rotbyte(&b0);\
264 rotbyte(&b1);\
265 rotbyte(&b2);\
266 rotbyte(&b3);\
267 rotbyte(&b4);\
268 rotbyte(&b5);\
269 rotbyte(&b6);\
270 rotbyte(&b7);\
271 ;\
272 sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
273 ;\
274 rcon;\
275 shufb(&b0, EXPB0);\
276 shufb(&b1, EXPB0);\
277 shufb(&b4, EXPB0);\
278 shufb(&b6, EXPB0);\
279 shufb(&b3, EXPB0);\
280 shufb(&b7, EXPB0);\
281 shufb(&b2, EXPB0);\
282 shufb(&b5, EXPB0);\
283 ;\
284 t0 = *(int128 *)(bskey + (i-1) * 128 + 0);\
285 t1 = *(int128 *)(bskey + (i-1) * 128 + 16);\
286 t2 = *(int128 *)(bskey + (i-1) * 128 + 32);\
287 t3 = *(int128 *)(bskey + (i-1) * 128 + 48);\
288 t4 = *(int128 *)(bskey + (i-1) * 128 + 64);\
289 t5 = *(int128 *)(bskey + (i-1) * 128 + 80);\
290 t6 = *(int128 *)(bskey + (i-1) * 128 + 96);\
291 t7 = *(int128 *)(bskey + (i-1) * 128 + 112);\
292 ;\
293 toggle(&t0);\
294 toggle(&t1);\
295 toggle(&t5);\
296 toggle(&t6);\
297 ;\
298 xor2(&b0, &t0);\
299 xor2(&b1, &t1);\
300 xor2(&b4, &t2);\
301 xor2(&b6, &t3);\
302 xor2(&b3, &t4);\
303 xor2(&b7, &t5);\
304 xor2(&b2, &t6);\
305 xor2(&b5, &t7);\
306 ;\
307 rshift32_littleendian(&t0, 8);\
308 rshift32_littleendian(&t1, 8);\
309 rshift32_littleendian(&t2, 8);\
310 rshift32_littleendian(&t3, 8);\
311 rshift32_littleendian(&t4, 8);\
312 rshift32_littleendian(&t5, 8);\
313 rshift32_littleendian(&t6, 8);\
314 rshift32_littleendian(&t7, 8);\
315 ;\
316 xor2(&b0, &t0);\
317 xor2(&b1, &t1);\
318 xor2(&b4, &t2);\
319 xor2(&b6, &t3);\
320 xor2(&b3, &t4);\
321 xor2(&b7, &t5);\
322 xor2(&b2, &t6);\
323 xor2(&b5, &t7);\
324 ;\
325 rshift32_littleendian(&t0, 8);\
326 rshift32_littleendian(&t1, 8);\
327 rshift32_littleendian(&t2, 8);\
328 rshift32_littleendian(&t3, 8);\
329 rshift32_littleendian(&t4, 8);\
330 rshift32_littleendian(&t5, 8);\
331 rshift32_littleendian(&t6, 8);\
332 rshift32_littleendian(&t7, 8);\
333 ;\
334 xor2(&b0, &t0);\
335 xor2(&b1, &t1);\
336 xor2(&b4, &t2);\
337 xor2(&b6, &t3);\
338 xor2(&b3, &t4);\
339 xor2(&b7, &t5);\
340 xor2(&b2, &t6);\
341 xor2(&b5, &t7);\
342 ;\
343 rshift32_littleendian(&t0, 8);\
344 rshift32_littleendian(&t1, 8);\
345 rshift32_littleendian(&t2, 8);\
346 rshift32_littleendian(&t3, 8);\
347 rshift32_littleendian(&t4, 8);\
348 rshift32_littleendian(&t5, 8);\
349 rshift32_littleendian(&t6, 8);\
350 rshift32_littleendian(&t7, 8);\
351 ;\
352 xor2(&b0, &t0);\
353 xor2(&b1, &t1);\
354 xor2(&b4, &t2);\
355 xor2(&b6, &t3);\
356 xor2(&b3, &t4);\
357 xor2(&b7, &t5);\
358 xor2(&b2, &t6);\
359 xor2(&b5, &t7);\
360 ;\
361 *(int128 *)(bskey + i*128 + 0) = b0;\
362 *(int128 *)(bskey + i*128 + 16) = b1;\
363 *(int128 *)(bskey + i*128 + 32) = b4;\
364 *(int128 *)(bskey + i*128 + 48) = b6;\
365 *(int128 *)(bskey + i*128 + 64) = b3;\
366 *(int128 *)(bskey + i*128 + 80) = b7;\
367 *(int128 *)(bskey + i*128 + 96) = b2;\
368 *(int128 *)(bskey + i*128 + 112) = b5;\
369
370/* Macros used in multiple contexts */
371
372#define bitslicekey0(key, bskey) \
373 xmm0 = *(int128 *) (key + 0);\
374 shufb(&xmm0, M0);\
375 copy2(&xmm1, &xmm0);\
376 copy2(&xmm2, &xmm0);\
377 copy2(&xmm3, &xmm0);\
378 copy2(&xmm4, &xmm0);\
379 copy2(&xmm5, &xmm0);\
380 copy2(&xmm6, &xmm0);\
381 copy2(&xmm7, &xmm0);\
382 ;\
383 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
384 ;\
385 *(int128 *) (bskey + 0) = xmm0;\
386 *(int128 *) (bskey + 16) = xmm1;\
387 *(int128 *) (bskey + 32) = xmm2;\
388 *(int128 *) (bskey + 48) = xmm3;\
389 *(int128 *) (bskey + 64) = xmm4;\
390 *(int128 *) (bskey + 80) = xmm5;\
391 *(int128 *) (bskey + 96) = xmm6;\
392 *(int128 *) (bskey + 112) = xmm7;\
393
394
395#define bitslicekey10(key, bskey) \
396 xmm0 = *(int128 *) (key + 0);\
397 copy2(xmm1, xmm0);\
398 copy2(xmm2, xmm0);\
399 copy2(xmm3, xmm0);\
400 copy2(xmm4, xmm0);\
401 copy2(xmm5, xmm0);\
402 copy2(xmm6, xmm0);\
403 copy2(xmm7, xmm0);\
404 ;\
405 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
406 ;\
407 toggle(&xmm6);\
408 toggle(&xmm5);\
409 toggle(&xmm1);\
410 toggle(&xmm0);\
411 ;\
412 *(int128 *) (bskey + 0 + 1280) = xmm0;\
413 *(int128 *) (bskey + 16 + 1280) = xmm1;\
414 *(int128 *) (bskey + 32 + 1280) = xmm2;\
415 *(int128 *) (bskey + 48 + 1280) = xmm3;\
416 *(int128 *) (bskey + 64 + 1280) = xmm4;\
417 *(int128 *) (bskey + 80 + 1280) = xmm5;\
418 *(int128 *) (bskey + 96 + 1280) = xmm6;\
419 *(int128 *) (bskey + 112 + 1280) = xmm7;\
420
421
422#define bitslicekey(i,key,bskey) \
423 xmm0 = *(int128 *) (key + 0);\
424 shufb(&xmm0, M0);\
425 copy2(&xmm1, &xmm0);\
426 copy2(&xmm2, &xmm0);\
427 copy2(&xmm3, &xmm0);\
428 copy2(&xmm4, &xmm0);\
429 copy2(&xmm5, &xmm0);\
430 copy2(&xmm6, &xmm0);\
431 copy2(&xmm7, &xmm0);\
432 ;\
433 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
434 ;\
435 toggle(&xmm6);\
436 toggle(&xmm5);\
437 toggle(&xmm1);\
438 toggle(&xmm0);\
439 ;\
440 *(int128 *) (bskey + 0 + 128*i) = xmm0;\
441 *(int128 *) (bskey + 16 + 128*i) = xmm1;\
442 *(int128 *) (bskey + 32 + 128*i) = xmm2;\
443 *(int128 *) (bskey + 48 + 128*i) = xmm3;\
444 *(int128 *) (bskey + 64 + 128*i) = xmm4;\
445 *(int128 *) (bskey + 80 + 128*i) = xmm5;\
446 *(int128 *) (bskey + 96 + 128*i) = xmm6;\
447 *(int128 *) (bskey + 112 + 128*i) = xmm7;\
448
449
450#define bitslice(x0, x1, x2, x3, x4, x5, x6, x7, t) \
451 swapmove(x0, x1, 1, BS0, t);\
452 swapmove(x2, x3, 1, BS0, t);\
453 swapmove(x4, x5, 1, BS0, t);\
454 swapmove(x6, x7, 1, BS0, t);\
455 ;\
456 swapmove(x0, x2, 2, BS1, t);\
457 swapmove(x1, x3, 2, BS1, t);\
458 swapmove(x4, x6, 2, BS1, t);\
459 swapmove(x5, x7, 2, BS1, t);\
460 ;\
461 swapmove(x0, x4, 4, BS2, t);\
462 swapmove(x1, x5, 4, BS2, t);\
463 swapmove(x2, x6, 4, BS2, t);\
464 swapmove(x3, x7, 4, BS2, t);\
465
466
467#define swapmove(a, b, n, m, t) \
468 copy2(&t, &b);\
469 rshift64_littleendian(&t, n);\
470 xor2(&t, &a);\
471 and2(&t, &m);\
472 xor2(&a, &t);\
473 lshift64_littleendian(&t, n);\
474 xor2(&b, &t);
475
476#define rotbyte(x) \
477 shufb(x, ROTB) /* TODO: Make faster */
478
479
480/* Macros used for encryption (and decryption) */
481
482#define shiftrows(x0, x1, x2, x3, x4, x5, x6, x7, i, M, bskey) \
483 xor2(&x0, (int128 *)(bskey + 128*(i-1) + 0));\
484 shufb(&x0, M);\
485 xor2(&x1, (int128 *)(bskey + 128*(i-1) + 16));\
486 shufb(&x1, M);\
487 xor2(&x2, (int128 *)(bskey + 128*(i-1) + 32));\
488 shufb(&x2, M);\
489 xor2(&x3, (int128 *)(bskey + 128*(i-1) + 48));\
490 shufb(&x3, M);\
491 xor2(&x4, (int128 *)(bskey + 128*(i-1) + 64));\
492 shufb(&x4, M);\
493 xor2(&x5, (int128 *)(bskey + 128*(i-1) + 80));\
494 shufb(&x5, M);\
495 xor2(&x6, (int128 *)(bskey + 128*(i-1) + 96));\
496 shufb(&x6, M);\
497 xor2(&x7, (int128 *)(bskey + 128*(i-1) + 112));\
498 shufb(&x7, M);\
499
500
501#define mixcolumns(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, t7) \
502 shufd(&t0, &x0, 0x93);\
503 shufd(&t1, &x1, 0x93);\
504 shufd(&t2, &x2, 0x93);\
505 shufd(&t3, &x3, 0x93);\
506 shufd(&t4, &x4, 0x93);\
507 shufd(&t5, &x5, 0x93);\
508 shufd(&t6, &x6, 0x93);\
509 shufd(&t7, &x7, 0x93);\
510 ;\
511 xor2(&x0, &t0);\
512 xor2(&x1, &t1);\
513 xor2(&x2, &t2);\
514 xor2(&x3, &t3);\
515 xor2(&x4, &t4);\
516 xor2(&x5, &t5);\
517 xor2(&x6, &t6);\
518 xor2(&x7, &t7);\
519 ;\
520 xor2(&t0, &x7);\
521 xor2(&t1, &x0);\
522 xor2(&t2, &x1);\
523 xor2(&t1, &x7);\
524 xor2(&t3, &x2);\
525 xor2(&t4, &x3);\
526 xor2(&t5, &x4);\
527 xor2(&t3, &x7);\
528 xor2(&t6, &x5);\
529 xor2(&t7, &x6);\
530 xor2(&t4, &x7);\
531 ;\
532 shufd(&x0, &x0, 0x4e);\
533 shufd(&x1, &x1, 0x4e);\
534 shufd(&x2, &x2, 0x4e);\
535 shufd(&x3, &x3, 0x4e);\
536 shufd(&x4, &x4, 0x4e);\
537 shufd(&x5, &x5, 0x4e);\
538 shufd(&x6, &x6, 0x4e);\
539 shufd(&x7, &x7, 0x4e);\
540 ;\
541 xor2(&t0, &x0);\
542 xor2(&t1, &x1);\
543 xor2(&t2, &x2);\
544 xor2(&t3, &x3);\
545 xor2(&t4, &x4);\
546 xor2(&t5, &x5);\
547 xor2(&t6, &x6);\
548 xor2(&t7, &x7);\
549
550
551#define aesround(i, b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
552 shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, i, SR, bskey);\
553 sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
554 mixcolumns(b0, b1, b4, b6, b3, b7, b2, b5, t0, t1, t2, t3, t4, t5, t6, t7);\
555
556
557#define lastround(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
558 shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, 10, SRM0, bskey);\
559 sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
560 xor2(&b0,(int128 *)(bskey + 128*10));\
561 xor2(&b1,(int128 *)(bskey + 128*10+16));\
562 xor2(&b4,(int128 *)(bskey + 128*10+32));\
563 xor2(&b6,(int128 *)(bskey + 128*10+48));\
564 xor2(&b3,(int128 *)(bskey + 128*10+64));\
565 xor2(&b7,(int128 *)(bskey + 128*10+80));\
566 xor2(&b2,(int128 *)(bskey + 128*10+96));\
567 xor2(&b5,(int128 *)(bskey + 128*10+112));\
568
569
570#define sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, s0, s1, s2, s3) \
571 InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7); \
572 Inv_GF256(b6, b5, b0, b3, b7, b1, b4, b2, t0, t1, t2, t3, s0, s1, s2, s3); \
573 OutBasisChange(b7, b1, b4, b2, b6, b5, b0, b3); \
574
575
576#define InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \
577 xor2(&b5, &b6);\
578 xor2(&b2, &b1);\
579 xor2(&b5, &b0);\
580 xor2(&b6, &b2);\
581 xor2(&b3, &b0);\
582 ;\
583 xor2(&b6, &b3);\
584 xor2(&b3, &b7);\
585 xor2(&b3, &b4);\
586 xor2(&b7, &b5);\
587 xor2(&b3, &b1);\
588 ;\
589 xor2(&b4, &b5);\
590 xor2(&b2, &b7);\
591 xor2(&b1, &b5);\
592
593#define OutBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \
594 xor2(&b0, &b6);\
595 xor2(&b1, &b4);\
596 xor2(&b2, &b0);\
597 xor2(&b4, &b6);\
598 xor2(&b6, &b1);\
599 ;\
600 xor2(&b1, &b5);\
601 xor2(&b5, &b3);\
602 xor2(&b2, &b5);\
603 xor2(&b3, &b7);\
604 xor2(&b7, &b5);\
605 ;\
606 xor2(&b4, &b7);\
607
608#define Mul_GF4(x0, x1, y0, y1, t0) \
609 copy2(&t0, &y0);\
610 xor2(&t0, &y1);\
611 and2(&t0, &x0);\
612 xor2(&x0, &x1);\
613 and2(&x0, &y1);\
614 and2(&x1, &y0);\
615 xor2(&x0, &x1);\
616 xor2(&x1, &t0);\
617
618#define Mul_GF4_N(x0, x1, y0, y1, t0) \
619 copy2(&t0, &y0);\
620 xor2(&t0, &y1);\
621 and2(&t0, &x0);\
622 xor2(&x0, &x1);\
623 and2(&x0, &y1);\
624 and2(&x1, &y0);\
625 xor2(&x1, &x0);\
626 xor2(&x0, &t0);\
627
628#define Mul_GF4_2(x0, x1, x2, x3, y0, y1, t0, t1) \
629 copy2(&t0, = y0);\
630 xor2(&t0, &y1);\
631 copy2(&t1, &t0);\
632 and2(&t0, &x0);\
633 and2(&t1, &x2);\
634 xor2(&x0, &x1);\
635 xor2(&x2, &x3);\
636 and2(&x0, &y1);\
637 and2(&x2, &y1);\
638 and2(&x1, &y0);\
639 and2(&x3, &y0);\
640 xor2(&x0, &x1);\
641 xor2(&x2, &x3);\
642 xor2(&x1, &t0);\
643 xor2(&x3, &t1);\
644
645#define Mul_GF16(x0, x1, x2, x3, y0, y1, y2, y3, t0, t1, t2, t3) \
646 copy2(&t0, &x0);\
647 copy2(&t1, &x1);\
648 Mul_GF4(x0, x1, y0, y1, t2);\
649 xor2(&t0, &x2);\
650 xor2(&t1, &x3);\
651 xor2(&y0, &y2);\
652 xor2(&y1, &y3);\
653 Mul_GF4_N(t0, t1, y0, y1, t2);\
654 Mul_GF4(x2, x3, y2, y3, t3);\
655 ;\
656 xor2(&x0, &t0);\
657 xor2(&x2, &t0);\
658 xor2(&x1, &t1);\
659 xor2(&x3, &t1);\
660
661#define Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, t0, t1, t2, t3) \
662 copy2(&t0, &x0);\
663 copy2(&t1, &x1);\
664 Mul_GF4(x0, x1, y0, y1, t2);\
665 xor2(&t0, &x2);\
666 xor2(&t1, &x3);\
667 xor2(&y0, &y2);\
668 xor2(&y1, &y3);\
669 Mul_GF4_N(t0, t1, y0, y1, t3);\
670 Mul_GF4(x2, x3, y2, y3, t2);\
671 ;\
672 xor2(&x0, &t0);\
673 xor2(&x2, &t0);\
674 xor2(&x1, &t1);\
675 xor2(&x3, &t1);\
676 ;\
677 copy2(&t0, &x4);\
678 copy2(&t1, &x5);\
679 xor2(&t0, &x6);\
680 xor2(&t1, &x7);\
681 Mul_GF4_N(t0, t1, y0, y1, t3);\
682 Mul_GF4(x6, x7, y2, y3, t2);\
683 xor2(&y0, &y2);\
684 xor2(&y1, &y3);\
685 Mul_GF4(x4, x5, y0, y1, t3);\
686 ;\
687 xor2(&x4, &t0);\
688 xor2(&x6, &t0);\
689 xor2(&x5, &t1);\
690 xor2(&x7, &t1);\
691
692#define Inv_GF16(x0, x1, x2, x3, t0, t1, t2, t3) \
693 copy2(&t0, &x1);\
694 copy2(&t1, &x0);\
695 and2(&t0, &x3);\
696 or2(&t1, &x2);\
697 copy2(&t2, &x1);\
698 copy2(&t3, &x0);\
699 or2(&t2, &x2);\
700 or2(&t3, &x3);\
701 xor2(&t2, &t3);\
702 ;\
703 xor2(&t0, &t2);\
704 xor2(&t1, &t2);\
705 ;\
706 Mul_GF4_2(x0, x1, x2, x3, t1, t0, t2, t3);\
707
708
709#define Inv_GF256(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, s0, s1, s2, s3) \
710 copy2(&t3, &x4);\
711 copy2(&t2, &x5);\
712 copy2(&t1, &x1);\
713 copy2(&s1, &x7);\
714 copy2(&s0, &x0);\
715 ;\
716 xor2(&t3, &x6);\
717 xor2(&t2, &x7);\
718 xor2(&t1, &x3);\
719 xor2(&s1, &x6);\
720 xor2(&s0, &x2);\
721 ;\
722 copy2(&s2, &t3);\
723 copy2(&t0, &t2);\
724 copy2(&s3, &t3);\
725 ;\
726 or2(&t2, &t1);\
727 or2(&t3, &s0);\
728 xor2(&s3, &t0);\
729 and2(&s2, &s0);\
730 and2(&t0, &t1);\
731 xor2(&s0, &t1);\
732 and2(&s3, &s0);\
733 copy2(&s0, &x3);\
734 xor2(&s0, &x2);\
735 and2(&s1, &s0);\
736 xor2(&t3, &s1);\
737 xor2(&t2, &s1);\
738 copy2(&s1, &x4);\
739 xor2(&s1, &x5);\
740 copy2(&s0, &x1);\
741 copy2(&t1, &s1);\
742 xor2(&s0, &x0);\
743 or2(&t1, &s0);\
744 and2(&s1, &s0);\
745 xor2(&t0, &s1);\
746 xor2(&t3, &s3);\
747 xor2(&t2, &s2);\
748 xor2(&t1, &s3);\
749 xor2(&t0, &s2);\
750 xor2(&t1, &s2);\
751 copy2(&s0, &x7);\
752 copy2(&s1, &x6);\
753 copy2(&s2, &x5);\
754 copy2(&s3, &x4);\
755 and2(&s0, &x3);\
756 and2(&s1, &x2);\
757 and2(&s2, &x1);\
758 or2(&s3, &x0);\
759 xor2(&t3, &s0);\
760 xor2(&t2, &s1);\
761 xor2(&t1, &s2);\
762 xor2(&t0, &s3);\
763 ;\
764 copy2(&s0, &t3);\
765 xor2(&s0, &t2);\
766 and2(&t3, &t1);\
767 copy2(&s2, &t0);\
768 xor2(&s2, &t3);\
769 copy2(&s3, &s0);\
770 and2(&s3, &s2);\
771 xor2(&s3, &t2);\
772 copy2(&s1, &t1);\
773 xor2(&s1, &t0);\
774 xor2(&t3, &t2);\
775 and2(&s1, &t3);\
776 xor2(&s1, &t0);\
777 xor2(&t1, &s1);\
778 copy2(&t2, &s2);\
779 xor2(&t2, &s1);\
780 and2(&t2, &t0);\
781 xor2(&t1, &t2);\
782 xor2(&s2, &t2);\
783 and2(&s2, &s3);\
784 xor2(&s2, &s0);\
785 ;\
786 Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, s3, s2, s1, t1, s0, t0, t2, t3);\
787
788#endif
diff --git a/nacl/crypto_stream/aes128ctr/portable/consts.c b/nacl/crypto_stream/aes128ctr/portable/consts.c
deleted file mode 100644
index ed2835db..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/consts.c
+++ /dev/null
@@ -1,14 +0,0 @@
1#include "consts.h"
2
3const unsigned char ROTB[16] = {0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08};
4const unsigned char M0[16] = {0x0f, 0x0b, 0x07, 0x03, 0x0e, 0x0a, 0x06, 0x02, 0x0d, 0x09, 0x05, 0x01, 0x0c, 0x08, 0x04, 0x00};
5const unsigned char EXPB0[16] = {0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, 0x0b, 0x0b, 0x0b, 0x0b, 0x0f, 0x0f, 0x0f, 0x0f};
6
7const unsigned char SWAP32[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};
8const unsigned char M0SWAP[16] = {0x0c, 0x08, 0x04, 0x00, 0x0d, 0x09, 0x05, 0x01, 0x0e, 0x0a, 0x06, 0x02, 0x0f, 0x0b, 0x07, 0x03};
9const unsigned char SR[16] = {0x01, 0x02, 0x03, 0x00, 0x06, 0x07, 0x04, 0x05, 0x0b, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0e, 0x0f};
10const unsigned char SRM0[16] = {0x0f, 0x0a, 0x05, 0x00, 0x0e, 0x09, 0x04, 0x03, 0x0d, 0x08, 0x07, 0x02, 0x0c, 0x0b, 0x06, 0x01};
11
12const int128 BS0 = {0x5555555555555555ULL, 0x5555555555555555ULL};
13const int128 BS1 = {0x3333333333333333ULL, 0x3333333333333333ULL};
14const int128 BS2 = {0x0f0f0f0f0f0f0f0fULL, 0x0f0f0f0f0f0f0f0fULL};
diff --git a/nacl/crypto_stream/aes128ctr/portable/consts.h b/nacl/crypto_stream/aes128ctr/portable/consts.h
deleted file mode 100644
index 4c50360b..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/consts.h
+++ /dev/null
@@ -1,28 +0,0 @@
1#ifndef CONSTS_H
2#define CONSTS_H
3
4#include "int128.h"
5
6#define ROTB crypto_stream_aes128ctr_portable_ROTB
7#define M0 crypto_stream_aes128ctr_portable_M0
8#define EXPB0 crypto_stream_aes128ctr_portable_EXPB0
9#define SWAP32 crypto_stream_aes128ctr_portable_SWAP32
10#define M0SWAP crypto_stream_aes128ctr_portable_M0SWAP
11#define SR crypto_stream_aes128ctr_portable_SR
12#define SRM0 crypto_stream_aes128ctr_portable_SRM0
13#define BS0 crypto_stream_aes128ctr_portable_BS0
14#define BS1 crypto_stream_aes128ctr_portable_BS1
15#define BS2 crypto_stream_aes128ctr_portable_BS2
16
17extern const unsigned char ROTB[16];
18extern const unsigned char M0[16];
19extern const unsigned char EXPB0[16];
20extern const unsigned char SWAP32[16];
21extern const unsigned char M0SWAP[16];
22extern const unsigned char SR[16];
23extern const unsigned char SRM0[16];
24extern const int128 BS0;
25extern const int128 BS1;
26extern const int128 BS2;
27
28#endif
diff --git a/nacl/crypto_stream/aes128ctr/portable/int128.c b/nacl/crypto_stream/aes128ctr/portable/int128.c
deleted file mode 100644
index 25894d42..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/int128.c
+++ /dev/null
@@ -1,128 +0,0 @@
1#include "int128.h"
2#include "common.h"
3
4void xor2(int128 *r, const int128 *x)
5{
6 r->a ^= x->a;
7 r->b ^= x->b;
8}
9
10void and2(int128 *r, const int128 *x)
11{
12 r->a &= x->a;
13 r->b &= x->b;
14}
15
16void or2(int128 *r, const int128 *x)
17{
18 r->a |= x->a;
19 r->b |= x->b;
20}
21
22void copy2(int128 *r, const int128 *x)
23{
24 r->a = x->a;
25 r->b = x->b;
26}
27
28void shufb(int128 *r, const unsigned char *l)
29{
30 int128 t;
31 copy2(&t,r);
32 unsigned char *cr = (unsigned char *)r;
33 unsigned char *ct = (unsigned char *)&t;
34 cr[0] = ct[l[0]];
35 cr[1] = ct[l[1]];
36 cr[2] = ct[l[2]];
37 cr[3] = ct[l[3]];
38 cr[4] = ct[l[4]];
39 cr[5] = ct[l[5]];
40 cr[6] = ct[l[6]];
41 cr[7] = ct[l[7]];
42 cr[8] = ct[l[8]];
43 cr[9] = ct[l[9]];
44 cr[10] = ct[l[10]];
45 cr[11] = ct[l[11]];
46 cr[12] = ct[l[12]];
47 cr[13] = ct[l[13]];
48 cr[14] = ct[l[14]];
49 cr[15] = ct[l[15]];
50}
51
52void shufd(int128 *r, const int128 *x, const unsigned int c)
53{
54 int128 t;
55 uint32 *tp = (uint32 *)&t;
56 uint32 *xp = (uint32 *)x;
57 tp[0] = xp[c&3];
58 tp[1] = xp[(c>>2)&3];
59 tp[2] = xp[(c>>4)&3];
60 tp[3] = xp[(c>>6)&3];
61 copy2(r,&t);
62}
63
64void rshift32_littleendian(int128 *r, const unsigned int n)
65{
66 unsigned char *rp = (unsigned char *)r;
67 uint32 t;
68 t = load32_littleendian(rp);
69 t >>= n;
70 store32_littleendian(rp, t);
71 t = load32_littleendian(rp+4);
72 t >>= n;
73 store32_littleendian(rp+4, t);
74 t = load32_littleendian(rp+8);
75 t >>= n;
76 store32_littleendian(rp+8, t);
77 t = load32_littleendian(rp+12);
78 t >>= n;
79 store32_littleendian(rp+12, t);
80}
81
82void rshift64_littleendian(int128 *r, const unsigned int n)
83{
84 unsigned char *rp = (unsigned char *)r;
85 uint64 t;
86 t = load64_littleendian(rp);
87 t >>= n;
88 store64_littleendian(rp, t);
89 t = load64_littleendian(rp+8);
90 t >>= n;
91 store64_littleendian(rp+8, t);
92}
93
94void lshift64_littleendian(int128 *r, const unsigned int n)
95{
96 unsigned char *rp = (unsigned char *)r;
97 uint64 t;
98 t = load64_littleendian(rp);
99 t <<= n;
100 store64_littleendian(rp, t);
101 t = load64_littleendian(rp+8);
102 t <<= n;
103 store64_littleendian(rp+8, t);
104}
105
106void toggle(int128 *r)
107{
108 r->a ^= 0xffffffffffffffffULL;
109 r->b ^= 0xffffffffffffffffULL;
110}
111
112void xor_rcon(int128 *r)
113{
114 unsigned char *rp = (unsigned char *)r;
115 uint32 t;
116 t = load32_littleendian(rp+12);
117 t ^= 0xffffffff;
118 store32_littleendian(rp+12, t);
119}
120
121void add_uint32_big(int128 *r, uint32 x)
122{
123 unsigned char *rp = (unsigned char *)r;
124 uint32 t;
125 t = load32_littleendian(rp+12);
126 t += x;
127 store32_littleendian(rp+12, t);
128}
diff --git a/nacl/crypto_stream/aes128ctr/portable/int128.h b/nacl/crypto_stream/aes128ctr/portable/int128.h
deleted file mode 100644
index 7099e5b1..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/int128.h
+++ /dev/null
@@ -1,47 +0,0 @@
1#ifndef INT128_H
2#define INT128_H
3
4#include "common.h"
5
6typedef struct{
7 unsigned long long a;
8 unsigned long long b;
9} int128;
10
11#define xor2 crypto_stream_aes128ctr_portable_xor2
12void xor2(int128 *r, const int128 *x);
13
14#define and2 crypto_stream_aes128ctr_portable_and2
15void and2(int128 *r, const int128 *x);
16
17#define or2 crypto_stream_aes128ctr_portable_or2
18void or2(int128 *r, const int128 *x);
19
20#define copy2 crypto_stream_aes128ctr_portable_copy2
21void copy2(int128 *r, const int128 *x);
22
23#define shufb crypto_stream_aes128ctr_portable_shufb
24void shufb(int128 *r, const unsigned char *l);
25
26#define shufd crypto_stream_aes128ctr_portable_shufd
27void shufd(int128 *r, const int128 *x, const unsigned int c);
28
29#define rshift32_littleendian crypto_stream_aes128ctr_portable_rshift32_littleendian
30void rshift32_littleendian(int128 *r, const unsigned int n);
31
32#define rshift64_littleendian crypto_stream_aes128ctr_portable_rshift64_littleendian
33void rshift64_littleendian(int128 *r, const unsigned int n);
34
35#define lshift64_littleendian crypto_stream_aes128ctr_portable_lshift64_littleendian
36void lshift64_littleendian(int128 *r, const unsigned int n);
37
38#define toggle crypto_stream_aes128ctr_portable_toggle
39void toggle(int128 *r);
40
41#define xor_rcon crypto_stream_aes128ctr_portable_xor_rcon
42void xor_rcon(int128 *r);
43
44#define add_uint32_big crypto_stream_aes128ctr_portable_add_uint32_big
45void add_uint32_big(int128 *r, uint32 x);
46
47#endif
diff --git a/nacl/crypto_stream/aes128ctr/portable/stream.c b/nacl/crypto_stream/aes128ctr/portable/stream.c
deleted file mode 100644
index 963fa8c1..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/stream.c
+++ /dev/null
@@ -1,28 +0,0 @@
1#include "crypto_stream.h"
2
3int crypto_stream(
4 unsigned char *out,
5 unsigned long long outlen,
6 const unsigned char *n,
7 const unsigned char *k
8 )
9{
10 unsigned char d[crypto_stream_BEFORENMBYTES];
11 crypto_stream_beforenm(d, k);
12 crypto_stream_afternm(out, outlen, n, d);
13 return 0;
14}
15
16int crypto_stream_xor(
17 unsigned char *out,
18 const unsigned char *in,
19 unsigned long long inlen,
20 const unsigned char *n,
21 const unsigned char *k
22 )
23{
24 unsigned char d[crypto_stream_BEFORENMBYTES];
25 crypto_stream_beforenm(d, k);
26 crypto_stream_xor_afternm(out, in, inlen, n, d);
27 return 0;
28}
diff --git a/nacl/crypto_stream/aes128ctr/portable/types.h b/nacl/crypto_stream/aes128ctr/portable/types.h
deleted file mode 100644
index 6aa502fc..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/types.h
+++ /dev/null
@@ -1,10 +0,0 @@
1#ifndef TYPES_H
2#define TYPES_H
3
4#include "crypto_uint32.h"
5typedef crypto_uint32 uint32;
6
7#include "crypto_uint64.h"
8typedef crypto_uint64 uint64;
9
10#endif
diff --git a/nacl/crypto_stream/aes128ctr/portable/xor_afternm.c b/nacl/crypto_stream/aes128ctr/portable/xor_afternm.c
deleted file mode 100644
index f2ff8ff6..00000000
--- a/nacl/crypto_stream/aes128ctr/portable/xor_afternm.c
+++ /dev/null
@@ -1,180 +0,0 @@
1/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
2 * Date: 2009-03-19
3 * Public domain */
4
5#include <stdio.h>
6#include "int128.h"
7#include "common.h"
8#include "consts.h"
9#include "crypto_stream.h"
10
11int crypto_stream_xor_afternm(unsigned char *outp, const unsigned char *inp, unsigned long long len, const unsigned char *noncep, const unsigned char *c)
12{
13
14 int128 xmm0;
15 int128 xmm1;
16 int128 xmm2;
17 int128 xmm3;
18 int128 xmm4;
19 int128 xmm5;
20 int128 xmm6;
21 int128 xmm7;
22
23 int128 xmm8;
24 int128 xmm9;
25 int128 xmm10;
26 int128 xmm11;
27 int128 xmm12;
28 int128 xmm13;
29 int128 xmm14;
30 int128 xmm15;
31
32 int128 nonce_stack;
33 unsigned long long lensav;
34 unsigned char bl[128];
35 unsigned char *blp;
36 unsigned char b;
37
38 uint32 tmp;
39
40 /* Copy nonce on the stack */
41 copy2(&nonce_stack, (int128 *) (noncep + 0));
42 unsigned char *np = (unsigned char *)&nonce_stack;
43
44 enc_block:
45
46 xmm0 = *(int128 *) (np + 0);
47 copy2(&xmm1, &xmm0);
48 shufb(&xmm1, SWAP32);
49 copy2(&xmm2, &xmm1);
50 copy2(&xmm3, &xmm1);
51 copy2(&xmm4, &xmm1);
52 copy2(&xmm5, &xmm1);
53 copy2(&xmm6, &xmm1);
54 copy2(&xmm7, &xmm1);
55
56 add_uint32_big(&xmm1, 1);
57 add_uint32_big(&xmm2, 2);
58 add_uint32_big(&xmm3, 3);
59 add_uint32_big(&xmm4, 4);
60 add_uint32_big(&xmm5, 5);
61 add_uint32_big(&xmm6, 6);
62 add_uint32_big(&xmm7, 7);
63
64 shufb(&xmm0, M0);
65 shufb(&xmm1, M0SWAP);
66 shufb(&xmm2, M0SWAP);
67 shufb(&xmm3, M0SWAP);
68 shufb(&xmm4, M0SWAP);
69 shufb(&xmm5, M0SWAP);
70 shufb(&xmm6, M0SWAP);
71 shufb(&xmm7, M0SWAP);
72
73 bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
74
75 aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
76 aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
77 aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
78 aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
79 aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
80 aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
81 aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
82 aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
83 aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
84 lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
85
86 bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
87
88 if(len < 128) goto partial;
89 if(len == 128) goto full;
90
91 tmp = load32_bigendian(np + 12);
92 tmp += 8;
93 store32_bigendian(np + 12, tmp);
94
95 xor2(&xmm8, (int128 *)(inp + 0));
96 xor2(&xmm9, (int128 *)(inp + 16));
97 xor2(&xmm12, (int128 *)(inp + 32));
98 xor2(&xmm14, (int128 *)(inp + 48));
99 xor2(&xmm11, (int128 *)(inp + 64));
100 xor2(&xmm15, (int128 *)(inp + 80));
101 xor2(&xmm10, (int128 *)(inp + 96));
102 xor2(&xmm13, (int128 *)(inp + 112));
103
104 *(int128 *) (outp + 0) = xmm8;
105 *(int128 *) (outp + 16) = xmm9;
106 *(int128 *) (outp + 32) = xmm12;
107 *(int128 *) (outp + 48) = xmm14;
108 *(int128 *) (outp + 64) = xmm11;
109 *(int128 *) (outp + 80) = xmm15;
110 *(int128 *) (outp + 96) = xmm10;
111 *(int128 *) (outp + 112) = xmm13;
112
113 len -= 128;
114 inp += 128;
115 outp += 128;
116
117 goto enc_block;
118
119 partial:
120
121 lensav = len;
122 len >>= 4;
123
124 tmp = load32_bigendian(np + 12);
125 tmp += len;
126 store32_bigendian(np + 12, tmp);
127
128 blp = bl;
129 *(int128 *)(blp + 0) = xmm8;
130 *(int128 *)(blp + 16) = xmm9;
131 *(int128 *)(blp + 32) = xmm12;
132 *(int128 *)(blp + 48) = xmm14;
133 *(int128 *)(blp + 64) = xmm11;
134 *(int128 *)(blp + 80) = xmm15;
135 *(int128 *)(blp + 96) = xmm10;
136 *(int128 *)(blp + 112) = xmm13;
137
138 bytes:
139
140 if(lensav == 0) goto end;
141
142 b = blp[0];
143 b ^= *(unsigned char *)(inp + 0);
144 *(unsigned char *)(outp + 0) = b;
145
146 blp += 1;
147 inp +=1;
148 outp +=1;
149 lensav -= 1;
150
151 goto bytes;
152
153 full:
154
155 tmp = load32_bigendian(np + 12);
156 tmp += 8;
157 store32_bigendian(np + 12, tmp);
158
159 xor2(&xmm8, (int128 *)(inp + 0));
160 xor2(&xmm9, (int128 *)(inp + 16));
161 xor2(&xmm12, (int128 *)(inp + 32));
162 xor2(&xmm14, (int128 *)(inp + 48));
163 xor2(&xmm11, (int128 *)(inp + 64));
164 xor2(&xmm15, (int128 *)(inp + 80));
165 xor2(&xmm10, (int128 *)(inp + 96));
166 xor2(&xmm13, (int128 *)(inp + 112));
167
168 *(int128 *) (outp + 0) = xmm8;
169 *(int128 *) (outp + 16) = xmm9;
170 *(int128 *) (outp + 32) = xmm12;
171 *(int128 *) (outp + 48) = xmm14;
172 *(int128 *) (outp + 64) = xmm11;
173 *(int128 *) (outp + 80) = xmm15;
174 *(int128 *) (outp + 96) = xmm10;
175 *(int128 *) (outp + 112) = xmm13;
176
177 end:
178 return 0;
179
180}