diff options
Diffstat (limited to 'cbits/cryptonite_salsa.c')
-rw-r--r-- | cbits/cryptonite_salsa.c | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/cbits/cryptonite_salsa.c b/cbits/cryptonite_salsa.c new file mode 100644 index 00000000..0bd96607 --- /dev/null +++ b/cbits/cryptonite_salsa.c | |||
@@ -0,0 +1,297 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2015 Vincent Hanquez <vincent@snarc.org> | ||
3 | * | ||
4 | * All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * 3. Neither the name of the author nor the names of his contributors | ||
15 | * may be used to endorse or promote products derived from this software | ||
16 | * without specific prior written permission. | ||
17 | * | ||
18 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | ||
19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE | ||
22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
28 | * SUCH DAMAGE. | ||
29 | */ | ||
30 | |||
31 | #include <stdint.h> | ||
32 | #include <string.h> | ||
33 | #include <stdio.h> | ||
34 | #include "cryptonite_salsa.h" | ||
35 | #include "cryptonite_bitfn.h" | ||
36 | |||
37 | static const uint8_t sigma[16] = "expand 32-byte k"; | ||
38 | static const uint8_t tau[16] = "expand 16-byte k"; | ||
39 | |||
40 | #define QR(a,b,c,d) \ | ||
41 | b ^= rol32(a+d, 7); \ | ||
42 | c ^= rol32(b+a, 9); \ | ||
43 | d ^= rol32(c+b, 13); \ | ||
44 | a ^= rol32(d+c, 18); | ||
45 | |||
46 | #define ALIGNED64(PTR) \ | ||
47 | (((uintptr_t)(const void *)(PTR)) % 8 == 0) | ||
48 | |||
49 | #define SALSA_CORE_LOOP \ | ||
50 | for (i = rounds; i > 0; i -= 2) { \ | ||
51 | QR (x0,x4,x8,x12); \ | ||
52 | QR (x5,x9,x13,x1); \ | ||
53 | QR (x10,x14,x2,x6); \ | ||
54 | QR (x15,x3,x7,x11); \ | ||
55 | QR (x0,x1,x2,x3); \ | ||
56 | QR (x5,x6,x7,x4); \ | ||
57 | QR (x10,x11,x8,x9); \ | ||
58 | QR (x15,x12,x13,x14); \ | ||
59 | } | ||
60 | |||
61 | static inline uint32_t load32(const uint8_t *p) | ||
62 | { | ||
63 | return le32_to_cpu(*((uint32_t *) p)); | ||
64 | } | ||
65 | |||
66 | static void salsa_core(int rounds, block *out, const cryptonite_salsa_state *in) | ||
67 | { | ||
68 | uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; | ||
69 | int i; | ||
70 | |||
71 | x0 = in->d[0]; x1 = in->d[1]; x2 = in->d[2]; x3 = in->d[3]; | ||
72 | x4 = in->d[4]; x5 = in->d[5]; x6 = in->d[6]; x7 = in->d[7]; | ||
73 | x8 = in->d[8]; x9 = in->d[9]; x10 = in->d[10]; x11 = in->d[11]; | ||
74 | x12 = in->d[12]; x13 = in->d[13]; x14 = in->d[14]; x15 = in->d[15]; | ||
75 | |||
76 | SALSA_CORE_LOOP; | ||
77 | |||
78 | x0 += in->d[0]; x1 += in->d[1]; x2 += in->d[2]; x3 += in->d[3]; | ||
79 | x4 += in->d[4]; x5 += in->d[5]; x6 += in->d[6]; x7 += in->d[7]; | ||
80 | x8 += in->d[8]; x9 += in->d[9]; x10 += in->d[10]; x11 += in->d[11]; | ||
81 | x12 += in->d[12]; x13 += in->d[13]; x14 += in->d[14]; x15 += in->d[15]; | ||
82 | |||
83 | out->d[0] = cpu_to_le32(x0); | ||
84 | out->d[1] = cpu_to_le32(x1); | ||
85 | out->d[2] = cpu_to_le32(x2); | ||
86 | out->d[3] = cpu_to_le32(x3); | ||
87 | out->d[4] = cpu_to_le32(x4); | ||
88 | out->d[5] = cpu_to_le32(x5); | ||
89 | out->d[6] = cpu_to_le32(x6); | ||
90 | out->d[7] = cpu_to_le32(x7); | ||
91 | out->d[8] = cpu_to_le32(x8); | ||
92 | out->d[9] = cpu_to_le32(x9); | ||
93 | out->d[10] = cpu_to_le32(x10); | ||
94 | out->d[11] = cpu_to_le32(x11); | ||
95 | out->d[12] = cpu_to_le32(x12); | ||
96 | out->d[13] = cpu_to_le32(x13); | ||
97 | out->d[14] = cpu_to_le32(x14); | ||
98 | out->d[15] = cpu_to_le32(x15); | ||
99 | } | ||
100 | |||
101 | void cryptonite_salsa_core_xor(int rounds, block *out, block *in) | ||
102 | { | ||
103 | uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; | ||
104 | int i; | ||
105 | |||
106 | #define LOAD(i) (out->d[i] ^= in->d[i]) | ||
107 | x0 = LOAD(0); x1 = LOAD(1); x2 = LOAD(2); x3 = LOAD(3); | ||
108 | x4 = LOAD(4); x5 = LOAD(5); x6 = LOAD(6); x7 = LOAD(7); | ||
109 | x8 = LOAD(8); x9 = LOAD(9); x10 = LOAD(10); x11 = LOAD(11); | ||
110 | x12 = LOAD(12); x13 = LOAD(13); x14 = LOAD(14); x15 = LOAD(15); | ||
111 | #undef LOAD | ||
112 | |||
113 | SALSA_CORE_LOOP; | ||
114 | |||
115 | out->d[0] += x0; out->d[1] += x1; out->d[2] += x2; out->d[3] += x3; | ||
116 | out->d[4] += x4; out->d[5] += x5; out->d[6] += x6; out->d[7] += x7; | ||
117 | out->d[8] += x8; out->d[9] += x9; out->d[10] += x10; out->d[11] += x11; | ||
118 | out->d[12] += x12; out->d[13] += x13; out->d[14] += x14; out->d[15] += x15; | ||
119 | } | ||
120 | |||
121 | /* only 2 valid values for keylen are 256 (32) and 128 (16) */ | ||
122 | void cryptonite_salsa_init_core(cryptonite_salsa_state *st, | ||
123 | uint32_t keylen, const uint8_t *key, | ||
124 | uint32_t ivlen, const uint8_t *iv) | ||
125 | { | ||
126 | const uint8_t *constants = (keylen == 32) ? sigma : tau; | ||
127 | int i; | ||
128 | |||
129 | st->d[0] = load32(constants + 0); | ||
130 | st->d[5] = load32(constants + 4); | ||
131 | st->d[10] = load32(constants + 8); | ||
132 | st->d[15] = load32(constants + 12); | ||
133 | |||
134 | st->d[1] = load32(key + 0); | ||
135 | st->d[2] = load32(key + 4); | ||
136 | st->d[3] = load32(key + 8); | ||
137 | st->d[4] = load32(key + 12); | ||
138 | /* we repeat the key on 128 bits */ | ||
139 | if (keylen == 32) | ||
140 | key += 16; | ||
141 | st->d[11] = load32(key + 0); | ||
142 | st->d[12] = load32(key + 4); | ||
143 | st->d[13] = load32(key + 8); | ||
144 | st->d[14] = load32(key + 12); | ||
145 | |||
146 | st->d[9] = 0; | ||
147 | switch (ivlen) { | ||
148 | case 8: | ||
149 | st->d[6] = load32(iv + 0); | ||
150 | st->d[7] = load32(iv + 4); | ||
151 | st->d[8] = 0; | ||
152 | break; | ||
153 | case 12: | ||
154 | st->d[6] = load32(iv + 0); | ||
155 | st->d[7] = load32(iv + 4); | ||
156 | st->d[8] = load32(iv + 8); | ||
157 | default: | ||
158 | return; | ||
159 | } | ||
160 | } | ||
161 | |||
162 | void cryptonite_salsa_init(cryptonite_salsa_context *ctx, uint8_t nb_rounds, | ||
163 | uint32_t keylen, const uint8_t *key, | ||
164 | uint32_t ivlen, const uint8_t *iv) | ||
165 | { | ||
166 | memset(ctx, 0, sizeof(*ctx)); | ||
167 | ctx->nb_rounds = nb_rounds; | ||
168 | cryptonite_salsa_init_core(&ctx->st, keylen, key, ivlen, iv); | ||
169 | } | ||
170 | |||
171 | void cryptonite_salsa_combine(uint8_t *dst, cryptonite_salsa_context *ctx, const uint8_t *src, uint32_t bytes) | ||
172 | { | ||
173 | block out; | ||
174 | cryptonite_salsa_state *st; | ||
175 | int i; | ||
176 | |||
177 | if (!bytes) | ||
178 | return; | ||
179 | |||
180 | /* xor the previous buffer first (if any) */ | ||
181 | if (ctx->prev_len > 0) { | ||
182 | int to_copy = (ctx->prev_len < bytes) ? ctx->prev_len : bytes; | ||
183 | for (i = 0; i < to_copy; i++) | ||
184 | dst[i] = src[i] ^ ctx->prev[ctx->prev_ofs+i]; | ||
185 | memset(ctx->prev + ctx->prev_ofs, 0, to_copy); | ||
186 | ctx->prev_len -= to_copy; | ||
187 | ctx->prev_ofs += to_copy; | ||
188 | src += to_copy; | ||
189 | dst += to_copy; | ||
190 | bytes -= to_copy; | ||
191 | } | ||
192 | |||
193 | if (bytes == 0) | ||
194 | return; | ||
195 | |||
196 | st = &ctx->st; | ||
197 | |||
198 | /* xor new 64-bytes chunks and store the left over if any */ | ||
199 | for (; bytes >= 64; bytes -= 64, src += 64, dst += 64) { | ||
200 | /* generate new chunk and update state */ | ||
201 | salsa_core(ctx->nb_rounds, &out, st); | ||
202 | st->d[8] += 1; | ||
203 | if (st->d[8] == 0) | ||
204 | st->d[9] += 1; | ||
205 | |||
206 | for (i = 0; i < 64; ++i) | ||
207 | dst[i] = src[i] ^ out.b[i]; | ||
208 | } | ||
209 | |||
210 | if (bytes > 0) { | ||
211 | /* generate new chunk and update state */ | ||
212 | salsa_core(ctx->nb_rounds, &out, st); | ||
213 | st->d[8] += 1; | ||
214 | if (st->d[8] == 0) | ||
215 | st->d[9] += 1; | ||
216 | |||
217 | /* xor as much as needed */ | ||
218 | for (i = 0; i < bytes; i++) | ||
219 | dst[i] = src[i] ^ out.b[i]; | ||
220 | |||
221 | /* copy the left over in the buffer */ | ||
222 | ctx->prev_len = 64 - bytes; | ||
223 | ctx->prev_ofs = i; | ||
224 | for (; i < 64; i++) { | ||
225 | ctx->prev[i] = out.b[i]; | ||
226 | } | ||
227 | } | ||
228 | } | ||
229 | |||
230 | void cryptonite_salsa_generate(uint8_t *dst, cryptonite_salsa_context *ctx, uint32_t bytes) | ||
231 | { | ||
232 | cryptonite_salsa_state *st; | ||
233 | block out; | ||
234 | int i; | ||
235 | |||
236 | if (!bytes) | ||
237 | return; | ||
238 | |||
239 | /* xor the previous buffer first (if any) */ | ||
240 | if (ctx->prev_len > 0) { | ||
241 | int to_copy = (ctx->prev_len < bytes) ? ctx->prev_len : bytes; | ||
242 | for (i = 0; i < to_copy; i++) | ||
243 | dst[i] = ctx->prev[ctx->prev_ofs+i]; | ||
244 | memset(ctx->prev + ctx->prev_ofs, 0, to_copy); | ||
245 | ctx->prev_len -= to_copy; | ||
246 | ctx->prev_ofs += to_copy; | ||
247 | dst += to_copy; | ||
248 | bytes -= to_copy; | ||
249 | } | ||
250 | |||
251 | if (bytes == 0) | ||
252 | return; | ||
253 | |||
254 | st = &ctx->st; | ||
255 | |||
256 | if (ALIGNED64(dst)) { | ||
257 | /* xor new 64-bytes chunks and store the left over if any */ | ||
258 | for (; bytes >= 64; bytes -= 64, dst += 64) { | ||
259 | /* generate new chunk and update state */ | ||
260 | salsa_core(ctx->nb_rounds, (block *) dst, st); | ||
261 | st->d[8] += 1; | ||
262 | if (st->d[8] == 0) | ||
263 | st->d[9] += 1; | ||
264 | } | ||
265 | } else { | ||
266 | /* xor new 64-bytes chunks and store the left over if any */ | ||
267 | for (; bytes >= 64; bytes -= 64, dst += 64) { | ||
268 | /* generate new chunk and update state */ | ||
269 | salsa_core(ctx->nb_rounds, &out, st); | ||
270 | st->d[8] += 1; | ||
271 | if (st->d[8] == 0) | ||
272 | st->d[9] += 1; | ||
273 | |||
274 | for (i = 0; i < 64; ++i) | ||
275 | dst[i] = out.b[i]; | ||
276 | } | ||
277 | } | ||
278 | |||
279 | if (bytes > 0) { | ||
280 | /* generate new chunk and update state */ | ||
281 | salsa_core(ctx->nb_rounds, &out, st); | ||
282 | st->d[8] += 1; | ||
283 | if (st->d[8] == 0) | ||
284 | st->d[9] += 1; | ||
285 | |||
286 | /* xor as much as needed */ | ||
287 | for (i = 0; i < bytes; i++) | ||
288 | dst[i] = out.b[i]; | ||
289 | |||
290 | /* copy the left over in the buffer */ | ||
291 | ctx->prev_len = 64 - bytes; | ||
292 | ctx->prev_ofs = i; | ||
293 | for (; i < 64; i++) | ||
294 | ctx->prev[i] = out.b[i]; | ||
295 | } | ||
296 | } | ||
297 | |||