diff options
Diffstat (limited to 'toxencryptsave/crypto_pwhash_scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c')
-rw-r--r-- | toxencryptsave/crypto_pwhash_scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/toxencryptsave/crypto_pwhash_scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c b/toxencryptsave/crypto_pwhash_scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c new file mode 100644 index 00000000..856a655e --- /dev/null +++ b/toxencryptsave/crypto_pwhash_scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c | |||
@@ -0,0 +1,398 @@ | |||
1 | #ifdef HAVE_CONFIG_H | ||
2 | #include "config.h" | ||
3 | #endif | ||
4 | #ifdef VANILLA_NACL /* toxcore only uses this when libsodium is unavailable */ | ||
5 | |||
6 | /*- | ||
7 | * Copyright 2009 Colin Percival | ||
8 | * Copyright 2012,2013 Alexander Peslyak | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without | ||
12 | * modification, are permitted provided that the following conditions | ||
13 | * are met: | ||
14 | * 1. Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * 2. Redistributions in binary form must reproduce the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer in the | ||
18 | * documentation and/or other materials provided with the distribution. | ||
19 | * | ||
20 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | ||
21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
30 | * SUCH DAMAGE. | ||
31 | * | ||
32 | * This file was originally written by Colin Percival as part of the Tarsnap | ||
33 | * online backup system. | ||
34 | */ | ||
35 | |||
36 | #if defined(HAVE_EMMINTRIN_H) || defined(_MSC_VER) | ||
37 | #if __GNUC__ | ||
38 | # pragma GCC target("sse2") | ||
39 | #endif | ||
40 | #include <emmintrin.h> | ||
41 | #if defined(__XOP__) && defined(DISABLED) | ||
42 | # include <x86intrin.h> | ||
43 | #endif | ||
44 | |||
45 | #include <errno.h> | ||
46 | #include <limits.h> | ||
47 | #include <stdint.h> | ||
48 | #include <stdlib.h> | ||
49 | #include <string.h> | ||
50 | |||
51 | #include "../pbkdf2-sha256.h" | ||
52 | #include "../sysendian.h" | ||
53 | #include "../crypto_scrypt.h" | ||
54 | |||
55 | #if defined(__XOP__) && defined(DISABLED) | ||
56 | #define ARX(out, in1, in2, s) \ | ||
57 | out = _mm_xor_si128(out, _mm_roti_epi32(_mm_add_epi32(in1, in2), s)); | ||
58 | #else | ||
59 | #define ARX(out, in1, in2, s) \ | ||
60 | { \ | ||
61 | __m128i T = _mm_add_epi32(in1, in2); \ | ||
62 | out = _mm_xor_si128(out, _mm_slli_epi32(T, s)); \ | ||
63 | out = _mm_xor_si128(out, _mm_srli_epi32(T, 32-s)); \ | ||
64 | } | ||
65 | #endif | ||
66 | |||
67 | #define SALSA20_2ROUNDS \ | ||
68 | /* Operate on "columns". */ \ | ||
69 | ARX(X1, X0, X3, 7) \ | ||
70 | ARX(X2, X1, X0, 9) \ | ||
71 | ARX(X3, X2, X1, 13) \ | ||
72 | ARX(X0, X3, X2, 18) \ | ||
73 | \ | ||
74 | /* Rearrange data. */ \ | ||
75 | X1 = _mm_shuffle_epi32(X1, 0x93); \ | ||
76 | X2 = _mm_shuffle_epi32(X2, 0x4E); \ | ||
77 | X3 = _mm_shuffle_epi32(X3, 0x39); \ | ||
78 | \ | ||
79 | /* Operate on "rows". */ \ | ||
80 | ARX(X3, X0, X1, 7) \ | ||
81 | ARX(X2, X3, X0, 9) \ | ||
82 | ARX(X1, X2, X3, 13) \ | ||
83 | ARX(X0, X1, X2, 18) \ | ||
84 | \ | ||
85 | /* Rearrange data. */ \ | ||
86 | X1 = _mm_shuffle_epi32(X1, 0x39); \ | ||
87 | X2 = _mm_shuffle_epi32(X2, 0x4E); \ | ||
88 | X3 = _mm_shuffle_epi32(X3, 0x93); | ||
89 | |||
90 | /** | ||
91 | * Apply the salsa20/8 core to the block provided in (X0 ... X3) ^ (Z0 ... Z3). | ||
92 | */ | ||
93 | #define SALSA20_8_XOR(in, out) \ | ||
94 | { \ | ||
95 | __m128i Y0 = X0 = _mm_xor_si128(X0, (in)[0]); \ | ||
96 | __m128i Y1 = X1 = _mm_xor_si128(X1, (in)[1]); \ | ||
97 | __m128i Y2 = X2 = _mm_xor_si128(X2, (in)[2]); \ | ||
98 | __m128i Y3 = X3 = _mm_xor_si128(X3, (in)[3]); \ | ||
99 | SALSA20_2ROUNDS \ | ||
100 | SALSA20_2ROUNDS \ | ||
101 | SALSA20_2ROUNDS \ | ||
102 | SALSA20_2ROUNDS \ | ||
103 | (out)[0] = X0 = _mm_add_epi32(X0, Y0); \ | ||
104 | (out)[1] = X1 = _mm_add_epi32(X1, Y1); \ | ||
105 | (out)[2] = X2 = _mm_add_epi32(X2, Y2); \ | ||
106 | (out)[3] = X3 = _mm_add_epi32(X3, Y3); \ | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * blockmix_salsa8(Bin, Bout, r): | ||
111 | * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r | ||
112 | * bytes in length; the output Bout must also be the same size. | ||
113 | */ | ||
114 | static inline void | ||
115 | blockmix_salsa8(const __m128i * Bin, __m128i * Bout, size_t r) | ||
116 | { | ||
117 | __m128i X0, X1, X2, X3; | ||
118 | size_t i; | ||
119 | |||
120 | /* 1: X <-- B_{2r - 1} */ | ||
121 | X0 = Bin[8 * r - 4]; | ||
122 | X1 = Bin[8 * r - 3]; | ||
123 | X2 = Bin[8 * r - 2]; | ||
124 | X3 = Bin[8 * r - 1]; | ||
125 | |||
126 | /* 3: X <-- H(X \xor B_i) */ | ||
127 | /* 4: Y_i <-- X */ | ||
128 | /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ | ||
129 | SALSA20_8_XOR(Bin, Bout) | ||
130 | |||
131 | /* 2: for i = 0 to 2r - 1 do */ | ||
132 | r--; | ||
133 | for (i = 0; i < r;) { | ||
134 | /* 3: X <-- H(X \xor B_i) */ | ||
135 | /* 4: Y_i <-- X */ | ||
136 | /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ | ||
137 | SALSA20_8_XOR(&Bin[i * 8 + 4], &Bout[(r + i) * 4 + 4]) | ||
138 | |||
139 | i++; | ||
140 | |||
141 | /* 3: X <-- H(X \xor B_i) */ | ||
142 | /* 4: Y_i <-- X */ | ||
143 | /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ | ||
144 | SALSA20_8_XOR(&Bin[i * 8], &Bout[i * 4]) | ||
145 | } | ||
146 | |||
147 | /* 3: X <-- H(X \xor B_i) */ | ||
148 | /* 4: Y_i <-- X */ | ||
149 | /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ | ||
150 | SALSA20_8_XOR(&Bin[i * 8 + 4], &Bout[(r + i) * 4 + 4]) | ||
151 | } | ||
152 | |||
153 | #define XOR4(in) \ | ||
154 | X0 = _mm_xor_si128(X0, (in)[0]); \ | ||
155 | X1 = _mm_xor_si128(X1, (in)[1]); \ | ||
156 | X2 = _mm_xor_si128(X2, (in)[2]); \ | ||
157 | X3 = _mm_xor_si128(X3, (in)[3]); | ||
158 | |||
159 | #define XOR4_2(in1, in2) \ | ||
160 | X0 = _mm_xor_si128((in1)[0], (in2)[0]); \ | ||
161 | X1 = _mm_xor_si128((in1)[1], (in2)[1]); \ | ||
162 | X2 = _mm_xor_si128((in1)[2], (in2)[2]); \ | ||
163 | X3 = _mm_xor_si128((in1)[3], (in2)[3]); | ||
164 | |||
165 | static inline uint32_t | ||
166 | blockmix_salsa8_xor(const __m128i * Bin1, const __m128i * Bin2, __m128i * Bout, | ||
167 | size_t r) | ||
168 | { | ||
169 | __m128i X0, X1, X2, X3; | ||
170 | size_t i; | ||
171 | |||
172 | /* 1: X <-- B_{2r - 1} */ | ||
173 | XOR4_2(&Bin1[8 * r - 4], &Bin2[8 * r - 4]) | ||
174 | |||
175 | /* 3: X <-- H(X \xor B_i) */ | ||
176 | /* 4: Y_i <-- X */ | ||
177 | /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ | ||
178 | XOR4(Bin1) | ||
179 | SALSA20_8_XOR(Bin2, Bout) | ||
180 | |||
181 | /* 2: for i = 0 to 2r - 1 do */ | ||
182 | r--; | ||
183 | for (i = 0; i < r;) { | ||
184 | /* 3: X <-- H(X \xor B_i) */ | ||
185 | /* 4: Y_i <-- X */ | ||
186 | /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ | ||
187 | XOR4(&Bin1[i * 8 + 4]) | ||
188 | SALSA20_8_XOR(&Bin2[i * 8 + 4], &Bout[(r + i) * 4 + 4]) | ||
189 | |||
190 | i++; | ||
191 | |||
192 | /* 3: X <-- H(X \xor B_i) */ | ||
193 | /* 4: Y_i <-- X */ | ||
194 | /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ | ||
195 | XOR4(&Bin1[i * 8]) | ||
196 | SALSA20_8_XOR(&Bin2[i * 8], &Bout[i * 4]) | ||
197 | } | ||
198 | |||
199 | /* 3: X <-- H(X \xor B_i) */ | ||
200 | /* 4: Y_i <-- X */ | ||
201 | /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ | ||
202 | XOR4(&Bin1[i * 8 + 4]) | ||
203 | SALSA20_8_XOR(&Bin2[i * 8 + 4], &Bout[(r + i) * 4 + 4]) | ||
204 | |||
205 | return _mm_cvtsi128_si32(X0); | ||
206 | } | ||
207 | |||
208 | #undef ARX | ||
209 | #undef SALSA20_2ROUNDS | ||
210 | #undef SALSA20_8_XOR | ||
211 | #undef XOR4 | ||
212 | #undef XOR4_2 | ||
213 | |||
214 | /** | ||
215 | * integerify(B, r): | ||
216 | * Return the result of parsing B_{2r-1} as a little-endian integer. | ||
217 | */ | ||
218 | static inline uint32_t | ||
219 | integerify(const void * B, size_t r) | ||
220 | { | ||
221 | return *(const uint32_t *)((uintptr_t)(B) + (2 * r - 1) * 64); | ||
222 | } | ||
223 | |||
224 | /** | ||
225 | * smix(B, r, N, V, XY): | ||
226 | * Compute B = SMix_r(B, N). The input B must be 128r bytes in length; | ||
227 | * the temporary storage V must be 128rN bytes in length; the temporary | ||
228 | * storage XY must be 256r + 64 bytes in length. The value N must be a | ||
229 | * power of 2 greater than 1. The arrays B, V, and XY must be aligned to a | ||
230 | * multiple of 64 bytes. | ||
231 | */ | ||
232 | static void | ||
233 | smix(uint8_t * B, size_t r, uint32_t N, void * V, void * XY) | ||
234 | { | ||
235 | size_t s = 128 * r; | ||
236 | __m128i * X = (__m128i *) V, * Y; | ||
237 | uint32_t * X32 = (uint32_t *) V; | ||
238 | uint32_t i, j; | ||
239 | size_t k; | ||
240 | |||
241 | /* 1: X <-- B */ | ||
242 | /* 3: V_i <-- X */ | ||
243 | for (k = 0; k < 2 * r; k++) { | ||
244 | for (i = 0; i < 16; i++) { | ||
245 | X32[k * 16 + i] = | ||
246 | le32dec(&B[(k * 16 + (i * 5 % 16)) * 4]); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | /* 2: for i = 0 to N - 1 do */ | ||
251 | for (i = 1; i < N - 1; i += 2) { | ||
252 | /* 4: X <-- H(X) */ | ||
253 | /* 3: V_i <-- X */ | ||
254 | Y = (__m128i *)((uintptr_t)(V) + i * s); | ||
255 | blockmix_salsa8(X, Y, r); | ||
256 | |||
257 | /* 4: X <-- H(X) */ | ||
258 | /* 3: V_i <-- X */ | ||
259 | X = (__m128i *)((uintptr_t)(V) + (i + 1) * s); | ||
260 | blockmix_salsa8(Y, X, r); | ||
261 | } | ||
262 | |||
263 | /* 4: X <-- H(X) */ | ||
264 | /* 3: V_i <-- X */ | ||
265 | Y = (__m128i *)((uintptr_t)(V) + i * s); | ||
266 | blockmix_salsa8(X, Y, r); | ||
267 | |||
268 | /* 4: X <-- H(X) */ | ||
269 | /* 3: V_i <-- X */ | ||
270 | X = (__m128i *) XY; | ||
271 | blockmix_salsa8(Y, X, r); | ||
272 | |||
273 | X32 = (uint32_t *) XY; | ||
274 | Y = (__m128i *)((uintptr_t)(XY) + s); | ||
275 | |||
276 | /* 7: j <-- Integerify(X) mod N */ | ||
277 | j = integerify(X, r) & (N - 1); | ||
278 | |||
279 | /* 6: for i = 0 to N - 1 do */ | ||
280 | for (i = 0; i < N; i += 2) { | ||
281 | __m128i * V_j = (__m128i *)((uintptr_t)(V) + j * s); | ||
282 | |||
283 | /* 8: X <-- H(X \xor V_j) */ | ||
284 | /* 7: j <-- Integerify(X) mod N */ | ||
285 | j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1); | ||
286 | V_j = (__m128i *)((uintptr_t)(V) + j * s); | ||
287 | |||
288 | /* 8: X <-- H(X \xor V_j) */ | ||
289 | /* 7: j <-- Integerify(X) mod N */ | ||
290 | j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1); | ||
291 | } | ||
292 | |||
293 | /* 10: B' <-- X */ | ||
294 | for (k = 0; k < 2 * r; k++) { | ||
295 | for (i = 0; i < 16; i++) { | ||
296 | le32enc(&B[(k * 16 + (i * 5 % 16)) * 4], | ||
297 | X32[k * 16 + i]); | ||
298 | } | ||
299 | } | ||
300 | } | ||
301 | |||
302 | /** | ||
303 | * escrypt_kdf(local, passwd, passwdlen, salt, saltlen, | ||
304 | * N, r, p, buf, buflen): | ||
305 | * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, | ||
306 | * p, buflen) and write the result into buf. The parameters r, p, and buflen | ||
307 | * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N | ||
308 | * must be a power of 2 greater than 1. | ||
309 | * | ||
310 | * Return 0 on success; or -1 on error. | ||
311 | */ | ||
312 | int | ||
313 | escrypt_kdf_sse(escrypt_local_t * local, | ||
314 | const uint8_t * passwd, size_t passwdlen, | ||
315 | const uint8_t * salt, size_t saltlen, | ||
316 | uint64_t N, uint32_t _r, uint32_t _p, | ||
317 | uint8_t * buf, size_t buflen) | ||
318 | { | ||
319 | size_t B_size, V_size, XY_size, need; | ||
320 | uint8_t * B; | ||
321 | uint32_t * V, * XY; | ||
322 | size_t r = _r, p = _p; | ||
323 | uint32_t i; | ||
324 | |||
325 | /* Sanity-check parameters. */ | ||
326 | #if SIZE_MAX > UINT32_MAX | ||
327 | if (buflen > (((uint64_t)(1) << 32) - 1) * 32) { | ||
328 | errno = EFBIG; | ||
329 | return -1; | ||
330 | } | ||
331 | #endif | ||
332 | if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) { | ||
333 | errno = EFBIG; | ||
334 | return -1; | ||
335 | } | ||
336 | if (N > UINT32_MAX) { | ||
337 | errno = EFBIG; | ||
338 | return -1; | ||
339 | } | ||
340 | if (((N & (N - 1)) != 0) || (N < 2)) { | ||
341 | errno = EINVAL; | ||
342 | return -1; | ||
343 | } | ||
344 | if (r == 0 || p == 0) { | ||
345 | errno = EINVAL; | ||
346 | return -1; | ||
347 | } | ||
348 | if ((r > SIZE_MAX / 128 / p) || | ||
349 | #if SIZE_MAX / 256 <= UINT32_MAX | ||
350 | (r > SIZE_MAX / 256) || | ||
351 | #endif | ||
352 | (N > SIZE_MAX / 128 / r)) { | ||
353 | errno = ENOMEM; | ||
354 | return -1; | ||
355 | } | ||
356 | |||
357 | /* Allocate memory. */ | ||
358 | B_size = (size_t)128 * r * p; | ||
359 | V_size = (size_t)128 * r * N; | ||
360 | need = B_size + V_size; | ||
361 | if (need < V_size) { | ||
362 | errno = ENOMEM; | ||
363 | return -1; | ||
364 | } | ||
365 | XY_size = (size_t)256 * r + 64; | ||
366 | need += XY_size; | ||
367 | if (need < XY_size) { | ||
368 | errno = ENOMEM; | ||
369 | return -1; | ||
370 | } | ||
371 | if (local->size < need) { | ||
372 | if (free_region(local)) | ||
373 | return -1; | ||
374 | if (!alloc_region(local, need)) | ||
375 | return -1; | ||
376 | } | ||
377 | B = (uint8_t *)local->aligned; | ||
378 | V = (uint32_t *)((uint8_t *)B + B_size); | ||
379 | XY = (uint32_t *)((uint8_t *)V + V_size); | ||
380 | |||
381 | /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ | ||
382 | PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size); | ||
383 | |||
384 | /* 2: for i = 0 to p - 1 do */ | ||
385 | for (i = 0; i < p; i++) { | ||
386 | /* 3: B_i <-- MF(B_i, N) */ | ||
387 | smix(&B[(size_t)128 * i * r], r, N, V, XY); | ||
388 | } | ||
389 | |||
390 | /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ | ||
391 | PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen); | ||
392 | |||
393 | /* Success! */ | ||
394 | return 0; | ||
395 | } | ||
396 | #endif | ||
397 | |||
398 | #endif | ||