summaryrefslogtreecommitdiff
path: root/nacl/crypto_scalarmult/curve25519/donna_c64/smult.c
diff options
context:
space:
mode:
Diffstat (limited to 'nacl/crypto_scalarmult/curve25519/donna_c64/smult.c')
-rw-r--r--nacl/crypto_scalarmult/curve25519/donna_c64/smult.c477
1 files changed, 477 insertions, 0 deletions
diff --git a/nacl/crypto_scalarmult/curve25519/donna_c64/smult.c b/nacl/crypto_scalarmult/curve25519/donna_c64/smult.c
new file mode 100644
index 00000000..6d26956b
--- /dev/null
+++ b/nacl/crypto_scalarmult/curve25519/donna_c64/smult.c
@@ -0,0 +1,477 @@
1/* Copyright 2008, Google Inc.
2 * All rights reserved.
3 *
4 * Code released into the public domain.
5 *
6 * curve25519-donna: Curve25519 elliptic curve, public key function
7 *
8 * http://code.google.com/p/curve25519-donna/
9 *
10 * Adam Langley <agl@imperialviolet.org>
11 *
12 * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
13 *
14 * More information about curve25519 can be found here
15 * http://cr.yp.to/ecdh.html
16 *
17 * djb's sample implementation of curve25519 is written in a special assembly
18 * language called qhasm and uses the floating point registers.
19 *
20 * This is, almost, a clean room reimplementation from the curve25519 paper. It
21 * uses many of the tricks described therein. Only the crecip function is taken
22 * from the sample implementation.
23 */
24
25#include <string.h>
26#include <stdint.h>
27#include "crypto_scalarmult.h"
28
29typedef uint8_t u8;
30typedef uint64_t felem;
31// This is a special gcc mode for 128-bit integers. It's implemented on 64-bit
32// platforms only as far as I know.
33typedef unsigned uint128_t __attribute__((mode(TI)));
34
35/* Sum two numbers: output += in */
36static void fsum(felem *output, const felem *in) {
37 unsigned i;
38 for (i = 0; i < 5; ++i) output[i] += in[i];
39}
40
41/* Find the difference of two numbers: output = in - output
42 * (note the order of the arguments!)
43 */
44static void fdifference_backwards(felem *ioutput, const felem *iin) {
45 static const int64_t twotothe51 = (1l << 51);
46 const int64_t *in = (const int64_t *) iin;
47 int64_t *out = (int64_t *) ioutput;
48
49 out[0] = in[0] - out[0];
50 out[1] = in[1] - out[1];
51 out[2] = in[2] - out[2];
52 out[3] = in[3] - out[3];
53 out[4] = in[4] - out[4];
54
55 // An arithmetic shift right of 63 places turns a positive number to 0 and a
56 // negative number to all 1's. This gives us a bitmask that lets us avoid
57 // side-channel prone branches.
58 int64_t t;
59
60#define NEGCHAIN(a,b) \
61 t = out[a] >> 63; \
62 out[a] += twotothe51 & t; \
63 out[b] -= 1 & t;
64
65#define NEGCHAIN19(a,b) \
66 t = out[a] >> 63; \
67 out[a] += twotothe51 & t; \
68 out[b] -= 19 & t;
69
70 NEGCHAIN(0, 1);
71 NEGCHAIN(1, 2);
72 NEGCHAIN(2, 3);
73 NEGCHAIN(3, 4);
74 NEGCHAIN19(4, 0);
75 NEGCHAIN(0, 1);
76 NEGCHAIN(1, 2);
77 NEGCHAIN(2, 3);
78 NEGCHAIN(3, 4);
79}
80
81/* Multiply a number by a scalar: output = in * scalar */
82static void fscalar_product(felem *output, const felem *in, const felem scalar) {
83 uint128_t a;
84
85 a = ((uint128_t) in[0]) * scalar;
86 output[0] = a & 0x7ffffffffffff;
87
88 a = ((uint128_t) in[1]) * scalar + (a >> 51);
89 output[1] = a & 0x7ffffffffffff;
90
91 a = ((uint128_t) in[2]) * scalar + (a >> 51);
92 output[2] = a & 0x7ffffffffffff;
93
94 a = ((uint128_t) in[3]) * scalar + (a >> 51);
95 output[3] = a & 0x7ffffffffffff;
96
97 a = ((uint128_t) in[4]) * scalar + (a >> 51);
98 output[4] = a & 0x7ffffffffffff;
99
100 output[0] += (a >> 51) * 19;
101}
102
103/* Multiply two numbers: output = in2 * in
104 *
105 * output must be distinct to both inputs. The inputs are reduced coefficient
106 * form, the output is not.
107 */
108static void fmul(felem *output, const felem *in2, const felem *in) {
109 uint128_t t[9];
110
111 t[0] = ((uint128_t) in[0]) * in2[0];
112 t[1] = ((uint128_t) in[0]) * in2[1] +
113 ((uint128_t) in[1]) * in2[0];
114 t[2] = ((uint128_t) in[0]) * in2[2] +
115 ((uint128_t) in[2]) * in2[0] +
116 ((uint128_t) in[1]) * in2[1];
117 t[3] = ((uint128_t) in[0]) * in2[3] +
118 ((uint128_t) in[3]) * in2[0] +
119 ((uint128_t) in[1]) * in2[2] +
120 ((uint128_t) in[2]) * in2[1];
121 t[4] = ((uint128_t) in[0]) * in2[4] +
122 ((uint128_t) in[4]) * in2[0] +
123 ((uint128_t) in[3]) * in2[1] +
124 ((uint128_t) in[1]) * in2[3] +
125 ((uint128_t) in[2]) * in2[2];
126 t[5] = ((uint128_t) in[4]) * in2[1] +
127 ((uint128_t) in[1]) * in2[4] +
128 ((uint128_t) in[2]) * in2[3] +
129 ((uint128_t) in[3]) * in2[2];
130 t[6] = ((uint128_t) in[4]) * in2[2] +
131 ((uint128_t) in[2]) * in2[4] +
132 ((uint128_t) in[3]) * in2[3];
133 t[7] = ((uint128_t) in[3]) * in2[4] +
134 ((uint128_t) in[4]) * in2[3];
135 t[8] = ((uint128_t) in[4]) * in2[4];
136
137 t[0] += t[5] * 19;
138 t[1] += t[6] * 19;
139 t[2] += t[7] * 19;
140 t[3] += t[8] * 19;
141
142 t[1] += t[0] >> 51;
143 t[0] &= 0x7ffffffffffff;
144 t[2] += t[1] >> 51;
145 t[1] &= 0x7ffffffffffff;
146 t[3] += t[2] >> 51;
147 t[2] &= 0x7ffffffffffff;
148 t[4] += t[3] >> 51;
149 t[3] &= 0x7ffffffffffff;
150 t[0] += 19 * (t[4] >> 51);
151 t[4] &= 0x7ffffffffffff;
152 t[1] += t[0] >> 51;
153 t[0] &= 0x7ffffffffffff;
154 t[2] += t[1] >> 51;
155 t[1] &= 0x7ffffffffffff;
156
157 output[0] = t[0];
158 output[1] = t[1];
159 output[2] = t[2];
160 output[3] = t[3];
161 output[4] = t[4];
162}
163
164static void
165fsquare(felem *output, const felem *in) {
166 uint128_t t[9];
167
168 t[0] = ((uint128_t) in[0]) * in[0];
169 t[1] = ((uint128_t) in[0]) * in[1] * 2;
170 t[2] = ((uint128_t) in[0]) * in[2] * 2 +
171 ((uint128_t) in[1]) * in[1];
172 t[3] = ((uint128_t) in[0]) * in[3] * 2 +
173 ((uint128_t) in[1]) * in[2] * 2;
174 t[4] = ((uint128_t) in[0]) * in[4] * 2 +
175 ((uint128_t) in[3]) * in[1] * 2 +
176 ((uint128_t) in[2]) * in[2];
177 t[5] = ((uint128_t) in[4]) * in[1] * 2 +
178 ((uint128_t) in[2]) * in[3] * 2;
179 t[6] = ((uint128_t) in[4]) * in[2] * 2 +
180 ((uint128_t) in[3]) * in[3];
181 t[7] = ((uint128_t) in[3]) * in[4] * 2;
182 t[8] = ((uint128_t) in[4]) * in[4];
183
184 t[0] += t[5] * 19;
185 t[1] += t[6] * 19;
186 t[2] += t[7] * 19;
187 t[3] += t[8] * 19;
188
189 t[1] += t[0] >> 51;
190 t[0] &= 0x7ffffffffffff;
191 t[2] += t[1] >> 51;
192 t[1] &= 0x7ffffffffffff;
193 t[3] += t[2] >> 51;
194 t[2] &= 0x7ffffffffffff;
195 t[4] += t[3] >> 51;
196 t[3] &= 0x7ffffffffffff;
197 t[0] += 19 * (t[4] >> 51);
198 t[4] &= 0x7ffffffffffff;
199 t[1] += t[0] >> 51;
200 t[0] &= 0x7ffffffffffff;
201
202 output[0] = t[0];
203 output[1] = t[1];
204 output[2] = t[2];
205 output[3] = t[3];
206 output[4] = t[4];
207}
208
209/* Take a little-endian, 32-byte number and expand it into polynomial form */
210static void
211fexpand(felem *output, const u8 *in) {
212 output[0] = *((const uint64_t *)(in)) & 0x7ffffffffffff;
213 output[1] = (*((const uint64_t *)(in+6)) >> 3) & 0x7ffffffffffff;
214 output[2] = (*((const uint64_t *)(in+12)) >> 6) & 0x7ffffffffffff;
215 output[3] = (*((const uint64_t *)(in+19)) >> 1) & 0x7ffffffffffff;
216 output[4] = (*((const uint64_t *)(in+25)) >> 4) & 0x7ffffffffffff;
217}
218
219/* Take a fully reduced polynomial form number and contract it into a
220 * little-endian, 32-byte array
221 */
222static void
223fcontract(u8 *output, const felem *input) {
224 uint128_t t[5];
225
226 t[0] = input[0];
227 t[1] = input[1];
228 t[2] = input[2];
229 t[3] = input[3];
230 t[4] = input[4];
231
232 t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
233 t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
234 t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
235 t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
236 t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
237
238 t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
239 t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
240 t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
241 t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
242 t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
243
244 /* now t is between 0 and 2^255-1, properly carried. */
245 /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
246
247 t[0] += 19;
248
249 t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
250 t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
251 t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
252 t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
253 t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
254
255 /* now between 19 and 2^255-1 in both cases, and offset by 19. */
256
257 t[0] += 0x8000000000000 - 19;
258 t[1] += 0x8000000000000 - 1;
259 t[2] += 0x8000000000000 - 1;
260 t[3] += 0x8000000000000 - 1;
261 t[4] += 0x8000000000000 - 1;
262
263 /* now between 2^255 and 2^256-20, and offset by 2^255. */
264
265 t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
266 t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
267 t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
268 t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
269 t[4] &= 0x7ffffffffffff;
270
271 *((uint64_t *)(output)) = t[0] | (t[1] << 51);
272 *((uint64_t *)(output+8)) = (t[1] >> 13) | (t[2] << 38);
273 *((uint64_t *)(output+16)) = (t[2] >> 26) | (t[3] << 25);
274 *((uint64_t *)(output+24)) = (t[3] >> 39) | (t[4] << 12);
275}
276
277/* Input: Q, Q', Q-Q'
278 * Output: 2Q, Q+Q'
279 *
280 * x2 z3: long form
281 * x3 z3: long form
282 * x z: short form, destroyed
283 * xprime zprime: short form, destroyed
284 * qmqp: short form, preserved
285 */
286static void
287fmonty(felem *x2, felem *z2, /* output 2Q */
288 felem *x3, felem *z3, /* output Q + Q' */
289 felem *x, felem *z, /* input Q */
290 felem *xprime, felem *zprime, /* input Q' */
291 const felem *qmqp /* input Q - Q' */) {
292 felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
293 zzprime[5], zzzprime[5];
294
295 memcpy(origx, x, 5 * sizeof(felem));
296 fsum(x, z);
297 fdifference_backwards(z, origx); // does x - z
298
299 memcpy(origxprime, xprime, sizeof(felem) * 5);
300 fsum(xprime, zprime);
301 fdifference_backwards(zprime, origxprime);
302 fmul(xxprime, xprime, z);
303 fmul(zzprime, x, zprime);
304 memcpy(origxprime, xxprime, sizeof(felem) * 5);
305 fsum(xxprime, zzprime);
306 fdifference_backwards(zzprime, origxprime);
307 fsquare(x3, xxprime);
308 fsquare(zzzprime, zzprime);
309 fmul(z3, zzzprime, qmqp);
310
311 fsquare(xx, x);
312 fsquare(zz, z);
313 fmul(x2, xx, zz);
314 fdifference_backwards(zz, xx); // does zz = xx - zz
315 fscalar_product(zzz, zz, 121665);
316 fsum(zzz, xx);
317 fmul(z2, zz, zzz);
318}
319
320// -----------------------------------------------------------------------------
321// Maybe swap the contents of two felem arrays (@a and @b), each @len elements
322// long. Perform the swap iff @swap is non-zero.
323//
324// This function performs the swap without leaking any side-channel
325// information.
326// -----------------------------------------------------------------------------
327static void
328swap_conditional(felem *a, felem *b, unsigned len, felem iswap) {
329 unsigned i;
330 const felem swap = -iswap;
331
332 for (i = 0; i < len; ++i) {
333 const felem x = swap & (a[i] ^ b[i]);
334 a[i] ^= x;
335 b[i] ^= x;
336 }
337}
338
339/* Calculates nQ where Q is the x-coordinate of a point on the curve
340 *
341 * resultx/resultz: the x coordinate of the resulting curve point (short form)
342 * n: a little endian, 32-byte number
343 * q: a point of the curve (short form)
344 */
345static void
346cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q) {
347 felem a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
348 felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
349 felem e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
350 felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
351
352 unsigned i, j;
353
354 memcpy(nqpqx, q, sizeof(felem) * 5);
355
356 for (i = 0; i < 32; ++i) {
357 u8 byte = n[31 - i];
358 for (j = 0; j < 8; ++j) {
359 const felem bit = byte >> 7;
360
361 swap_conditional(nqx, nqpqx, 5, bit);
362 swap_conditional(nqz, nqpqz, 5, bit);
363 fmonty(nqx2, nqz2,
364 nqpqx2, nqpqz2,
365 nqx, nqz,
366 nqpqx, nqpqz,
367 q);
368 swap_conditional(nqx2, nqpqx2, 5, bit);
369 swap_conditional(nqz2, nqpqz2, 5, bit);
370
371 t = nqx;
372 nqx = nqx2;
373 nqx2 = t;
374 t = nqz;
375 nqz = nqz2;
376 nqz2 = t;
377 t = nqpqx;
378 nqpqx = nqpqx2;
379 nqpqx2 = t;
380 t = nqpqz;
381 nqpqz = nqpqz2;
382 nqpqz2 = t;
383
384 byte <<= 1;
385 }
386 }
387
388 memcpy(resultx, nqx, sizeof(felem) * 5);
389 memcpy(resultz, nqz, sizeof(felem) * 5);
390}
391
392// -----------------------------------------------------------------------------
393// Shamelessly copied from djb's code
394// -----------------------------------------------------------------------------
395static void
396crecip(felem *out, const felem *z) {
397 felem z2[5];
398 felem z9[5];
399 felem z11[5];
400 felem z2_5_0[5];
401 felem z2_10_0[5];
402 felem z2_20_0[5];
403 felem z2_50_0[5];
404 felem z2_100_0[5];
405 felem t0[5];
406 felem t1[5];
407 int i;
408
409 /* 2 */ fsquare(z2,z);
410 /* 4 */ fsquare(t1,z2);
411 /* 8 */ fsquare(t0,t1);
412 /* 9 */ fmul(z9,t0,z);
413 /* 11 */ fmul(z11,z9,z2);
414 /* 22 */ fsquare(t0,z11);
415 /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
416
417 /* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
418 /* 2^7 - 2^2 */ fsquare(t1,t0);
419 /* 2^8 - 2^3 */ fsquare(t0,t1);
420 /* 2^9 - 2^4 */ fsquare(t1,t0);
421 /* 2^10 - 2^5 */ fsquare(t0,t1);
422 /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
423
424 /* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
425 /* 2^12 - 2^2 */ fsquare(t1,t0);
426 /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
427 /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
428
429 /* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
430 /* 2^22 - 2^2 */ fsquare(t1,t0);
431 /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
432 /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
433
434 /* 2^41 - 2^1 */ fsquare(t1,t0);
435 /* 2^42 - 2^2 */ fsquare(t0,t1);
436 /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
437 /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
438
439 /* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
440 /* 2^52 - 2^2 */ fsquare(t1,t0);
441 /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
442 /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
443
444 /* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
445 /* 2^102 - 2^2 */ fsquare(t0,t1);
446 /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
447 /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
448
449 /* 2^201 - 2^1 */ fsquare(t0,t1);
450 /* 2^202 - 2^2 */ fsquare(t1,t0);
451 /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
452 /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
453
454 /* 2^251 - 2^1 */ fsquare(t1,t0);
455 /* 2^252 - 2^2 */ fsquare(t0,t1);
456 /* 2^253 - 2^3 */ fsquare(t1,t0);
457 /* 2^254 - 2^4 */ fsquare(t0,t1);
458 /* 2^255 - 2^5 */ fsquare(t1,t0);
459 /* 2^255 - 21 */ fmul(out,t1,z11);
460}
461
462int
463crypto_scalarmult(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
464 felem bp[5], x[5], z[5], zmone[5];
465 unsigned char e[32];
466 int i;
467 for (i = 0;i < 32;++i) e[i] = secret[i];
468 e[0] &= 248;
469 e[31] &= 127;
470 e[31] |= 64;
471 fexpand(bp, basepoint);
472 cmult(x, z, e, bp);
473 crecip(zmone, z);
474 fmul(z, x, zmone);
475 fcontract(mypublic, z);
476 return 0;
477}