diff options
Diffstat (limited to 'nacl/crypto_scalarmult/curve25519/ref/smult.c')
-rw-r--r-- | nacl/crypto_scalarmult/curve25519/ref/smult.c | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/nacl/crypto_scalarmult/curve25519/ref/smult.c b/nacl/crypto_scalarmult/curve25519/ref/smult.c new file mode 100644 index 00000000..6a479558 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/ref/smult.c | |||
@@ -0,0 +1,265 @@ | |||
1 | /* | ||
2 | version 20081011 | ||
3 | Matthew Dempsky | ||
4 | Public domain. | ||
5 | Derived from public domain code by D. J. Bernstein. | ||
6 | */ | ||
7 | |||
8 | #include "crypto_scalarmult.h" | ||
9 | |||
10 | static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) | ||
11 | { | ||
12 | unsigned int j; | ||
13 | unsigned int u; | ||
14 | u = 0; | ||
15 | for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; } | ||
16 | u += a[31] + b[31]; out[31] = u; | ||
17 | } | ||
18 | |||
19 | static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) | ||
20 | { | ||
21 | unsigned int j; | ||
22 | unsigned int u; | ||
23 | u = 218; | ||
24 | for (j = 0;j < 31;++j) { | ||
25 | u += a[j] + 65280 - b[j]; | ||
26 | out[j] = u & 255; | ||
27 | u >>= 8; | ||
28 | } | ||
29 | u += a[31] - b[31]; | ||
30 | out[31] = u; | ||
31 | } | ||
32 | |||
33 | static void squeeze(unsigned int a[32]) | ||
34 | { | ||
35 | unsigned int j; | ||
36 | unsigned int u; | ||
37 | u = 0; | ||
38 | for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } | ||
39 | u += a[31]; a[31] = u & 127; | ||
40 | u = 19 * (u >> 7); | ||
41 | for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } | ||
42 | u += a[31]; a[31] = u; | ||
43 | } | ||
44 | |||
45 | static const unsigned int minusp[32] = { | ||
46 | 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 | ||
47 | } ; | ||
48 | |||
49 | static void freeze(unsigned int a[32]) | ||
50 | { | ||
51 | unsigned int aorig[32]; | ||
52 | unsigned int j; | ||
53 | unsigned int negative; | ||
54 | |||
55 | for (j = 0;j < 32;++j) aorig[j] = a[j]; | ||
56 | add(a,a,minusp); | ||
57 | negative = -((a[31] >> 7) & 1); | ||
58 | for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]); | ||
59 | } | ||
60 | |||
61 | static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) | ||
62 | { | ||
63 | unsigned int i; | ||
64 | unsigned int j; | ||
65 | unsigned int u; | ||
66 | |||
67 | for (i = 0;i < 32;++i) { | ||
68 | u = 0; | ||
69 | for (j = 0;j <= i;++j) u += a[j] * b[i - j]; | ||
70 | for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j]; | ||
71 | out[i] = u; | ||
72 | } | ||
73 | squeeze(out); | ||
74 | } | ||
75 | |||
76 | static void mult121665(unsigned int out[32],const unsigned int a[32]) | ||
77 | { | ||
78 | unsigned int j; | ||
79 | unsigned int u; | ||
80 | |||
81 | u = 0; | ||
82 | for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; } | ||
83 | u += 121665 * a[31]; out[31] = u & 127; | ||
84 | u = 19 * (u >> 7); | ||
85 | for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; } | ||
86 | u += out[j]; out[j] = u; | ||
87 | } | ||
88 | |||
89 | static void square(unsigned int out[32],const unsigned int a[32]) | ||
90 | { | ||
91 | unsigned int i; | ||
92 | unsigned int j; | ||
93 | unsigned int u; | ||
94 | |||
95 | for (i = 0;i < 32;++i) { | ||
96 | u = 0; | ||
97 | for (j = 0;j < i - j;++j) u += a[j] * a[i - j]; | ||
98 | for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j]; | ||
99 | u *= 2; | ||
100 | if ((i & 1) == 0) { | ||
101 | u += a[i / 2] * a[i / 2]; | ||
102 | u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; | ||
103 | } | ||
104 | out[i] = u; | ||
105 | } | ||
106 | squeeze(out); | ||
107 | } | ||
108 | |||
109 | static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b) | ||
110 | { | ||
111 | unsigned int j; | ||
112 | unsigned int t; | ||
113 | unsigned int bminus1; | ||
114 | |||
115 | bminus1 = b - 1; | ||
116 | for (j = 0;j < 64;++j) { | ||
117 | t = bminus1 & (r[j] ^ s[j]); | ||
118 | p[j] = s[j] ^ t; | ||
119 | q[j] = r[j] ^ t; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | static void mainloop(unsigned int work[64],const unsigned char e[32]) | ||
124 | { | ||
125 | unsigned int xzm1[64]; | ||
126 | unsigned int xzm[64]; | ||
127 | unsigned int xzmb[64]; | ||
128 | unsigned int xzm1b[64]; | ||
129 | unsigned int xznb[64]; | ||
130 | unsigned int xzn1b[64]; | ||
131 | unsigned int a0[64]; | ||
132 | unsigned int a1[64]; | ||
133 | unsigned int b0[64]; | ||
134 | unsigned int b1[64]; | ||
135 | unsigned int c1[64]; | ||
136 | unsigned int r[32]; | ||
137 | unsigned int s[32]; | ||
138 | unsigned int t[32]; | ||
139 | unsigned int u[32]; | ||
140 | unsigned int i; | ||
141 | unsigned int j; | ||
142 | unsigned int b; | ||
143 | int pos; | ||
144 | |||
145 | for (j = 0;j < 32;++j) xzm1[j] = work[j]; | ||
146 | xzm1[32] = 1; | ||
147 | for (j = 33;j < 64;++j) xzm1[j] = 0; | ||
148 | |||
149 | xzm[0] = 1; | ||
150 | for (j = 1;j < 64;++j) xzm[j] = 0; | ||
151 | |||
152 | for (pos = 254;pos >= 0;--pos) { | ||
153 | b = e[pos / 8] >> (pos & 7); | ||
154 | b &= 1; | ||
155 | select(xzmb,xzm1b,xzm,xzm1,b); | ||
156 | add(a0,xzmb,xzmb + 32); | ||
157 | sub(a0 + 32,xzmb,xzmb + 32); | ||
158 | add(a1,xzm1b,xzm1b + 32); | ||
159 | sub(a1 + 32,xzm1b,xzm1b + 32); | ||
160 | square(b0,a0); | ||
161 | square(b0 + 32,a0 + 32); | ||
162 | mult(b1,a1,a0 + 32); | ||
163 | mult(b1 + 32,a1 + 32,a0); | ||
164 | add(c1,b1,b1 + 32); | ||
165 | sub(c1 + 32,b1,b1 + 32); | ||
166 | square(r,c1 + 32); | ||
167 | sub(s,b0,b0 + 32); | ||
168 | mult121665(t,s); | ||
169 | add(u,t,b0); | ||
170 | mult(xznb,b0,b0 + 32); | ||
171 | mult(xznb + 32,s,u); | ||
172 | square(xzn1b,c1); | ||
173 | mult(xzn1b + 32,r,work); | ||
174 | select(xzm,xzm1,xznb,xzn1b,b); | ||
175 | } | ||
176 | |||
177 | for (j = 0;j < 64;++j) work[j] = xzm[j]; | ||
178 | } | ||
179 | |||
180 | static void recip(unsigned int out[32],const unsigned int z[32]) | ||
181 | { | ||
182 | unsigned int z2[32]; | ||
183 | unsigned int z9[32]; | ||
184 | unsigned int z11[32]; | ||
185 | unsigned int z2_5_0[32]; | ||
186 | unsigned int z2_10_0[32]; | ||
187 | unsigned int z2_20_0[32]; | ||
188 | unsigned int z2_50_0[32]; | ||
189 | unsigned int z2_100_0[32]; | ||
190 | unsigned int t0[32]; | ||
191 | unsigned int t1[32]; | ||
192 | int i; | ||
193 | |||
194 | /* 2 */ square(z2,z); | ||
195 | /* 4 */ square(t1,z2); | ||
196 | /* 8 */ square(t0,t1); | ||
197 | /* 9 */ mult(z9,t0,z); | ||
198 | /* 11 */ mult(z11,z9,z2); | ||
199 | /* 22 */ square(t0,z11); | ||
200 | /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9); | ||
201 | |||
202 | /* 2^6 - 2^1 */ square(t0,z2_5_0); | ||
203 | /* 2^7 - 2^2 */ square(t1,t0); | ||
204 | /* 2^8 - 2^3 */ square(t0,t1); | ||
205 | /* 2^9 - 2^4 */ square(t1,t0); | ||
206 | /* 2^10 - 2^5 */ square(t0,t1); | ||
207 | /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0); | ||
208 | |||
209 | /* 2^11 - 2^1 */ square(t0,z2_10_0); | ||
210 | /* 2^12 - 2^2 */ square(t1,t0); | ||
211 | /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); } | ||
212 | /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0); | ||
213 | |||
214 | /* 2^21 - 2^1 */ square(t0,z2_20_0); | ||
215 | /* 2^22 - 2^2 */ square(t1,t0); | ||
216 | /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); } | ||
217 | /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0); | ||
218 | |||
219 | /* 2^41 - 2^1 */ square(t1,t0); | ||
220 | /* 2^42 - 2^2 */ square(t0,t1); | ||
221 | /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); } | ||
222 | /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0); | ||
223 | |||
224 | /* 2^51 - 2^1 */ square(t0,z2_50_0); | ||
225 | /* 2^52 - 2^2 */ square(t1,t0); | ||
226 | /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } | ||
227 | /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0); | ||
228 | |||
229 | /* 2^101 - 2^1 */ square(t1,z2_100_0); | ||
230 | /* 2^102 - 2^2 */ square(t0,t1); | ||
231 | /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); } | ||
232 | /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0); | ||
233 | |||
234 | /* 2^201 - 2^1 */ square(t0,t1); | ||
235 | /* 2^202 - 2^2 */ square(t1,t0); | ||
236 | /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } | ||
237 | /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0); | ||
238 | |||
239 | /* 2^251 - 2^1 */ square(t1,t0); | ||
240 | /* 2^252 - 2^2 */ square(t0,t1); | ||
241 | /* 2^253 - 2^3 */ square(t1,t0); | ||
242 | /* 2^254 - 2^4 */ square(t0,t1); | ||
243 | /* 2^255 - 2^5 */ square(t1,t0); | ||
244 | /* 2^255 - 21 */ mult(out,t1,z11); | ||
245 | } | ||
246 | |||
247 | int crypto_scalarmult(unsigned char *q, | ||
248 | const unsigned char *n, | ||
249 | const unsigned char *p) | ||
250 | { | ||
251 | unsigned int work[96]; | ||
252 | unsigned char e[32]; | ||
253 | unsigned int i; | ||
254 | for (i = 0;i < 32;++i) e[i] = n[i]; | ||
255 | e[0] &= 248; | ||
256 | e[31] &= 127; | ||
257 | e[31] |= 64; | ||
258 | for (i = 0;i < 32;++i) work[i] = p[i]; | ||
259 | mainloop(work,e); | ||
260 | recip(work + 32,work + 32); | ||
261 | mult(work + 64,work,work + 32); | ||
262 | freeze(work + 64); | ||
263 | for (i = 0;i < 32;++i) q[i] = work[64 + i]; | ||
264 | return 0; | ||
265 | } | ||