1 |
/* crypto/bn/bn_asm.c */ |
2 |
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 |
* All rights reserved. |
4 |
* |
5 |
* This package is an SSL implementation written |
6 |
* by Eric Young (eay@cryptsoft.com). |
7 |
* The implementation was written so as to conform with Netscapes SSL. |
8 |
* |
9 |
* This library is free for commercial and non-commercial use as long as |
10 |
* the following conditions are aheared to. The following conditions |
11 |
* apply to all code found in this distribution, be it the RC4, RSA, |
12 |
* lhash, DES, etc., code; not just the SSL code. The SSL documentation |
13 |
* included with this distribution is covered by the same copyright terms |
14 |
* except that the holder is Tim Hudson (tjh@cryptsoft.com). |
15 |
* |
16 |
* Copyright remains Eric Young's, and as such any Copyright notices in |
17 |
* the code are not to be removed. |
18 |
* If this package is used in a product, Eric Young should be given attribution |
19 |
* as the author of the parts of the library used. |
20 |
* This can be in the form of a textual message at program startup or |
21 |
* in documentation (online or textual) provided with the package. |
22 |
* |
23 |
* Redistribution and use in source and binary forms, with or without |
24 |
* modification, are permitted provided that the following conditions |
25 |
* are met: |
26 |
* 1. Redistributions of source code must retain the copyright |
27 |
* notice, this list of conditions and the following disclaimer. |
28 |
* 2. Redistributions in binary form must reproduce the above copyright |
29 |
* notice, this list of conditions and the following disclaimer in the |
30 |
* documentation and/or other materials provided with the distribution. |
31 |
* 3. All advertising materials mentioning features or use of this software |
32 |
* must display the following acknowledgement: |
33 |
* "This product includes cryptographic software written by |
34 |
* Eric Young (eay@cryptsoft.com)" |
35 |
* The word 'cryptographic' can be left out if the rouines from the library |
36 |
* being used are not cryptographic related :-). |
37 |
* 4. If you include any Windows specific code (or a derivative thereof) from |
38 |
* the apps directory (application code) you must include an acknowledgement: |
39 |
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
40 |
* |
41 |
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
42 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
43 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
44 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
45 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
46 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
47 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
48 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
49 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
50 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
51 |
* SUCH DAMAGE. |
52 |
* |
53 |
* The licence and distribution terms for any publically available version or |
54 |
* derivative of this code cannot be changed. i.e. this code cannot simply be |
55 |
* copied and put under another distribution licence |
56 |
* [including the GNU Public Licence.] |
57 |
*/ |
58 |
|
59 |
#ifndef BN_DEBUG |
60 |
# undef NDEBUG /* avoid conflicting definitions */ |
61 |
# define NDEBUG |
62 |
#endif |
63 |
|
64 |
#include <stdio.h> |
65 |
#include <assert.h> |
66 |
#include "bn_lcl.h" |
67 |
|
68 |
#if defined(BN_LLONG) || defined(BN_UMULT_HIGH) |
69 |
|
70 |
BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) |
71 |
{ |
72 |
BN_ULONG c1=0; |
73 |
|
74 |
assert(num >= 0); |
75 |
if (num <= 0) return(c1); |
76 |
|
77 |
while (num&~3) |
78 |
{ |
79 |
mul_add(rp[0],ap[0],w,c1); |
80 |
mul_add(rp[1],ap[1],w,c1); |
81 |
mul_add(rp[2],ap[2],w,c1); |
82 |
mul_add(rp[3],ap[3],w,c1); |
83 |
ap+=4; rp+=4; num-=4; |
84 |
} |
85 |
if (num) |
86 |
{ |
87 |
mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; |
88 |
mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; |
89 |
mul_add(rp[2],ap[2],w,c1); return c1; |
90 |
} |
91 |
|
92 |
return(c1); |
93 |
} |
94 |
|
95 |
BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) |
96 |
{ |
97 |
BN_ULONG c1=0; |
98 |
|
99 |
assert(num >= 0); |
100 |
if (num <= 0) return(c1); |
101 |
|
102 |
while (num&~3) |
103 |
{ |
104 |
mul(rp[0],ap[0],w,c1); |
105 |
mul(rp[1],ap[1],w,c1); |
106 |
mul(rp[2],ap[2],w,c1); |
107 |
mul(rp[3],ap[3],w,c1); |
108 |
ap+=4; rp+=4; num-=4; |
109 |
} |
110 |
if (num) |
111 |
{ |
112 |
mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; |
113 |
mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; |
114 |
mul(rp[2],ap[2],w,c1); |
115 |
} |
116 |
return(c1); |
117 |
} |
118 |
|
119 |
void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) |
120 |
{ |
121 |
assert(n >= 0); |
122 |
if (n <= 0) return; |
123 |
while (n&~3) |
124 |
{ |
125 |
sqr(r[0],r[1],a[0]); |
126 |
sqr(r[2],r[3],a[1]); |
127 |
sqr(r[4],r[5],a[2]); |
128 |
sqr(r[6],r[7],a[3]); |
129 |
a+=4; r+=8; n-=4; |
130 |
} |
131 |
if (n) |
132 |
{ |
133 |
sqr(r[0],r[1],a[0]); if (--n == 0) return; |
134 |
sqr(r[2],r[3],a[1]); if (--n == 0) return; |
135 |
sqr(r[4],r[5],a[2]); |
136 |
} |
137 |
} |
138 |
|
139 |
#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ |
140 |
|
141 |
BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) |
142 |
{ |
143 |
BN_ULONG c=0; |
144 |
BN_ULONG bl,bh; |
145 |
|
146 |
assert(num >= 0); |
147 |
if (num <= 0) return((BN_ULONG)0); |
148 |
|
149 |
bl=LBITS(w); |
150 |
bh=HBITS(w); |
151 |
|
152 |
for (;;) |
153 |
{ |
154 |
mul_add(rp[0],ap[0],bl,bh,c); |
155 |
if (--num == 0) break; |
156 |
mul_add(rp[1],ap[1],bl,bh,c); |
157 |
if (--num == 0) break; |
158 |
mul_add(rp[2],ap[2],bl,bh,c); |
159 |
if (--num == 0) break; |
160 |
mul_add(rp[3],ap[3],bl,bh,c); |
161 |
if (--num == 0) break; |
162 |
ap+=4; |
163 |
rp+=4; |
164 |
} |
165 |
return(c); |
166 |
} |
167 |
|
168 |
BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) |
169 |
{ |
170 |
BN_ULONG carry=0; |
171 |
BN_ULONG bl,bh; |
172 |
|
173 |
assert(num >= 0); |
174 |
if (num <= 0) return((BN_ULONG)0); |
175 |
|
176 |
bl=LBITS(w); |
177 |
bh=HBITS(w); |
178 |
|
179 |
for (;;) |
180 |
{ |
181 |
mul(rp[0],ap[0],bl,bh,carry); |
182 |
if (--num == 0) break; |
183 |
mul(rp[1],ap[1],bl,bh,carry); |
184 |
if (--num == 0) break; |
185 |
mul(rp[2],ap[2],bl,bh,carry); |
186 |
if (--num == 0) break; |
187 |
mul(rp[3],ap[3],bl,bh,carry); |
188 |
if (--num == 0) break; |
189 |
ap+=4; |
190 |
rp+=4; |
191 |
} |
192 |
return(carry); |
193 |
} |
194 |
|
195 |
void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) |
196 |
{ |
197 |
assert(n >= 0); |
198 |
if (n <= 0) return; |
199 |
for (;;) |
200 |
{ |
201 |
sqr64(r[0],r[1],a[0]); |
202 |
if (--n == 0) break; |
203 |
|
204 |
sqr64(r[2],r[3],a[1]); |
205 |
if (--n == 0) break; |
206 |
|
207 |
sqr64(r[4],r[5],a[2]); |
208 |
if (--n == 0) break; |
209 |
|
210 |
sqr64(r[6],r[7],a[3]); |
211 |
if (--n == 0) break; |
212 |
|
213 |
a+=4; |
214 |
r+=8; |
215 |
} |
216 |
} |
217 |
|
218 |
#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ |
219 |
|
220 |
#if defined(BN_LLONG) && defined(BN_DIV2W) |
221 |
|
222 |
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
223 |
{ |
224 |
return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d)); |
225 |
} |
226 |
|
227 |
#else |
228 |
|
229 |
/* Divide h,l by d and return the result. */ |
230 |
/* I need to test this some more :-( */ |
231 |
BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
232 |
{ |
233 |
BN_ULONG dh,dl,q,ret=0,th,tl,t; |
234 |
int i,count=2; |
235 |
|
236 |
if (d == 0) return(BN_MASK2); |
237 |
|
238 |
i=BN_num_bits_word(d); |
239 |
assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i)); |
240 |
|
241 |
i=BN_BITS2-i; |
242 |
if (h >= d) h-=d; |
243 |
|
244 |
if (i) |
245 |
{ |
246 |
d<<=i; |
247 |
h=(h<<i)|(l>>(BN_BITS2-i)); |
248 |
l<<=i; |
249 |
} |
250 |
dh=(d&BN_MASK2h)>>BN_BITS4; |
251 |
dl=(d&BN_MASK2l); |
252 |
for (;;) |
253 |
{ |
254 |
if ((h>>BN_BITS4) == dh) |
255 |
q=BN_MASK2l; |
256 |
else |
257 |
q=h/dh; |
258 |
|
259 |
th=q*dh; |
260 |
tl=dl*q; |
261 |
for (;;) |
262 |
{ |
263 |
t=h-th; |
264 |
if ((t&BN_MASK2h) || |
265 |
((tl) <= ( |
266 |
(t<<BN_BITS4)| |
267 |
((l&BN_MASK2h)>>BN_BITS4)))) |
268 |
break; |
269 |
q--; |
270 |
th-=dh; |
271 |
tl-=dl; |
272 |
} |
273 |
t=(tl>>BN_BITS4); |
274 |
tl=(tl<<BN_BITS4)&BN_MASK2h; |
275 |
th+=t; |
276 |
|
277 |
if (l < tl) th++; |
278 |
l-=tl; |
279 |
if (h < th) |
280 |
{ |
281 |
h+=d; |
282 |
q--; |
283 |
} |
284 |
h-=th; |
285 |
|
286 |
if (--count == 0) break; |
287 |
|
288 |
ret=q<<BN_BITS4; |
289 |
h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2; |
290 |
l=(l&BN_MASK2l)<<BN_BITS4; |
291 |
} |
292 |
ret|=q; |
293 |
return(ret); |
294 |
} |
295 |
#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ |
296 |
|
297 |
#ifdef BN_LLONG |
298 |
BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
299 |
{ |
300 |
BN_ULLONG ll=0; |
301 |
|
302 |
assert(n >= 0); |
303 |
if (n <= 0) return((BN_ULONG)0); |
304 |
|
305 |
for (;;) |
306 |
{ |
307 |
ll+=(BN_ULLONG)a[0]+b[0]; |
308 |
r[0]=(BN_ULONG)ll&BN_MASK2; |
309 |
ll>>=BN_BITS2; |
310 |
if (--n <= 0) break; |
311 |
|
312 |
ll+=(BN_ULLONG)a[1]+b[1]; |
313 |
r[1]=(BN_ULONG)ll&BN_MASK2; |
314 |
ll>>=BN_BITS2; |
315 |
if (--n <= 0) break; |
316 |
|
317 |
ll+=(BN_ULLONG)a[2]+b[2]; |
318 |
r[2]=(BN_ULONG)ll&BN_MASK2; |
319 |
ll>>=BN_BITS2; |
320 |
if (--n <= 0) break; |
321 |
|
322 |
ll+=(BN_ULLONG)a[3]+b[3]; |
323 |
r[3]=(BN_ULONG)ll&BN_MASK2; |
324 |
ll>>=BN_BITS2; |
325 |
if (--n <= 0) break; |
326 |
|
327 |
a+=4; |
328 |
b+=4; |
329 |
r+=4; |
330 |
} |
331 |
return((BN_ULONG)ll); |
332 |
} |
333 |
#else /* !BN_LLONG */ |
334 |
BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
335 |
{ |
336 |
BN_ULONG c,l,t; |
337 |
|
338 |
assert(n >= 0); |
339 |
if (n <= 0) return((BN_ULONG)0); |
340 |
|
341 |
c=0; |
342 |
for (;;) |
343 |
{ |
344 |
t=a[0]; |
345 |
t=(t+c)&BN_MASK2; |
346 |
c=(t < c); |
347 |
l=(t+b[0])&BN_MASK2; |
348 |
c+=(l < t); |
349 |
r[0]=l; |
350 |
if (--n <= 0) break; |
351 |
|
352 |
t=a[1]; |
353 |
t=(t+c)&BN_MASK2; |
354 |
c=(t < c); |
355 |
l=(t+b[1])&BN_MASK2; |
356 |
c+=(l < t); |
357 |
r[1]=l; |
358 |
if (--n <= 0) break; |
359 |
|
360 |
t=a[2]; |
361 |
t=(t+c)&BN_MASK2; |
362 |
c=(t < c); |
363 |
l=(t+b[2])&BN_MASK2; |
364 |
c+=(l < t); |
365 |
r[2]=l; |
366 |
if (--n <= 0) break; |
367 |
|
368 |
t=a[3]; |
369 |
t=(t+c)&BN_MASK2; |
370 |
c=(t < c); |
371 |
l=(t+b[3])&BN_MASK2; |
372 |
c+=(l < t); |
373 |
r[3]=l; |
374 |
if (--n <= 0) break; |
375 |
|
376 |
a+=4; |
377 |
b+=4; |
378 |
r+=4; |
379 |
} |
380 |
return((BN_ULONG)c); |
381 |
} |
382 |
#endif /* !BN_LLONG */ |
383 |
|
384 |
#if 0 |
385 |
BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
386 |
{ |
387 |
BN_ULONG t1,t2; |
388 |
int c=0; |
389 |
|
390 |
assert(n >= 0); |
391 |
if (n <= 0) return((BN_ULONG)0); |
392 |
|
393 |
for (;;) |
394 |
{ |
395 |
t1=a[0]; t2=b[0]; |
396 |
r[0]=(t1-t2-c)&BN_MASK2; |
397 |
if (t1 != t2) c=(t1 < t2); |
398 |
if (--n <= 0) break; |
399 |
|
400 |
t1=a[1]; t2=b[1]; |
401 |
r[1]=(t1-t2-c)&BN_MASK2; |
402 |
if (t1 != t2) c=(t1 < t2); |
403 |
if (--n <= 0) break; |
404 |
|
405 |
t1=a[2]; t2=b[2]; |
406 |
r[2]=(t1-t2-c)&BN_MASK2; |
407 |
if (t1 != t2) c=(t1 < t2); |
408 |
if (--n <= 0) break; |
409 |
|
410 |
t1=a[3]; t2=b[3]; |
411 |
r[3]=(t1-t2-c)&BN_MASK2; |
412 |
if (t1 != t2) c=(t1 < t2); |
413 |
if (--n <= 0) break; |
414 |
|
415 |
a+=4; |
416 |
b+=4; |
417 |
r+=4; |
418 |
} |
419 |
return(c); |
420 |
} |
421 |
#endif |
422 |
|
423 |
#ifdef BN_MUL_COMBA |
424 |
|
425 |
#undef bn_mul_comba8 |
426 |
#undef bn_mul_comba4 |
427 |
#undef bn_sqr_comba8 |
428 |
#undef bn_sqr_comba4 |
429 |
|
430 |
/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ |
431 |
/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ |
432 |
/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ |
433 |
/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
434 |
|
435 |
#ifdef BN_LLONG |
436 |
#define mul_add_c(a,b,c0,c1,c2) \ |
437 |
t=(BN_ULLONG)a*b; \ |
438 |
t1=(BN_ULONG)Lw(t); \ |
439 |
t2=(BN_ULONG)Hw(t); \ |
440 |
c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ |
441 |
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; |
442 |
|
443 |
#define mul_add_c2(a,b,c0,c1,c2) \ |
444 |
t=(BN_ULLONG)a*b; \ |
445 |
tt=(t+t)&BN_MASK; \ |
446 |
if (tt < t) c2++; \ |
447 |
t1=(BN_ULONG)Lw(tt); \ |
448 |
t2=(BN_ULONG)Hw(tt); \ |
449 |
c0=(c0+t1)&BN_MASK2; \ |
450 |
if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ |
451 |
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; |
452 |
|
453 |
#define sqr_add_c(a,i,c0,c1,c2) \ |
454 |
t=(BN_ULLONG)a[i]*a[i]; \ |
455 |
t1=(BN_ULONG)Lw(t); \ |
456 |
t2=(BN_ULONG)Hw(t); \ |
457 |
c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ |
458 |
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; |
459 |
|
460 |
#define sqr_add_c2(a,i,j,c0,c1,c2) \ |
461 |
mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
462 |
|
463 |
#elif defined(BN_UMULT_HIGH) |
464 |
|
465 |
#define mul_add_c(a,b,c0,c1,c2) { \ |
466 |
BN_ULONG ta=(a),tb=(b); \ |
467 |
t1 = ta * tb; \ |
468 |
t2 = BN_UMULT_HIGH(ta,tb); \ |
469 |
c0 += t1; t2 += (c0<t1)?1:0; \ |
470 |
c1 += t2; c2 += (c1<t2)?1:0; \ |
471 |
} |
472 |
|
473 |
#define mul_add_c2(a,b,c0,c1,c2) { \ |
474 |
BN_ULONG ta=(a),tb=(b),t0; \ |
475 |
t1 = BN_UMULT_HIGH(ta,tb); \ |
476 |
t0 = ta * tb; \ |
477 |
t2 = t1+t1; c2 += (t2<t1)?1:0; \ |
478 |
t1 = t0+t0; t2 += (t1<t0)?1:0; \ |
479 |
c0 += t1; t2 += (c0<t1)?1:0; \ |
480 |
c1 += t2; c2 += (c1<t2)?1:0; \ |
481 |
} |
482 |
|
483 |
#define sqr_add_c(a,i,c0,c1,c2) { \ |
484 |
BN_ULONG ta=(a)[i]; \ |
485 |
t1 = ta * ta; \ |
486 |
t2 = BN_UMULT_HIGH(ta,ta); \ |
487 |
c0 += t1; t2 += (c0<t1)?1:0; \ |
488 |
c1 += t2; c2 += (c1<t2)?1:0; \ |
489 |
} |
490 |
|
491 |
#define sqr_add_c2(a,i,j,c0,c1,c2) \ |
492 |
mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
493 |
|
494 |
#else /* !BN_LLONG */ |
495 |
#define mul_add_c(a,b,c0,c1,c2) \ |
496 |
t1=LBITS(a); t2=HBITS(a); \ |
497 |
bl=LBITS(b); bh=HBITS(b); \ |
498 |
mul64(t1,t2,bl,bh); \ |
499 |
c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ |
500 |
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; |
501 |
|
502 |
#define mul_add_c2(a,b,c0,c1,c2) \ |
503 |
t1=LBITS(a); t2=HBITS(a); \ |
504 |
bl=LBITS(b); bh=HBITS(b); \ |
505 |
mul64(t1,t2,bl,bh); \ |
506 |
if (t2 & BN_TBIT) c2++; \ |
507 |
t2=(t2+t2)&BN_MASK2; \ |
508 |
if (t1 & BN_TBIT) t2++; \ |
509 |
t1=(t1+t1)&BN_MASK2; \ |
510 |
c0=(c0+t1)&BN_MASK2; \ |
511 |
if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ |
512 |
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; |
513 |
|
514 |
#define sqr_add_c(a,i,c0,c1,c2) \ |
515 |
sqr64(t1,t2,(a)[i]); \ |
516 |
c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ |
517 |
c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; |
518 |
|
519 |
#define sqr_add_c2(a,i,j,c0,c1,c2) \ |
520 |
mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
521 |
#endif /* !BN_LLONG */ |
522 |
|
523 |
void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
524 |
{ |
525 |
#ifdef BN_LLONG |
526 |
BN_ULLONG t; |
527 |
#else |
528 |
BN_ULONG bl,bh; |
529 |
#endif |
530 |
BN_ULONG t1,t2; |
531 |
BN_ULONG c1,c2,c3; |
532 |
|
533 |
c1=0; |
534 |
c2=0; |
535 |
c3=0; |
536 |
mul_add_c(a[0],b[0],c1,c2,c3); |
537 |
r[0]=c1; |
538 |
c1=0; |
539 |
mul_add_c(a[0],b[1],c2,c3,c1); |
540 |
mul_add_c(a[1],b[0],c2,c3,c1); |
541 |
r[1]=c2; |
542 |
c2=0; |
543 |
mul_add_c(a[2],b[0],c3,c1,c2); |
544 |
mul_add_c(a[1],b[1],c3,c1,c2); |
545 |
mul_add_c(a[0],b[2],c3,c1,c2); |
546 |
r[2]=c3; |
547 |
c3=0; |
548 |
mul_add_c(a[0],b[3],c1,c2,c3); |
549 |
mul_add_c(a[1],b[2],c1,c2,c3); |
550 |
mul_add_c(a[2],b[1],c1,c2,c3); |
551 |
mul_add_c(a[3],b[0],c1,c2,c3); |
552 |
r[3]=c1; |
553 |
c1=0; |
554 |
mul_add_c(a[4],b[0],c2,c3,c1); |
555 |
mul_add_c(a[3],b[1],c2,c3,c1); |
556 |
mul_add_c(a[2],b[2],c2,c3,c1); |
557 |
mul_add_c(a[1],b[3],c2,c3,c1); |
558 |
mul_add_c(a[0],b[4],c2,c3,c1); |
559 |
r[4]=c2; |
560 |
c2=0; |
561 |
mul_add_c(a[0],b[5],c3,c1,c2); |
562 |
mul_add_c(a[1],b[4],c3,c1,c2); |
563 |
mul_add_c(a[2],b[3],c3,c1,c2); |
564 |
mul_add_c(a[3],b[2],c3,c1,c2); |
565 |
mul_add_c(a[4],b[1],c3,c1,c2); |
566 |
mul_add_c(a[5],b[0],c3,c1,c2); |
567 |
r[5]=c3; |
568 |
c3=0; |
569 |
mul_add_c(a[6],b[0],c1,c2,c3); |
570 |
mul_add_c(a[5],b[1],c1,c2,c3); |
571 |
mul_add_c(a[4],b[2],c1,c2,c3); |
572 |
mul_add_c(a[3],b[3],c1,c2,c3); |
573 |
mul_add_c(a[2],b[4],c1,c2,c3); |
574 |
mul_add_c(a[1],b[5],c1,c2,c3); |
575 |
mul_add_c(a[0],b[6],c1,c2,c3); |
576 |
r[6]=c1; |
577 |
c1=0; |
578 |
mul_add_c(a[0],b[7],c2,c3,c1); |
579 |
mul_add_c(a[1],b[6],c2,c3,c1); |
580 |
mul_add_c(a[2],b[5],c2,c3,c1); |
581 |
mul_add_c(a[3],b[4],c2,c3,c1); |
582 |
mul_add_c(a[4],b[3],c2,c3,c1); |
583 |
mul_add_c(a[5],b[2],c2,c3,c1); |
584 |
mul_add_c(a[6],b[1],c2,c3,c1); |
585 |
mul_add_c(a[7],b[0],c2,c3,c1); |
586 |
r[7]=c2; |
587 |
c2=0; |
588 |
mul_add_c(a[7],b[1],c3,c1,c2); |
589 |
mul_add_c(a[6],b[2],c3,c1,c2); |
590 |
mul_add_c(a[5],b[3],c3,c1,c2); |
591 |
mul_add_c(a[4],b[4],c3,c1,c2); |
592 |
mul_add_c(a[3],b[5],c3,c1,c2); |
593 |
mul_add_c(a[2],b[6],c3,c1,c2); |
594 |
mul_add_c(a[1],b[7],c3,c1,c2); |
595 |
r[8]=c3; |
596 |
c3=0; |
597 |
mul_add_c(a[2],b[7],c1,c2,c3); |
598 |
mul_add_c(a[3],b[6],c1,c2,c3); |
599 |
mul_add_c(a[4],b[5],c1,c2,c3); |
600 |
mul_add_c(a[5],b[4],c1,c2,c3); |
601 |
mul_add_c(a[6],b[3],c1,c2,c3); |
602 |
mul_add_c(a[7],b[2],c1,c2,c3); |
603 |
r[9]=c1; |
604 |
c1=0; |
605 |
mul_add_c(a[7],b[3],c2,c3,c1); |
606 |
mul_add_c(a[6],b[4],c2,c3,c1); |
607 |
mul_add_c(a[5],b[5],c2,c3,c1); |
608 |
mul_add_c(a[4],b[6],c2,c3,c1); |
609 |
mul_add_c(a[3],b[7],c2,c3,c1); |
610 |
r[10]=c2; |
611 |
c2=0; |
612 |
mul_add_c(a[4],b[7],c3,c1,c2); |
613 |
mul_add_c(a[5],b[6],c3,c1,c2); |
614 |
mul_add_c(a[6],b[5],c3,c1,c2); |
615 |
mul_add_c(a[7],b[4],c3,c1,c2); |
616 |
r[11]=c3; |
617 |
c3=0; |
618 |
mul_add_c(a[7],b[5],c1,c2,c3); |
619 |
mul_add_c(a[6],b[6],c1,c2,c3); |
620 |
mul_add_c(a[5],b[7],c1,c2,c3); |
621 |
r[12]=c1; |
622 |
c1=0; |
623 |
mul_add_c(a[6],b[7],c2,c3,c1); |
624 |
mul_add_c(a[7],b[6],c2,c3,c1); |
625 |
r[13]=c2; |
626 |
c2=0; |
627 |
mul_add_c(a[7],b[7],c3,c1,c2); |
628 |
r[14]=c3; |
629 |
r[15]=c1; |
630 |
} |
631 |
|
632 |
void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
633 |
{ |
634 |
#ifdef BN_LLONG |
635 |
BN_ULLONG t; |
636 |
#else |
637 |
BN_ULONG bl,bh; |
638 |
#endif |
639 |
BN_ULONG t1,t2; |
640 |
BN_ULONG c1,c2,c3; |
641 |
|
642 |
c1=0; |
643 |
c2=0; |
644 |
c3=0; |
645 |
mul_add_c(a[0],b[0],c1,c2,c3); |
646 |
r[0]=c1; |
647 |
c1=0; |
648 |
mul_add_c(a[0],b[1],c2,c3,c1); |
649 |
mul_add_c(a[1],b[0],c2,c3,c1); |
650 |
r[1]=c2; |
651 |
c2=0; |
652 |
mul_add_c(a[2],b[0],c3,c1,c2); |
653 |
mul_add_c(a[1],b[1],c3,c1,c2); |
654 |
mul_add_c(a[0],b[2],c3,c1,c2); |
655 |
r[2]=c3; |
656 |
c3=0; |
657 |
mul_add_c(a[0],b[3],c1,c2,c3); |
658 |
mul_add_c(a[1],b[2],c1,c2,c3); |
659 |
mul_add_c(a[2],b[1],c1,c2,c3); |
660 |
mul_add_c(a[3],b[0],c1,c2,c3); |
661 |
r[3]=c1; |
662 |
c1=0; |
663 |
mul_add_c(a[3],b[1],c2,c3,c1); |
664 |
mul_add_c(a[2],b[2],c2,c3,c1); |
665 |
mul_add_c(a[1],b[3],c2,c3,c1); |
666 |
r[4]=c2; |
667 |
c2=0; |
668 |
mul_add_c(a[2],b[3],c3,c1,c2); |
669 |
mul_add_c(a[3],b[2],c3,c1,c2); |
670 |
r[5]=c3; |
671 |
c3=0; |
672 |
mul_add_c(a[3],b[3],c1,c2,c3); |
673 |
r[6]=c1; |
674 |
r[7]=c2; |
675 |
} |
676 |
|
677 |
void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) |
678 |
{ |
679 |
#ifdef BN_LLONG |
680 |
BN_ULLONG t,tt; |
681 |
#else |
682 |
BN_ULONG bl,bh; |
683 |
#endif |
684 |
BN_ULONG t1,t2; |
685 |
BN_ULONG c1,c2,c3; |
686 |
|
687 |
c1=0; |
688 |
c2=0; |
689 |
c3=0; |
690 |
sqr_add_c(a,0,c1,c2,c3); |
691 |
r[0]=c1; |
692 |
c1=0; |
693 |
sqr_add_c2(a,1,0,c2,c3,c1); |
694 |
r[1]=c2; |
695 |
c2=0; |
696 |
sqr_add_c(a,1,c3,c1,c2); |
697 |
sqr_add_c2(a,2,0,c3,c1,c2); |
698 |
r[2]=c3; |
699 |
c3=0; |
700 |
sqr_add_c2(a,3,0,c1,c2,c3); |
701 |
sqr_add_c2(a,2,1,c1,c2,c3); |
702 |
r[3]=c1; |
703 |
c1=0; |
704 |
sqr_add_c(a,2,c2,c3,c1); |
705 |
sqr_add_c2(a,3,1,c2,c3,c1); |
706 |
sqr_add_c2(a,4,0,c2,c3,c1); |
707 |
r[4]=c2; |
708 |
c2=0; |
709 |
sqr_add_c2(a,5,0,c3,c1,c2); |
710 |
sqr_add_c2(a,4,1,c3,c1,c2); |
711 |
sqr_add_c2(a,3,2,c3,c1,c2); |
712 |
r[5]=c3; |
713 |
c3=0; |
714 |
sqr_add_c(a,3,c1,c2,c3); |
715 |
sqr_add_c2(a,4,2,c1,c2,c3); |
716 |
sqr_add_c2(a,5,1,c1,c2,c3); |
717 |
sqr_add_c2(a,6,0,c1,c2,c3); |
718 |
r[6]=c1; |
719 |
c1=0; |
720 |
sqr_add_c2(a,7,0,c2,c3,c1); |
721 |
sqr_add_c2(a,6,1,c2,c3,c1); |
722 |
sqr_add_c2(a,5,2,c2,c3,c1); |
723 |
sqr_add_c2(a,4,3,c2,c3,c1); |
724 |
r[7]=c2; |
725 |
c2=0; |
726 |
sqr_add_c(a,4,c3,c1,c2); |
727 |
sqr_add_c2(a,5,3,c3,c1,c2); |
728 |
sqr_add_c2(a,6,2,c3,c1,c2); |
729 |
sqr_add_c2(a,7,1,c3,c1,c2); |
730 |
r[8]=c3; |
731 |
c3=0; |
732 |
sqr_add_c2(a,7,2,c1,c2,c3); |
733 |
sqr_add_c2(a,6,3,c1,c2,c3); |
734 |
sqr_add_c2(a,5,4,c1,c2,c3); |
735 |
r[9]=c1; |
736 |
c1=0; |
737 |
sqr_add_c(a,5,c2,c3,c1); |
738 |
sqr_add_c2(a,6,4,c2,c3,c1); |
739 |
sqr_add_c2(a,7,3,c2,c3,c1); |
740 |
r[10]=c2; |
741 |
c2=0; |
742 |
sqr_add_c2(a,7,4,c3,c1,c2); |
743 |
sqr_add_c2(a,6,5,c3,c1,c2); |
744 |
r[11]=c3; |
745 |
c3=0; |
746 |
sqr_add_c(a,6,c1,c2,c3); |
747 |
sqr_add_c2(a,7,5,c1,c2,c3); |
748 |
r[12]=c1; |
749 |
c1=0; |
750 |
sqr_add_c2(a,7,6,c2,c3,c1); |
751 |
r[13]=c2; |
752 |
c2=0; |
753 |
sqr_add_c(a,7,c3,c1,c2); |
754 |
r[14]=c3; |
755 |
r[15]=c1; |
756 |
} |
757 |
|
758 |
void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) |
759 |
{ |
760 |
#ifdef BN_LLONG |
761 |
BN_ULLONG t,tt; |
762 |
#else |
763 |
BN_ULONG bl,bh; |
764 |
#endif |
765 |
BN_ULONG t1,t2; |
766 |
BN_ULONG c1,c2,c3; |
767 |
|
768 |
c1=0; |
769 |
c2=0; |
770 |
c3=0; |
771 |
sqr_add_c(a,0,c1,c2,c3); |
772 |
r[0]=c1; |
773 |
c1=0; |
774 |
sqr_add_c2(a,1,0,c2,c3,c1); |
775 |
r[1]=c2; |
776 |
c2=0; |
777 |
sqr_add_c(a,1,c3,c1,c2); |
778 |
sqr_add_c2(a,2,0,c3,c1,c2); |
779 |
r[2]=c3; |
780 |
c3=0; |
781 |
sqr_add_c2(a,3,0,c1,c2,c3); |
782 |
sqr_add_c2(a,2,1,c1,c2,c3); |
783 |
r[3]=c1; |
784 |
c1=0; |
785 |
sqr_add_c(a,2,c2,c3,c1); |
786 |
sqr_add_c2(a,3,1,c2,c3,c1); |
787 |
r[4]=c2; |
788 |
c2=0; |
789 |
sqr_add_c2(a,3,2,c3,c1,c2); |
790 |
r[5]=c3; |
791 |
c3=0; |
792 |
sqr_add_c(a,3,c1,c2,c3); |
793 |
r[6]=c1; |
794 |
r[7]=c2; |
795 |
} |
796 |
#else /* !BN_MUL_COMBA */ |
797 |
|
798 |
/* hmm... is it faster just to do a multiply? */ |
799 |
#undef bn_sqr_comba4 |
800 |
void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) |
801 |
{ |
802 |
BN_ULONG t[8]; |
803 |
bn_sqr_normal(r,a,4,t); |
804 |
} |
805 |
|
806 |
#undef bn_sqr_comba8 |
807 |
void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) |
808 |
{ |
809 |
BN_ULONG t[16]; |
810 |
bn_sqr_normal(r,a,8,t); |
811 |
} |
812 |
|
813 |
void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
814 |
{ |
815 |
r[4]=bn_mul_words( &(r[0]),a,4,b[0]); |
816 |
r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]); |
817 |
r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]); |
818 |
r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]); |
819 |
} |
820 |
|
821 |
void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
822 |
{ |
823 |
r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]); |
824 |
r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]); |
825 |
r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]); |
826 |
r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]); |
827 |
r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]); |
828 |
r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]); |
829 |
r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]); |
830 |
r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); |
831 |
} |
832 |
|
833 |
#endif /* !BN_MUL_COMBA */ |