1 |
/* |
2 |
* PearPC |
3 |
* ppc_fpu.h |
4 |
* |
5 |
* Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net) |
6 |
* |
7 |
* This program is free software; you can redistribute it and/or modify |
8 |
* it under the terms of the GNU General Public License version 2 as |
9 |
* published by the Free Software Foundation. |
10 |
* |
11 |
* This program is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
* |
16 |
* You should have received a copy of the GNU General Public License |
17 |
* along with this program; if not, write to the Free Software |
18 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 |
*/ |
20 |
|
21 |
#ifndef __PPC_FPU_H__ |
22 |
#define __PPC_FPU_H__ |
23 |
|
24 |
|
25 |
#define FPU_SIGN_BIT (0x8000000000000000ULL) |
26 |
|
27 |
#define FPD_SIGN(v) (((v)&FPU_SIGN_BIT)?1:0) |
28 |
#define FPD_EXP(v) ((v)>>52) |
29 |
#define FPD_FRAC(v) ((v)&0x000fffffffffffffULL) |
30 |
|
31 |
#define FPS_SIGN(v) ((v)&0x80000000) |
32 |
#define FPS_EXP(v) ((v)>>23) |
33 |
#define FPS_FRAC(v) ((v)&0x007fffff) |
34 |
|
35 |
// m must be uint64 |
36 |
#define FPD_PACK_VAR(f, s, e, m) (f) = ((s)?FPU_SIGN_BIT:0ULL)|((((uint64)(e))&0x7ff)<<52)|((m)&((1ULL<<52)-1)) |
37 |
#define FPD_UNPACK_VAR(f, s, e, m) {(s)=FPD_SIGN(f);(e)=FPD_EXP(f)&0x7ff;(m)=FPD_FRAC(f);} |
38 |
|
39 |
#define FPS_PACK_VAR(f, s, e, m) (f) = ((s)?0x80000000:0)|((e)<<23)|((m)&0x7fffff) |
40 |
#define FPS_UNPACK_VAR(f, s, e, m) {(s)=FPS_SIGN(f);(e)=FPS_EXP(f)&0xff;(m)=FPS_FRAC(f);} |
41 |
|
42 |
#define FPD_UNPACK(freg, fvar) FPD_UNPACK(freg, fvar.s, fvar.e, fvar.m) |
43 |
|
44 |
|
45 |
void ppc_fpu_test(); |
46 |
|
47 |
enum ppc_fpr_type { |
48 |
ppc_fpr_norm, |
49 |
ppc_fpr_zero, |
50 |
ppc_fpr_NaN, |
51 |
ppc_fpr_Inf, |
52 |
}; |
53 |
|
54 |
struct ppc_quadro { |
55 |
ppc_fpr_type type; |
56 |
int s; |
57 |
int e; |
58 |
uint64 m0; // most significant |
59 |
uint64 m1; // least significant |
60 |
}; |
61 |
|
62 |
struct ppc_double { |
63 |
ppc_fpr_type type; |
64 |
int s; |
65 |
int e; |
66 |
uint64 m; |
67 |
}; |
68 |
|
69 |
struct ppc_single { |
70 |
ppc_fpr_type type; |
71 |
int s; |
72 |
int e; |
73 |
uint m; |
74 |
}; |
75 |
|
76 |
inline int ppc_count_leading_zeros(uint64 i) |
77 |
{ |
78 |
int ret; |
79 |
uint32 dd = i >> 32; |
80 |
if (dd) { |
81 |
ret = 31; |
82 |
if (dd > 0xffff) { ret -= 16; dd >>= 16; } |
83 |
if (dd > 0xff) { ret -= 8; dd >>= 8; } |
84 |
if (dd & 0xf0) { ret -= 4; dd >>= 4; } |
85 |
if (dd & 0xc) { ret -= 2; dd >>= 2; } |
86 |
if (dd & 0x2) ret--; |
87 |
} else { |
88 |
dd = (uint32)i; |
89 |
ret = 63; |
90 |
if (dd > 0xffff) { ret -= 16; dd >>= 16; } |
91 |
if (dd > 0xff) { ret -= 8; dd >>= 8; } |
92 |
if (dd & 0xf0) { ret -= 4; dd >>= 4; } |
93 |
if (dd & 0xc) { ret -= 2; dd >>= 2; } |
94 |
if (dd & 0x2) ret--; |
95 |
} |
96 |
return ret; |
97 |
} |
98 |
|
99 |
inline int ppc_fpu_normalize_quadro(ppc_quadro &d) |
100 |
{ |
101 |
int ret = d.m0 ? ppc_count_leading_zeros(d.m0) : 64 + ppc_count_leading_zeros(d.m1); |
102 |
return ret; |
103 |
} |
104 |
|
105 |
inline int ppc_fpu_normalize(ppc_double &d) |
106 |
{ |
107 |
return ppc_count_leading_zeros(d.m); |
108 |
} |
109 |
|
110 |
inline int ppc_fpu_normalize_single(ppc_single &s) |
111 |
{ |
112 |
int ret; |
113 |
uint32 dd = s.m; |
114 |
ret = 31; |
115 |
if (dd > 0xffff) { ret -= 16; dd >>= 16; } |
116 |
if (dd > 0xff) { ret -= 8; dd >>= 8; } |
117 |
if (dd & 0xf0) { ret -= 4; dd >>= 4; } |
118 |
if (dd & 0xc) { ret -= 2; dd >>= 2; } |
119 |
if (dd & 0x2) ret--; |
120 |
return ret; |
121 |
} |
122 |
|
123 |
#include "tools/snprintf.h" |
124 |
inline void ppc_fpu_unpack_double(ppc_double &res, uint64 d) |
125 |
{ |
126 |
FPD_UNPACK_VAR(d, res.s, res.e, res.m); |
127 |
// ht_printf("ud: %qx: s:%d e:%d m:%qx\n", d, res.s, res.e, res.m); |
128 |
// .124 |
129 |
if (res.e == 2047) { |
130 |
if (res.m == 0) { |
131 |
res.type = ppc_fpr_Inf; |
132 |
} else { |
133 |
res.type = ppc_fpr_NaN; |
134 |
} |
135 |
} else if (res.e == 0) { |
136 |
if (res.m == 0) { |
137 |
res.type = ppc_fpr_zero; |
138 |
} else { |
139 |
// normalize denormalized exponent |
140 |
int diff = ppc_fpu_normalize(res) - 8; |
141 |
res.m <<= diff+3; |
142 |
res.e -= 1023 - 1 + diff; |
143 |
res.type = ppc_fpr_norm; |
144 |
} |
145 |
} else { |
146 |
res.e -= 1023; // unbias exponent |
147 |
res.type = ppc_fpr_norm; |
148 |
// add implied bit |
149 |
res.m |= 1ULL<<52; |
150 |
res.m <<= 3; |
151 |
} |
152 |
// ht_printf("ud: %qx: s:%d e:%d m:%qx\n", d, res.s, res.e, res.m); |
153 |
} |
154 |
|
155 |
|
156 |
inline void ppc_fpu_unpack_single(ppc_single &res, uint32 d) |
157 |
{ |
158 |
FPS_UNPACK_VAR(d, res.s, res.e, res.m); |
159 |
// .124 |
160 |
if (res.e == 255) { |
161 |
if (res.m == 0) { |
162 |
res.type = ppc_fpr_Inf; |
163 |
} else { |
164 |
res.type = ppc_fpr_NaN; |
165 |
} |
166 |
} else if (res.e == 0) { |
167 |
if (res.m == 0) { |
168 |
res.type = ppc_fpr_zero; |
169 |
} else { |
170 |
// normalize denormalized exponent |
171 |
int diff = ppc_fpu_normalize_single(res) - 8; |
172 |
res.m <<= diff+3; |
173 |
res.e -= 127 - 1 + diff; |
174 |
res.type = ppc_fpr_norm; |
175 |
} |
176 |
} else { |
177 |
res.e -= 127; // unbias exponent |
178 |
res.type = ppc_fpr_norm; |
179 |
// add implied bit |
180 |
res.m |= 1<<23; |
181 |
res.m <<= 3; |
182 |
} |
183 |
} |
184 |
|
185 |
inline uint32 ppc_fpu_round(ppc_double &d) |
186 |
{ |
187 |
// .132 |
188 |
switch (FPSCR_RN(gCPU.fpscr)) { |
189 |
case FPSCR_RN_NEAR: |
190 |
if (d.m & 0x7) { |
191 |
if ((d.m & 0x7) != 4) { |
192 |
d.m += 4; |
193 |
} else if (d.m & 8) { |
194 |
d.m += 4; |
195 |
} |
196 |
return FPSCR_XX; |
197 |
} |
198 |
return 0; |
199 |
case FPSCR_RN_ZERO: |
200 |
if (d.m & 0x7) { |
201 |
return FPSCR_XX; |
202 |
} |
203 |
return 0; |
204 |
case FPSCR_RN_PINF: |
205 |
if (!d.s && (d.m & 0x7)) { |
206 |
d.m += 8; |
207 |
return FPSCR_XX; |
208 |
} |
209 |
return 0; |
210 |
case FPSCR_RN_MINF: |
211 |
if (d.s && (d.m & 0x7)) { |
212 |
d.m += 8; |
213 |
return FPSCR_XX; |
214 |
} |
215 |
return 0; |
216 |
} |
217 |
return 0; |
218 |
} |
219 |
|
220 |
inline uint32 ppc_fpu_round_single(ppc_single &s) |
221 |
{ |
222 |
switch (FPSCR_RN(gCPU.fpscr)) { |
223 |
case FPSCR_RN_NEAR: |
224 |
if (s.m & 0x7) { |
225 |
if ((s.m & 0x7) != 4) { |
226 |
s.m += 4; |
227 |
} else if (s.m & 8) { |
228 |
s.m += 4; |
229 |
} |
230 |
return FPSCR_XX; |
231 |
} |
232 |
return 0; |
233 |
case FPSCR_RN_ZERO: |
234 |
if (s.m & 0x7) { |
235 |
return FPSCR_XX; |
236 |
} |
237 |
return 0; |
238 |
case FPSCR_RN_PINF: |
239 |
if (!s.s && (s.m & 0x7)) { |
240 |
s.m += 8; |
241 |
return FPSCR_XX; |
242 |
} |
243 |
return 0; |
244 |
case FPSCR_RN_MINF: |
245 |
if (s.s && (s.m & 0x7)) { |
246 |
s.m += 8; |
247 |
return FPSCR_XX; |
248 |
} |
249 |
return 0; |
250 |
} |
251 |
return 0; |
252 |
} |
253 |
|
254 |
inline uint32 ppc_fpu_round_single(ppc_double &s) |
255 |
{ |
256 |
switch (FPSCR_RN(gCPU.fpscr)) { |
257 |
case FPSCR_RN_NEAR: |
258 |
if (s.m & 0x7) { |
259 |
if ((s.m & 0x7) != 4) { |
260 |
s.m += 4; |
261 |
} else if (s.m & 8) { |
262 |
s.m += 4; |
263 |
} |
264 |
return FPSCR_XX; |
265 |
} |
266 |
return 0; |
267 |
case FPSCR_RN_ZERO: |
268 |
if (s.m & 0x7) { |
269 |
return FPSCR_XX; |
270 |
} |
271 |
return 0; |
272 |
case FPSCR_RN_PINF: |
273 |
if (!s.s && (s.m & 0x7)) { |
274 |
s.m += 8; |
275 |
return FPSCR_XX; |
276 |
} |
277 |
return 0; |
278 |
case FPSCR_RN_MINF: |
279 |
if (s.s && (s.m & 0x7)) { |
280 |
s.m += 8; |
281 |
return FPSCR_XX; |
282 |
} |
283 |
return 0; |
284 |
} |
285 |
return 0; |
286 |
} |
287 |
|
288 |
inline uint32 ppc_fpu_pack_double(ppc_double &d, uint64 &res) |
289 |
{ |
290 |
// .124 |
291 |
uint32 ret = 0; |
292 |
// ht_printf("pd_type: %d\n", d.type); |
293 |
switch (d.type) { |
294 |
case ppc_fpr_norm: |
295 |
// ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
296 |
d.e += 1023; // bias exponent |
297 |
// ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
298 |
if (d.e > 0) { |
299 |
ret |= ppc_fpu_round(d); |
300 |
if (d.m & (1ULL<<56)) { |
301 |
d.e++; |
302 |
d.m >>= 4; |
303 |
} else { |
304 |
d.m >>= 3; |
305 |
} |
306 |
if (d.e >= 2047) { |
307 |
d.e = 2047; |
308 |
d.m = 0; |
309 |
ret |= FPSCR_OX; |
310 |
} |
311 |
} else { |
312 |
// number is denormalized |
313 |
d.e = -d.e+1; |
314 |
if (d.e <= 56) { |
315 |
d.m >>= d.e; |
316 |
ret |= ppc_fpu_round(d); |
317 |
d.m <<= 1; |
318 |
if (d.m & (1ULL<<56)) { |
319 |
d.e = 1; |
320 |
d.m = 0; |
321 |
} else { |
322 |
d.e = 0; |
323 |
d.m >>= 4; |
324 |
ret |= FPSCR_UX; |
325 |
} |
326 |
} else { |
327 |
// underflow to zero |
328 |
d.e = 0; |
329 |
d.m = 0; |
330 |
ret |= FPSCR_UX; |
331 |
} |
332 |
} |
333 |
break; |
334 |
case ppc_fpr_zero: |
335 |
d.e = 0; |
336 |
d.m = 0; |
337 |
break; |
338 |
case ppc_fpr_NaN: |
339 |
d.e = 2047; |
340 |
d.m = 1; |
341 |
break; |
342 |
case ppc_fpr_Inf: |
343 |
d.e = 2047; |
344 |
d.m = 0; |
345 |
break; |
346 |
} |
347 |
// ht_printf("pd: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
348 |
FPD_PACK_VAR(res, d.s, d.e, d.m); |
349 |
return ret; |
350 |
} |
351 |
|
352 |
inline uint32 ppc_fpu_pack_single(ppc_double &d, uint32 &res) |
353 |
{ |
354 |
// .124 |
355 |
uint32 ret = 0; |
356 |
switch (d.type) { |
357 |
case ppc_fpr_norm: |
358 |
// ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
359 |
d.e += 127; // bias exponent |
360 |
d.m >>= 29; |
361 |
// ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
362 |
if (d.e > 0) { |
363 |
ret |= ppc_fpu_round_single(d); |
364 |
if (d.m & (1ULL<<27)) { |
365 |
d.e++; |
366 |
d.m >>= 4; |
367 |
} else { |
368 |
d.m >>= 3; |
369 |
} |
370 |
if (d.e >= 255) { |
371 |
d.e = 255; |
372 |
d.m = 0; |
373 |
ret |= FPSCR_OX; |
374 |
} |
375 |
} else { |
376 |
// number is denormalized |
377 |
d.e = -d.e+1; |
378 |
if (d.e <= 27) { |
379 |
d.m >>= d.e; |
380 |
ret |= ppc_fpu_round_single(d); |
381 |
d.m <<= 1; |
382 |
if (d.m & (1ULL<<27)) { |
383 |
d.e = 1; |
384 |
d.m = 0; |
385 |
} else { |
386 |
d.e = 0; |
387 |
d.m >>= 4; |
388 |
ret |= FPSCR_UX; |
389 |
} |
390 |
} else { |
391 |
// underflow to zero |
392 |
d.e = 0; |
393 |
d.m = 0; |
394 |
ret |= FPSCR_UX; |
395 |
} |
396 |
} |
397 |
break; |
398 |
case ppc_fpr_zero: |
399 |
d.e = 0; |
400 |
d.m = 0; |
401 |
break; |
402 |
case ppc_fpr_NaN: |
403 |
d.e = 255; |
404 |
d.m = 1; |
405 |
break; |
406 |
case ppc_fpr_Inf: |
407 |
d.e = 255; |
408 |
d.m = 0; |
409 |
break; |
410 |
} |
411 |
// ht_printf("ps: %qx: s:%d e:%d m:%qx\n", d, d.s, d.e, d.m); |
412 |
FPS_PACK_VAR(res, d.s, d.e, d.m); |
413 |
return ret; |
414 |
} |
415 |
|
416 |
inline void ppc_fpu_single_to_double(ppc_single &s, ppc_double &d) |
417 |
{ |
418 |
d.s = s.s; |
419 |
d.e = s.e; |
420 |
d.m = ((uint64)s.m)<<29; |
421 |
d.type = s.type; |
422 |
} |
423 |
|
424 |
inline uint32 ppc_fpu_pack_double_as_single(ppc_double &d, uint64 &res) |
425 |
{ |
426 |
// .757 |
427 |
ppc_single s; |
428 |
s.m = d.m >> 29; |
429 |
s.e = d.e; |
430 |
s.s = d.s; |
431 |
s.type = d.type; |
432 |
uint32 ret = 0; |
433 |
|
434 |
switch (s.type) { |
435 |
case ppc_fpr_norm: |
436 |
s.e = d.e+127; |
437 |
if (s.e > 0) { |
438 |
ret |= ppc_fpu_round_single(s); |
439 |
if (s.m & (1<<27)) { |
440 |
s.e++; |
441 |
s.m >>= 4; |
442 |
} else { |
443 |
s.m >>= 3; |
444 |
} |
445 |
if (s.e >= 255) { |
446 |
s.type = ppc_fpr_Inf; |
447 |
s.e = 255; |
448 |
s.m = 0; |
449 |
ret |= FPSCR_OX; |
450 |
} |
451 |
d.e = s.e-127; |
452 |
} else { |
453 |
// number is denormalized |
454 |
s.e = -s.e+1; |
455 |
if (s.e <= 27) { |
456 |
s.m >>= s.e; |
457 |
ret |= ppc_fpu_round_single(s); |
458 |
s.m <<= 1; |
459 |
if (s.m & (1<<27)) { |
460 |
s.e = 1; |
461 |
s.m = 0; |
462 |
} else { |
463 |
s.e = 0; |
464 |
s.m >>= 4; |
465 |
ret |= FPSCR_UX; |
466 |
} |
467 |
} else { |
468 |
// underflow to zero |
469 |
s.type = ppc_fpr_zero; |
470 |
s.e = 0; |
471 |
s.m = 0; |
472 |
ret |= FPSCR_UX; |
473 |
} |
474 |
} |
475 |
break; |
476 |
case ppc_fpr_zero: |
477 |
s.e = 0; |
478 |
s.m = 0; |
479 |
break; |
480 |
case ppc_fpr_NaN: |
481 |
s.e = 2047; |
482 |
s.m = 1; |
483 |
break; |
484 |
case ppc_fpr_Inf: |
485 |
s.e = 2047; |
486 |
s.m = 0; |
487 |
break; |
488 |
} |
489 |
if (s.type == ppc_fpr_norm) { |
490 |
d.m = ((uint64)(s.m))<<32; |
491 |
} else { |
492 |
d.m = s.m; |
493 |
} |
494 |
// ht_printf("dm: %qx\n", d.m); |
495 |
ret |= ppc_fpu_pack_double(d, res); |
496 |
return ret; |
497 |
} |
498 |
|
499 |
inline uint32 ppc_fpu_double_to_int(ppc_double &d) |
500 |
{ |
501 |
switch (d.type) { |
502 |
case ppc_fpr_norm: { |
503 |
if (d.e < 0) { |
504 |
switch (FPSCR_RN(gCPU.fpscr)) { |
505 |
case FPSCR_RN_NEAR: |
506 |
if (d.e < -1) { |
507 |
return 0; |
508 |
} else { |
509 |
return d.s ? (uint32)-1 : 1; |
510 |
} |
511 |
case FPSCR_RN_ZERO: |
512 |
return 0; |
513 |
case FPSCR_RN_PINF: |
514 |
if (d.s) { |
515 |
return 0; |
516 |
} else { |
517 |
return 1; |
518 |
} |
519 |
case FPSCR_RN_MINF: |
520 |
if (d.s) { |
521 |
return (uint32)-1; |
522 |
} else { |
523 |
return 0; |
524 |
} |
525 |
} |
526 |
} |
527 |
if (d.e >= 31) { |
528 |
if (d.s) { |
529 |
return 0x80000000; |
530 |
} else { |
531 |
return 0x7fffffff; |
532 |
} |
533 |
} |
534 |
int i=0; |
535 |
uint64 mask = (1ULL<<(56 - d.e - 1))-1; |
536 |
// we have to round |
537 |
switch (FPSCR_RN(gCPU.fpscr)) { |
538 |
case FPSCR_RN_NEAR: |
539 |
if (d.m & mask) { |
540 |
if (d.m & (1ULL<<(56 - d.e - 2))) { |
541 |
i = 1; |
542 |
} |
543 |
} |
544 |
break; |
545 |
case FPSCR_RN_ZERO: |
546 |
break; |
547 |
case FPSCR_RN_PINF: |
548 |
if (!d.s && (d.m & mask)) { |
549 |
i = 1; |
550 |
} |
551 |
break; |
552 |
case FPSCR_RN_MINF: |
553 |
if (d.s && (d.m & mask)) { |
554 |
i = 1; |
555 |
} |
556 |
break; |
557 |
} |
558 |
d.m >>= 56 - d.e - 1; |
559 |
d.m += i; |
560 |
return d.s ? -d.m : d.m; |
561 |
} |
562 |
case ppc_fpr_zero: |
563 |
return 0; |
564 |
case ppc_fpr_Inf: |
565 |
case ppc_fpr_NaN: |
566 |
if (d.s) { |
567 |
return 0x80000000; |
568 |
} else { |
569 |
return 0x7fffffff; |
570 |
} |
571 |
} |
572 |
return 0; |
573 |
} |
574 |
|
575 |
double ppc_fpu_get_double(uint64 d); |
576 |
double ppc_fpu_get_double(ppc_double &d); |
577 |
|
578 |
void ppc_opc_fabsx(); |
579 |
void ppc_opc_faddx(); |
580 |
void ppc_opc_faddsx(); |
581 |
void ppc_opc_fcmpo(); |
582 |
void ppc_opc_fcmpu(); |
583 |
void ppc_opc_fctiwx(); |
584 |
void ppc_opc_fctiwzx(); |
585 |
void ppc_opc_fdivx(); |
586 |
void ppc_opc_fdivsx(); |
587 |
void ppc_opc_fmaddx(); |
588 |
void ppc_opc_fmaddsx(); |
589 |
void ppc_opc_fmrx(); |
590 |
void ppc_opc_fmsubx(); |
591 |
void ppc_opc_fmsubsx(); |
592 |
void ppc_opc_fmulx(); |
593 |
void ppc_opc_fmulsx(); |
594 |
void ppc_opc_fnabsx(); |
595 |
void ppc_opc_fnegx(); |
596 |
void ppc_opc_fnmaddx(); |
597 |
void ppc_opc_fnmaddsx(); |
598 |
void ppc_opc_fnmsubx(); |
599 |
void ppc_opc_fnmsubsx(); |
600 |
void ppc_opc_fresx(); |
601 |
void ppc_opc_frspx(); |
602 |
void ppc_opc_frsqrtex(); |
603 |
void ppc_opc_fselx(); |
604 |
void ppc_opc_fsqrtx(); |
605 |
void ppc_opc_fsqrtsx(); |
606 |
void ppc_opc_fsubx(); |
607 |
void ppc_opc_fsubsx(); |
608 |
|
609 |
#endif |