1 |
dpavlin |
1 |
/* |
2 |
|
|
* PearPC |
3 |
|
|
* ppc_fpu.cc |
4 |
|
|
* |
5 |
|
|
* Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net) |
6 |
|
|
* Copyright (C) 2003, 2004 Stefan Weyergraf |
7 |
|
|
* |
8 |
|
|
* This program is free software; you can redistribute it and/or modify |
9 |
|
|
* it under the terms of the GNU General Public License version 2 as |
10 |
|
|
* published by the Free Software Foundation. |
11 |
|
|
* |
12 |
|
|
* This program is distributed in the hope that it will be useful, |
13 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 |
|
|
* GNU General Public License for more details. |
16 |
|
|
* |
17 |
|
|
* You should have received a copy of the GNU General Public License |
18 |
|
|
* along with this program; if not, write to the Free Software |
19 |
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
20 |
|
|
*/ |
21 |
|
|
|
22 |
|
|
#include "debug/tracers.h" |
23 |
|
|
#include "ppc_cpu.h" |
24 |
|
|
#include "ppc_dec.h" |
25 |
|
|
#include "ppc_fpu.h" |
26 |
|
|
|
27 |
|
|
// .121 |
28 |
|
|
#define PPC_FPR_TYPE2(a,b) (((a)<<8)|(b)) |
29 |
|
|
|
30 |
|
|
const char *ppc_fpu_get_fpr_type(ppc_fpr_type t) |
31 |
|
|
{ |
32 |
|
|
switch (t) { |
33 |
|
|
case ppc_fpr_norm: return "norm"; |
34 |
|
|
case ppc_fpr_zero: return "zero"; |
35 |
|
|
case ppc_fpr_NaN: return "NaN"; |
36 |
|
|
case ppc_fpr_Inf: return "Inf"; |
37 |
|
|
default: return "???"; |
38 |
|
|
} |
39 |
|
|
} |
40 |
|
|
|
41 |
|
|
inline void ppc_fpu_add(ppc_double &res, ppc_double &a, ppc_double &b) |
42 |
|
|
{ |
43 |
|
|
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
44 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
45 |
|
|
int diff = a.e - b.e; |
46 |
|
|
if (diff<0) { |
47 |
|
|
diff = -diff; |
48 |
|
|
if (diff <= 56) { |
49 |
|
|
a.m >>= diff; |
50 |
|
|
} else if (a.m != 0) { |
51 |
|
|
a.m = 1; |
52 |
|
|
} else { |
53 |
|
|
a.m = 0; |
54 |
|
|
} |
55 |
|
|
res.e = b.e; |
56 |
|
|
} else { |
57 |
|
|
if (diff <= 56) { |
58 |
|
|
b.m >>= diff; |
59 |
|
|
} else if (b.m != 0) { |
60 |
|
|
b.m = 1; |
61 |
|
|
} else { |
62 |
|
|
b.m = 0; |
63 |
|
|
} |
64 |
|
|
res.e = a.e; |
65 |
|
|
} |
66 |
|
|
res.type = ppc_fpr_norm; |
67 |
|
|
if (a.s == b.s) { |
68 |
|
|
res.s = a.s; |
69 |
|
|
res.m = a.m + b.m; |
70 |
|
|
if (res.m & (1ULL<<56)) { |
71 |
|
|
res.m >>= 1; |
72 |
|
|
res.e++; |
73 |
|
|
} |
74 |
|
|
} else { |
75 |
|
|
res.s = a.s; |
76 |
|
|
res.m = a.m - b.m; |
77 |
|
|
if (!res.m) { |
78 |
|
|
if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) { |
79 |
|
|
res.s |= b.s; |
80 |
|
|
} else { |
81 |
|
|
res.s &= b.s; |
82 |
|
|
} |
83 |
|
|
res.type = ppc_fpr_zero; |
84 |
|
|
} else { |
85 |
|
|
if ((sint64)res.m < 0) { |
86 |
|
|
res.m = b.m - a.m; |
87 |
|
|
res.s = b.s; |
88 |
|
|
} |
89 |
|
|
diff = ppc_fpu_normalize(res) - 8; |
90 |
|
|
res.e -= diff; |
91 |
|
|
res.m <<= diff; |
92 |
|
|
} |
93 |
|
|
} |
94 |
|
|
break; |
95 |
|
|
} |
96 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
97 |
|
|
res.s = a.s; |
98 |
|
|
res.type = ppc_fpr_NaN; |
99 |
|
|
break; |
100 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
101 |
|
|
res.e = a.e; |
102 |
|
|
// fall-thru |
103 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
104 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
105 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
106 |
|
|
res.s = a.s; |
107 |
|
|
res.m = a.m; |
108 |
|
|
res.type = a.type; |
109 |
|
|
break; |
110 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
111 |
|
|
res.e = b.e; |
112 |
|
|
// fall-thru |
113 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
114 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
115 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
116 |
|
|
res.s = b.s; |
117 |
|
|
res.m = b.m; |
118 |
|
|
res.type = b.type; |
119 |
|
|
break; |
120 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
121 |
|
|
if (a.s != b.s) { |
122 |
|
|
// +oo + -oo == NaN |
123 |
|
|
res.s = a.s ^ b.s; |
124 |
|
|
res.type = ppc_fpr_NaN; |
125 |
|
|
break; |
126 |
|
|
} |
127 |
|
|
// fall-thru |
128 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
129 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
130 |
|
|
res.s = a.s; |
131 |
|
|
res.type = a.type; |
132 |
|
|
break; |
133 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
134 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
135 |
|
|
res.s = b.s; |
136 |
|
|
res.type = b.type; |
137 |
|
|
break; |
138 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
139 |
|
|
// round bla |
140 |
|
|
res.type = ppc_fpr_zero; |
141 |
|
|
res.s = a.s && b.s; |
142 |
|
|
break; |
143 |
|
|
} |
144 |
|
|
} |
145 |
|
|
|
146 |
|
|
inline void ppc_fpu_quadro_mshr(ppc_quadro &q, int exp) |
147 |
|
|
{ |
148 |
|
|
if (exp >= 64) { |
149 |
|
|
q.m1 = q.m0; |
150 |
|
|
q.m0 = 0; |
151 |
|
|
exp -= 64; |
152 |
|
|
} |
153 |
|
|
uint64 t = q.m0 & ((1ULL<<exp)-1); |
154 |
|
|
q.m0 >>= exp; |
155 |
|
|
q.m1 >>= exp; |
156 |
|
|
q.m1 |= t<<(64-exp); |
157 |
|
|
} |
158 |
|
|
|
159 |
|
|
inline void ppc_fpu_quadro_mshl(ppc_quadro &q, int exp) |
160 |
|
|
{ |
161 |
|
|
if (exp >= 64) { |
162 |
|
|
q.m0 = q.m1; |
163 |
|
|
q.m1 = 0; |
164 |
|
|
exp -= 64; |
165 |
|
|
} |
166 |
|
|
uint64 t = (q.m1 >> (64-exp)) & ((1ULL<<exp)-1); |
167 |
|
|
q.m0 <<= exp; |
168 |
|
|
q.m1 <<= exp; |
169 |
|
|
q.m0 |= t; |
170 |
|
|
} |
171 |
|
|
|
172 |
|
|
inline void ppc_fpu_add_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b) |
173 |
|
|
{ |
174 |
|
|
res.m1 = a.m1+b.m1; |
175 |
|
|
if (res.m1 < a.m1) { |
176 |
|
|
res.m0 = a.m0+b.m0+1; |
177 |
|
|
} else { |
178 |
|
|
res.m0 = a.m0+b.m0; |
179 |
|
|
} |
180 |
|
|
} |
181 |
|
|
|
182 |
|
|
inline void ppc_fpu_sub_quadro_m(ppc_quadro &res, const ppc_quadro &a, const ppc_quadro &b) |
183 |
|
|
{ |
184 |
|
|
res.m1 = a.m1-b.m1; |
185 |
|
|
if (a.m1 < b.m1) { |
186 |
|
|
res.m0 = a.m0-b.m0-1; |
187 |
|
|
} else { |
188 |
|
|
res.m0 = a.m0-b.m0; |
189 |
|
|
} |
190 |
|
|
} |
191 |
|
|
|
192 |
|
|
// res has 107 significant bits. a, b have 106 significant bits each. |
193 |
|
|
inline void ppc_fpu_add_quadro(ppc_quadro &res, ppc_quadro &a, ppc_quadro &b) |
194 |
|
|
{ |
195 |
|
|
// treat as 107 bit mantissa |
196 |
|
|
if (a.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(a, 1); |
197 |
|
|
if (b.type == ppc_fpr_norm) ppc_fpu_quadro_mshl(b, 1); |
198 |
|
|
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
199 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
200 |
|
|
int diff = a.e - b.e; |
201 |
|
|
if (diff < 0) { |
202 |
|
|
diff = -diff; |
203 |
|
|
if (diff <= 107) { |
204 |
|
|
// FIXME: may set x_prime |
205 |
|
|
ppc_fpu_quadro_mshr(a, diff); |
206 |
|
|
} else if (a.m0 || a.m1) { |
207 |
|
|
a.m0 = 0; |
208 |
|
|
a.m1 = 1; |
209 |
|
|
} else { |
210 |
|
|
a.m0 = 0; |
211 |
|
|
a.m1 = 0; |
212 |
|
|
} |
213 |
|
|
res.e = b.e; |
214 |
|
|
} else { |
215 |
|
|
if (diff <= 107) { |
216 |
|
|
// FIXME: may set x_prime |
217 |
|
|
ppc_fpu_quadro_mshr(b, diff); |
218 |
|
|
} else if (b.m0 || b.m1) { |
219 |
|
|
b.m0 = 0; |
220 |
|
|
b.m1 = 1; |
221 |
|
|
} else { |
222 |
|
|
b.m0 = 0; |
223 |
|
|
b.m1 = 0; |
224 |
|
|
} |
225 |
|
|
res.e = a.e; |
226 |
|
|
} |
227 |
|
|
res.type = ppc_fpr_norm; |
228 |
|
|
if (a.s == b.s) { |
229 |
|
|
res.s = a.s; |
230 |
|
|
ppc_fpu_add_quadro_m(res, a, b); |
231 |
|
|
int X_prime = res.m1 & 1; |
232 |
|
|
if (res.m0 & (1ULL<<(107-64))) { |
233 |
|
|
ppc_fpu_quadro_mshr(res, 1); |
234 |
|
|
res.e++; |
235 |
|
|
} |
236 |
|
|
// res = [107] |
237 |
|
|
res.m1 = (res.m1 & 0xfffffffffffffffeULL) | X_prime; |
238 |
|
|
} else { |
239 |
|
|
res.s = a.s; |
240 |
|
|
int cmp; |
241 |
|
|
if (a.m0 < b.m0) { |
242 |
|
|
cmp = -1; |
243 |
|
|
} else if (a.m0 > b.m0) { |
244 |
|
|
cmp = +1; |
245 |
|
|
} else { |
246 |
|
|
if (a.m1 < b.m1) { |
247 |
|
|
cmp = -1; |
248 |
|
|
} else if (a.m1 > b.m1) { |
249 |
|
|
cmp = +1; |
250 |
|
|
} else { |
251 |
|
|
cmp = 0; |
252 |
|
|
} |
253 |
|
|
} |
254 |
|
|
if (!cmp) { |
255 |
|
|
if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) { |
256 |
|
|
res.s |= b.s; |
257 |
|
|
} else { |
258 |
|
|
res.s &= b.s; |
259 |
|
|
} |
260 |
|
|
res.type = ppc_fpr_zero; |
261 |
|
|
} else { |
262 |
|
|
if (cmp < 0) { |
263 |
|
|
ppc_fpu_sub_quadro_m(res, b, a); |
264 |
|
|
res.s = b.s; |
265 |
|
|
} else { |
266 |
|
|
ppc_fpu_sub_quadro_m(res, a, b); |
267 |
|
|
} |
268 |
|
|
diff = ppc_fpu_normalize_quadro(res) - (128-107); |
269 |
|
|
int X_prime = res.m1 & 1; |
270 |
|
|
res.m1 &= 0xfffffffffffffffeULL; |
271 |
|
|
ppc_fpu_quadro_mshl(res, diff); |
272 |
|
|
res.e -= diff; |
273 |
|
|
res.m1 |= X_prime; |
274 |
|
|
} |
275 |
|
|
// res = [107] |
276 |
|
|
} |
277 |
|
|
break; |
278 |
|
|
} |
279 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
280 |
|
|
res.s = a.s; |
281 |
|
|
res.type = ppc_fpr_NaN; |
282 |
|
|
break; |
283 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
284 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
285 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
286 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
287 |
|
|
res.e = a.e; |
288 |
|
|
res.s = a.s; |
289 |
|
|
res.m0 = a.m0; |
290 |
|
|
res.m1 = a.m1; |
291 |
|
|
res.type = a.type; |
292 |
|
|
break; |
293 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
294 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
295 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
296 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
297 |
|
|
res.e = b.e; |
298 |
|
|
res.s = b.s; |
299 |
|
|
res.m0 = b.m0; |
300 |
|
|
res.m1 = b.m1; |
301 |
|
|
res.type = b.type; |
302 |
|
|
break; |
303 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
304 |
|
|
if (a.s != b.s) { |
305 |
|
|
// +oo + -oo == NaN |
306 |
|
|
res.s = a.s ^ b.s; |
307 |
|
|
res.type = ppc_fpr_NaN; |
308 |
|
|
break; |
309 |
|
|
} |
310 |
|
|
// fall-thru |
311 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
312 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
313 |
|
|
res.s = a.s; |
314 |
|
|
res.type = a.type; |
315 |
|
|
break; |
316 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
317 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
318 |
|
|
res.s = b.s; |
319 |
|
|
res.type = b.type; |
320 |
|
|
break; |
321 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
322 |
|
|
// round bla |
323 |
|
|
res.type = ppc_fpr_zero; |
324 |
|
|
res.s = a.s && b.s; |
325 |
|
|
break; |
326 |
|
|
} |
327 |
|
|
} |
328 |
|
|
|
329 |
|
|
inline void ppc_fpu_add_uint64_carry(uint64 &a, uint64 b, uint64 &carry) |
330 |
|
|
{ |
331 |
|
|
carry = (a+b < a) ? 1 : 0; |
332 |
|
|
a += b; |
333 |
|
|
} |
334 |
|
|
|
335 |
|
|
// 'res' has 56 significant bits on return, a + b have 56 significant bits each |
336 |
|
|
inline void ppc_fpu_mul(ppc_double &res, const ppc_double &a, const ppc_double &b) |
337 |
|
|
{ |
338 |
|
|
res.s = a.s ^ b.s; |
339 |
|
|
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
340 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
341 |
|
|
res.type = ppc_fpr_norm; |
342 |
|
|
res.e = a.e + b.e; |
343 |
|
|
// printf("new exp: %d\n", res.e); |
344 |
|
|
// ht_printf("MUL:\na.m: %qb\nb.m: %qb\n", a.m, b.m); |
345 |
|
|
uint64 fH, fM1, fM2, fL; |
346 |
|
|
fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64] |
347 |
|
|
fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56] |
348 |
|
|
fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56] |
349 |
|
|
fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48] |
350 |
|
|
// ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL); |
351 |
|
|
|
352 |
|
|
// calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL |
353 |
|
|
uint64 rL, rH; |
354 |
|
|
rL = fL; // rL = rH = [63,64] |
355 |
|
|
rH = fH; // rH = fH = [47,48] |
356 |
|
|
uint64 split; |
357 |
|
|
split = fM1 + fM2; |
358 |
|
|
uint64 carry; |
359 |
|
|
ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64] |
360 |
|
|
rH += carry; // rH = [0 .. 2^48] |
361 |
|
|
rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set |
362 |
|
|
|
363 |
|
|
// res.m = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_55] |
364 |
|
|
// [---------------------------------------------------------] |
365 |
|
|
// bit = [63 62 .. 58 | 57 56 .. 9 | 8 7 0 ] |
366 |
|
|
// [---------------------------------------------------------] |
367 |
|
|
// [15 bits zero | 49 bits rH | 8 most sign.bits rL ] |
368 |
|
|
res.m = rH << 9; |
369 |
|
|
res.m |= rL >> (64-9); |
370 |
|
|
// res.m = [58] |
371 |
|
|
|
372 |
|
|
// ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL); |
373 |
|
|
if (res.m & (1ULL << 57)) { |
374 |
|
|
res.m >>= 2; |
375 |
|
|
res.e += 2; |
376 |
|
|
} else if (res.m & (1ULL << 56)) { |
377 |
|
|
res.m >>= 1; |
378 |
|
|
res.e++; |
379 |
|
|
} |
380 |
|
|
// res.m = [56] |
381 |
|
|
break; |
382 |
|
|
} |
383 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
384 |
|
|
res.type = a.type; |
385 |
|
|
res.e = a.e; |
386 |
|
|
break; |
387 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
388 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
389 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
390 |
|
|
res.s = a.s; |
391 |
|
|
// fall-thru |
392 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
393 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
394 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
395 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
396 |
|
|
res.type = a.type; |
397 |
|
|
break; |
398 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
399 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
400 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
401 |
|
|
res.s = b.s; |
402 |
|
|
// fall-thru |
403 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
404 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
405 |
|
|
res.type = b.type; |
406 |
|
|
break; |
407 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
408 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
409 |
|
|
res.type = ppc_fpr_NaN; |
410 |
|
|
break; |
411 |
|
|
} |
412 |
|
|
} |
413 |
|
|
|
414 |
|
|
// 'res' has 'prec' significant bits on return, a + b have 56 significant bits each |
415 |
|
|
// for 111 >= prec >= 64 |
416 |
|
|
inline void ppc_fpu_mul_quadro(ppc_quadro &res, ppc_double &a, ppc_double &b, int prec) |
417 |
|
|
{ |
418 |
|
|
res.s = a.s ^ b.s; |
419 |
|
|
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
420 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
421 |
|
|
res.type = ppc_fpr_norm; |
422 |
|
|
res.e = a.e + b.e; |
423 |
|
|
// printf("new exp: %d\n", res.e); |
424 |
|
|
// ht_printf("MUL:\na.m: %016qx\nb.m: %016qx\n", a.m, b.m); |
425 |
|
|
uint64 fH, fM1, fM2, fL; |
426 |
|
|
fL = (a.m & 0xffffffff) * (b.m & 0xffffffff); // [32] * [32] = [63,64] |
427 |
|
|
fM1 = (a.m >> 32) * (b.m & 0xffffffff); // [24] * [32] = [55,56] |
428 |
|
|
fM2 = (a.m & 0xffffffff) * (b.m >> 32); // [32] * [24] = [55,56] |
429 |
|
|
fH = (a.m >> 32) * (b.m >> 32); // [24] * [24] = [47,48] |
430 |
|
|
// ht_printf("fH: %016qx fM1: %016qx fM2: %016qx fL: %016qx\n", fH, fM1, fM2, fL); |
431 |
|
|
|
432 |
|
|
// calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL |
433 |
|
|
uint64 rL, rH; |
434 |
|
|
rL = fL; // rL = rH = [63,64] |
435 |
|
|
rH = fH; // rH = fH = [47,48] |
436 |
|
|
uint64 split; |
437 |
|
|
split = fM1 + fM2; |
438 |
|
|
uint64 carry; |
439 |
|
|
ppc_fpu_add_uint64_carry(rL, (split & 0xffffffff) << 32, carry); // rL = [63,64] |
440 |
|
|
rH += carry; // rH = [0 .. 2^48] |
441 |
|
|
rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set |
442 |
|
|
|
443 |
|
|
// res.m0 = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_0] |
444 |
|
|
// [-----------------------------------------------------------] |
445 |
|
|
// log.bit= [127 126 .. 113 | 112 64 | 63 62 0 ] |
446 |
|
|
// [-----------------------------------------------------------] |
447 |
|
|
// [ 15 bits zero | 49 bits rH | 64 bits rL ] |
448 |
|
|
res.m0 = rH; |
449 |
|
|
res.m1 = rL; |
450 |
|
|
// res.m0|res.m1 = [111,112,113] |
451 |
|
|
|
452 |
|
|
// ht_printf("res = %016qx%016qx\n", res.m0, res.m1); |
453 |
|
|
if (res.m0 & (1ULL << 48)) { |
454 |
|
|
ppc_fpu_quadro_mshr(res, 2+(111-prec)); |
455 |
|
|
res.e += 2; |
456 |
|
|
} else if (res.m0 & (1ULL << 47)) { |
457 |
|
|
ppc_fpu_quadro_mshr(res, 1+(111-prec)); |
458 |
|
|
res.e += 1; |
459 |
|
|
} else { |
460 |
|
|
ppc_fpu_quadro_mshr(res, 111-prec); |
461 |
|
|
} |
462 |
|
|
// res.m0|res.m1 = [prec] |
463 |
|
|
break; |
464 |
|
|
} |
465 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
466 |
|
|
res.type = a.type; |
467 |
|
|
res.e = a.e; |
468 |
|
|
break; |
469 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
470 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
471 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
472 |
|
|
res.s = a.s; |
473 |
|
|
// fall-thru |
474 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
475 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
476 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
477 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
478 |
|
|
res.type = a.type; |
479 |
|
|
break; |
480 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
481 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
482 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
483 |
|
|
res.s = b.s; |
484 |
|
|
// fall-thru |
485 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
486 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
487 |
|
|
res.type = b.type; |
488 |
|
|
break; |
489 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
490 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
491 |
|
|
res.type = ppc_fpr_NaN; |
492 |
|
|
break; |
493 |
|
|
} |
494 |
|
|
} |
495 |
|
|
|
496 |
|
|
// calculate one of these: |
497 |
|
|
// + m1 * m2 + s |
498 |
|
|
// + m1 * m2 - s |
499 |
|
|
// - m1 * m2 + s |
500 |
|
|
// - m1 * m2 - s |
501 |
|
|
// using a 106 bit accumulator |
502 |
|
|
// |
503 |
|
|
// .752 |
504 |
|
|
// |
505 |
|
|
inline void ppc_fpu_mul_add(ppc_double &res, ppc_double &m1, ppc_double &m2, |
506 |
|
|
ppc_double &s) |
507 |
|
|
{ |
508 |
|
|
ppc_quadro p; |
509 |
|
|
/* ht_printf("m1 = %d * %016qx * 2^%d, %s\n", m1.s, m1.m, m1.e, |
510 |
|
|
ppc_fpu_get_fpr_type(m1.type)); |
511 |
|
|
ht_printf("m2 = %d * %016qx * 2^%d, %s\n", m2.s, m2.m, m2.e, |
512 |
|
|
ppc_fpu_get_fpr_type(m2.type));*/ |
513 |
|
|
// create product with 106 significant bits |
514 |
|
|
ppc_fpu_mul_quadro(p, m1, m2, 106); |
515 |
|
|
/* ht_printf("p = %d * %016qx%016qx * 2^%d, %s\n", p.s, p.m0, p.m1, p.e, |
516 |
|
|
ppc_fpu_get_fpr_type(p.type));*/ |
517 |
|
|
// convert s into ppc_quadro |
518 |
|
|
/* ht_printf("s = %d * %016qx * 2^%d %s\n", s.s, s.m, s.e, |
519 |
|
|
ppc_fpu_get_fpr_type(s.type));*/ |
520 |
|
|
ppc_quadro q; |
521 |
|
|
q.e = s.e; |
522 |
|
|
q.s = s.s; |
523 |
|
|
q.type = s.type; |
524 |
|
|
q.m0 = 0; |
525 |
|
|
q.m1 = s.m; |
526 |
|
|
// .. with 106 significant bits |
527 |
|
|
ppc_fpu_quadro_mshl(q, 106-56); |
528 |
|
|
/* ht_printf("q = %d * %016qx%016qx * 2^%d %s\n", q.s, q.m0, q.m1, q.e, |
529 |
|
|
ppc_fpu_get_fpr_type(q.type));*/ |
530 |
|
|
// now we must add p, q. |
531 |
|
|
ppc_quadro x; |
532 |
|
|
ppc_fpu_add_quadro(x, p, q); |
533 |
|
|
// x = [107] |
534 |
|
|
/* ht_printf("x = %d * %016qx%016qx * 2^%d %s\n", x.s, x.m0, x.m1, x.e, |
535 |
|
|
ppc_fpu_get_fpr_type(x.type));*/ |
536 |
|
|
res.type = x.type; |
537 |
|
|
res.s = x.s; |
538 |
|
|
res.e = x.e; |
539 |
|
|
if (x.type == ppc_fpr_norm) { |
540 |
|
|
res.m = x.m0 << 13; // 43 bits from m0 |
541 |
|
|
res.m |= (x.m1 >> (64-12)) << 1; // 12 bits from m1 |
542 |
|
|
res.m |= x.m1 & 1; // X' bit from m1 |
543 |
|
|
} |
544 |
|
|
/* ht_printf("res = %d * %016qx * 2^%d %s\n", res.s, res.m, res.e, |
545 |
|
|
ppc_fpu_get_fpr_type(res.type));*/ |
546 |
|
|
} |
547 |
|
|
|
548 |
|
|
inline void ppc_fpu_div(ppc_double &res, const ppc_double &a, const ppc_double &b) |
549 |
|
|
{ |
550 |
|
|
res.s = a.s ^ b.s; |
551 |
|
|
switch (PPC_FPR_TYPE2(a.type, b.type)) { |
552 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { |
553 |
|
|
res.type = ppc_fpr_norm; |
554 |
|
|
res.e = a.e - b.e; |
555 |
|
|
res.m = 0; |
556 |
|
|
uint64 am = a.m, bm = b.m; |
557 |
|
|
uint i = 0; |
558 |
|
|
// printf("DIV:\nam=%llx, bm=%llx, rm=%llx\n", am, bm, res.m); |
559 |
|
|
while (am && (i<56)) { |
560 |
|
|
res.m <<= 1; |
561 |
|
|
if (am >= bm) { |
562 |
|
|
res.m |= 1; |
563 |
|
|
am -= bm; |
564 |
|
|
} |
565 |
|
|
am <<= 1; |
566 |
|
|
// printf("am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m); |
567 |
|
|
i++; |
568 |
|
|
} |
569 |
|
|
res.m <<= 57-i; |
570 |
|
|
if (res.m & (1ULL << 56)) { |
571 |
|
|
res.m >>= 1; |
572 |
|
|
} else { |
573 |
|
|
res.e--; |
574 |
|
|
} |
575 |
|
|
// printf("final: am=%llx, bm=%llx, rm=%llx\n", am, bm, res.m); |
576 |
|
|
break; |
577 |
|
|
} |
578 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): |
579 |
|
|
res.e = a.e; |
580 |
|
|
// fall-thru |
581 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): |
582 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): |
583 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): |
584 |
|
|
res.s = a.s; |
585 |
|
|
// fall-thru |
586 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): |
587 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): |
588 |
|
|
res.type = a.type; |
589 |
|
|
break; |
590 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): |
591 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): |
592 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): |
593 |
|
|
res.s = b.s; |
594 |
|
|
res.type = b.type; |
595 |
|
|
break; |
596 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): |
597 |
|
|
res.type = ppc_fpr_zero; |
598 |
|
|
break; |
599 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): |
600 |
|
|
res.type = ppc_fpr_Inf; |
601 |
|
|
break; |
602 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): |
603 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): |
604 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): |
605 |
|
|
case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): |
606 |
|
|
res.type = ppc_fpr_NaN; |
607 |
|
|
break; |
608 |
|
|
} |
609 |
|
|
} |
610 |
|
|
|
611 |
|
|
inline void ppc_fpu_sqrt(ppc_double &D, const ppc_double &B) |
612 |
|
|
{ |
613 |
|
|
switch (B.type) { |
614 |
|
|
case ppc_fpr_norm: |
615 |
|
|
if (B.s) { |
616 |
|
|
D.type = ppc_fpr_NaN; |
617 |
|
|
gCPU.fpscr |= FPSCR_VXSQRT; |
618 |
|
|
break; |
619 |
|
|
} |
620 |
|
|
// D := 1/2(D_old + B/D_old) |
621 |
|
|
D = B; |
622 |
|
|
D.e /= 2; |
623 |
|
|
for (int i=0; i<6; i++) { |
624 |
|
|
ppc_double D_old = D; |
625 |
|
|
ppc_double B_div_D_old; |
626 |
|
|
ppc_fpu_div(B_div_D_old, B, D_old); |
627 |
|
|
ppc_fpu_add(D, D_old, B_div_D_old); |
628 |
|
|
D.e--; |
629 |
|
|
|
630 |
|
|
/* uint64 e; |
631 |
|
|
ppc_double E = D; |
632 |
|
|
ppc_fpu_pack_double(E, e); |
633 |
|
|
printf("%.20f\n", *(double *)&e);*/ |
634 |
|
|
} |
635 |
|
|
break; |
636 |
|
|
case ppc_fpr_zero: |
637 |
|
|
D.type = ppc_fpr_zero; |
638 |
|
|
D.s = B.s; |
639 |
|
|
break; |
640 |
|
|
case ppc_fpr_Inf: |
641 |
|
|
if (B.s) { |
642 |
|
|
D.type = ppc_fpr_NaN; |
643 |
|
|
gCPU.fpscr |= FPSCR_VXSQRT; |
644 |
|
|
} else { |
645 |
|
|
D.type = ppc_fpr_Inf; |
646 |
|
|
D.s = 0; |
647 |
|
|
} |
648 |
|
|
break; |
649 |
|
|
case ppc_fpr_NaN: |
650 |
|
|
D.type = ppc_fpr_NaN; |
651 |
|
|
break; |
652 |
|
|
} |
653 |
|
|
} |
654 |
|
|
|
655 |
|
|
void ppc_fpu_test() |
656 |
|
|
{ |
657 |
|
|
double bb = 1.0; |
658 |
|
|
uint64 b = *(uint64 *)&bb; |
659 |
|
|
ppc_double B; |
660 |
|
|
ppc_double D; |
661 |
|
|
ppc_fpu_unpack_double(B, b); |
662 |
|
|
ht_printf("%d\n", B.e); |
663 |
|
|
ppc_fpu_sqrt(D, B); |
664 |
|
|
uint64 d; |
665 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, d); |
666 |
|
|
printf("%f\n", *(double *)&d); |
667 |
|
|
/* ppc_double A, B, C, D, E; |
668 |
|
|
ppc_fpu_unpack_double(A, 0xc00fafcd6c40e500ULL); |
669 |
|
|
ppc_fpu_unpack_double(B, 0xc00fafcd6c40e4beULL); |
670 |
|
|
B.s ^= 1; |
671 |
|
|
ppc_fpu_add(E, A, B); |
672 |
|
|
uint64 e; |
673 |
|
|
ppc_fpu_pack_double(E, e); |
674 |
|
|
ht_printf("%qx\n", e); |
675 |
|
|
ppc_fpu_add(D, E, B);*/ |
676 |
|
|
|
677 |
|
|
/* ppc_double A, B, C; |
678 |
|
|
double a, b, c; |
679 |
|
|
A.type = B.type = ppc_fpr_norm; |
680 |
|
|
A.s = 1; |
681 |
|
|
A.e = 0; |
682 |
|
|
A.m = 0; |
683 |
|
|
A.m = ((1ULL<<56)-1)-((1ULL<<10)-1); |
684 |
|
|
ht_printf("%qb\n", A.m); |
685 |
|
|
B.s = 1; |
686 |
|
|
B.e = 0; |
687 |
|
|
B.m = 0; |
688 |
|
|
B.m = ((1ULL<<56)-1)-((1ULL<<50)-1); |
689 |
|
|
a = ppc_fpu_get_double(A); |
690 |
|
|
b = ppc_fpu_get_double(B); |
691 |
|
|
printf("%f + %f = \n", a, b); |
692 |
|
|
ppc_fpu_add(C, A, B); |
693 |
|
|
uint64 d; |
694 |
|
|
uint32 s; |
695 |
|
|
ppc_fpu_pack_double_as_single(C, d); |
696 |
|
|
ht_printf("%064qb\n", d); |
697 |
|
|
ppc_fpu_unpack_double(C, d); |
698 |
|
|
ppc_fpu_pack_single(C, s); |
699 |
|
|
ht_printf("single: %032b\n", s); |
700 |
|
|
ppc_single Cs; |
701 |
|
|
ppc_fpu_unpack_single(Cs, s); |
702 |
|
|
ppc_fpu_single_to_double(Cs, C); |
703 |
|
|
// ht_printf("%d\n", ppc_fpu_double_to_int(C)); |
704 |
|
|
c = ppc_fpu_get_double(C); |
705 |
|
|
printf("%f\n", c);*/ |
706 |
|
|
} |
707 |
|
|
|
708 |
|
|
/* |
709 |
|
|
* a and b must not be NaNs |
710 |
|
|
*/ |
711 |
|
|
inline uint32 ppc_fpu_compare(ppc_double &a, ppc_double &b) |
712 |
|
|
{ |
713 |
|
|
if (a.type == ppc_fpr_zero) { |
714 |
|
|
if (b.type == ppc_fpr_zero) return 2; |
715 |
|
|
return (b.s) ? 4: 8; |
716 |
|
|
} |
717 |
|
|
if (b.type == ppc_fpr_zero) return (a.s) ? 8: 4; |
718 |
|
|
if (a.s != b.s) return (a.s) ? 8: 4; |
719 |
|
|
if (a.e > b.e) return (a.s) ? 8: 4; |
720 |
|
|
if (a.e < b.e) return (a.s) ? 4: 8; |
721 |
|
|
if (a.m > b.m) return (a.s) ? 8: 4; |
722 |
|
|
if (a.m < b.m) return (a.s) ? 4: 8; |
723 |
|
|
return 2; |
724 |
|
|
} |
725 |
|
|
|
726 |
|
|
double ppc_fpu_get_double(uint64 d) |
727 |
|
|
{ |
728 |
|
|
ppc_double dd; |
729 |
|
|
ppc_fpu_unpack_double(dd, d); |
730 |
|
|
return ppc_fpu_get_double(dd); |
731 |
|
|
} |
732 |
|
|
|
733 |
|
|
double ppc_fpu_get_double(ppc_double &d) |
734 |
|
|
{ |
735 |
|
|
if (d.type == ppc_fpr_norm) { |
736 |
|
|
double r = d.m; |
737 |
|
|
for (int i=0; i<55; i++) { |
738 |
|
|
r = r / 2.0; |
739 |
|
|
} |
740 |
|
|
if (d.e < 0) { |
741 |
|
|
for (int i=0; i>d.e; i--) { |
742 |
|
|
r = r / 2.0; |
743 |
|
|
} |
744 |
|
|
} else if (d.e > 0) { |
745 |
|
|
for (int i=0; i<d.e; i++) { |
746 |
|
|
r = r * 2.0; |
747 |
|
|
} |
748 |
|
|
} |
749 |
|
|
if (d.s) r = -r; |
750 |
|
|
return r; |
751 |
|
|
} else { |
752 |
|
|
return 0.0; |
753 |
|
|
} |
754 |
|
|
} |
755 |
|
|
|
756 |
|
|
/*********************************************************************************** |
757 |
|
|
* |
758 |
|
|
*/ |
759 |
|
|
|
760 |
|
|
#define SWAP do { \ |
761 |
|
|
int tmp = frA; frA = frB; frB = tmp; \ |
762 |
|
|
tmp = a; a = b; b = tmp; \ |
763 |
|
|
X86FloatArithOp tmpop = op; op = rop; rop = tmpop; \ |
764 |
|
|
} while(0); |
765 |
|
|
|
766 |
|
|
|
767 |
|
|
static void ppc_opc_gen_binary_floatop(X86FloatArithOp op, X86FloatArithOp rop, int frD, int frA, int frB) |
768 |
|
|
{ |
769 |
|
|
jitcFloatRegisterClobberAll(); |
770 |
|
|
// jitcSetFPUPrecision(53); |
771 |
|
|
|
772 |
|
|
// ht_printf("binfloatop: %x: %d: %d, %d, %d\n", gJITC.pc, op, frD, frA, frB); |
773 |
|
|
// op == st(i)/st(0) |
774 |
|
|
// rop == st(0)/st(i) |
775 |
|
|
|
776 |
|
|
// op == st(0)/mem |
777 |
|
|
// rop == mem/st(0) |
778 |
|
|
|
779 |
|
|
// frD := frA (op) frB = frB (rop) frA |
780 |
|
|
|
781 |
|
|
// make sure client float register aren't mapped to integer registers |
782 |
|
|
jitcClobberClientRegisterForFloat(frA); |
783 |
|
|
jitcClobberClientRegisterForFloat(frB); |
784 |
|
|
jitcInvalidateClientRegisterForFloat(frD); |
785 |
|
|
|
786 |
|
|
JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA); |
787 |
|
|
JitcFloatReg b = jitcGetClientFloatRegisterMapping(frB); |
788 |
|
|
// ht_printf("%d -> %d\n", frA, a); |
789 |
|
|
// ht_printf("%d -> %d\n", frB, b); |
790 |
|
|
if (a == JITC_FLOAT_REG_NONE && b != JITC_FLOAT_REG_NONE) { |
791 |
|
|
// b is mapped but not a, swap them |
792 |
|
|
SWAP; |
793 |
|
|
} |
794 |
|
|
if (a != JITC_FLOAT_REG_NONE) { |
795 |
|
|
// a is mapped |
796 |
|
|
if (frB == frD && frA != frD) { |
797 |
|
|
// b = st(a) (op) b |
798 |
|
|
// ht_printf("case a\n"); |
799 |
|
|
b = jitcGetClientFloatRegister(frB, a); |
800 |
|
|
if (jitcFloatRegisterIsTOP(b)) { |
801 |
|
|
asmFArith_ST0(op, jitcFloatRegisterToNative(a)); |
802 |
|
|
} else { |
803 |
|
|
jitcFloatRegisterXCHGToFront(a); |
804 |
|
|
asmFArith_STi(rop, jitcFloatRegisterToNative(b)); |
805 |
|
|
} |
806 |
|
|
jitcFloatRegisterDirty(b); |
807 |
|
|
} else if (frA == frD) { |
808 |
|
|
// st(a) = st(a) (op) b |
809 |
|
|
// ht_printf("case b\n"); |
810 |
|
|
b = jitcGetClientFloatRegister(frB, a); |
811 |
|
|
if (jitcFloatRegisterIsTOP(b)) { |
812 |
|
|
asmFArith_STi(op, jitcFloatRegisterToNative(a)); |
813 |
|
|
} else { |
814 |
|
|
jitcFloatRegisterXCHGToFront(a); |
815 |
|
|
asmFArith_ST0(rop, jitcFloatRegisterToNative(b)); |
816 |
|
|
} |
817 |
|
|
jitcFloatRegisterDirty(a); |
818 |
|
|
} else { |
819 |
|
|
// ht_printf("case c\n"); |
820 |
|
|
// frA != frD != frB (and frA is mapped, frD isn't mapped) |
821 |
|
|
a = jitcFloatRegisterDup(a, b); |
822 |
|
|
// ht_printf("%d\n", b); |
823 |
|
|
// now a is TOP |
824 |
|
|
if (b != JITC_FLOAT_REG_NONE) { |
825 |
|
|
asmFArith_ST0(rop, jitcFloatRegisterToNative(b)); |
826 |
|
|
} else { |
827 |
|
|
modrm_o modrm; |
828 |
|
|
asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB])); |
829 |
|
|
} |
830 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
831 |
|
|
if (d == JITC_FLOAT_REG_NONE) { |
832 |
|
|
jitcMapClientFloatRegisterDirty(frD, a); |
833 |
|
|
} else { |
834 |
|
|
jitcFloatRegisterStoreAndPopTOP(d); |
835 |
|
|
jitcFloatRegisterDirty(d); |
836 |
|
|
} |
837 |
|
|
} |
838 |
|
|
} else { |
839 |
|
|
// ht_printf("case d\n"); |
840 |
|
|
// neither a nor b is mapped |
841 |
|
|
if (frB == frD && frA != frD) { |
842 |
|
|
// frB = frA (op) frB, none of them is mapped |
843 |
|
|
b = jitcGetClientFloatRegister(frB); |
844 |
|
|
jitcFloatRegisterDirty(b); |
845 |
|
|
modrm_o modrm; |
846 |
|
|
asmFArith(rop, x86_mem2(modrm, &gCPU.fpr[frA])); |
847 |
|
|
return; |
848 |
|
|
} |
849 |
|
|
if (frA == frD) { |
850 |
|
|
// frA = frA (op) frB, none of them is mapped |
851 |
|
|
a = jitcGetClientFloatRegister(frA); |
852 |
|
|
jitcFloatRegisterDirty(a); |
853 |
|
|
modrm_o modrm; |
854 |
|
|
asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB])); |
855 |
|
|
} else { |
856 |
|
|
// frA != frD != frB (and frA, frB aren't mapped) |
857 |
|
|
a = jitcGetClientFloatRegisterUnmapped(frA); |
858 |
|
|
modrm_o modrm; |
859 |
|
|
asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB])); |
860 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
861 |
|
|
if (d == JITC_FLOAT_REG_NONE) { |
862 |
|
|
jitcMapClientFloatRegisterDirty(frD, a); |
863 |
|
|
} else { |
864 |
|
|
jitcFloatRegisterStoreAndPopTOP(d); |
865 |
|
|
jitcFloatRegisterDirty(d); |
866 |
|
|
} |
867 |
|
|
} |
868 |
|
|
} |
869 |
|
|
} |
870 |
|
|
|
871 |
|
|
static inline void ppc_opc_gen_unary_floatop(X86FloatOp op, int frD, int frA) |
872 |
|
|
{ |
873 |
|
|
jitcClobberClientRegisterForFloat(frA); |
874 |
|
|
jitcInvalidateClientRegisterForFloat(frD); |
875 |
|
|
if (frD == frA) { |
876 |
|
|
JitcFloatReg a = jitcGetClientFloatRegister(frA); |
877 |
|
|
jitcFloatRegisterDirty(a); |
878 |
|
|
jitcFloatRegisterXCHGToFront(a); |
879 |
|
|
} else { |
880 |
|
|
JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA); |
881 |
|
|
if (a == JITC_FLOAT_REG_NONE) { |
882 |
|
|
a = jitcGetClientFloatRegisterUnmapped(frA); |
883 |
|
|
} else { |
884 |
|
|
a = jitcFloatRegisterDup(a); |
885 |
|
|
} |
886 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
887 |
|
|
if (d == JITC_FLOAT_REG_NONE) { |
888 |
|
|
jitcMapClientFloatRegisterDirty(frD, a); |
889 |
|
|
} else { |
890 |
|
|
asmFSimple(op); |
891 |
|
|
jitcFloatRegisterStoreAndPopTOP(d); |
892 |
|
|
jitcFloatRegisterDirty(d); |
893 |
|
|
return; |
894 |
|
|
} |
895 |
|
|
} |
896 |
|
|
asmFSimple(op); |
897 |
|
|
} |
898 |
|
|
|
899 |
|
|
|
900 |
|
|
/* |
901 |
|
|
fmadd FADD false |
902 |
|
|
fmsub FSUB false |
903 |
|
|
fnmadd FADD true |
904 |
|
|
fnmsub FSUBR false |
905 |
|
|
*/ |
906 |
|
|
|
907 |
|
|
static void ppc_opc_gen_ternary_floatop(X86FloatArithOp op, X86FloatArithOp rop, bool chs, int frD, int frA, int frC, int frB) |
908 |
|
|
{ |
909 |
|
|
jitcFloatRegisterClobberAll(); |
910 |
|
|
// jitcSetFPUPrecision(64); |
911 |
|
|
jitcClobberClientRegisterForFloat(frA); |
912 |
|
|
jitcClobberClientRegisterForFloat(frC); |
913 |
|
|
jitcClobberClientRegisterForFloat(frB); |
914 |
|
|
jitcInvalidateClientRegisterForFloat(frD); |
915 |
|
|
|
916 |
|
|
JitcFloatReg a = jitcGetClientFloatRegisterMapping(frA); |
917 |
|
|
if (a != JITC_FLOAT_REG_NONE) { |
918 |
|
|
ht_printf("askf lsa flsd\n"); |
919 |
|
|
a = jitcFloatRegisterDup(a, jitcGetClientFloatRegisterMapping(frC)); |
920 |
|
|
} else { |
921 |
|
|
a = jitcGetClientFloatRegisterUnmapped(frA, jitcGetClientFloatRegisterMapping(frC), jitcGetClientFloatRegisterMapping(frB)); |
922 |
|
|
} |
923 |
|
|
// a is TOP now |
924 |
|
|
JitcFloatReg c = jitcGetClientFloatRegisterMapping(frC); |
925 |
|
|
if (c != JITC_FLOAT_REG_NONE) { |
926 |
|
|
asmFArith_ST0(X86_FMUL, jitcFloatRegisterToNative(c)); |
927 |
|
|
} else { |
928 |
|
|
modrm_o modrm; |
929 |
|
|
asmFArith(X86_FMUL, x86_mem2(modrm, &gCPU.fpr[frC])); |
930 |
|
|
} |
931 |
|
|
JitcFloatReg b = jitcGetClientFloatRegisterMapping(frB); |
932 |
|
|
if (b != JITC_FLOAT_REG_NONE) { |
933 |
|
|
asmFArith_ST0(rop, jitcFloatRegisterToNative(b)); |
934 |
|
|
} else { |
935 |
|
|
modrm_o modrm; |
936 |
|
|
asmFArith(op, x86_mem2(modrm, &gCPU.fpr[frB])); |
937 |
|
|
} |
938 |
|
|
if (chs) { |
939 |
|
|
asmFSimple(FCHS); |
940 |
|
|
} |
941 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
942 |
|
|
if (d == JITC_FLOAT_REG_NONE) { |
943 |
|
|
jitcMapClientFloatRegisterDirty(frD, a); |
944 |
|
|
} else { |
945 |
|
|
jitcFloatRegisterStoreAndPopTOP(d); |
946 |
|
|
jitcFloatRegisterDirty(d); |
947 |
|
|
} |
948 |
|
|
} |
949 |
|
|
|
950 |
|
|
#define JITC |
951 |
|
|
|
952 |
|
|
static void FASTCALL ppc_opc_gen_update_cr1_output_err(const char *err) |
953 |
|
|
{ |
954 |
|
|
PPC_FPU_ERR("%s\n", err); |
955 |
|
|
} |
956 |
|
|
|
957 |
|
|
static void ppc_opc_gen_update_cr1(const char *err) |
958 |
|
|
{ |
959 |
|
|
asmALU(X86_MOV, EAX, (uint32)err); |
960 |
|
|
asmCALL((NativeAddress)ppc_opc_gen_update_cr1_output_err); |
961 |
|
|
} |
962 |
|
|
|
963 |
|
|
/* |
964 |
|
|
* fabsx Floating Absolute Value |
965 |
|
|
* .484 |
966 |
|
|
*/ |
967 |
|
|
void ppc_opc_fabsx() |
968 |
|
|
{ |
969 |
|
|
int frD, frA, frB; |
970 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
971 |
|
|
PPC_OPC_ASSERT(frA==0); |
972 |
|
|
gCPU.fpr[frD] = gCPU.fpr[frB] & ~FPU_SIGN_BIT; |
973 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
974 |
|
|
// update cr1 flags |
975 |
|
|
PPC_FPU_ERR("fabs.\n"); |
976 |
|
|
} |
977 |
|
|
} |
978 |
|
|
JITCFlow ppc_opc_gen_fabsx() |
979 |
|
|
{ |
980 |
|
|
int frD, frA, frB; |
981 |
|
|
PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB); |
982 |
|
|
if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) { |
983 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
984 |
|
|
if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d); |
985 |
|
|
jitcClobberCarryAndFlags(); |
986 |
|
|
if (frD != frB) { |
987 |
|
|
NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB)); |
988 |
|
|
NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB)); |
989 |
|
|
NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD)); |
990 |
|
|
NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD)); |
991 |
|
|
asmALU(X86_MOV, dh, bh); |
992 |
|
|
asmALU(X86_MOV, dl, bl); |
993 |
|
|
asmALU(X86_AND, dh, 0x7fffffff); |
994 |
|
|
} else { |
995 |
|
|
NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB)); |
996 |
|
|
asmALU(X86_AND, b, 0x7fffffff); |
997 |
|
|
} |
998 |
|
|
} else { |
999 |
|
|
ppc_opc_gen_unary_floatop(FABS, frD, frB); |
1000 |
|
|
} |
1001 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1002 |
|
|
// update cr1 flags |
1003 |
|
|
ppc_opc_gen_update_cr1("fabs.\n"); |
1004 |
|
|
} |
1005 |
|
|
return flowContinue; |
1006 |
|
|
} |
1007 |
|
|
/* |
1008 |
|
|
* faddx Floating Add (Double-Precision) |
1009 |
|
|
* .485 |
1010 |
|
|
*/ |
1011 |
|
|
void ppc_opc_faddx() |
1012 |
|
|
{ |
1013 |
|
|
int frD, frA, frB, frC; |
1014 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1015 |
|
|
PPC_OPC_ASSERT(frC==0); |
1016 |
|
|
ppc_double A, B, D; |
1017 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1018 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1019 |
|
|
if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
1020 |
|
|
gCPU.fpscr |= FPSCR_VXISI; |
1021 |
|
|
} |
1022 |
|
|
ppc_fpu_add(D, A, B); |
1023 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1024 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1025 |
|
|
// update cr1 flags |
1026 |
|
|
PPC_FPU_ERR("fadd.\n"); |
1027 |
|
|
} |
1028 |
|
|
} |
1029 |
|
|
JITCFlow ppc_opc_gen_faddx() |
1030 |
|
|
{ |
1031 |
|
|
#ifdef JITC |
1032 |
|
|
int frD, frA, frB, frC; |
1033 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1034 |
|
|
PPC_OPC_ASSERT(frC==0); |
1035 |
|
|
ppc_opc_gen_binary_floatop(X86_FADD, X86_FADD, frD, frA, frB); |
1036 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1037 |
|
|
// update cr1 flags |
1038 |
|
|
ppc_opc_gen_update_cr1("fadd.\n"); |
1039 |
|
|
} |
1040 |
|
|
return flowContinue; |
1041 |
|
|
#else |
1042 |
|
|
ppc_opc_gen_interpret(ppc_opc_faddx); |
1043 |
|
|
return flowEndBlock; |
1044 |
|
|
#endif |
1045 |
|
|
} |
1046 |
|
|
/* |
1047 |
|
|
* faddsx Floating Add Single |
1048 |
|
|
* .486 |
1049 |
|
|
*/ |
1050 |
|
|
void ppc_opc_faddsx() |
1051 |
|
|
{ |
1052 |
|
|
int frD, frA, frB, frC; |
1053 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1054 |
|
|
PPC_OPC_ASSERT(frC==0); |
1055 |
|
|
ppc_double A, B, D; |
1056 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1057 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1058 |
|
|
if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
1059 |
|
|
gCPU.fpscr |= FPSCR_VXISI; |
1060 |
|
|
} |
1061 |
|
|
ppc_fpu_add(D, A, B); |
1062 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1063 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1064 |
|
|
// update cr1 flags |
1065 |
|
|
PPC_FPU_ERR("fadds.\n"); |
1066 |
|
|
} |
1067 |
|
|
} |
1068 |
|
|
JITCFlow ppc_opc_gen_faddsx() |
1069 |
|
|
{ |
1070 |
|
|
ppc_opc_gen_interpret(ppc_opc_faddsx); |
1071 |
|
|
return flowEndBlock; |
1072 |
|
|
} |
1073 |
|
|
/* |
1074 |
|
|
* fcmpo Floating Compare Ordered |
1075 |
|
|
* .488 |
1076 |
|
|
*/ |
1077 |
|
|
static uint32 ppc_fpu_cmp_and_mask[8] = { |
1078 |
|
|
0xfffffff0, |
1079 |
|
|
0xffffff0f, |
1080 |
|
|
0xfffff0ff, |
1081 |
|
|
0xffff0fff, |
1082 |
|
|
0xfff0ffff, |
1083 |
|
|
0xff0fffff, |
1084 |
|
|
0xf0ffffff, |
1085 |
|
|
0x0fffffff, |
1086 |
|
|
}; |
1087 |
|
|
void ppc_opc_fcmpo() |
1088 |
|
|
{ |
1089 |
|
|
int crfD, frA, frB; |
1090 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB); |
1091 |
|
|
crfD >>= 2; |
1092 |
|
|
ppc_double A, B; |
1093 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1094 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1095 |
|
|
uint32 cmp; |
1096 |
|
|
if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) { |
1097 |
|
|
gCPU.fpscr |= FPSCR_VXSNAN; |
1098 |
|
|
/*if (bla)*/ gCPU.fpscr |= FPSCR_VXVC; |
1099 |
|
|
cmp = 1; |
1100 |
|
|
} else { |
1101 |
|
|
cmp = ppc_fpu_compare(A, B); |
1102 |
|
|
} |
1103 |
|
|
crfD = 7-crfD; |
1104 |
|
|
gCPU.fpscr &= ~0x1f000; |
1105 |
|
|
gCPU.fpscr |= (cmp << 12); |
1106 |
|
|
gCPU.cr &= ppc_fpu_cmp_and_mask[crfD]; |
1107 |
|
|
gCPU.cr |= (cmp << (crfD * 4)); |
1108 |
|
|
} |
1109 |
|
|
JITCFlow ppc_opc_gen_fcmpo() |
1110 |
|
|
{ |
1111 |
|
|
ppc_opc_gen_interpret(ppc_opc_fcmpo); |
1112 |
|
|
return flowEndBlock; |
1113 |
|
|
} |
1114 |
|
|
/* |
1115 |
|
|
* fcmpu Floating Compare Unordered |
1116 |
|
|
* .489 |
1117 |
|
|
*/ |
1118 |
|
|
void ppc_opc_fcmpu() |
1119 |
|
|
{ |
1120 |
|
|
int crfD, frA, frB; |
1121 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, crfD, frA, frB); |
1122 |
|
|
crfD >>= 2; |
1123 |
|
|
ppc_double A, B; |
1124 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1125 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1126 |
|
|
uint32 cmp; |
1127 |
|
|
if (A.type == ppc_fpr_NaN || B.type == ppc_fpr_NaN) { |
1128 |
|
|
gCPU.fpscr |= FPSCR_VXSNAN; |
1129 |
|
|
cmp = 1; |
1130 |
|
|
} else { |
1131 |
|
|
cmp = ppc_fpu_compare(A, B); |
1132 |
|
|
} |
1133 |
|
|
crfD = 7-crfD; |
1134 |
|
|
gCPU.fpscr &= ~0x1f000; |
1135 |
|
|
gCPU.fpscr |= (cmp << 12); |
1136 |
|
|
gCPU.cr &= ppc_fpu_cmp_and_mask[crfD]; |
1137 |
|
|
gCPU.cr |= (cmp << (crfD * 4)); |
1138 |
|
|
} |
1139 |
|
|
JITCFlow ppc_opc_gen_fcmpu() |
1140 |
|
|
{ |
1141 |
|
|
ppc_opc_gen_interpret(ppc_opc_fcmpu); |
1142 |
|
|
return flowEndBlock; |
1143 |
|
|
} |
1144 |
|
|
/* |
1145 |
|
|
* fctiwx Floating Convert to Integer Word |
1146 |
|
|
* .492 |
1147 |
|
|
*/ |
1148 |
|
|
void ppc_opc_fctiwx() |
1149 |
|
|
{ |
1150 |
|
|
int frD, frA, frB; |
1151 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
1152 |
|
|
PPC_OPC_ASSERT(frA==0); |
1153 |
|
|
ppc_double B; |
1154 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1155 |
|
|
gCPU.fpr[frD] = ppc_fpu_double_to_int(B); |
1156 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1157 |
|
|
// update cr1 flags |
1158 |
|
|
PPC_FPU_ERR("fctiw.\n"); |
1159 |
|
|
} |
1160 |
|
|
} |
1161 |
|
|
JITCFlow ppc_opc_gen_fctiwx() |
1162 |
|
|
{ |
1163 |
|
|
int frD, frA, frB; |
1164 |
|
|
PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB); |
1165 |
|
|
PPC_OPC_ASSERT(frA==0); |
1166 |
|
|
jitcClobberClientRegisterForFloat(frB); |
1167 |
|
|
jitcInvalidateClientRegisterForFloat(frD); |
1168 |
|
|
|
1169 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
1170 |
|
|
if (frB != frD && d != JITC_FLOAT_REG_NONE) { |
1171 |
|
|
jitcFloatRegisterXCHGToFront(d); |
1172 |
|
|
asmFFREEP(Float_ST0); |
1173 |
|
|
gJITC.nativeFloatRegState[d] = rsUnused; |
1174 |
|
|
gJITC.clientFloatReg[frD] = JITC_FLOAT_REG_NONE; |
1175 |
|
|
gJITC.nativeFloatTOP--; |
1176 |
|
|
} |
1177 |
|
|
|
1178 |
|
|
modrm_o modrm; |
1179 |
|
|
JitcFloatReg b = jitcGetClientFloatRegisterUnmapped(frB); |
1180 |
|
|
asmFISTP_D(x86_mem2(modrm, &gCPU.fpr[frD])); |
1181 |
|
|
gJITC.nativeFloatRegState[b] = rsUnused; |
1182 |
|
|
gJITC.clientFloatReg[frB] = JITC_FLOAT_REG_NONE; |
1183 |
|
|
gJITC.nativeFloatTOP--; |
1184 |
|
|
|
1185 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1186 |
|
|
// update cr1 flags |
1187 |
|
|
ppc_opc_gen_update_cr1("fctiw.\n"); |
1188 |
|
|
} |
1189 |
|
|
return flowContinue; |
1190 |
|
|
} |
1191 |
|
|
/* |
1192 |
|
|
* fctiwzx Floating Convert to Integer Word with Round toward Zero |
1193 |
|
|
* .493 |
1194 |
|
|
*/ |
1195 |
|
|
void ppc_opc_fctiwzx() |
1196 |
|
|
{ |
1197 |
|
|
int frD, frA, frB; |
1198 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
1199 |
|
|
PPC_OPC_ASSERT(frA==0); |
1200 |
|
|
uint32 oldfpscr = gCPU.fpscr; |
1201 |
|
|
gCPU.fpscr &= ~3; |
1202 |
|
|
gCPU.fpscr |= 1; |
1203 |
|
|
ppc_double B; |
1204 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1205 |
|
|
gCPU.fpr[frD] = ppc_fpu_double_to_int(B); |
1206 |
|
|
gCPU.fpscr = oldfpscr; |
1207 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1208 |
|
|
// update cr1 flags |
1209 |
|
|
PPC_FPU_ERR("fctiwz.\n"); |
1210 |
|
|
} |
1211 |
|
|
} |
1212 |
|
|
JITCFlow ppc_opc_gen_fctiwzx() |
1213 |
|
|
{ |
1214 |
|
|
int frD, frA, frB; |
1215 |
|
|
PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB); |
1216 |
|
|
PPC_OPC_ASSERT(frA==0); |
1217 |
|
|
|
1218 |
|
|
static uint16 cw = 0xfff; |
1219 |
|
|
|
1220 |
|
|
modrm_o modrm; |
1221 |
|
|
if (!gJITC.hostCPUCaps.sse3) { |
1222 |
|
|
asmFLDCW(x86_mem2(modrm, &cw)); |
1223 |
|
|
} |
1224 |
|
|
|
1225 |
|
|
jitcClobberClientRegisterForFloat(frB); |
1226 |
|
|
jitcInvalidateClientRegisterForFloat(frD); |
1227 |
|
|
|
1228 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
1229 |
|
|
if (frB != frD && d != JITC_FLOAT_REG_NONE) { |
1230 |
|
|
jitcFloatRegisterXCHGToFront(d); |
1231 |
|
|
asmFFREEP(Float_ST0); |
1232 |
|
|
gJITC.nativeFloatRegState[d] = rsUnused; |
1233 |
|
|
gJITC.clientFloatReg[frD] = JITC_FLOAT_REG_NONE; |
1234 |
|
|
gJITC.nativeFloatTOP--; |
1235 |
|
|
} |
1236 |
|
|
|
1237 |
|
|
JitcFloatReg b = jitcGetClientFloatRegisterUnmapped(frB); |
1238 |
|
|
if (gJITC.hostCPUCaps.sse3) { |
1239 |
|
|
asmFISTTP(x86_mem2(modrm, &gCPU.fpr[frD])); |
1240 |
|
|
} else { |
1241 |
|
|
asmFISTP_D(x86_mem2(modrm, &gCPU.fpr[frD])); |
1242 |
|
|
} |
1243 |
|
|
gJITC.nativeFloatRegState[b] = rsUnused; |
1244 |
|
|
gJITC.clientFloatReg[frB] = JITC_FLOAT_REG_NONE; |
1245 |
|
|
gJITC.nativeFloatTOP--; |
1246 |
|
|
|
1247 |
|
|
if (!gJITC.hostCPUCaps.sse3) { |
1248 |
|
|
asmFLDCW(x86_mem2(modrm, &gCPU.x87cw)); |
1249 |
|
|
} |
1250 |
|
|
|
1251 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1252 |
|
|
// update cr1 flags |
1253 |
|
|
ppc_opc_gen_update_cr1("fctiwz.\n"); |
1254 |
|
|
} |
1255 |
|
|
return flowContinue; |
1256 |
|
|
} |
1257 |
|
|
/* |
1258 |
|
|
* fdivx Floating Divide (Double-Precision) |
1259 |
|
|
* .494 |
1260 |
|
|
*/ |
1261 |
|
|
void ppc_opc_fdivx() |
1262 |
|
|
{ |
1263 |
|
|
int frD, frA, frB, frC; |
1264 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1265 |
|
|
PPC_OPC_ASSERT(frC==0); |
1266 |
|
|
ppc_double A, B, D; |
1267 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1268 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1269 |
|
|
if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) { |
1270 |
|
|
gCPU.fpscr |= FPSCR_VXZDZ; |
1271 |
|
|
} |
1272 |
|
|
if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
1273 |
|
|
gCPU.fpscr |= FPSCR_VXIDI; |
1274 |
|
|
} |
1275 |
|
|
if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) { |
1276 |
|
|
// FIXME:: |
1277 |
|
|
gCPU.fpscr |= FPSCR_VXIDI; |
1278 |
|
|
} |
1279 |
|
|
ppc_fpu_div(D, A, B); |
1280 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1281 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1282 |
|
|
// update cr1 flags |
1283 |
|
|
PPC_FPU_ERR("fdiv.\n"); |
1284 |
|
|
} |
1285 |
|
|
} |
1286 |
|
|
JITCFlow ppc_opc_gen_fdivx() |
1287 |
|
|
{ |
1288 |
|
|
#ifndef JITC |
1289 |
|
|
ppc_opc_gen_interpret(ppc_opc_fdivx); |
1290 |
|
|
return flowEndBlock; |
1291 |
|
|
#else |
1292 |
|
|
int frD, frA, frB, frC; |
1293 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1294 |
|
|
PPC_OPC_ASSERT(frC==0); |
1295 |
|
|
ppc_opc_gen_binary_floatop(X86_FDIV, X86_FDIVR, frD, frA, frB); |
1296 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1297 |
|
|
// update cr1 flags |
1298 |
|
|
ppc_opc_gen_update_cr1("fdiv.\n"); |
1299 |
|
|
} |
1300 |
|
|
return flowContinue; |
1301 |
|
|
#endif |
1302 |
|
|
} |
1303 |
|
|
/* |
1304 |
|
|
* fdivsx Floating Divide Single |
1305 |
|
|
* .495 |
1306 |
|
|
*/ |
1307 |
|
|
void ppc_opc_fdivsx() |
1308 |
|
|
{ |
1309 |
|
|
int frD, frA, frB, frC; |
1310 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1311 |
|
|
PPC_OPC_ASSERT(frC==0); |
1312 |
|
|
ppc_double A, B, D; |
1313 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1314 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1315 |
|
|
if (A.type == ppc_fpr_zero && B.type == ppc_fpr_zero) { |
1316 |
|
|
gCPU.fpscr |= FPSCR_VXZDZ; |
1317 |
|
|
} |
1318 |
|
|
if (A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
1319 |
|
|
gCPU.fpscr |= FPSCR_VXIDI; |
1320 |
|
|
} |
1321 |
|
|
if (B.type == ppc_fpr_zero && A.type != ppc_fpr_zero) { |
1322 |
|
|
// FIXME:: |
1323 |
|
|
gCPU.fpscr |= FPSCR_VXIDI; |
1324 |
|
|
} |
1325 |
|
|
ppc_fpu_div(D, A, B); |
1326 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1327 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1328 |
|
|
// update cr1 flags |
1329 |
|
|
PPC_FPU_ERR("fdivs.\n"); |
1330 |
|
|
} |
1331 |
|
|
} |
1332 |
|
|
JITCFlow ppc_opc_gen_fdivsx() |
1333 |
|
|
{ |
1334 |
|
|
ppc_opc_gen_interpret(ppc_opc_fdivsx); |
1335 |
|
|
return flowEndBlock; |
1336 |
|
|
} |
1337 |
|
|
/* |
1338 |
|
|
* fmaddx Floating Multiply-Add (Double-Precision) |
1339 |
|
|
* .496 |
1340 |
|
|
*/ |
1341 |
|
|
void ppc_opc_fmaddx() |
1342 |
|
|
{ |
1343 |
|
|
int frD, frA, frB, frC; |
1344 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1345 |
|
|
ppc_double A, B, C, D; |
1346 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1347 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1348 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1349 |
|
|
ppc_fpu_mul_add(D, A, C, B); |
1350 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1351 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1352 |
|
|
// update cr1 flags |
1353 |
|
|
PPC_FPU_ERR("fmadd.\n"); |
1354 |
|
|
} |
1355 |
|
|
} |
1356 |
|
|
JITCFlow ppc_opc_gen_fmaddx() |
1357 |
|
|
{ |
1358 |
|
|
int frD, frA, frB, frC; |
1359 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1360 |
|
|
ppc_opc_gen_ternary_floatop(X86_FADD, X86_FADD, false, frD, frA, frC, frB); |
1361 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1362 |
|
|
// update cr1 flags |
1363 |
|
|
ppc_opc_gen_update_cr1("fmadd.\n"); |
1364 |
|
|
} |
1365 |
|
|
return flowContinue; |
1366 |
|
|
} |
1367 |
|
|
/* |
1368 |
|
|
* fmaddx Floating Multiply-Add Single |
1369 |
|
|
* .497 |
1370 |
|
|
*/ |
1371 |
|
|
void ppc_opc_fmaddsx() |
1372 |
|
|
{ |
1373 |
|
|
int frD, frA, frB, frC; |
1374 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1375 |
|
|
ppc_double A, B, C, D; |
1376 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1377 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1378 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1379 |
|
|
ppc_fpu_mul_add(D, A, C, B); |
1380 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1381 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1382 |
|
|
// update cr1 flags |
1383 |
|
|
PPC_FPU_ERR("fmadds.\n"); |
1384 |
|
|
} |
1385 |
|
|
} |
1386 |
|
|
JITCFlow ppc_opc_gen_fmaddsx() |
1387 |
|
|
{ |
1388 |
|
|
ppc_opc_gen_interpret(ppc_opc_fmaddsx); |
1389 |
|
|
return flowEndBlock; |
1390 |
|
|
} |
1391 |
|
|
/* |
1392 |
|
|
* fmrx Floating Move Register |
1393 |
|
|
* .498 |
1394 |
|
|
*/ |
1395 |
|
|
void ppc_opc_fmrx() |
1396 |
|
|
{ |
1397 |
|
|
int frD, rA, frB; |
1398 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, rA, frB); |
1399 |
|
|
PPC_OPC_ASSERT(rA==0); |
1400 |
|
|
gCPU.fpr[frD] = gCPU.fpr[frB]; |
1401 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1402 |
|
|
// update cr1 flags |
1403 |
|
|
PPC_FPU_ERR("fmr.\n"); |
1404 |
|
|
} |
1405 |
|
|
} |
1406 |
|
|
JITCFlow ppc_opc_gen_fmrx() |
1407 |
|
|
{ |
1408 |
|
|
int frD, frA, frB; |
1409 |
|
|
PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB); |
1410 |
|
|
if (frD != frB) { |
1411 |
|
|
JitcFloatReg a = jitcGetClientFloatRegisterMapping(frB); |
1412 |
|
|
if (a == JITC_FLOAT_REG_NONE) { |
1413 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
1414 |
|
|
if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d); |
1415 |
|
|
NativeReg bu = jitcGetClientRegister(PPC_FPR_U(frB)); |
1416 |
|
|
NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB)); |
1417 |
|
|
NativeReg du = jitcMapClientRegisterDirty(PPC_FPR_U(frD)); |
1418 |
|
|
NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD)); |
1419 |
|
|
asmALU(X86_MOV, du, bu); |
1420 |
|
|
asmALU(X86_MOV, dl, bl); |
1421 |
|
|
} else { |
1422 |
|
|
jitcInvalidateClientRegisterForFloat(frD); |
1423 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
1424 |
|
|
if (d == JITC_FLOAT_REG_NONE) { |
1425 |
|
|
d = jitcFloatRegisterDup(a); |
1426 |
|
|
jitcMapClientFloatRegisterDirty(frD, d); |
1427 |
|
|
} else { |
1428 |
|
|
jitcFloatRegisterXCHGToFront(a); |
1429 |
|
|
asmFST(jitcFloatRegisterToNative(d)); |
1430 |
|
|
jitcFloatRegisterDirty(d); |
1431 |
|
|
} |
1432 |
|
|
} |
1433 |
|
|
} |
1434 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1435 |
|
|
// update cr1 flags |
1436 |
|
|
ppc_opc_gen_update_cr1("fabs.\n"); |
1437 |
|
|
} |
1438 |
|
|
return flowContinue; |
1439 |
|
|
} |
1440 |
|
|
/* |
1441 |
|
|
* fmsubx Floating Multiply-Subtract (Double-Precision) |
1442 |
|
|
* .499 |
1443 |
|
|
*/ |
1444 |
|
|
void ppc_opc_fmsubx() |
1445 |
|
|
{ |
1446 |
|
|
int frD, frA, frB, frC; |
1447 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1448 |
|
|
ppc_double A, B, C, D; |
1449 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1450 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1451 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1452 |
|
|
B.s ^= 1; |
1453 |
|
|
ppc_fpu_mul_add(D, A, C, B); |
1454 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1455 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1456 |
|
|
// update cr1 flags |
1457 |
|
|
PPC_FPU_ERR("fmsub.\n"); |
1458 |
|
|
} |
1459 |
|
|
} |
1460 |
|
|
JITCFlow ppc_opc_gen_fmsubx() |
1461 |
|
|
{ |
1462 |
|
|
int frD, frA, frB, frC; |
1463 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1464 |
|
|
ppc_opc_gen_ternary_floatop(X86_FSUB, X86_FSUBR, false, frD, frA, frC, frB); |
1465 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1466 |
|
|
// update cr1 flags |
1467 |
|
|
ppc_opc_gen_update_cr1("fmsub.\n"); |
1468 |
|
|
} |
1469 |
|
|
return flowContinue; |
1470 |
|
|
} |
1471 |
|
|
/* |
1472 |
|
|
* fmsubsx Floating Multiply-Subtract Single |
1473 |
|
|
* .500 |
1474 |
|
|
*/ |
1475 |
|
|
void ppc_opc_fmsubsx() |
1476 |
|
|
{ |
1477 |
|
|
int frD, frA, frB, frC; |
1478 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1479 |
|
|
ppc_double A, B, C, D; |
1480 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1481 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1482 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1483 |
|
|
B.s ^= 1; |
1484 |
|
|
ppc_fpu_mul_add(D, A, C, B); |
1485 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1486 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1487 |
|
|
// update cr1 flags |
1488 |
|
|
PPC_FPU_ERR("fmsubs.\n"); |
1489 |
|
|
} |
1490 |
|
|
} |
1491 |
|
|
JITCFlow ppc_opc_gen_fmsubsx() |
1492 |
|
|
{ |
1493 |
|
|
ppc_opc_gen_interpret(ppc_opc_fmsubsx); |
1494 |
|
|
return flowEndBlock; |
1495 |
|
|
} |
1496 |
|
|
/* |
1497 |
|
|
* fmulx Floating Multiply (Double-Precision) |
1498 |
|
|
* .501 |
1499 |
|
|
*/ |
1500 |
|
|
void ppc_opc_fmulx() |
1501 |
|
|
{ |
1502 |
|
|
int frD, frA, frB, frC; |
1503 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1504 |
|
|
PPC_OPC_ASSERT(frB==0); |
1505 |
|
|
ppc_double A, C, D; |
1506 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1507 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1508 |
|
|
if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero) |
1509 |
|
|
|| (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) { |
1510 |
|
|
gCPU.fpscr |= FPSCR_VXIMZ; |
1511 |
|
|
} |
1512 |
|
|
ppc_fpu_mul(D, A, C); |
1513 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1514 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1515 |
|
|
// update cr1 flags |
1516 |
|
|
PPC_FPU_ERR("fmul.\n"); |
1517 |
|
|
} |
1518 |
|
|
} |
1519 |
|
|
JITCFlow ppc_opc_gen_fmulx() |
1520 |
|
|
{ |
1521 |
|
|
#ifdef JITC |
1522 |
|
|
int frD, frA, frB, frC; |
1523 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1524 |
|
|
PPC_OPC_ASSERT(frB==0); |
1525 |
|
|
ppc_opc_gen_binary_floatop(X86_FMUL, X86_FMUL, frD, frA, frC); |
1526 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1527 |
|
|
// update cr1 flags |
1528 |
|
|
ppc_opc_gen_update_cr1("fmul.\n"); |
1529 |
|
|
} |
1530 |
|
|
return flowContinue; |
1531 |
|
|
#else |
1532 |
|
|
ppc_opc_gen_interpret(ppc_opc_fmulx); |
1533 |
|
|
return flowEndBlock; |
1534 |
|
|
#endif |
1535 |
|
|
} |
1536 |
|
|
/* |
1537 |
|
|
* fmulsx Floating Multiply Single |
1538 |
|
|
* .502 |
1539 |
|
|
*/ |
1540 |
|
|
void ppc_opc_fmulsx() |
1541 |
|
|
{ |
1542 |
|
|
int frD, frA, frB, frC; |
1543 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1544 |
|
|
PPC_OPC_ASSERT(frB==0); |
1545 |
|
|
ppc_double A, C, D; |
1546 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1547 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1548 |
|
|
if ((A.type == ppc_fpr_Inf && C.type == ppc_fpr_zero) |
1549 |
|
|
|| (A.type == ppc_fpr_zero && C.type == ppc_fpr_Inf)) { |
1550 |
|
|
gCPU.fpscr |= FPSCR_VXIMZ; |
1551 |
|
|
} |
1552 |
|
|
ppc_fpu_mul(D, A, C); |
1553 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1554 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1555 |
|
|
// update cr1 flags |
1556 |
|
|
PPC_FPU_ERR("fmuls.\n"); |
1557 |
|
|
} |
1558 |
|
|
} |
1559 |
|
|
JITCFlow ppc_opc_gen_fmulsx() |
1560 |
|
|
{ |
1561 |
|
|
ppc_opc_gen_interpret(ppc_opc_fmulsx); |
1562 |
|
|
return flowEndBlock; |
1563 |
|
|
} |
1564 |
|
|
/* |
1565 |
|
|
* fnabsx Floating Negative Absolute Value |
1566 |
|
|
* .503 |
1567 |
|
|
*/ |
1568 |
|
|
void ppc_opc_fnabsx() |
1569 |
|
|
{ |
1570 |
|
|
int frD, frA, frB; |
1571 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
1572 |
|
|
PPC_OPC_ASSERT(frA==0); |
1573 |
|
|
gCPU.fpr[frD] = gCPU.fpr[frB] | FPU_SIGN_BIT; |
1574 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1575 |
|
|
// update cr1 flags |
1576 |
|
|
PPC_FPU_ERR("fnabs.\n"); |
1577 |
|
|
} |
1578 |
|
|
} |
1579 |
|
|
JITCFlow ppc_opc_gen_fnabsx() |
1580 |
|
|
{ |
1581 |
|
|
int frD, frA, frB; |
1582 |
|
|
PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB); |
1583 |
|
|
if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) { |
1584 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
1585 |
|
|
if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d); |
1586 |
|
|
jitcClobberCarryAndFlags(); |
1587 |
|
|
if (frD != frB) { |
1588 |
|
|
NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB)); |
1589 |
|
|
NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB)); |
1590 |
|
|
NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD)); |
1591 |
|
|
NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD)); |
1592 |
|
|
asmALU(X86_MOV, dh, bh); |
1593 |
|
|
asmALU(X86_MOV, dl, bl); |
1594 |
|
|
asmALU(X86_OR, dh, 0x80000000); |
1595 |
|
|
} else { |
1596 |
|
|
NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB)); |
1597 |
|
|
asmALU(X86_OR, b, 0x80000000); |
1598 |
|
|
} |
1599 |
|
|
} else { |
1600 |
|
|
ppc_opc_gen_unary_floatop(FABS, frD, frB); |
1601 |
|
|
asmFSimple(FCHS); |
1602 |
|
|
} |
1603 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1604 |
|
|
// update cr1 flags |
1605 |
|
|
ppc_opc_gen_update_cr1("fnabs.\n"); |
1606 |
|
|
} |
1607 |
|
|
return flowContinue; |
1608 |
|
|
} |
1609 |
|
|
/* |
1610 |
|
|
* fnegx Floating Negate |
1611 |
|
|
* .504 |
1612 |
|
|
*/ |
1613 |
|
|
void ppc_opc_fnegx() |
1614 |
|
|
{ |
1615 |
|
|
int frD, frA, frB; |
1616 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
1617 |
|
|
PPC_OPC_ASSERT(frA==0); |
1618 |
|
|
gCPU.fpr[frD] = gCPU.fpr[frB] ^ FPU_SIGN_BIT; |
1619 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1620 |
|
|
// update cr1 flags |
1621 |
|
|
PPC_FPU_ERR("fneg.\n"); |
1622 |
|
|
} |
1623 |
|
|
} |
1624 |
|
|
JITCFlow ppc_opc_gen_fnegx() |
1625 |
|
|
{ |
1626 |
|
|
int frD, frA, frB; |
1627 |
|
|
PPC_OPC_TEMPL_X(gJITC.current_opc, frD, frA, frB); |
1628 |
|
|
if (jitcGetClientFloatRegisterMapping(frB) == JITC_FLOAT_REG_NONE) { |
1629 |
|
|
JitcFloatReg d = jitcGetClientFloatRegisterMapping(frD); |
1630 |
|
|
if (d != JITC_FLOAT_REG_NONE) jitcFloatRegisterInvalidate(d); |
1631 |
|
|
jitcClobberCarryAndFlags(); |
1632 |
|
|
if (frD != frB) { |
1633 |
|
|
NativeReg bh = jitcGetClientRegister(PPC_FPR_U(frB)); |
1634 |
|
|
NativeReg bl = jitcGetClientRegister(PPC_FPR_L(frB)); |
1635 |
|
|
NativeReg dh = jitcMapClientRegisterDirty(PPC_FPR_U(frD)); |
1636 |
|
|
NativeReg dl = jitcMapClientRegisterDirty(PPC_FPR_L(frD)); |
1637 |
|
|
asmALU(X86_MOV, dh, bh); |
1638 |
|
|
asmALU(X86_MOV, dl, bl); |
1639 |
|
|
asmALU(X86_XOR, dh, 0x80000000); |
1640 |
|
|
} else { |
1641 |
|
|
NativeReg b = jitcGetClientRegisterDirty(PPC_FPR_U(frB)); |
1642 |
|
|
asmALU(X86_XOR, b, 0x80000000); |
1643 |
|
|
} |
1644 |
|
|
} else { |
1645 |
|
|
ppc_opc_gen_unary_floatop(FCHS, frD, frB); |
1646 |
|
|
} |
1647 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1648 |
|
|
// update cr1 flags |
1649 |
|
|
ppc_opc_gen_update_cr1("fneg.\n"); |
1650 |
|
|
} |
1651 |
|
|
return flowContinue; |
1652 |
|
|
} |
1653 |
|
|
/* |
1654 |
|
|
* fnmaddx Floating Negative Multiply-Add (Double-Precision) |
1655 |
|
|
* .505 |
1656 |
|
|
*/ |
1657 |
|
|
void ppc_opc_fnmaddx() |
1658 |
|
|
{ |
1659 |
|
|
int frD, frA, frB, frC; |
1660 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1661 |
|
|
ppc_double A, B, C, D; |
1662 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1663 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1664 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1665 |
|
|
ppc_fpu_mul_add(D, A, C, B); |
1666 |
|
|
D.s ^= 1; |
1667 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1668 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1669 |
|
|
// update cr1 flags |
1670 |
|
|
PPC_FPU_ERR("fnmadd.\n"); |
1671 |
|
|
} |
1672 |
|
|
} |
1673 |
|
|
JITCFlow ppc_opc_gen_fnmaddx() |
1674 |
|
|
{ |
1675 |
|
|
int frD, frA, frB, frC; |
1676 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1677 |
|
|
ppc_opc_gen_ternary_floatop(X86_FADD, X86_FADD, true, frD, frA, frC, frB); |
1678 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1679 |
|
|
// update cr1 flags |
1680 |
|
|
ppc_opc_gen_update_cr1("fnmadd.\n"); |
1681 |
|
|
} |
1682 |
|
|
return flowContinue; |
1683 |
|
|
} |
1684 |
|
|
/* |
1685 |
|
|
* fnmaddsx Floating Negative Multiply-Add Single |
1686 |
|
|
* .506 |
1687 |
|
|
*/ |
1688 |
|
|
void ppc_opc_fnmaddsx() |
1689 |
|
|
{ |
1690 |
|
|
int frD, frA, frB, frC; |
1691 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1692 |
|
|
ppc_double A, B, C, D; |
1693 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1694 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1695 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1696 |
|
|
ppc_fpu_mul_add(D, A, C, B); |
1697 |
|
|
D.s ^= 1; |
1698 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1699 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1700 |
|
|
// update cr1 flags |
1701 |
|
|
PPC_FPU_ERR("fnmadds.\n"); |
1702 |
|
|
} |
1703 |
|
|
} |
1704 |
|
|
JITCFlow ppc_opc_gen_fnmaddsx() |
1705 |
|
|
{ |
1706 |
|
|
ppc_opc_gen_interpret(ppc_opc_fnmaddsx); |
1707 |
|
|
return flowEndBlock; |
1708 |
|
|
} |
1709 |
|
|
/* |
1710 |
|
|
* fnmsubx Floating Negative Multiply-Subtract (Double-Precision) |
1711 |
|
|
* .507 |
1712 |
|
|
*/ |
1713 |
|
|
void ppc_opc_fnmsubx() |
1714 |
|
|
{ |
1715 |
|
|
int frD, frA, frB, frC; |
1716 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1717 |
|
|
ppc_double A, B, C, D; |
1718 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1719 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1720 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1721 |
|
|
B.s ^= 1; |
1722 |
|
|
ppc_fpu_mul_add(D, A, C, B); |
1723 |
|
|
D.s ^= 1; |
1724 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1725 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1726 |
|
|
// update cr1 flags |
1727 |
|
|
PPC_FPU_ERR("fnmsub.\n"); |
1728 |
|
|
} |
1729 |
|
|
} |
1730 |
|
|
JITCFlow ppc_opc_gen_fnmsubx() |
1731 |
|
|
{ |
1732 |
|
|
int frD, frA, frB, frC; |
1733 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1734 |
|
|
ppc_opc_gen_ternary_floatop(X86_FSUBR, X86_FSUB, false, frD, frA, frC, frB); |
1735 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1736 |
|
|
// update cr1 flags |
1737 |
|
|
ppc_opc_gen_update_cr1("fnmsub.\n"); |
1738 |
|
|
} |
1739 |
|
|
return flowContinue; |
1740 |
|
|
} |
1741 |
|
|
/* |
1742 |
|
|
* fnmsubsx Floating Negative Multiply-Subtract Single |
1743 |
|
|
* .508 |
1744 |
|
|
*/ |
1745 |
|
|
void ppc_opc_fnmsubsx() |
1746 |
|
|
{ |
1747 |
|
|
int frD, frA, frB, frC; |
1748 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1749 |
|
|
ppc_double A, B, C, D; |
1750 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1751 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1752 |
|
|
ppc_fpu_unpack_double(C, gCPU.fpr[frC]); |
1753 |
|
|
B.s ^= 1; |
1754 |
|
|
ppc_fpu_mul_add(D, A, C, B); |
1755 |
|
|
D.s ^= 1; |
1756 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1757 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1758 |
|
|
// update cr1 flags |
1759 |
|
|
PPC_FPU_ERR("fnmsubs.\n"); |
1760 |
|
|
} |
1761 |
|
|
} |
1762 |
|
|
JITCFlow ppc_opc_gen_fnmsubsx() |
1763 |
|
|
{ |
1764 |
|
|
ppc_opc_gen_interpret(ppc_opc_fnmsubsx); |
1765 |
|
|
return flowEndBlock; |
1766 |
|
|
} |
1767 |
|
|
/* |
1768 |
|
|
* fresx Floating Reciprocal Estimate Single |
1769 |
|
|
* .509 |
1770 |
|
|
*/ |
1771 |
|
|
void ppc_opc_fresx() |
1772 |
|
|
{ |
1773 |
|
|
int frD, frA, frB, frC; |
1774 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1775 |
|
|
PPC_OPC_ASSERT(frA==0 && frC==0); |
1776 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1777 |
|
|
// update cr1 flags |
1778 |
|
|
PPC_FPU_ERR("fres.\n"); |
1779 |
|
|
} |
1780 |
|
|
PPC_FPU_ERR("fres\n"); |
1781 |
|
|
} |
1782 |
|
|
JITCFlow ppc_opc_gen_fresx() |
1783 |
|
|
{ |
1784 |
|
|
ppc_opc_gen_interpret(ppc_opc_fresx); |
1785 |
|
|
return flowEndBlock; |
1786 |
|
|
} |
1787 |
|
|
/* |
1788 |
|
|
* frspx Floating Round to Single |
1789 |
|
|
* .511 |
1790 |
|
|
*/ |
1791 |
|
|
void ppc_opc_frspx() |
1792 |
|
|
{ |
1793 |
|
|
int frD, frA, frB; |
1794 |
|
|
PPC_OPC_TEMPL_X(gCPU.current_opc, frD, frA, frB); |
1795 |
|
|
PPC_OPC_ASSERT(frA==0); |
1796 |
|
|
ppc_double B; |
1797 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1798 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(B, gCPU.fpr[frD]); |
1799 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1800 |
|
|
// update cr1 flags |
1801 |
|
|
PPC_FPU_ERR("frsp.\n"); |
1802 |
|
|
} |
1803 |
|
|
} |
1804 |
|
|
JITCFlow ppc_opc_gen_frspx() |
1805 |
|
|
{ |
1806 |
|
|
ppc_opc_gen_interpret(ppc_opc_frspx); |
1807 |
|
|
return flowEndBlock; |
1808 |
|
|
} |
1809 |
|
|
/* |
1810 |
|
|
* frsqrtex Floating Reciprocal Square Root Estimate |
1811 |
|
|
* .512 |
1812 |
|
|
*/ |
1813 |
|
|
void ppc_opc_frsqrtex() |
1814 |
|
|
{ |
1815 |
|
|
int frD, frA, frB, frC; |
1816 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1817 |
|
|
PPC_OPC_ASSERT(frA==0 && frC==0); |
1818 |
|
|
ppc_double B; |
1819 |
|
|
ppc_double D; |
1820 |
|
|
ppc_double E; |
1821 |
|
|
ppc_double Q; |
1822 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1823 |
|
|
ppc_fpu_sqrt(Q, B); |
1824 |
|
|
E.type = ppc_fpr_norm; E.s = 0; E.e = 0; E.m = 0x80000000000000ULL; |
1825 |
|
|
ppc_fpu_div(D, E, Q); |
1826 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1827 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1828 |
|
|
// update cr1 flags |
1829 |
|
|
PPC_FPU_ERR("frsqrte.\n"); |
1830 |
|
|
} |
1831 |
|
|
} |
1832 |
|
|
JITCFlow ppc_opc_gen_frsqrtex() |
1833 |
|
|
{ |
1834 |
|
|
int frD, frA, frB, frC; |
1835 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1836 |
|
|
PPC_OPC_ASSERT(frA==0 && frC==0); |
1837 |
|
|
ppc_opc_gen_unary_floatop(FSQRT, frD, frB); |
1838 |
|
|
if (gJITC.nativeFloatTOP == 8) { |
1839 |
|
|
jitcPopFloatStack(jitcGetClientFloatRegisterMapping(frD), JITC_FLOAT_REG_NONE); |
1840 |
|
|
} |
1841 |
|
|
gJITC.nativeFloatTOP++; |
1842 |
|
|
asmFSimple(FLD1); |
1843 |
|
|
asmFArithP_STi(X86_FDIVR, jitcFloatRegisterToNative(jitcGetClientFloatRegisterMapping(frD))); |
1844 |
|
|
gJITC.nativeFloatTOP--; |
1845 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1846 |
|
|
// update cr1 flags |
1847 |
|
|
ppc_opc_gen_update_cr1("frsqrte.\n"); |
1848 |
|
|
} |
1849 |
|
|
return flowContinue; |
1850 |
|
|
} |
1851 |
|
|
/* |
1852 |
|
|
* fselx Floating Select |
1853 |
|
|
* .514 |
1854 |
|
|
*/ |
1855 |
|
|
void ppc_opc_fselx() |
1856 |
|
|
{ |
1857 |
|
|
int frD, frA, frB, frC; |
1858 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1859 |
|
|
ppc_double A; |
1860 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1861 |
|
|
if (A.type == ppc_fpr_NaN || (A.type != ppc_fpr_zero && A.s)) { |
1862 |
|
|
gCPU.fpr[frD] = gCPU.fpr[frB]; |
1863 |
|
|
} else { |
1864 |
|
|
gCPU.fpr[frD] = gCPU.fpr[frC]; |
1865 |
|
|
} |
1866 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1867 |
|
|
// update cr1 flags |
1868 |
|
|
PPC_FPU_ERR("fsel.\n"); |
1869 |
|
|
} |
1870 |
|
|
} |
1871 |
|
|
JITCFlow ppc_opc_gen_fselx() |
1872 |
|
|
{ |
1873 |
|
|
ppc_opc_gen_interpret(ppc_opc_fselx); |
1874 |
|
|
return flowEndBlock; |
1875 |
|
|
} |
1876 |
|
|
/* |
1877 |
|
|
* fsqrtx Floating Square Root (Double-Precision) |
1878 |
|
|
* .515 |
1879 |
|
|
*/ |
1880 |
|
|
void ppc_opc_fsqrtx() |
1881 |
|
|
{ |
1882 |
|
|
int frD, frA, frB, frC; |
1883 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1884 |
|
|
PPC_OPC_ASSERT(frA==0 && frC==0); |
1885 |
|
|
ppc_double B; |
1886 |
|
|
ppc_double D; |
1887 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1888 |
|
|
ppc_fpu_sqrt(D, B); |
1889 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1890 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1891 |
|
|
// update cr1 flags |
1892 |
|
|
PPC_FPU_ERR("fsqrt.\n"); |
1893 |
|
|
} |
1894 |
|
|
} |
1895 |
|
|
JITCFlow ppc_opc_gen_fsqrtx() |
1896 |
|
|
{ |
1897 |
|
|
int frD, frA, frB, frC; |
1898 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1899 |
|
|
PPC_OPC_ASSERT(frA==0 && frC==0); |
1900 |
|
|
ppc_opc_gen_unary_floatop(FSQRT, frD, frB); |
1901 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1902 |
|
|
// update cr1 flags |
1903 |
|
|
ppc_opc_gen_update_cr1("fsqrt.\n"); |
1904 |
|
|
} |
1905 |
|
|
return flowContinue; |
1906 |
|
|
} |
1907 |
|
|
/* |
1908 |
|
|
* fsqrtsx Floating Square Root Single |
1909 |
|
|
* .515 |
1910 |
|
|
*/ |
1911 |
|
|
void ppc_opc_fsqrtsx() |
1912 |
|
|
{ |
1913 |
|
|
int frD, frA, frB, frC; |
1914 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1915 |
|
|
PPC_OPC_ASSERT(frA==0 && frC==0); |
1916 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1917 |
|
|
// update cr1 flags |
1918 |
|
|
PPC_FPU_ERR("fsqrts.\n"); |
1919 |
|
|
} |
1920 |
|
|
PPC_FPU_ERR("fsqrts\n"); |
1921 |
|
|
} |
1922 |
|
|
JITCFlow ppc_opc_gen_fsqrtsx() |
1923 |
|
|
{ |
1924 |
|
|
ppc_opc_gen_interpret(ppc_opc_fsqrtsx); |
1925 |
|
|
return flowEndBlock; |
1926 |
|
|
} |
1927 |
|
|
/* |
1928 |
|
|
* fsubx Floating Subtract (Double-Precision) |
1929 |
|
|
* .517 |
1930 |
|
|
*/ |
1931 |
|
|
void ppc_opc_fsubx() |
1932 |
|
|
{ |
1933 |
|
|
int frD, frA, frB, frC; |
1934 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1935 |
|
|
PPC_OPC_ASSERT(frC==0); |
1936 |
|
|
ppc_double A, B, D; |
1937 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1938 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1939 |
|
|
if (B.type != ppc_fpr_NaN) { |
1940 |
|
|
B.s ^= 1; |
1941 |
|
|
} |
1942 |
|
|
if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
1943 |
|
|
gCPU.fpscr |= FPSCR_VXISI; |
1944 |
|
|
} |
1945 |
|
|
ppc_fpu_add(D, A, B); |
1946 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double(D, gCPU.fpr[frD]); |
1947 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1948 |
|
|
// update cr1 flags |
1949 |
|
|
PPC_FPU_ERR("fsub.\n"); |
1950 |
|
|
} |
1951 |
|
|
} |
1952 |
|
|
JITCFlow ppc_opc_gen_fsubx() |
1953 |
|
|
{ |
1954 |
|
|
#ifdef JITC |
1955 |
|
|
int frD, frA, frB, frC; |
1956 |
|
|
PPC_OPC_TEMPL_A(gJITC.current_opc, frD, frA, frB, frC); |
1957 |
|
|
PPC_OPC_ASSERT(frC==0); |
1958 |
|
|
ppc_opc_gen_binary_floatop(X86_FSUB, X86_FSUBR, frD, frA, frB); |
1959 |
|
|
/* |
1960 |
|
|
* FIXME: This solves the a floating point bug. |
1961 |
|
|
* I have no idea why. |
1962 |
|
|
*/ |
1963 |
|
|
jitcFloatRegisterClobberAll(); |
1964 |
|
|
if (gJITC.current_opc & PPC_OPC_Rc) { |
1965 |
|
|
// update cr1 flags |
1966 |
|
|
ppc_opc_gen_update_cr1("fsub.\n"); |
1967 |
|
|
} |
1968 |
|
|
return flowContinue; |
1969 |
|
|
#else |
1970 |
|
|
ppc_opc_gen_interpret(ppc_opc_fsubx); |
1971 |
|
|
return flowEndBlock; |
1972 |
|
|
#endif |
1973 |
|
|
} |
1974 |
|
|
/* |
1975 |
|
|
* fsubsx Floating Subtract Single |
1976 |
|
|
* .518 |
1977 |
|
|
*/ |
1978 |
|
|
void ppc_opc_fsubsx() |
1979 |
|
|
{ |
1980 |
|
|
int frD, frA, frB, frC; |
1981 |
|
|
PPC_OPC_TEMPL_A(gCPU.current_opc, frD, frA, frB, frC); |
1982 |
|
|
PPC_OPC_ASSERT(frC==0); |
1983 |
|
|
ppc_double A, B, D; |
1984 |
|
|
ppc_fpu_unpack_double(A, gCPU.fpr[frA]); |
1985 |
|
|
ppc_fpu_unpack_double(B, gCPU.fpr[frB]); |
1986 |
|
|
if (B.type != ppc_fpr_NaN) { |
1987 |
|
|
B.s ^= 1; |
1988 |
|
|
} |
1989 |
|
|
if (A.s != B.s && A.type == ppc_fpr_Inf && B.type == ppc_fpr_Inf) { |
1990 |
|
|
gCPU.fpscr |= FPSCR_VXISI; |
1991 |
|
|
} |
1992 |
|
|
ppc_fpu_add(D, A, B); |
1993 |
|
|
gCPU.fpscr |= ppc_fpu_pack_double_as_single(D, gCPU.fpr[frD]); |
1994 |
|
|
if (gCPU.current_opc & PPC_OPC_Rc) { |
1995 |
|
|
// update cr1 flags |
1996 |
|
|
PPC_FPU_ERR("fsubs.\n"); |
1997 |
|
|
} |
1998 |
|
|
} |
1999 |
|
|
JITCFlow ppc_opc_gen_fsubsx() |
2000 |
|
|
{ |
2001 |
|
|
ppc_opc_gen_interpret(ppc_opc_fsubsx); |
2002 |
|
|
return flowEndBlock; |
2003 |
|
|
} |
2004 |
|
|
|