1 |
/* |
2 |
* PearPC |
3 |
* ppc_vec.cc |
4 |
* |
5 |
* Copyright (C) 2004 Daniel Foesch (dfoesch@cs.nsmu.edu) |
6 |
* |
7 |
* This program is free software; you can redistribute it and/or modify |
8 |
* it under the terms of the GNU General Public License version 2 as |
9 |
* published by the Free Software Foundation. |
10 |
* |
11 |
* This program is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
* |
16 |
* You should have received a copy of the GNU General Public License |
17 |
* along with this program; if not, write to the Free Software |
18 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 |
*/ |
20 |
|
21 |
/* Pages marked: v.??? |
22 |
* From: IBM PowerPC MicroProcessor Family: Altivec(tm) Technology... |
23 |
* Programming Environments Manual |
24 |
*/ |
25 |
|
26 |
#include <math.h> |
27 |
|
28 |
/* |
29 |
* FIXME: put somewhere appropriate |
30 |
*/ |
31 |
#ifndef HAS_LOG2 |
32 |
#define log2(x) log(x)/log(2) |
33 |
#endif /* HAS_LOG2 */ |
34 |
|
35 |
#ifndef HAS_EXP2 |
36 |
#define exp2(x) pow(2, x) |
37 |
#endif /* HAS_EXP2 */ |
38 |
|
39 |
#include "debug/tracers.h" |
40 |
#include "ppc_cpu.h" |
41 |
#include "ppc_dec.h" |
42 |
#include "ppc_fpu.h" |
43 |
#include "ppc_vec.h" |
44 |
|
45 |
#define SIGN32 0x80000000 |
46 |
|
47 |
/* PACK_PIXEL Packs a uint32 pixel to uint16 pixel |
48 |
* v.219 |
49 |
*/ |
50 |
static inline uint16 PACK_PIXEL(uint32 clr) |
51 |
{ |
52 |
return (((clr & 0x000000f8) >> 3) | \ |
53 |
((clr & 0x0000f800) >> 6) | \ |
54 |
((clr & 0x01f80000) >> 9)); |
55 |
} |
56 |
|
57 |
/* UNPACK_PIXEL Unpacks a uint16 pixel to uint32 pixel |
58 |
* v.276 & v.279 |
59 |
*/ |
60 |
static inline uint32 UNPACK_PIXEL(uint16 clr) |
61 |
{ |
62 |
return (((uint32)(clr & 0x001f)) | \ |
63 |
((uint32)(clr & 0x03E0) << 3) | \ |
64 |
((uint32)(clr & 0x7c00) << 6) | \ |
65 |
(((clr) & 0x8000) ? 0xff000000 : 0)); |
66 |
} |
67 |
|
68 |
static inline uint8 SATURATE_UB(uint16 val) |
69 |
{ |
70 |
if (val & 0xff00) { |
71 |
gCPU.vscr |= VSCR_SAT; |
72 |
return 0xff; |
73 |
} |
74 |
return val; |
75 |
} |
76 |
static inline uint8 SATURATE_0B(uint16 val) |
77 |
{ |
78 |
if (val & 0xff00) { |
79 |
gCPU.vscr |= VSCR_SAT; |
80 |
return 0; |
81 |
} |
82 |
return val; |
83 |
} |
84 |
|
85 |
static inline uint16 SATURATE_UH(uint32 val) |
86 |
{ |
87 |
if (val & 0xffff0000) { |
88 |
gCPU.vscr |= VSCR_SAT; |
89 |
return 0xffff; |
90 |
} |
91 |
return val; |
92 |
} |
93 |
|
94 |
static inline uint16 SATURATE_0H(uint32 val) |
95 |
{ |
96 |
if (val & 0xffff0000) { |
97 |
gCPU.vscr |= VSCR_SAT; |
98 |
return 0; |
99 |
} |
100 |
return val; |
101 |
} |
102 |
|
103 |
static inline sint8 SATURATE_SB(sint16 val) |
104 |
{ |
105 |
if (val > 127) { // 0x7F |
106 |
gCPU.vscr |= VSCR_SAT; |
107 |
return 127; |
108 |
} else if (val < -128) { // 0x80 |
109 |
gCPU.vscr |= VSCR_SAT; |
110 |
return -128; |
111 |
} |
112 |
return val; |
113 |
} |
114 |
|
115 |
static inline uint8 SATURATE_USB(sint16 val) |
116 |
{ |
117 |
if (val > 0xff) { |
118 |
gCPU.vscr |= VSCR_SAT; |
119 |
return 0xff; |
120 |
} else if (val < 0) { |
121 |
gCPU.vscr |= VSCR_SAT; |
122 |
return 0; |
123 |
} |
124 |
return (uint8)val; |
125 |
} |
126 |
|
127 |
static inline sint16 SATURATE_SH(sint32 val) |
128 |
{ |
129 |
if (val > 32767) { // 0x7fff |
130 |
gCPU.vscr |= VSCR_SAT; |
131 |
return 32767; |
132 |
} else if (val < -32768) { // 0x8000 |
133 |
gCPU.vscr |= VSCR_SAT; |
134 |
return -32768; |
135 |
} |
136 |
return val; |
137 |
} |
138 |
|
139 |
static inline uint16 SATURATE_USH(sint32 val) |
140 |
{ |
141 |
if (val > 0xffff) { |
142 |
gCPU.vscr |= VSCR_SAT; |
143 |
return 0xffff; |
144 |
} else if (val < 0) { |
145 |
gCPU.vscr |= VSCR_SAT; |
146 |
return 0; |
147 |
} |
148 |
return (uint16)val; |
149 |
} |
150 |
|
151 |
static inline sint32 SATURATE_UW(sint64 val) |
152 |
{ |
153 |
if (val > 0xffffffffLL) { |
154 |
gCPU.vscr |= VSCR_SAT; |
155 |
return 0xffffffffLL; |
156 |
} |
157 |
return val; |
158 |
} |
159 |
|
160 |
static inline sint32 SATURATE_SW(sint64 val) |
161 |
{ |
162 |
if (val > 2147483647LL) { // 0x7fffffff |
163 |
gCPU.vscr |= VSCR_SAT; |
164 |
return 2147483647LL; |
165 |
} else if (val < -2147483648LL) { // 0x80000000 |
166 |
gCPU.vscr |= VSCR_SAT; |
167 |
return -2147483648LL; |
168 |
} |
169 |
return val; |
170 |
} |
171 |
|
172 |
/* vperm Vector Permutation |
173 |
* v.218 |
174 |
*/ |
175 |
void ppc_opc_vperm() |
176 |
{ |
177 |
VECTOR_DEBUG_COMMON; |
178 |
int vrD, vrA, vrB, vrC; |
179 |
int sel; |
180 |
Vector_t r; |
181 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
182 |
for (int i=0; i<16; i++) { |
183 |
sel = gCPU.vr[vrC].b[i]; |
184 |
if (sel & 0x10) |
185 |
r.b[i] = VECT_B(gCPU.vr[vrB], sel & 0xf); |
186 |
else |
187 |
r.b[i] = VECT_B(gCPU.vr[vrA], sel & 0xf); |
188 |
} |
189 |
|
190 |
gCPU.vr[vrD] = r; |
191 |
} |
192 |
|
193 |
/* vsel Vector Select |
194 |
* v.238 |
195 |
*/ |
196 |
void ppc_opc_vsel() |
197 |
{ |
198 |
VECTOR_DEBUG; |
199 |
int vrD, vrA, vrB, vrC; |
200 |
uint64 mask, val; |
201 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
202 |
|
203 |
mask = gCPU.vr[vrC].d[0]; |
204 |
val = gCPU.vr[vrB].d[0] & mask; |
205 |
val |= gCPU.vr[vrA].d[0] & ~mask; |
206 |
gCPU.vr[vrD].d[0] = val; |
207 |
|
208 |
mask = gCPU.vr[vrC].d[1]; |
209 |
val = gCPU.vr[vrB].d[1] & mask; |
210 |
val |= gCPU.vr[vrA].d[1] & ~mask; |
211 |
gCPU.vr[vrD].d[1] = val; |
212 |
} |
213 |
|
214 |
/* vsrb Vector Shift Right Byte |
215 |
* v.256 |
216 |
*/ |
217 |
void ppc_opc_vsrb() |
218 |
{ |
219 |
VECTOR_DEBUG; |
220 |
int vrD, vrA, vrB; |
221 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
222 |
for (int i=0; i<16; i++) { |
223 |
gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] >> (gCPU.vr[vrB].b[i] & 0x7); |
224 |
} |
225 |
} |
226 |
|
227 |
/* vsrh Vector Shift Right Half Word |
228 |
* v.257 |
229 |
*/ |
230 |
void ppc_opc_vsrh() |
231 |
{ |
232 |
VECTOR_DEBUG; |
233 |
int vrD, vrA, vrB; |
234 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
235 |
for (int i=0; i<8; i++) { |
236 |
gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] >> (gCPU.vr[vrB].h[i] & 0xf); |
237 |
} |
238 |
} |
239 |
|
240 |
/* vsrw Vector Shift Right Word |
241 |
* v.259 |
242 |
*/ |
243 |
void ppc_opc_vsrw() |
244 |
{ |
245 |
VECTOR_DEBUG; |
246 |
int vrD, vrA, vrB; |
247 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
248 |
for (int i=0; i<4; i++) { |
249 |
gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] >> (gCPU.vr[vrB].w[i] & 0x1f); |
250 |
} |
251 |
} |
252 |
|
253 |
/* vsrab Vector Shift Right Arithmetic Byte |
254 |
* v.253 |
255 |
*/ |
256 |
void ppc_opc_vsrab() |
257 |
{ |
258 |
VECTOR_DEBUG; |
259 |
int vrD, vrA, vrB; |
260 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
261 |
for (int i=0; i<16; i++) { |
262 |
gCPU.vr[vrD].sb[i] = gCPU.vr[vrA].sb[i] >> (gCPU.vr[vrB].b[i] & 0x7); |
263 |
} |
264 |
} |
265 |
|
266 |
/* vsrah Vector Shift Right Arithmetic Half Word |
267 |
* v.254 |
268 |
*/ |
269 |
void ppc_opc_vsrah() |
270 |
{ |
271 |
VECTOR_DEBUG; |
272 |
int vrD, vrA, vrB; |
273 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
274 |
for (int i=0; i<8; i++) { |
275 |
gCPU.vr[vrD].sh[i] = gCPU.vr[vrA].sh[i] >> (gCPU.vr[vrB].h[i] & 0xf); |
276 |
} |
277 |
} |
278 |
|
279 |
/* vsraw Vector Shift Right Arithmetic Word |
280 |
* v.255 |
281 |
*/ |
282 |
void ppc_opc_vsraw() |
283 |
{ |
284 |
VECTOR_DEBUG; |
285 |
int vrD, vrA, vrB; |
286 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
287 |
for (int i=0; i<4; i++) { |
288 |
gCPU.vr[vrD].sw[i] = gCPU.vr[vrA].sw[i] >> (gCPU.vr[vrB].w[i] & 0x1f); |
289 |
} |
290 |
} |
291 |
|
292 |
/* vslb Vector Shift Left Byte |
293 |
* v.240 |
294 |
*/ |
295 |
void ppc_opc_vslb() |
296 |
{ |
297 |
VECTOR_DEBUG; |
298 |
int vrD, vrA, vrB; |
299 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
300 |
for (int i=0; i<16; i++) { |
301 |
gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] << (gCPU.vr[vrB].b[i] & 0x7); |
302 |
} |
303 |
} |
304 |
|
305 |
/* vslh Vector Shift Left Half Word |
306 |
* v.242 |
307 |
*/ |
308 |
void ppc_opc_vslh() |
309 |
{ |
310 |
VECTOR_DEBUG; |
311 |
int vrD, vrA, vrB; |
312 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
313 |
for (int i=0; i<8; i++) { |
314 |
gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] << (gCPU.vr[vrB].h[i] & 0xf); |
315 |
} |
316 |
} |
317 |
|
318 |
/* vslw Vector Shift Left Word |
319 |
* v.244 |
320 |
*/ |
321 |
void ppc_opc_vslw() |
322 |
{ |
323 |
VECTOR_DEBUG; |
324 |
int vrD, vrA, vrB; |
325 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
326 |
for (int i=0; i<4; i++) { |
327 |
gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] << (gCPU.vr[vrB].w[i] & 0x1f); |
328 |
} |
329 |
} |
330 |
|
331 |
/* vsr Vector Shift Right |
332 |
* v.251 |
333 |
*/ |
334 |
void ppc_opc_vsr() |
335 |
{ |
336 |
VECTOR_DEBUG; |
337 |
int vrD, vrA, vrB; |
338 |
Vector_t r; |
339 |
int shift; |
340 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
341 |
|
342 |
/* Specs say that the low-order 3 bits of all byte elements in vB |
343 |
* must be the same, or the result is undefined. So we can just |
344 |
* use the same low-order 3 bits for all of our shifts. |
345 |
*/ |
346 |
shift = gCPU.vr[vrB].w[0] & 0x7; |
347 |
|
348 |
r.d[0] = gCPU.vr[vrA].d[0] >> shift; |
349 |
r.d[1] = gCPU.vr[vrA].d[1] >> shift; |
350 |
|
351 |
VECT_D(r, 1) |= VECT_D(gCPU.vr[vrA], 0) << (64 - shift); |
352 |
|
353 |
gCPU.vr[vrD] = r; |
354 |
} |
355 |
|
356 |
/* vsro Vector Shift Right Octet |
357 |
* v.258 |
358 |
*/ |
359 |
void ppc_opc_vsro() |
360 |
{ |
361 |
VECTOR_DEBUG; |
362 |
int vrD, vrA, vrB; |
363 |
Vector_t r; |
364 |
int shift, i; |
365 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
366 |
|
367 |
shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf; |
368 |
#if HOST_ENDIANESS == HOST_ENDIANESS_LE |
369 |
for (i=0; i<(16-shift); i++) { |
370 |
r.b[i] = gCPU.vr[vrA].b[i+shift]; |
371 |
} |
372 |
|
373 |
for (; i<16; i++) { |
374 |
r.b[i] = 0; |
375 |
} |
376 |
#elif HOST_ENDIANESS == HOST_ENDIANESS_BE |
377 |
for (i=0; i<shift; i++) { |
378 |
r.b[i] = 0; |
379 |
} |
380 |
|
381 |
for (; i<16; i++) { |
382 |
r.b[i] = gCPU.vr[vrA].b[i-shift]; |
383 |
} |
384 |
#else |
385 |
#error Endianess not supported! |
386 |
#endif |
387 |
|
388 |
gCPU.vr[vrD] = r; |
389 |
} |
390 |
|
391 |
/* vsl Vector Shift Left |
392 |
* v.239 |
393 |
*/ |
394 |
void ppc_opc_vsl() |
395 |
{ |
396 |
VECTOR_DEBUG; |
397 |
int vrD, vrA, vrB; |
398 |
Vector_t r; |
399 |
int shift; |
400 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
401 |
|
402 |
/* Specs say that the low-order 3 bits of all byte elements in vB |
403 |
* must be the same, or the result is undefined. So we can just |
404 |
* use the same low-order 3 bits for all of our shifts. |
405 |
*/ |
406 |
shift = gCPU.vr[vrB].w[0] & 0x7; |
407 |
|
408 |
r.d[0] = gCPU.vr[vrA].d[0] << shift; |
409 |
r.d[1] = gCPU.vr[vrA].d[1] << shift; |
410 |
|
411 |
VECT_D(r, 0) |= VECT_D(gCPU.vr[vrA], 1) >> (64 - shift); |
412 |
|
413 |
gCPU.vr[vrD] = r; |
414 |
} |
415 |
|
416 |
/* vslo Vector Shift Left Octet |
417 |
* v.243 |
418 |
*/ |
419 |
void ppc_opc_vslo() |
420 |
{ |
421 |
VECTOR_DEBUG; |
422 |
int vrD, vrA, vrB; |
423 |
Vector_t r; |
424 |
int shift, i; |
425 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
426 |
|
427 |
shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf; |
428 |
#if HOST_ENDIANESS == HOST_ENDIANESS_LE |
429 |
for (i=0; i<shift; i++) { |
430 |
r.b[i] = 0; |
431 |
} |
432 |
|
433 |
for (; i<16; i++) { |
434 |
r.b[i] = gCPU.vr[vrA].b[i-shift]; |
435 |
} |
436 |
#elif HOST_ENDIANESS == HOST_ENDIANESS_BE |
437 |
for (i=0; i<(16-shift); i++) { |
438 |
r.b[i] = gCPU.vr[vrA].b[i+shift]; |
439 |
} |
440 |
|
441 |
for (; i<16; i++) { |
442 |
r.b[i] = 0; |
443 |
} |
444 |
#else |
445 |
#error Endianess not supported! |
446 |
#endif |
447 |
|
448 |
gCPU.vr[vrD] = r; |
449 |
} |
450 |
|
451 |
/* vsldoi Vector Shift Left Double by Octet Immediate |
452 |
* v.241 |
453 |
*/ |
454 |
void ppc_opc_vsldoi() |
455 |
{ |
456 |
VECTOR_DEBUG_COMMON; |
457 |
int vrD, vrA, vrB, shift, ashift; |
458 |
int i; |
459 |
Vector_t r; |
460 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, shift); |
461 |
|
462 |
shift &= 0xf; |
463 |
ashift = 16 - shift; |
464 |
|
465 |
#if HOST_ENDIANESS == HOST_ENDIANESS_LE |
466 |
for (i=0; i<shift; i++) { |
467 |
r.b[i] = gCPU.vr[vrB].b[i+ashift]; |
468 |
} |
469 |
|
470 |
for (; i<16; i++) { |
471 |
r.b[i] = gCPU.vr[vrA].b[i-shift]; |
472 |
} |
473 |
#elif HOST_ENDIANESS == HOST_ENDIANESS_BE |
474 |
for (i=0; i<ashift; i++) { |
475 |
r.b[i] = gCPU.vr[vrA].b[i+shift]; |
476 |
} |
477 |
|
478 |
for (; i<16; i++) { |
479 |
r.b[i] = gCPU.vr[vrB].b[i-ashift]; |
480 |
} |
481 |
#else |
482 |
#error Endianess not supported! |
483 |
#endif |
484 |
|
485 |
gCPU.vr[vrD] = r; |
486 |
} |
487 |
|
488 |
/* vrlb Vector Rotate Left Byte |
489 |
* v.234 |
490 |
*/ |
491 |
void ppc_opc_vrlb() |
492 |
{ |
493 |
VECTOR_DEBUG; |
494 |
int vrD, vrA, vrB, shift; |
495 |
Vector_t r; |
496 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
497 |
|
498 |
for (int i=0; i<16; i++) { |
499 |
shift = (gCPU.vr[vrB].b[i] & 0x7); |
500 |
|
501 |
r.b[i] = gCPU.vr[vrA].b[i] << shift; |
502 |
r.b[i] |= gCPU.vr[vrA].b[i] >> (8 - shift); |
503 |
} |
504 |
|
505 |
gCPU.vr[vrD] = r; |
506 |
} |
507 |
|
508 |
/* vrlh Vector Rotate Left Half Word |
509 |
* v.235 |
510 |
*/ |
511 |
void ppc_opc_vrlh() |
512 |
{ |
513 |
VECTOR_DEBUG; |
514 |
int vrD, vrA, vrB, shift; |
515 |
Vector_t r; |
516 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
517 |
|
518 |
for (int i=0; i<8; i++) { |
519 |
shift = (gCPU.vr[vrB].h[i] & 0xf); |
520 |
|
521 |
r.h[i] = gCPU.vr[vrA].h[i] << shift; |
522 |
r.h[i] |= gCPU.vr[vrA].h[i] >> (16 - shift); |
523 |
} |
524 |
|
525 |
gCPU.vr[vrD] = r; |
526 |
} |
527 |
|
528 |
/* vrlw Vector Rotate Left Word |
529 |
* v.236 |
530 |
*/ |
531 |
void ppc_opc_vrlw() |
532 |
{ |
533 |
VECTOR_DEBUG; |
534 |
int vrD, vrA, vrB, shift; |
535 |
Vector_t r; |
536 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
537 |
|
538 |
for (int i=0; i<4; i++) { |
539 |
shift = (gCPU.vr[vrB].w[i] & 0x1F); |
540 |
|
541 |
r.w[i] = gCPU.vr[vrA].w[i] << shift; |
542 |
r.w[i] |= gCPU.vr[vrA].w[i] >> (32 - shift); |
543 |
} |
544 |
|
545 |
gCPU.vr[vrD] = r; |
546 |
} |
547 |
|
548 |
/* With the merges, I just don't see any point in risking that a compiler |
549 |
* might generate actual alu code to calculate anything when it's |
550 |
* compile-time known. Plus, it's easier to validate it like this. |
551 |
*/ |
552 |
|
553 |
/* vmrghb Vector Merge High Byte |
554 |
* v.195 |
555 |
*/ |
556 |
void ppc_opc_vmrghb() |
557 |
{ |
558 |
VECTOR_DEBUG; |
559 |
int vrD, vrA, vrB; |
560 |
Vector_t r; |
561 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
562 |
|
563 |
VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 0); |
564 |
VECT_B(r, 1) = VECT_B(gCPU.vr[vrB], 0); |
565 |
VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 1); |
566 |
VECT_B(r, 3) = VECT_B(gCPU.vr[vrB], 1); |
567 |
VECT_B(r, 4) = VECT_B(gCPU.vr[vrA], 2); |
568 |
VECT_B(r, 5) = VECT_B(gCPU.vr[vrB], 2); |
569 |
VECT_B(r, 6) = VECT_B(gCPU.vr[vrA], 3); |
570 |
VECT_B(r, 7) = VECT_B(gCPU.vr[vrB], 3); |
571 |
VECT_B(r, 8) = VECT_B(gCPU.vr[vrA], 4); |
572 |
VECT_B(r, 9) = VECT_B(gCPU.vr[vrB], 4); |
573 |
VECT_B(r,10) = VECT_B(gCPU.vr[vrA], 5); |
574 |
VECT_B(r,11) = VECT_B(gCPU.vr[vrB], 5); |
575 |
VECT_B(r,12) = VECT_B(gCPU.vr[vrA], 6); |
576 |
VECT_B(r,13) = VECT_B(gCPU.vr[vrB], 6); |
577 |
VECT_B(r,14) = VECT_B(gCPU.vr[vrA], 7); |
578 |
VECT_B(r,15) = VECT_B(gCPU.vr[vrB], 7); |
579 |
|
580 |
gCPU.vr[vrD] = r; |
581 |
} |
582 |
|
583 |
/* vmrghh Vector Merge High Half Word |
584 |
* v.196 |
585 |
*/ |
586 |
void ppc_opc_vmrghh() |
587 |
{ |
588 |
VECTOR_DEBUG; |
589 |
int vrD, vrA, vrB; |
590 |
Vector_t r; |
591 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
592 |
|
593 |
VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 0); |
594 |
VECT_H(r, 1) = VECT_H(gCPU.vr[vrB], 0); |
595 |
VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 1); |
596 |
VECT_H(r, 3) = VECT_H(gCPU.vr[vrB], 1); |
597 |
VECT_H(r, 4) = VECT_H(gCPU.vr[vrA], 2); |
598 |
VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 2); |
599 |
VECT_H(r, 6) = VECT_H(gCPU.vr[vrA], 3); |
600 |
VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 3); |
601 |
|
602 |
gCPU.vr[vrD] = r; |
603 |
} |
604 |
|
605 |
/* vmrghw Vector Merge High Word |
606 |
* v.197 |
607 |
*/ |
608 |
void ppc_opc_vmrghw() |
609 |
{ |
610 |
VECTOR_DEBUG; |
611 |
int vrD, vrA, vrB; |
612 |
Vector_t r; |
613 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
614 |
|
615 |
VECT_W(r, 0) = VECT_W(gCPU.vr[vrA], 0); |
616 |
VECT_W(r, 1) = VECT_W(gCPU.vr[vrB], 0); |
617 |
VECT_W(r, 2) = VECT_W(gCPU.vr[vrA], 1); |
618 |
VECT_W(r, 3) = VECT_W(gCPU.vr[vrB], 1); |
619 |
|
620 |
gCPU.vr[vrD] = r; |
621 |
} |
622 |
|
623 |
/* vmrglb Vector Merge Low Byte |
624 |
* v.198 |
625 |
*/ |
626 |
void ppc_opc_vmrglb() |
627 |
{ |
628 |
VECTOR_DEBUG; |
629 |
int vrD, vrA, vrB; |
630 |
Vector_t r; |
631 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
632 |
|
633 |
VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 8); |
634 |
VECT_B(r, 1) = VECT_B(gCPU.vr[vrB], 8); |
635 |
VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 9); |
636 |
VECT_B(r, 3) = VECT_B(gCPU.vr[vrB], 9); |
637 |
VECT_B(r, 4) = VECT_B(gCPU.vr[vrA],10); |
638 |
VECT_B(r, 5) = VECT_B(gCPU.vr[vrB],10); |
639 |
VECT_B(r, 6) = VECT_B(gCPU.vr[vrA],11); |
640 |
VECT_B(r, 7) = VECT_B(gCPU.vr[vrB],11); |
641 |
VECT_B(r, 8) = VECT_B(gCPU.vr[vrA],12); |
642 |
VECT_B(r, 9) = VECT_B(gCPU.vr[vrB],12); |
643 |
VECT_B(r,10) = VECT_B(gCPU.vr[vrA],13); |
644 |
VECT_B(r,11) = VECT_B(gCPU.vr[vrB],13); |
645 |
VECT_B(r,12) = VECT_B(gCPU.vr[vrA],14); |
646 |
VECT_B(r,13) = VECT_B(gCPU.vr[vrB],14); |
647 |
VECT_B(r,14) = VECT_B(gCPU.vr[vrA],15); |
648 |
VECT_B(r,15) = VECT_B(gCPU.vr[vrB],15); |
649 |
|
650 |
gCPU.vr[vrD] = r; |
651 |
} |
652 |
|
653 |
/* vmrglh Vector Merge Low Half Word |
654 |
* v.199 |
655 |
*/ |
656 |
void ppc_opc_vmrglh() |
657 |
{ |
658 |
VECTOR_DEBUG; |
659 |
int vrD, vrA, vrB; |
660 |
Vector_t r; |
661 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
662 |
|
663 |
VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 4); |
664 |
VECT_H(r, 1) = VECT_H(gCPU.vr[vrB], 4); |
665 |
VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 5); |
666 |
VECT_H(r, 3) = VECT_H(gCPU.vr[vrB], 5); |
667 |
VECT_H(r, 4) = VECT_H(gCPU.vr[vrA], 6); |
668 |
VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 6); |
669 |
VECT_H(r, 6) = VECT_H(gCPU.vr[vrA], 7); |
670 |
VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 7); |
671 |
|
672 |
gCPU.vr[vrD] = r; |
673 |
} |
674 |
|
675 |
/* vmrglw Vector Merge Low Word |
676 |
* v.200 |
677 |
*/ |
678 |
void ppc_opc_vmrglw() |
679 |
{ |
680 |
VECTOR_DEBUG; |
681 |
int vrD, vrA, vrB; |
682 |
Vector_t r; |
683 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
684 |
|
685 |
VECT_W(r, 0) = VECT_W(gCPU.vr[vrA], 2); |
686 |
VECT_W(r, 1) = VECT_W(gCPU.vr[vrB], 2); |
687 |
VECT_W(r, 2) = VECT_W(gCPU.vr[vrA], 3); |
688 |
VECT_W(r, 3) = VECT_W(gCPU.vr[vrB], 3); |
689 |
|
690 |
gCPU.vr[vrD] = r; |
691 |
} |
692 |
|
693 |
/* vspltb Vector Splat Byte |
694 |
* v.245 |
695 |
*/ |
696 |
void ppc_opc_vspltb() |
697 |
{ |
698 |
VECTOR_DEBUG; |
699 |
int vrD, vrB; |
700 |
uint32 uimm; |
701 |
uint64 val; |
702 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB); |
703 |
|
704 |
/* The documentation doesn't stipulate what a value higher than 0xf |
705 |
* will do. Thus, this is by default an undefined value. We |
706 |
* are thus doing this the fastest way that won't crash us. |
707 |
*/ |
708 |
val = VECT_B(gCPU.vr[vrB], uimm & 0xf); |
709 |
val |= (val << 8); |
710 |
val |= (val << 16); |
711 |
val |= (val << 32); |
712 |
|
713 |
gCPU.vr[vrD].d[0] = val; |
714 |
gCPU.vr[vrD].d[1] = val; |
715 |
} |
716 |
|
717 |
/* vsplth Vector Splat Half Word |
718 |
* v.246 |
719 |
*/ |
720 |
void ppc_opc_vsplth() |
721 |
{ |
722 |
VECTOR_DEBUG; |
723 |
int vrD, vrB; |
724 |
uint32 uimm; |
725 |
uint64 val; |
726 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB); |
727 |
|
728 |
/* The documentation doesn't stipulate what a value higher than 0x7 |
729 |
* will do. Thus, this is by default an undefined value. We |
730 |
* are thus doing this the fastest way that won't crash us. |
731 |
*/ |
732 |
val = VECT_H(gCPU.vr[vrB], uimm & 0x7); |
733 |
val |= (val << 16); |
734 |
val |= (val << 32); |
735 |
|
736 |
gCPU.vr[vrD].d[0] = val; |
737 |
gCPU.vr[vrD].d[1] = val; |
738 |
} |
739 |
|
740 |
/* vspltw Vector Splat Word |
741 |
* v.250 |
742 |
*/ |
743 |
void ppc_opc_vspltw() |
744 |
{ |
745 |
VECTOR_DEBUG; |
746 |
int vrD, vrB; |
747 |
uint32 uimm; |
748 |
uint64 val; |
749 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB); |
750 |
|
751 |
/* The documentation doesn't stipulate what a value higher than 0x3 |
752 |
* will do. Thus, this is by default an undefined value. We |
753 |
* are thus doing this the fastest way that won't crash us. |
754 |
*/ |
755 |
val = VECT_W(gCPU.vr[vrB], uimm & 0x3); |
756 |
val |= (val << 32); |
757 |
|
758 |
gCPU.vr[vrD].d[0] = val; |
759 |
gCPU.vr[vrD].d[1] = val; |
760 |
} |
761 |
|
762 |
/* vspltisb Vector Splat Immediate Signed Byte |
763 |
* v.247 |
764 |
*/ |
765 |
void ppc_opc_vspltisb() |
766 |
{ |
767 |
VECTOR_DEBUG_COMMON; |
768 |
int vrD, vrB; |
769 |
uint32 simm; |
770 |
uint64 val; |
771 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB); |
772 |
PPC_OPC_ASSERT(vrB==0); |
773 |
|
774 |
val = (simm & 0x10) ? (simm | 0xE0) : simm; |
775 |
val |= (val << 8); |
776 |
val |= (val << 16); |
777 |
val |= (val << 32); |
778 |
|
779 |
gCPU.vr[vrD].d[0] = val; |
780 |
gCPU.vr[vrD].d[1] = val; |
781 |
} |
782 |
|
783 |
/* vspltish Vector Splat Immediate Signed Half Word |
784 |
* v.248 |
785 |
*/ |
786 |
void ppc_opc_vspltish() |
787 |
{ |
788 |
VECTOR_DEBUG_COMMON; |
789 |
int vrD, vrB; |
790 |
uint32 simm; |
791 |
uint64 val; |
792 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB); |
793 |
PPC_OPC_ASSERT(vrB==0); |
794 |
|
795 |
val = (simm & 0x10) ? (simm | 0xFFE0) : simm; |
796 |
val |= (val << 16); |
797 |
val |= (val << 32); |
798 |
|
799 |
gCPU.vr[vrD].d[0] = val; |
800 |
gCPU.vr[vrD].d[1] = val; |
801 |
} |
802 |
|
803 |
/* vspltisw Vector Splat Immediate Signed Word |
804 |
* v.249 |
805 |
*/ |
806 |
void ppc_opc_vspltisw() |
807 |
{ |
808 |
VECTOR_DEBUG_COMMON; |
809 |
int vrD, vrB; |
810 |
uint32 simm; |
811 |
uint64 val; |
812 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, simm, vrB); |
813 |
PPC_OPC_ASSERT(vrB==0); |
814 |
|
815 |
val = (simm & 0x10) ? (simm | 0xFFFFFFE0) : simm; |
816 |
val |= (val << 32); |
817 |
|
818 |
gCPU.vr[vrD].d[0] = val; |
819 |
gCPU.vr[vrD].d[1] = val; |
820 |
} |
821 |
|
822 |
/* mfvscr Move from Vector Status and Control Register |
823 |
* v.129 |
824 |
*/ |
825 |
void ppc_opc_mfvscr() |
826 |
{ |
827 |
VECTOR_DEBUG_COMMON; |
828 |
int vrD, vrA, vrB; |
829 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
830 |
PPC_OPC_ASSERT(vrA==0); |
831 |
PPC_OPC_ASSERT(vrB==0); |
832 |
|
833 |
VECT_W(gCPU.vr[vrD], 3) = gCPU.vscr; |
834 |
VECT_W(gCPU.vr[vrD], 2) = 0; |
835 |
VECT_D(gCPU.vr[vrD], 0) = 0; |
836 |
} |
837 |
|
838 |
/* mtvscr Move to Vector Status and Control Register |
839 |
* v.130 |
840 |
*/ |
841 |
void ppc_opc_mtvscr() |
842 |
{ |
843 |
VECTOR_DEBUG_COMMON; |
844 |
int vrD, vrA, vrB; |
845 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
846 |
PPC_OPC_ASSERT(vrA==0); |
847 |
PPC_OPC_ASSERT(vrD==0); |
848 |
|
849 |
gCPU.vscr = VECT_W(gCPU.vr[vrB], 3); |
850 |
} |
851 |
|
852 |
/* vpkuhum Vector Pack Unsigned Half Word Unsigned Modulo |
853 |
* v.224 |
854 |
*/ |
855 |
void ppc_opc_vpkuhum() |
856 |
{ |
857 |
VECTOR_DEBUG; |
858 |
int vrD, vrA, vrB; |
859 |
Vector_t r; |
860 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
861 |
|
862 |
VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 1); |
863 |
VECT_B(r, 1) = VECT_B(gCPU.vr[vrA], 3); |
864 |
VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 5); |
865 |
VECT_B(r, 3) = VECT_B(gCPU.vr[vrA], 7); |
866 |
VECT_B(r, 4) = VECT_B(gCPU.vr[vrA], 9); |
867 |
VECT_B(r, 5) = VECT_B(gCPU.vr[vrA],11); |
868 |
VECT_B(r, 6) = VECT_B(gCPU.vr[vrA],13); |
869 |
VECT_B(r, 7) = VECT_B(gCPU.vr[vrA],15); |
870 |
|
871 |
VECT_B(r, 8) = VECT_B(gCPU.vr[vrB], 1); |
872 |
VECT_B(r, 9) = VECT_B(gCPU.vr[vrB], 3); |
873 |
VECT_B(r,10) = VECT_B(gCPU.vr[vrB], 5); |
874 |
VECT_B(r,11) = VECT_B(gCPU.vr[vrB], 7); |
875 |
VECT_B(r,12) = VECT_B(gCPU.vr[vrB], 9); |
876 |
VECT_B(r,13) = VECT_B(gCPU.vr[vrB],11); |
877 |
VECT_B(r,14) = VECT_B(gCPU.vr[vrB],13); |
878 |
VECT_B(r,15) = VECT_B(gCPU.vr[vrB],15); |
879 |
|
880 |
gCPU.vr[vrD] = r; |
881 |
} |
882 |
|
883 |
/* vpkuwum Vector Pack Unsigned Word Unsigned Modulo |
884 |
* v.226 |
885 |
*/ |
886 |
void ppc_opc_vpkuwum() |
887 |
{ |
888 |
VECTOR_DEBUG; |
889 |
int vrD, vrA, vrB; |
890 |
Vector_t r; |
891 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
892 |
|
893 |
VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 1); |
894 |
VECT_H(r, 1) = VECT_H(gCPU.vr[vrA], 3); |
895 |
VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 5); |
896 |
VECT_H(r, 3) = VECT_H(gCPU.vr[vrA], 7); |
897 |
|
898 |
VECT_H(r, 4) = VECT_H(gCPU.vr[vrB], 1); |
899 |
VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 3); |
900 |
VECT_H(r, 6) = VECT_H(gCPU.vr[vrB], 5); |
901 |
VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 7); |
902 |
|
903 |
gCPU.vr[vrD] = r; |
904 |
} |
905 |
|
906 |
/* vpkpx Vector Pack Pixel32 |
907 |
* v.219 |
908 |
*/ |
909 |
void ppc_opc_vpkpx() |
910 |
{ |
911 |
VECTOR_DEBUG; |
912 |
int vrD, vrA, vrB; |
913 |
Vector_t r; |
914 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
915 |
|
916 |
VECT_H(r, 0) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 0)); |
917 |
VECT_H(r, 1) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 1)); |
918 |
VECT_H(r, 2) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 2)); |
919 |
VECT_H(r, 3) = PACK_PIXEL(VECT_W(gCPU.vr[vrA], 3)); |
920 |
|
921 |
VECT_H(r, 4) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 0)); |
922 |
VECT_H(r, 5) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 1)); |
923 |
VECT_H(r, 6) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 2)); |
924 |
VECT_H(r, 7) = PACK_PIXEL(VECT_W(gCPU.vr[vrB], 3)); |
925 |
|
926 |
gCPU.vr[vrD] = r; |
927 |
} |
928 |
|
929 |
|
930 |
/* vpkuhus Vector Pack Unsigned Half Word Unsigned Saturate |
931 |
* v.225 |
932 |
*/ |
933 |
void ppc_opc_vpkuhus() |
934 |
{ |
935 |
VECTOR_DEBUG; |
936 |
int vrD, vrA, vrB; |
937 |
Vector_t r; |
938 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
939 |
|
940 |
VECT_B(r, 0) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 0)); |
941 |
VECT_B(r, 1) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 1)); |
942 |
VECT_B(r, 2) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 2)); |
943 |
VECT_B(r, 3) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 3)); |
944 |
VECT_B(r, 4) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 4)); |
945 |
VECT_B(r, 5) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 5)); |
946 |
VECT_B(r, 6) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 6)); |
947 |
VECT_B(r, 7) = SATURATE_UB(VECT_H(gCPU.vr[vrA], 7)); |
948 |
|
949 |
VECT_B(r, 8) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 0)); |
950 |
VECT_B(r, 9) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 1)); |
951 |
VECT_B(r,10) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 2)); |
952 |
VECT_B(r,11) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 3)); |
953 |
VECT_B(r,12) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 4)); |
954 |
VECT_B(r,13) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 5)); |
955 |
VECT_B(r,14) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 6)); |
956 |
VECT_B(r,15) = SATURATE_UB(VECT_H(gCPU.vr[vrB], 7)); |
957 |
|
958 |
gCPU.vr[vrD] = r; |
959 |
} |
960 |
|
961 |
/* vpkshss Vector Pack Signed Half Word Signed Saturate |
962 |
* v.220 |
963 |
*/ |
964 |
void ppc_opc_vpkshss() |
965 |
{ |
966 |
VECTOR_DEBUG; |
967 |
int vrD, vrA, vrB; |
968 |
Vector_t r; |
969 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
970 |
|
971 |
VECT_B(r, 0) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 0)); |
972 |
VECT_B(r, 1) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 1)); |
973 |
VECT_B(r, 2) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 2)); |
974 |
VECT_B(r, 3) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 3)); |
975 |
VECT_B(r, 4) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 4)); |
976 |
VECT_B(r, 5) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 5)); |
977 |
VECT_B(r, 6) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 6)); |
978 |
VECT_B(r, 7) = SATURATE_SB(VECT_H(gCPU.vr[vrA], 7)); |
979 |
|
980 |
VECT_B(r, 8) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 0)); |
981 |
VECT_B(r, 9) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 1)); |
982 |
VECT_B(r,10) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 2)); |
983 |
VECT_B(r,11) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 3)); |
984 |
VECT_B(r,12) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 4)); |
985 |
VECT_B(r,13) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 5)); |
986 |
VECT_B(r,14) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 6)); |
987 |
VECT_B(r,15) = SATURATE_SB(VECT_H(gCPU.vr[vrB], 7)); |
988 |
|
989 |
gCPU.vr[vrD] = r; |
990 |
} |
991 |
|
992 |
/* vpkuwus Vector Pack Unsigned Word Unsigned Saturate |
993 |
* v.227 |
994 |
*/ |
995 |
void ppc_opc_vpkuwus() |
996 |
{ |
997 |
VECTOR_DEBUG; |
998 |
int vrD, vrA, vrB; |
999 |
Vector_t r; |
1000 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1001 |
|
1002 |
VECT_H(r, 0) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 0)); |
1003 |
VECT_H(r, 1) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 1)); |
1004 |
VECT_H(r, 2) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 2)); |
1005 |
VECT_H(r, 3) = SATURATE_UH(VECT_W(gCPU.vr[vrA], 3)); |
1006 |
|
1007 |
VECT_H(r, 4) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 0)); |
1008 |
VECT_H(r, 5) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 1)); |
1009 |
VECT_H(r, 6) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 2)); |
1010 |
VECT_H(r, 7) = SATURATE_UH(VECT_W(gCPU.vr[vrB], 3)); |
1011 |
|
1012 |
gCPU.vr[vrD] = r; |
1013 |
} |
1014 |
|
1015 |
/* vpkswss Vector Pack Signed Word Signed Saturate |
1016 |
* v.222 |
1017 |
*/ |
1018 |
void ppc_opc_vpkswss() |
1019 |
{ |
1020 |
VECTOR_DEBUG; |
1021 |
int vrD, vrA, vrB; |
1022 |
Vector_t r; |
1023 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1024 |
|
1025 |
VECT_H(r, 0) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 0)); |
1026 |
VECT_H(r, 1) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 1)); |
1027 |
VECT_H(r, 2) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 2)); |
1028 |
VECT_H(r, 3) = SATURATE_SH(VECT_W(gCPU.vr[vrA], 3)); |
1029 |
|
1030 |
VECT_H(r, 4) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 0)); |
1031 |
VECT_H(r, 5) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 1)); |
1032 |
VECT_H(r, 6) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 2)); |
1033 |
VECT_H(r, 7) = SATURATE_SH(VECT_W(gCPU.vr[vrB], 3)); |
1034 |
|
1035 |
gCPU.vr[vrD] = r; |
1036 |
} |
1037 |
|
1038 |
/* vpkshus Vector Pack Signed Half Word Unsigned Saturate |
1039 |
* v.221 |
1040 |
*/ |
1041 |
void ppc_opc_vpkshus() |
1042 |
{ |
1043 |
VECTOR_DEBUG; |
1044 |
int vrD, vrA, vrB; |
1045 |
Vector_t r; |
1046 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1047 |
|
1048 |
VECT_B(r, 0) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 0)); |
1049 |
VECT_B(r, 1) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 1)); |
1050 |
VECT_B(r, 2) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 2)); |
1051 |
VECT_B(r, 3) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 3)); |
1052 |
VECT_B(r, 4) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 4)); |
1053 |
VECT_B(r, 5) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 5)); |
1054 |
VECT_B(r, 6) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 6)); |
1055 |
VECT_B(r, 7) = SATURATE_USB(VECT_H(gCPU.vr[vrA], 7)); |
1056 |
|
1057 |
VECT_B(r, 8) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 0)); |
1058 |
VECT_B(r, 9) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 1)); |
1059 |
VECT_B(r,10) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 2)); |
1060 |
VECT_B(r,11) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 3)); |
1061 |
VECT_B(r,12) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 4)); |
1062 |
VECT_B(r,13) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 5)); |
1063 |
VECT_B(r,14) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 6)); |
1064 |
VECT_B(r,15) = SATURATE_USB(VECT_H(gCPU.vr[vrB], 7)); |
1065 |
|
1066 |
gCPU.vr[vrD] = r; |
1067 |
} |
1068 |
|
1069 |
/* vpkswus Vector Pack Signed Word Unsigned Saturate |
1070 |
* v.223 |
1071 |
*/ |
1072 |
void ppc_opc_vpkswus() |
1073 |
{ |
1074 |
VECTOR_DEBUG; |
1075 |
int vrD, vrA, vrB; |
1076 |
Vector_t r; |
1077 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1078 |
|
1079 |
VECT_H(r, 0) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 0)); |
1080 |
VECT_H(r, 1) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 1)); |
1081 |
VECT_H(r, 2) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 2)); |
1082 |
VECT_H(r, 3) = SATURATE_USH(VECT_W(gCPU.vr[vrA], 3)); |
1083 |
|
1084 |
VECT_H(r, 4) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 0)); |
1085 |
VECT_H(r, 5) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 1)); |
1086 |
VECT_H(r, 6) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 2)); |
1087 |
VECT_H(r, 7) = SATURATE_USH(VECT_W(gCPU.vr[vrB], 3)); |
1088 |
|
1089 |
gCPU.vr[vrD] = r; |
1090 |
} |
1091 |
|
1092 |
/* vupkhsb Vector Unpack High Signed Byte |
1093 |
* v.277 |
1094 |
*/ |
1095 |
void ppc_opc_vupkhsb() |
1096 |
{ |
1097 |
VECTOR_DEBUG; |
1098 |
int vrD, vrA, vrB; |
1099 |
Vector_t r; |
1100 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1101 |
PPC_OPC_ASSERT(vrA==0); |
1102 |
|
1103 |
VECT_SH(r, 0) = VECT_SB(gCPU.vr[vrB], 0); |
1104 |
VECT_SH(r, 1) = VECT_SB(gCPU.vr[vrB], 1); |
1105 |
VECT_SH(r, 2) = VECT_SB(gCPU.vr[vrB], 2); |
1106 |
VECT_SH(r, 3) = VECT_SB(gCPU.vr[vrB], 3); |
1107 |
VECT_SH(r, 4) = VECT_SB(gCPU.vr[vrB], 4); |
1108 |
VECT_SH(r, 5) = VECT_SB(gCPU.vr[vrB], 5); |
1109 |
VECT_SH(r, 6) = VECT_SB(gCPU.vr[vrB], 6); |
1110 |
VECT_SH(r, 7) = VECT_SB(gCPU.vr[vrB], 7); |
1111 |
|
1112 |
gCPU.vr[vrD] = r; |
1113 |
} |
1114 |
|
1115 |
/* vupkhpx Vector Unpack High Pixel32 |
1116 |
* v.279 |
1117 |
*/ |
1118 |
void ppc_opc_vupkhpx() |
1119 |
{ |
1120 |
VECTOR_DEBUG; |
1121 |
int vrD, vrA, vrB; |
1122 |
Vector_t r; |
1123 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1124 |
PPC_OPC_ASSERT(vrA==0); |
1125 |
|
1126 |
VECT_W(r, 0) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 0)); |
1127 |
VECT_W(r, 1) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 1)); |
1128 |
VECT_W(r, 2) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 2)); |
1129 |
VECT_W(r, 3) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 3)); |
1130 |
|
1131 |
gCPU.vr[vrD] = r; |
1132 |
} |
1133 |
|
1134 |
/* vupkhsh Vector Unpack High Signed Half Word |
1135 |
* v.278 |
1136 |
*/ |
1137 |
void ppc_opc_vupkhsh() |
1138 |
{ |
1139 |
VECTOR_DEBUG; |
1140 |
int vrD, vrA, vrB; |
1141 |
Vector_t r; |
1142 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1143 |
PPC_OPC_ASSERT(vrA==0); |
1144 |
|
1145 |
VECT_SW(r, 0) = VECT_SH(gCPU.vr[vrB], 0); |
1146 |
VECT_SW(r, 1) = VECT_SH(gCPU.vr[vrB], 1); |
1147 |
VECT_SW(r, 2) = VECT_SH(gCPU.vr[vrB], 2); |
1148 |
VECT_SW(r, 3) = VECT_SH(gCPU.vr[vrB], 3); |
1149 |
|
1150 |
gCPU.vr[vrD] = r; |
1151 |
} |
1152 |
|
1153 |
/* vupklsb Vector Unpack Low Signed Byte |
1154 |
* v.280 |
1155 |
*/ |
1156 |
void ppc_opc_vupklsb() |
1157 |
{ |
1158 |
VECTOR_DEBUG; |
1159 |
int vrD, vrA, vrB; |
1160 |
Vector_t r; |
1161 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1162 |
PPC_OPC_ASSERT(vrA==0); |
1163 |
|
1164 |
VECT_SH(r, 0) = VECT_SB(gCPU.vr[vrB], 8); |
1165 |
VECT_SH(r, 1) = VECT_SB(gCPU.vr[vrB], 9); |
1166 |
VECT_SH(r, 2) = VECT_SB(gCPU.vr[vrB],10); |
1167 |
VECT_SH(r, 3) = VECT_SB(gCPU.vr[vrB],11); |
1168 |
VECT_SH(r, 4) = VECT_SB(gCPU.vr[vrB],12); |
1169 |
VECT_SH(r, 5) = VECT_SB(gCPU.vr[vrB],13); |
1170 |
VECT_SH(r, 6) = VECT_SB(gCPU.vr[vrB],14); |
1171 |
VECT_SH(r, 7) = VECT_SB(gCPU.vr[vrB],15); |
1172 |
|
1173 |
gCPU.vr[vrD] = r; |
1174 |
} |
1175 |
|
1176 |
/* vupklpx Vector Unpack Low Pixel32 |
1177 |
* v.279 |
1178 |
*/ |
1179 |
void ppc_opc_vupklpx() |
1180 |
{ |
1181 |
VECTOR_DEBUG; |
1182 |
int vrD, vrA, vrB; |
1183 |
Vector_t r; |
1184 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1185 |
PPC_OPC_ASSERT(vrA==0); |
1186 |
|
1187 |
VECT_W(r, 0) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 4)); |
1188 |
VECT_W(r, 1) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 5)); |
1189 |
VECT_W(r, 2) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 6)); |
1190 |
VECT_W(r, 3) = UNPACK_PIXEL(VECT_H(gCPU.vr[vrB], 7)); |
1191 |
|
1192 |
gCPU.vr[vrD] = r; |
1193 |
} |
1194 |
|
1195 |
/* vupklsh Vector Unpack Low Signed Half Word |
1196 |
* v.281 |
1197 |
*/ |
1198 |
void ppc_opc_vupklsh() |
1199 |
{ |
1200 |
VECTOR_DEBUG; |
1201 |
int vrD, vrA, vrB; |
1202 |
Vector_t r; |
1203 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1204 |
PPC_OPC_ASSERT(vrA==0); |
1205 |
|
1206 |
VECT_SW(r, 0) = VECT_SH(gCPU.vr[vrB], 4); |
1207 |
VECT_SW(r, 1) = VECT_SH(gCPU.vr[vrB], 5); |
1208 |
VECT_SW(r, 2) = VECT_SH(gCPU.vr[vrB], 6); |
1209 |
VECT_SW(r, 3) = VECT_SH(gCPU.vr[vrB], 7); |
1210 |
|
1211 |
gCPU.vr[vrD] = r; |
1212 |
} |
1213 |
|
1214 |
/* vaddubm Vector Add Unsigned Byte Modulo |
1215 |
* v.141 |
1216 |
*/ |
1217 |
void ppc_opc_vaddubm() |
1218 |
{ |
1219 |
VECTOR_DEBUG; |
1220 |
int vrD, vrA, vrB; |
1221 |
uint8 res; |
1222 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1223 |
|
1224 |
for (int i=0; i<16; i++) { |
1225 |
res = gCPU.vr[vrA].b[i] + gCPU.vr[vrB].b[i]; |
1226 |
gCPU.vr[vrD].b[i] = res; |
1227 |
} |
1228 |
} |
1229 |
|
1230 |
/* vadduhm Vector Add Unsigned Half Word Modulo |
1231 |
* v.143 |
1232 |
*/ |
1233 |
void ppc_opc_vadduhm() |
1234 |
{ |
1235 |
VECTOR_DEBUG; |
1236 |
int vrD, vrA, vrB; |
1237 |
uint16 res; |
1238 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1239 |
|
1240 |
for (int i=0; i<8; i++) { |
1241 |
res = gCPU.vr[vrA].h[i] + gCPU.vr[vrB].h[i]; |
1242 |
gCPU.vr[vrD].h[i] = res; |
1243 |
} |
1244 |
} |
1245 |
|
1246 |
/* vadduwm Vector Add Unsigned Word Modulo |
1247 |
* v.145 |
1248 |
*/ |
1249 |
void ppc_opc_vadduwm() |
1250 |
{ |
1251 |
VECTOR_DEBUG; |
1252 |
int vrD, vrA, vrB; |
1253 |
uint32 res; |
1254 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1255 |
|
1256 |
for (int i=0; i<4; i++) { |
1257 |
res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i]; |
1258 |
gCPU.vr[vrD].w[i] = res; |
1259 |
} |
1260 |
} |
1261 |
|
1262 |
/* vaddfp Vector Add Float Point |
1263 |
* v.137 |
1264 |
*/ |
1265 |
void ppc_opc_vaddfp() |
1266 |
{ |
1267 |
VECTOR_DEBUG; |
1268 |
int vrD, vrA, vrB; |
1269 |
float res; |
1270 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1271 |
|
1272 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
1273 |
res = gCPU.vr[vrA].f[i] + gCPU.vr[vrB].f[i]; |
1274 |
gCPU.vr[vrD].f[i] = res; |
1275 |
} |
1276 |
} |
1277 |
|
1278 |
/* vaddcuw Vector Add Carryout Unsigned Word |
1279 |
* v.136 |
1280 |
*/ |
1281 |
void ppc_opc_vaddcuw() |
1282 |
{ |
1283 |
VECTOR_DEBUG; |
1284 |
int vrD, vrA, vrB; |
1285 |
uint32 res; |
1286 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1287 |
|
1288 |
for (int i=0; i<4; i++) { |
1289 |
res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i]; |
1290 |
gCPU.vr[vrD].w[i] = (res < gCPU.vr[vrA].w[i]) ? 1 : 0; |
1291 |
} |
1292 |
} |
1293 |
|
1294 |
/* vaddubs Vector Add Unsigned Byte Saturate |
1295 |
* v.142 |
1296 |
*/ |
1297 |
void ppc_opc_vaddubs() |
1298 |
{ |
1299 |
VECTOR_DEBUG; |
1300 |
int vrD, vrA, vrB; |
1301 |
uint16 res; |
1302 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1303 |
|
1304 |
for (int i=0; i<16; i++) { |
1305 |
res = (uint16)gCPU.vr[vrA].b[i] + (uint16)gCPU.vr[vrB].b[i]; |
1306 |
gCPU.vr[vrD].b[i] = SATURATE_UB(res); |
1307 |
} |
1308 |
} |
1309 |
|
1310 |
/* vaddsbs Vector Add Signed Byte Saturate |
1311 |
* v.138 |
1312 |
*/ |
1313 |
void ppc_opc_vaddsbs() |
1314 |
{ |
1315 |
VECTOR_DEBUG; |
1316 |
int vrD, vrA, vrB; |
1317 |
sint16 res; |
1318 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1319 |
|
1320 |
for (int i=0; i<16; i++) { |
1321 |
res = (sint16)gCPU.vr[vrA].sb[i] + (sint16)gCPU.vr[vrB].sb[i]; |
1322 |
gCPU.vr[vrD].b[i] = SATURATE_SB(res); |
1323 |
} |
1324 |
} |
1325 |
|
1326 |
/* vadduhs Vector Add Unsigned Half Word Saturate |
1327 |
* v.144 |
1328 |
*/ |
1329 |
void ppc_opc_vadduhs() |
1330 |
{ |
1331 |
VECTOR_DEBUG; |
1332 |
int vrD, vrA, vrB; |
1333 |
uint32 res; |
1334 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1335 |
|
1336 |
for (int i=0; i<8; i++) { |
1337 |
res = (uint32)gCPU.vr[vrA].h[i] + (uint32)gCPU.vr[vrB].h[i]; |
1338 |
gCPU.vr[vrD].h[i] = SATURATE_UH(res); |
1339 |
} |
1340 |
} |
1341 |
|
1342 |
/* vaddshs Vector Add Signed Half Word Saturate |
1343 |
* v.139 |
1344 |
*/ |
1345 |
void ppc_opc_vaddshs() |
1346 |
{ |
1347 |
VECTOR_DEBUG; |
1348 |
int vrD, vrA, vrB; |
1349 |
sint32 res; |
1350 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1351 |
|
1352 |
for (int i=0; i<8; i++) { |
1353 |
res = (sint32)gCPU.vr[vrA].sh[i] + (sint32)gCPU.vr[vrB].sh[i]; |
1354 |
gCPU.vr[vrD].h[i] = SATURATE_SH(res); |
1355 |
} |
1356 |
} |
1357 |
|
1358 |
/* vadduws Vector Add Unsigned Word Saturate |
1359 |
* v.146 |
1360 |
*/ |
1361 |
void ppc_opc_vadduws() |
1362 |
{ |
1363 |
VECTOR_DEBUG; |
1364 |
int vrD, vrA, vrB; |
1365 |
uint32 res; |
1366 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1367 |
|
1368 |
for (int i=0; i<4; i++) { |
1369 |
res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i]; |
1370 |
|
1371 |
// We do this to prevent us from having to do 64-bit math |
1372 |
if (res < gCPU.vr[vrA].w[i]) { |
1373 |
res = 0xFFFFFFFF; |
1374 |
gCPU.vscr |= VSCR_SAT; |
1375 |
} |
1376 |
|
1377 |
/* 64-bit math | 32-bit hack |
1378 |
* ------------------------+------------------------------------- |
1379 |
* add, addc (a+b) | add (a+b) |
1380 |
* sub, subb (r>ub) | sub (r<a) |
1381 |
*/ |
1382 |
|
1383 |
gCPU.vr[vrD].w[i] = res; |
1384 |
} |
1385 |
} |
1386 |
|
1387 |
/* vaddsws Vector Add Signed Word Saturate |
1388 |
* v.140 |
1389 |
*/ |
1390 |
void ppc_opc_vaddsws() |
1391 |
{ |
1392 |
VECTOR_DEBUG; |
1393 |
int vrD, vrA, vrB; |
1394 |
uint32 res; |
1395 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1396 |
|
1397 |
for (int i=0; i<4; i++) { |
1398 |
res = gCPU.vr[vrA].w[i] + gCPU.vr[vrB].w[i]; |
1399 |
|
1400 |
// We do this to prevent us from having to do 64-bit math |
1401 |
if (((gCPU.vr[vrA].w[i] ^ gCPU.vr[vrB].w[i]) & SIGN32) == 0) { |
1402 |
// the signs of both operands are the same |
1403 |
|
1404 |
if (((res ^ gCPU.vr[vrA].w[i]) & SIGN32) != 0) { |
1405 |
// sign of result != sign of operands |
1406 |
|
1407 |
// if res is negative, should have been positive |
1408 |
res = (res & SIGN32) ? (SIGN32 - 1) : SIGN32; |
1409 |
gCPU.vscr |= VSCR_SAT; |
1410 |
} |
1411 |
} |
1412 |
|
1413 |
/* 64-bit math | 32-bit hack |
1414 |
* ------------------------+------------------------------------- |
1415 |
* add, addc (a+b) | add (a+b) |
1416 |
* sub, subb (r>ub) | xor, and (sign == sign) |
1417 |
* sub, subb (r<lb) | xor, and (sign != sign) |
1418 |
* | and (which) |
1419 |
*/ |
1420 |
|
1421 |
gCPU.vr[vrD].w[i] = res; |
1422 |
} |
1423 |
} |
1424 |
|
1425 |
/* vsububm Vector Subtract Unsigned Byte Modulo |
1426 |
* v.265 |
1427 |
*/ |
1428 |
void ppc_opc_vsububm() |
1429 |
{ |
1430 |
VECTOR_DEBUG; |
1431 |
int vrD, vrA, vrB; |
1432 |
uint8 res; |
1433 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1434 |
|
1435 |
for (int i=0; i<16; i++) { |
1436 |
res = gCPU.vr[vrA].b[i] - gCPU.vr[vrB].b[i]; |
1437 |
gCPU.vr[vrD].b[i] = res; |
1438 |
} |
1439 |
} |
1440 |
|
1441 |
/* vsubuhm Vector Subtract Unsigned Half Word Modulo |
1442 |
* v.267 |
1443 |
*/ |
1444 |
void ppc_opc_vsubuhm() |
1445 |
{ |
1446 |
VECTOR_DEBUG; |
1447 |
int vrD, vrA, vrB; |
1448 |
uint16 res; |
1449 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1450 |
|
1451 |
for (int i=0; i<8; i++) { |
1452 |
res = gCPU.vr[vrA].h[i] - gCPU.vr[vrB].h[i]; |
1453 |
gCPU.vr[vrD].h[i] = res; |
1454 |
} |
1455 |
} |
1456 |
|
1457 |
/* vsubuwm Vector Subtract Unsigned Word Modulo |
1458 |
* v.269 |
1459 |
*/ |
1460 |
void ppc_opc_vsubuwm() |
1461 |
{ |
1462 |
VECTOR_DEBUG; |
1463 |
int vrD, vrA, vrB; |
1464 |
uint32 res; |
1465 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1466 |
|
1467 |
for (int i=0; i<4; i++) { |
1468 |
res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i]; |
1469 |
gCPU.vr[vrD].w[i] = res; |
1470 |
} |
1471 |
} |
1472 |
|
1473 |
/* vsubfp Vector Subtract Float Point |
1474 |
* v.261 |
1475 |
*/ |
1476 |
void ppc_opc_vsubfp() |
1477 |
{ |
1478 |
VECTOR_DEBUG; |
1479 |
int vrD, vrA, vrB; |
1480 |
float res; |
1481 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1482 |
|
1483 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
1484 |
res = gCPU.vr[vrA].f[i] - gCPU.vr[vrB].f[i]; |
1485 |
gCPU.vr[vrD].f[i] = res; |
1486 |
} |
1487 |
} |
1488 |
|
1489 |
/* vsubcuw Vector Subtract Carryout Unsigned Word |
1490 |
* v.260 |
1491 |
*/ |
1492 |
void ppc_opc_vsubcuw() |
1493 |
{ |
1494 |
VECTOR_DEBUG; |
1495 |
int vrD, vrA, vrB; |
1496 |
uint32 res; |
1497 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1498 |
|
1499 |
for (int i=0; i<4; i++) { |
1500 |
res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i]; |
1501 |
gCPU.vr[vrD].w[i] = (res <= gCPU.vr[vrA].w[i]) ? 1 : 0; |
1502 |
} |
1503 |
} |
1504 |
|
1505 |
/* vsububs Vector Subtract Unsigned Byte Saturate |
1506 |
* v.266 |
1507 |
*/ |
1508 |
void ppc_opc_vsububs() |
1509 |
{ |
1510 |
VECTOR_DEBUG; |
1511 |
int vrD, vrA, vrB; |
1512 |
uint16 res; |
1513 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1514 |
|
1515 |
for (int i=0; i<16; i++) { |
1516 |
res = (uint16)gCPU.vr[vrA].b[i] - (uint16)gCPU.vr[vrB].b[i]; |
1517 |
|
1518 |
gCPU.vr[vrD].b[i] = SATURATE_0B(res); |
1519 |
} |
1520 |
} |
1521 |
|
1522 |
/* vsubsbs Vector Subtract Signed Byte Saturate |
1523 |
* v.262 |
1524 |
*/ |
1525 |
void ppc_opc_vsubsbs() |
1526 |
{ |
1527 |
VECTOR_DEBUG; |
1528 |
int vrD, vrA, vrB; |
1529 |
sint16 res; |
1530 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1531 |
|
1532 |
for (int i=0; i<16; i++) { |
1533 |
res = (sint16)gCPU.vr[vrA].sb[i] - (sint16)gCPU.vr[vrB].sb[i]; |
1534 |
|
1535 |
gCPU.vr[vrD].sb[i] = SATURATE_SB(res); |
1536 |
} |
1537 |
} |
1538 |
|
1539 |
/* vsubuhs Vector Subtract Unsigned Half Word Saturate |
1540 |
* v.268 |
1541 |
*/ |
1542 |
void ppc_opc_vsubuhs() |
1543 |
{ |
1544 |
VECTOR_DEBUG; |
1545 |
int vrD, vrA, vrB; |
1546 |
uint32 res; |
1547 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1548 |
|
1549 |
for (int i=0; i<8; i++) { |
1550 |
res = (uint32)gCPU.vr[vrA].h[i] - (uint32)gCPU.vr[vrB].h[i]; |
1551 |
|
1552 |
gCPU.vr[vrD].h[i] = SATURATE_0H(res); |
1553 |
} |
1554 |
} |
1555 |
|
1556 |
/* vsubshs Vector Subtract Signed Half Word Saturate |
1557 |
* v.263 |
1558 |
*/ |
1559 |
void ppc_opc_vsubshs() |
1560 |
{ |
1561 |
VECTOR_DEBUG; |
1562 |
int vrD, vrA, vrB; |
1563 |
sint32 res; |
1564 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1565 |
|
1566 |
for (int i=0; i<8; i++) { |
1567 |
res = (sint32)gCPU.vr[vrA].sh[i] - (sint32)gCPU.vr[vrB].sh[i]; |
1568 |
|
1569 |
gCPU.vr[vrD].sh[i] = SATURATE_SH(res); |
1570 |
} |
1571 |
} |
1572 |
|
1573 |
/* vsubuws Vector Subtract Unsigned Word Saturate |
1574 |
* v.270 |
1575 |
*/ |
1576 |
void ppc_opc_vsubuws() |
1577 |
{ |
1578 |
VECTOR_DEBUG; |
1579 |
int vrD, vrA, vrB; |
1580 |
uint32 res; |
1581 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1582 |
|
1583 |
for (int i=0; i<4; i++) { |
1584 |
res = gCPU.vr[vrA].w[i] - gCPU.vr[vrB].w[i]; |
1585 |
|
1586 |
// We do this to prevent us from having to do 64-bit math |
1587 |
if (res > gCPU.vr[vrA].w[i]) { |
1588 |
res = 0; |
1589 |
gCPU.vscr |= VSCR_SAT; |
1590 |
} |
1591 |
|
1592 |
/* 64-bit math | 32-bit hack |
1593 |
* ------------------------+------------------------------------- |
1594 |
* sub, subb (a+b) | sub (a+b) |
1595 |
* sub, subb (r>ub) | sub (r<a) |
1596 |
*/ |
1597 |
|
1598 |
gCPU.vr[vrD].w[i] = res; |
1599 |
} |
1600 |
} |
1601 |
|
1602 |
/* vsubsws Vector Subtract Signed Word Saturate |
1603 |
* v.264 |
1604 |
*/ |
1605 |
void ppc_opc_vsubsws() |
1606 |
{ |
1607 |
VECTOR_DEBUG; |
1608 |
int vrD, vrA, vrB; |
1609 |
uint32 res, tmp; |
1610 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1611 |
|
1612 |
for (int i=0; i<4; i++) { |
1613 |
tmp = -gCPU.vr[vrB].w[i]; |
1614 |
res = gCPU.vr[vrA].w[i] + tmp; |
1615 |
|
1616 |
// We do this to prevent us from having to do 64-bit math |
1617 |
if (((gCPU.vr[vrA].w[i] ^ tmp) & SIGN32) == 0) { |
1618 |
// the signs of both operands are the same |
1619 |
|
1620 |
if (((res ^ tmp) & SIGN32) != 0) { |
1621 |
// sign of result != sign of operands |
1622 |
|
1623 |
// if res is negative, should have been positive |
1624 |
res = (res & SIGN32) ? (SIGN32 - 1) : SIGN32; |
1625 |
gCPU.vscr |= VSCR_SAT; |
1626 |
} |
1627 |
} |
1628 |
|
1629 |
/* 64-bit math | 32-bit hack |
1630 |
* ------------------------+------------------------------------- |
1631 |
* sub, subc (a+b) | neg, add (a-b) |
1632 |
* sub, subb (r>ub) | xor, and (sign == sign) |
1633 |
* sub, subb (r<lb) | xor, and (sign != sign) |
1634 |
* | and (which) |
1635 |
*/ |
1636 |
|
1637 |
gCPU.vr[vrD].w[i] = res; |
1638 |
} |
1639 |
} |
1640 |
|
1641 |
/* vmuleub Vector Multiply Even Unsigned Byte |
1642 |
* v.209 |
1643 |
*/ |
1644 |
void ppc_opc_vmuleub() |
1645 |
{ |
1646 |
VECTOR_DEBUG; |
1647 |
int vrD, vrA, vrB; |
1648 |
uint16 res; |
1649 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1650 |
|
1651 |
for (int i=0; i<8; i++) { |
1652 |
res = (uint16)gCPU.vr[vrA].b[VECT_EVEN(i)] * |
1653 |
(uint16)gCPU.vr[vrB].b[VECT_EVEN(i)]; |
1654 |
|
1655 |
gCPU.vr[vrD].h[i] = res; |
1656 |
} |
1657 |
} |
1658 |
|
1659 |
/* vmulesb Vector Multiply Even Signed Byte |
1660 |
* v.207 |
1661 |
*/ |
1662 |
void ppc_opc_vmulesb() |
1663 |
{ |
1664 |
VECTOR_DEBUG; |
1665 |
int vrD, vrA, vrB; |
1666 |
sint16 res; |
1667 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1668 |
|
1669 |
for (int i=0; i<8; i++) { |
1670 |
res = (sint16)gCPU.vr[vrA].sb[VECT_EVEN(i)] * |
1671 |
(sint16)gCPU.vr[vrB].sb[VECT_EVEN(i)]; |
1672 |
|
1673 |
gCPU.vr[vrD].sh[i] = res; |
1674 |
} |
1675 |
} |
1676 |
|
1677 |
/* vmuleuh Vector Multiply Even Unsigned Half Word |
1678 |
* v.210 |
1679 |
*/ |
1680 |
void ppc_opc_vmuleuh() |
1681 |
{ |
1682 |
VECTOR_DEBUG; |
1683 |
int vrD, vrA, vrB; |
1684 |
uint32 res; |
1685 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1686 |
|
1687 |
for (int i=0; i<4; i++) { |
1688 |
res = (uint32)gCPU.vr[vrA].h[VECT_EVEN(i)] * |
1689 |
(uint32)gCPU.vr[vrB].h[VECT_EVEN(i)]; |
1690 |
|
1691 |
gCPU.vr[vrD].w[i] = res; |
1692 |
} |
1693 |
} |
1694 |
|
1695 |
/* vmulesh Vector Multiply Even Signed Half Word |
1696 |
* v.208 |
1697 |
*/ |
1698 |
void ppc_opc_vmulesh() |
1699 |
{ |
1700 |
VECTOR_DEBUG; |
1701 |
int vrD, vrA, vrB; |
1702 |
sint32 res; |
1703 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1704 |
|
1705 |
for (int i=0; i<4; i++) { |
1706 |
res = (sint32)gCPU.vr[vrA].sh[VECT_EVEN(i)] * |
1707 |
(sint32)gCPU.vr[vrB].sh[VECT_EVEN(i)]; |
1708 |
|
1709 |
gCPU.vr[vrD].sw[i] = res; |
1710 |
} |
1711 |
} |
1712 |
|
1713 |
/* vmuloub Vector Multiply Odd Unsigned Byte |
1714 |
* v.213 |
1715 |
*/ |
1716 |
void ppc_opc_vmuloub() |
1717 |
{ |
1718 |
VECTOR_DEBUG; |
1719 |
int vrD, vrA, vrB; |
1720 |
uint16 res; |
1721 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1722 |
|
1723 |
for (int i=0; i<8; i++) { |
1724 |
res = (uint16)gCPU.vr[vrA].b[VECT_ODD(i)] * |
1725 |
(uint16)gCPU.vr[vrB].b[VECT_ODD(i)]; |
1726 |
|
1727 |
gCPU.vr[vrD].h[i] = res; |
1728 |
} |
1729 |
} |
1730 |
|
1731 |
/* vmulosb Vector Multiply Odd Signed Byte |
1732 |
* v.211 |
1733 |
*/ |
1734 |
void ppc_opc_vmulosb() |
1735 |
{ |
1736 |
VECTOR_DEBUG; |
1737 |
int vrD, vrA, vrB; |
1738 |
sint16 res; |
1739 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1740 |
|
1741 |
for (int i=0; i<8; i++) { |
1742 |
res = (sint16)gCPU.vr[vrA].sb[VECT_ODD(i)] * |
1743 |
(sint16)gCPU.vr[vrB].sb[VECT_ODD(i)]; |
1744 |
|
1745 |
gCPU.vr[vrD].sh[i] = res; |
1746 |
} |
1747 |
} |
1748 |
|
1749 |
/* vmulouh Vector Multiply Odd Unsigned Half Word |
1750 |
* v.214 |
1751 |
*/ |
1752 |
void ppc_opc_vmulouh() |
1753 |
{ |
1754 |
VECTOR_DEBUG; |
1755 |
int vrD, vrA, vrB; |
1756 |
uint32 res; |
1757 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1758 |
|
1759 |
for (int i=0; i<4; i++) { |
1760 |
res = (uint32)gCPU.vr[vrA].h[VECT_ODD(i)] * |
1761 |
(uint32)gCPU.vr[vrB].h[VECT_ODD(i)]; |
1762 |
|
1763 |
gCPU.vr[vrD].w[i] = res; |
1764 |
} |
1765 |
} |
1766 |
|
1767 |
/* vmulosh Vector Multiply Odd Signed Half Word |
1768 |
* v.212 |
1769 |
*/ |
1770 |
void ppc_opc_vmulosh() |
1771 |
{ |
1772 |
VECTOR_DEBUG; |
1773 |
int vrD, vrA, vrB; |
1774 |
sint32 res; |
1775 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
1776 |
|
1777 |
for (int i=0; i<4; i++) { |
1778 |
res = (sint32)gCPU.vr[vrA].sh[VECT_ODD(i)] * |
1779 |
(sint32)gCPU.vr[vrB].sh[VECT_ODD(i)]; |
1780 |
|
1781 |
gCPU.vr[vrD].sw[i] = res; |
1782 |
} |
1783 |
} |
1784 |
|
1785 |
/* vmaddfp Vector Multiply Add Floating Point |
1786 |
* v.177 |
1787 |
*/ |
1788 |
void ppc_opc_vmaddfp() |
1789 |
{ |
1790 |
VECTOR_DEBUG; |
1791 |
int vrD, vrA, vrB, vrC; |
1792 |
double res; |
1793 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1794 |
|
1795 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
1796 |
res = (double)gCPU.vr[vrA].f[i] * (double)gCPU.vr[vrC].f[i]; |
1797 |
|
1798 |
res = (double)gCPU.vr[vrB].f[i] + res; |
1799 |
|
1800 |
gCPU.vr[vrD].f[i] = (float)res; |
1801 |
} |
1802 |
} |
1803 |
|
1804 |
/* vmhaddshs Vector Multiply High and Add Signed Half Word Saturate |
1805 |
* v.185 |
1806 |
*/ |
1807 |
void ppc_opc_vmhaddshs() |
1808 |
{ |
1809 |
VECTOR_DEBUG; |
1810 |
int vrD, vrA, vrB, vrC; |
1811 |
sint32 prod; |
1812 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1813 |
|
1814 |
for (int i=0; i<8; i++) { |
1815 |
prod = (sint32)gCPU.vr[vrA].sh[i] * (sint32)gCPU.vr[vrB].sh[i]; |
1816 |
|
1817 |
prod = (prod >> 15) + (sint32)gCPU.vr[vrC].sh[i]; |
1818 |
|
1819 |
gCPU.vr[vrD].sh[i] = SATURATE_SH(prod); |
1820 |
} |
1821 |
} |
1822 |
|
1823 |
/* vmladduhm Vector Multiply Low and Add Unsigned Half Word Modulo |
1824 |
* v.194 |
1825 |
*/ |
1826 |
void ppc_opc_vmladduhm() |
1827 |
{ |
1828 |
VECTOR_DEBUG; |
1829 |
int vrD, vrA, vrB, vrC; |
1830 |
uint32 prod; |
1831 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1832 |
|
1833 |
for (int i=0; i<8; i++) { |
1834 |
prod = (uint32)gCPU.vr[vrA].h[i] * (uint32)gCPU.vr[vrB].h[i]; |
1835 |
|
1836 |
prod = prod + (uint32)gCPU.vr[vrC].h[i]; |
1837 |
|
1838 |
gCPU.vr[vrD].h[i] = prod; |
1839 |
} |
1840 |
} |
1841 |
|
1842 |
/* vmhraddshs Vector Multiply High Round and Add Signed Half Word Saturate |
1843 |
* v.186 |
1844 |
*/ |
1845 |
void ppc_opc_vmhraddshs() |
1846 |
{ |
1847 |
VECTOR_DEBUG; |
1848 |
int vrD, vrA, vrB, vrC; |
1849 |
sint32 prod; |
1850 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1851 |
|
1852 |
for (int i=0; i<8; i++) { |
1853 |
prod = (sint32)gCPU.vr[vrA].sh[i] * (sint32)gCPU.vr[vrB].sh[i]; |
1854 |
|
1855 |
prod += 0x4000; |
1856 |
prod = (prod >> 15) + (sint32)gCPU.vr[vrC].sh[i]; |
1857 |
|
1858 |
gCPU.vr[vrD].sh[i] = SATURATE_SH(prod); |
1859 |
} |
1860 |
} |
1861 |
|
1862 |
/* vmsumubm Vector Multiply Sum Unsigned Byte Modulo |
1863 |
* v.204 |
1864 |
*/ |
1865 |
void ppc_opc_vmsumubm() |
1866 |
{ |
1867 |
VECTOR_DEBUG; |
1868 |
int vrD, vrA, vrB, vrC; |
1869 |
uint32 temp; |
1870 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1871 |
|
1872 |
for (int i=0; i<4; i++) { |
1873 |
temp = gCPU.vr[vrC].w[i]; |
1874 |
|
1875 |
temp += (uint16)gCPU.vr[vrA].b[i<<2] * |
1876 |
(uint16)gCPU.vr[vrB].b[i<<2]; |
1877 |
|
1878 |
temp += (uint16)gCPU.vr[vrA].b[(i<<2)+1] * |
1879 |
(uint16)gCPU.vr[vrB].b[(i<<2)+1]; |
1880 |
|
1881 |
temp += (uint16)gCPU.vr[vrA].b[(i<<2)+2] * |
1882 |
(uint16)gCPU.vr[vrB].b[(i<<2)+2]; |
1883 |
|
1884 |
temp += (uint16)gCPU.vr[vrA].b[(i<<2)+3] * |
1885 |
(uint16)gCPU.vr[vrB].b[(i<<2)+3]; |
1886 |
|
1887 |
gCPU.vr[vrD].w[i] = temp; |
1888 |
} |
1889 |
} |
1890 |
|
1891 |
/* vmsumuhm Vector Multiply Sum Unsigned Half Word Modulo |
1892 |
* v.205 |
1893 |
*/ |
1894 |
void ppc_opc_vmsumuhm() |
1895 |
{ |
1896 |
VECTOR_DEBUG; |
1897 |
int vrD, vrA, vrB, vrC; |
1898 |
uint32 temp; |
1899 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1900 |
|
1901 |
for (int i=0; i<4; i++) { |
1902 |
temp = gCPU.vr[vrC].w[i]; |
1903 |
|
1904 |
temp += (uint32)gCPU.vr[vrA].h[i<<1] * |
1905 |
(uint32)gCPU.vr[vrB].h[i<<1]; |
1906 |
temp += (uint32)gCPU.vr[vrA].h[(i<<1)+1] * |
1907 |
(uint32)gCPU.vr[vrB].h[(i<<1)+1]; |
1908 |
|
1909 |
gCPU.vr[vrD].w[i] = temp; |
1910 |
} |
1911 |
} |
1912 |
|
1913 |
/* vmsummbm Vector Multiply Sum Mixed-Sign Byte Modulo |
1914 |
* v.201 |
1915 |
*/ |
1916 |
void ppc_opc_vmsummbm() |
1917 |
{ |
1918 |
VECTOR_DEBUG; |
1919 |
int vrD, vrA, vrB, vrC; |
1920 |
sint32 temp; |
1921 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1922 |
|
1923 |
for (int i=0; i<4; i++) { |
1924 |
temp = gCPU.vr[vrC].sw[i]; |
1925 |
|
1926 |
temp += (sint16)gCPU.vr[vrA].sb[i<<2] * |
1927 |
(uint16)gCPU.vr[vrB].b[i<<2]; |
1928 |
temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+1] * |
1929 |
(uint16)gCPU.vr[vrB].b[(i<<2)+1]; |
1930 |
temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+2] * |
1931 |
(uint16)gCPU.vr[vrB].b[(i<<2)+2]; |
1932 |
temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+3] * |
1933 |
(uint16)gCPU.vr[vrB].b[(i<<2)+3]; |
1934 |
|
1935 |
gCPU.vr[vrD].sw[i] = temp; |
1936 |
} |
1937 |
} |
1938 |
|
1939 |
/* vmsumshm Vector Multiply Sum Signed Half Word Modulo |
1940 |
* v.202 |
1941 |
*/ |
1942 |
void ppc_opc_vmsumshm() |
1943 |
{ |
1944 |
VECTOR_DEBUG; |
1945 |
int vrD, vrA, vrB, vrC; |
1946 |
sint32 temp; |
1947 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1948 |
|
1949 |
for (int i=0; i<4; i++) { |
1950 |
temp = gCPU.vr[vrC].sw[i]; |
1951 |
|
1952 |
temp += (sint32)gCPU.vr[vrA].sh[i<<1] * |
1953 |
(sint32)gCPU.vr[vrB].sh[i<<1]; |
1954 |
temp += (sint32)gCPU.vr[vrA].sh[(i<<1)+1] * |
1955 |
(sint32)gCPU.vr[vrB].sh[(i<<1)+1]; |
1956 |
|
1957 |
gCPU.vr[vrD].sw[i] = temp; |
1958 |
} |
1959 |
} |
1960 |
|
1961 |
/* vmsumuhs Vector Multiply Sum Unsigned Half Word Saturate |
1962 |
* v.206 |
1963 |
*/ |
1964 |
void ppc_opc_vmsumuhs() |
1965 |
{ |
1966 |
VECTOR_DEBUG; |
1967 |
int vrD, vrA, vrB, vrC; |
1968 |
uint64 temp; |
1969 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1970 |
|
1971 |
/* For this, there's no way to get around 64-bit math. If we use |
1972 |
* the hacks used before, then we have to do it so often, that |
1973 |
* we'll outpace the 64-bit math in execution time. |
1974 |
*/ |
1975 |
for (int i=0; i<4; i++) { |
1976 |
temp = gCPU.vr[vrC].w[i]; |
1977 |
|
1978 |
temp += (uint32)gCPU.vr[vrA].h[i<<1] * |
1979 |
(uint32)gCPU.vr[vrB].h[i<<1]; |
1980 |
|
1981 |
temp += (uint32)gCPU.vr[vrA].h[(i<<1)+1] * |
1982 |
(uint32)gCPU.vr[vrB].h[(i<<1)+1]; |
1983 |
|
1984 |
gCPU.vr[vrD].w[i] = SATURATE_UW(temp); |
1985 |
} |
1986 |
} |
1987 |
|
1988 |
/* vmsumshs Vector Multiply Sum Signed Half Word Saturate |
1989 |
* v.203 |
1990 |
*/ |
1991 |
void ppc_opc_vmsumshs() |
1992 |
{ |
1993 |
VECTOR_DEBUG; |
1994 |
int vrD, vrA, vrB, vrC; |
1995 |
sint64 temp; |
1996 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
1997 |
|
1998 |
/* For this, there's no way to get around 64-bit math. If we use |
1999 |
* the hacks used before, then we have to do it so often, that |
2000 |
* we'll outpace the 64-bit math in execution time. |
2001 |
*/ |
2002 |
|
2003 |
for (int i=0; i<4; i++) { |
2004 |
temp = gCPU.vr[vrC].sw[i]; |
2005 |
|
2006 |
temp += (sint32)gCPU.vr[vrA].sh[i<<1] * |
2007 |
(sint32)gCPU.vr[vrB].sh[i<<1]; |
2008 |
temp += (sint32)gCPU.vr[vrA].sh[(i<<1)+1] * |
2009 |
(sint32)gCPU.vr[vrB].sh[(i<<1)+1]; |
2010 |
|
2011 |
gCPU.vr[vrD].sw[i] = SATURATE_SW(temp); |
2012 |
} |
2013 |
} |
2014 |
|
2015 |
/* vsum4ubs Vector Sum Across Partial (1/4) Unsigned Byte Saturate |
2016 |
* v.275 |
2017 |
*/ |
2018 |
void ppc_opc_vsum4ubs() |
2019 |
{ |
2020 |
VECTOR_DEBUG; |
2021 |
int vrD, vrA, vrB; |
2022 |
uint64 res; |
2023 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2024 |
|
2025 |
/* For this, there's no way to get around 64-bit math. If we use |
2026 |
* the hacks used before, then we have to do it so often, that |
2027 |
* we'll outpace the 64-bit math in execution time. |
2028 |
*/ |
2029 |
|
2030 |
for (int i=0; i<4; i++) { |
2031 |
res = (uint64)gCPU.vr[vrB].w[i]; |
2032 |
|
2033 |
res += (uint64)gCPU.vr[vrA].b[(i<<2)]; |
2034 |
res += (uint64)gCPU.vr[vrA].b[(i<<2)+1]; |
2035 |
res += (uint64)gCPU.vr[vrA].b[(i<<2)+2]; |
2036 |
res += (uint64)gCPU.vr[vrA].b[(i<<2)+3]; |
2037 |
|
2038 |
gCPU.vr[vrD].w[i] = SATURATE_UW(res); |
2039 |
} |
2040 |
} |
2041 |
|
2042 |
/* vsum4sbs Vector Sum Across Partial (1/4) Signed Byte Saturate |
2043 |
* v.273 |
2044 |
*/ |
2045 |
void ppc_opc_vsum4sbs() |
2046 |
{ |
2047 |
VECTOR_DEBUG; |
2048 |
int vrD, vrA, vrB; |
2049 |
sint64 res; |
2050 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2051 |
|
2052 |
for (int i=0; i<4; i++) { |
2053 |
res = (sint64)gCPU.vr[vrB].sw[i]; |
2054 |
|
2055 |
res += (sint64)gCPU.vr[vrA].sb[(i<<2)]; |
2056 |
res += (sint64)gCPU.vr[vrA].sb[(i<<2)+1]; |
2057 |
res += (sint64)gCPU.vr[vrA].sb[(i<<2)+2]; |
2058 |
res += (sint64)gCPU.vr[vrA].sb[(i<<2)+3]; |
2059 |
|
2060 |
gCPU.vr[vrD].sw[i] = SATURATE_SW(res); |
2061 |
} |
2062 |
} |
2063 |
|
2064 |
/* vsum4shs Vector Sum Across Partial (1/4) Signed Half Word Saturate |
2065 |
* v.274 |
2066 |
*/ |
2067 |
void ppc_opc_vsum4shs() |
2068 |
{ |
2069 |
VECTOR_DEBUG; |
2070 |
int vrD, vrA, vrB; |
2071 |
sint64 res; |
2072 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2073 |
|
2074 |
for (int i=0; i<4; i++) { |
2075 |
res = (sint64)gCPU.vr[vrB].sw[i]; |
2076 |
|
2077 |
res += (sint64)gCPU.vr[vrA].sh[(i<<1)]; |
2078 |
res += (sint64)gCPU.vr[vrA].sh[(i<<1)+1]; |
2079 |
|
2080 |
gCPU.vr[vrD].sw[i] = SATURATE_SW(res); |
2081 |
} |
2082 |
} |
2083 |
|
2084 |
/* vsum2sws Vector Sum Across Partial (1/2) Signed Word Saturate |
2085 |
* v.272 |
2086 |
*/ |
2087 |
void ppc_opc_vsum2sws() |
2088 |
{ |
2089 |
VECTOR_DEBUG; |
2090 |
int vrD, vrA, vrB; |
2091 |
sint64 res; |
2092 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2093 |
|
2094 |
res = (sint64)gCPU.vr[vrA].sw[0] + (sint64)gCPU.vr[vrA].sw[1]; |
2095 |
res += (sint64)gCPU.vr[vrB].sw[VECT_ODD(0)]; |
2096 |
|
2097 |
gCPU.vr[vrD].w[VECT_ODD(0)] = SATURATE_SW(res); |
2098 |
gCPU.vr[vrD].w[VECT_EVEN(0)] = 0; |
2099 |
|
2100 |
res = (sint64)gCPU.vr[vrA].sw[2] + (sint64)gCPU.vr[vrA].sw[3]; |
2101 |
res += (sint64)gCPU.vr[vrB].sw[VECT_ODD(1)]; |
2102 |
|
2103 |
gCPU.vr[vrD].w[VECT_ODD(1)] = SATURATE_SW(res); |
2104 |
gCPU.vr[vrD].w[VECT_EVEN(1)] = 0; |
2105 |
} |
2106 |
|
2107 |
/* vsumsws Vector Sum Across Signed Word Saturate |
2108 |
* v.271 |
2109 |
*/ |
2110 |
void ppc_opc_vsumsws() |
2111 |
{ |
2112 |
VECTOR_DEBUG; |
2113 |
int vrD, vrA, vrB; |
2114 |
sint64 res; |
2115 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2116 |
|
2117 |
res = (sint64)gCPU.vr[vrA].sw[0] + (sint64)gCPU.vr[vrA].sw[1]; |
2118 |
res += (sint64)gCPU.vr[vrA].sw[2] + (sint64)gCPU.vr[vrA].sw[3]; |
2119 |
|
2120 |
res += (sint64)VECT_W(gCPU.vr[vrB], 3); |
2121 |
|
2122 |
VECT_W(gCPU.vr[vrD], 3) = SATURATE_SW(res); |
2123 |
VECT_W(gCPU.vr[vrD], 2) = 0; |
2124 |
VECT_W(gCPU.vr[vrD], 1) = 0; |
2125 |
VECT_W(gCPU.vr[vrD], 0) = 0; |
2126 |
} |
2127 |
|
2128 |
/* vnmsubfp Vector Negative Multiply-Subtract Floating Point |
2129 |
* v.215 |
2130 |
*/ |
2131 |
void ppc_opc_vnmsubfp() |
2132 |
{ |
2133 |
VECTOR_DEBUG; |
2134 |
int vrD, vrA, vrB, vrC; |
2135 |
double res; |
2136 |
PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); |
2137 |
|
2138 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2139 |
res = (double)gCPU.vr[vrA].f[i] * (double)gCPU.vr[vrC].f[i]; |
2140 |
|
2141 |
res = (double)gCPU.vr[vrB].f[i] - res; |
2142 |
|
2143 |
gCPU.vr[vrD].f[i] = (float)res; |
2144 |
} |
2145 |
} |
2146 |
|
2147 |
/* vavgub Vector Average Unsigned Byte |
2148 |
* v.152 |
2149 |
*/ |
2150 |
void ppc_opc_vavgub() |
2151 |
{ |
2152 |
VECTOR_DEBUG; |
2153 |
int vrD, vrA, vrB; |
2154 |
uint16 res; |
2155 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2156 |
|
2157 |
for (int i=0; i<16; i++) { |
2158 |
res = (uint16)gCPU.vr[vrA].b[i] + |
2159 |
(uint16)gCPU.vr[vrB].b[i] + 1; |
2160 |
|
2161 |
gCPU.vr[vrD].b[i] = (res >> 1); |
2162 |
} |
2163 |
} |
2164 |
|
2165 |
/* vavguh Vector Average Unsigned Half Word |
2166 |
* v.153 |
2167 |
*/ |
2168 |
void ppc_opc_vavguh() |
2169 |
{ |
2170 |
VECTOR_DEBUG; |
2171 |
int vrD, vrA, vrB; |
2172 |
uint32 res; |
2173 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2174 |
|
2175 |
for (int i=0; i<8; i++) { |
2176 |
res = (uint32)gCPU.vr[vrA].h[i] + |
2177 |
(uint32)gCPU.vr[vrB].h[i] + 1; |
2178 |
|
2179 |
gCPU.vr[vrD].h[i] = (res >> 1); |
2180 |
} |
2181 |
} |
2182 |
|
2183 |
/* vavguw Vector Average Unsigned Word |
2184 |
* v.154 |
2185 |
*/ |
2186 |
void ppc_opc_vavguw() |
2187 |
{ |
2188 |
VECTOR_DEBUG; |
2189 |
int vrD, vrA, vrB; |
2190 |
uint64 res; |
2191 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2192 |
|
2193 |
for (int i=0; i<4; i++) { |
2194 |
res = (uint64)gCPU.vr[vrA].w[i] + |
2195 |
(uint64)gCPU.vr[vrB].w[i] + 1; |
2196 |
|
2197 |
gCPU.vr[vrD].w[i] = (res >> 1); |
2198 |
} |
2199 |
} |
2200 |
|
2201 |
/* vavgsb Vector Average Signed Byte |
2202 |
* v.149 |
2203 |
*/ |
2204 |
void ppc_opc_vavgsb() |
2205 |
{ |
2206 |
VECTOR_DEBUG; |
2207 |
int vrD, vrA, vrB; |
2208 |
sint16 res; |
2209 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2210 |
|
2211 |
for (int i=0; i<16; i++) { |
2212 |
res = (sint16)gCPU.vr[vrA].sb[i] + |
2213 |
(sint16)gCPU.vr[vrB].sb[i] + 1; |
2214 |
|
2215 |
gCPU.vr[vrD].sb[i] = (res >> 1); |
2216 |
} |
2217 |
} |
2218 |
|
2219 |
/* vavgsh Vector Average Signed Half Word |
2220 |
* v.150 |
2221 |
*/ |
2222 |
void ppc_opc_vavgsh() |
2223 |
{ |
2224 |
VECTOR_DEBUG; |
2225 |
int vrD, vrA, vrB; |
2226 |
sint32 res; |
2227 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2228 |
|
2229 |
for (int i=0; i<8; i++) { |
2230 |
res = (sint32)gCPU.vr[vrA].sh[i] + |
2231 |
(sint32)gCPU.vr[vrB].sh[i] + 1; |
2232 |
|
2233 |
gCPU.vr[vrD].sh[i] = (res >> 1); |
2234 |
} |
2235 |
} |
2236 |
|
2237 |
/* vavgsw Vector Average Signed Word |
2238 |
* v.151 |
2239 |
*/ |
2240 |
void ppc_opc_vavgsw() |
2241 |
{ |
2242 |
VECTOR_DEBUG; |
2243 |
int vrD, vrA, vrB; |
2244 |
sint64 res; |
2245 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2246 |
|
2247 |
for (int i=0; i<4; i++) { |
2248 |
res = (sint64)gCPU.vr[vrA].sw[i] + |
2249 |
(sint64)gCPU.vr[vrB].sw[i] + 1; |
2250 |
|
2251 |
gCPU.vr[vrD].sw[i] = (res >> 1); |
2252 |
} |
2253 |
} |
2254 |
|
2255 |
/* vmaxub Vector Maximum Unsigned Byte |
2256 |
* v.182 |
2257 |
*/ |
2258 |
void ppc_opc_vmaxub() |
2259 |
{ |
2260 |
VECTOR_DEBUG; |
2261 |
int vrD, vrA, vrB; |
2262 |
uint8 res; |
2263 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2264 |
|
2265 |
for (int i=0; i<16; i++) { |
2266 |
res = gCPU.vr[vrA].b[i]; |
2267 |
|
2268 |
if (res < gCPU.vr[vrB].b[i]) |
2269 |
res = gCPU.vr[vrB].b[i]; |
2270 |
|
2271 |
gCPU.vr[vrD].b[i] = res; |
2272 |
} |
2273 |
} |
2274 |
|
2275 |
/* vmaxuh Vector Maximum Unsigned Half Word |
2276 |
* v.183 |
2277 |
*/ |
2278 |
void ppc_opc_vmaxuh() |
2279 |
{ |
2280 |
VECTOR_DEBUG; |
2281 |
int vrD, vrA, vrB; |
2282 |
uint16 res; |
2283 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2284 |
|
2285 |
for (int i=0; i<8; i++) { |
2286 |
res = gCPU.vr[vrA].h[i]; |
2287 |
|
2288 |
if (res < gCPU.vr[vrB].h[i]) |
2289 |
res = gCPU.vr[vrB].h[i]; |
2290 |
|
2291 |
gCPU.vr[vrD].h[i] = res; |
2292 |
} |
2293 |
} |
2294 |
|
2295 |
/* vmaxuw Vector Maximum Unsigned Word |
2296 |
* v.184 |
2297 |
*/ |
2298 |
void ppc_opc_vmaxuw() |
2299 |
{ |
2300 |
VECTOR_DEBUG; |
2301 |
int vrD, vrA, vrB; |
2302 |
uint32 res; |
2303 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2304 |
|
2305 |
for (int i=0; i<4; i++) { |
2306 |
res = gCPU.vr[vrA].w[i]; |
2307 |
|
2308 |
if (res < gCPU.vr[vrB].w[i]) |
2309 |
res = gCPU.vr[vrB].w[i]; |
2310 |
|
2311 |
gCPU.vr[vrD].w[i] = res; |
2312 |
} |
2313 |
} |
2314 |
|
2315 |
/* vmaxsb Vector Maximum Signed Byte |
2316 |
* v.179 |
2317 |
*/ |
2318 |
void ppc_opc_vmaxsb() |
2319 |
{ |
2320 |
VECTOR_DEBUG; |
2321 |
int vrD, vrA, vrB; |
2322 |
sint8 res; |
2323 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2324 |
|
2325 |
for (int i=0; i<16; i++) { |
2326 |
res = gCPU.vr[vrA].sb[i]; |
2327 |
|
2328 |
if (res < gCPU.vr[vrB].sb[i]) |
2329 |
res = gCPU.vr[vrB].sb[i]; |
2330 |
|
2331 |
gCPU.vr[vrD].sb[i] = res; |
2332 |
} |
2333 |
} |
2334 |
|
2335 |
/* vmaxsh Vector Maximum Signed Half Word |
2336 |
* v.180 |
2337 |
*/ |
2338 |
void ppc_opc_vmaxsh() |
2339 |
{ |
2340 |
VECTOR_DEBUG; |
2341 |
int vrD, vrA, vrB; |
2342 |
sint16 res; |
2343 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2344 |
|
2345 |
for (int i=0; i<8; i++) { |
2346 |
res = gCPU.vr[vrA].sh[i]; |
2347 |
|
2348 |
if (res < gCPU.vr[vrB].sh[i]) |
2349 |
res = gCPU.vr[vrB].sh[i]; |
2350 |
|
2351 |
gCPU.vr[vrD].sh[i] = res; |
2352 |
} |
2353 |
} |
2354 |
|
2355 |
/* vmaxsw Vector Maximum Signed Word |
2356 |
* v.181 |
2357 |
*/ |
2358 |
void ppc_opc_vmaxsw() |
2359 |
{ |
2360 |
VECTOR_DEBUG; |
2361 |
int vrD, vrA, vrB; |
2362 |
sint32 res; |
2363 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2364 |
|
2365 |
for (int i=0; i<4; i++) { |
2366 |
res = gCPU.vr[vrA].sw[i]; |
2367 |
|
2368 |
if (res < gCPU.vr[vrB].sw[i]) |
2369 |
res = gCPU.vr[vrB].sw[i]; |
2370 |
|
2371 |
gCPU.vr[vrD].sw[i] = res; |
2372 |
} |
2373 |
} |
2374 |
|
2375 |
/* vmaxfp Vector Maximum Floating Point |
2376 |
* v.178 |
2377 |
*/ |
2378 |
void ppc_opc_vmaxfp() |
2379 |
{ |
2380 |
VECTOR_DEBUG; |
2381 |
int vrD, vrA, vrB; |
2382 |
float res; |
2383 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2384 |
|
2385 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2386 |
res = gCPU.vr[vrA].f[i]; |
2387 |
|
2388 |
if (res < gCPU.vr[vrB].f[i]) |
2389 |
res = gCPU.vr[vrB].f[i]; |
2390 |
|
2391 |
gCPU.vr[vrD].f[i] = res; |
2392 |
} |
2393 |
} |
2394 |
|
2395 |
/* vminub Vector Minimum Unsigned Byte |
2396 |
* v.191 |
2397 |
*/ |
2398 |
void ppc_opc_vminub() |
2399 |
{ |
2400 |
VECTOR_DEBUG; |
2401 |
int vrD, vrA, vrB; |
2402 |
uint8 res; |
2403 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2404 |
|
2405 |
for (int i=0; i<16; i++) { |
2406 |
res = gCPU.vr[vrA].b[i]; |
2407 |
|
2408 |
if (res > gCPU.vr[vrB].b[i]) |
2409 |
res = gCPU.vr[vrB].b[i]; |
2410 |
|
2411 |
gCPU.vr[vrD].b[i] = res; |
2412 |
} |
2413 |
} |
2414 |
|
2415 |
/* vminuh Vector Minimum Unsigned Half Word |
2416 |
* v.192 |
2417 |
*/ |
2418 |
void ppc_opc_vminuh() |
2419 |
{ |
2420 |
VECTOR_DEBUG; |
2421 |
int vrD, vrA, vrB; |
2422 |
uint16 res; |
2423 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2424 |
|
2425 |
for (int i=0; i<8; i++) { |
2426 |
res = gCPU.vr[vrA].h[i]; |
2427 |
|
2428 |
if (res > gCPU.vr[vrB].h[i]) |
2429 |
res = gCPU.vr[vrB].h[i]; |
2430 |
|
2431 |
gCPU.vr[vrD].h[i] = res; |
2432 |
} |
2433 |
} |
2434 |
|
2435 |
/* vminuw Vector Minimum Unsigned Word |
2436 |
* v.193 |
2437 |
*/ |
2438 |
void ppc_opc_vminuw() |
2439 |
{ |
2440 |
VECTOR_DEBUG; |
2441 |
int vrD, vrA, vrB; |
2442 |
uint32 res; |
2443 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2444 |
|
2445 |
for (int i=0; i<4; i++) { |
2446 |
res = gCPU.vr[vrA].w[i]; |
2447 |
|
2448 |
if (res > gCPU.vr[vrB].w[i]) |
2449 |
res = gCPU.vr[vrB].w[i]; |
2450 |
|
2451 |
gCPU.vr[vrD].w[i] = res; |
2452 |
} |
2453 |
} |
2454 |
|
2455 |
/* vminsb Vector Minimum Signed Byte |
2456 |
* v.188 |
2457 |
*/ |
2458 |
void ppc_opc_vminsb() |
2459 |
{ |
2460 |
VECTOR_DEBUG; |
2461 |
int vrD, vrA, vrB; |
2462 |
sint8 res; |
2463 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2464 |
|
2465 |
for (int i=0; i<16; i++) { |
2466 |
res = gCPU.vr[vrA].sb[i]; |
2467 |
|
2468 |
if (res > gCPU.vr[vrB].sb[i]) |
2469 |
res = gCPU.vr[vrB].sb[i]; |
2470 |
|
2471 |
gCPU.vr[vrD].sb[i] = res; |
2472 |
} |
2473 |
} |
2474 |
|
2475 |
/* vminsh Vector Minimum Signed Half Word |
2476 |
* v.189 |
2477 |
*/ |
2478 |
void ppc_opc_vminsh() |
2479 |
{ |
2480 |
VECTOR_DEBUG; |
2481 |
int vrD, vrA, vrB; |
2482 |
sint16 res; |
2483 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2484 |
|
2485 |
for (int i=0; i<8; i++) { |
2486 |
res = gCPU.vr[vrA].sh[i]; |
2487 |
|
2488 |
if (res > gCPU.vr[vrB].sh[i]) |
2489 |
res = gCPU.vr[vrB].sh[i]; |
2490 |
|
2491 |
gCPU.vr[vrD].sh[i] = res; |
2492 |
} |
2493 |
} |
2494 |
|
2495 |
/* vminsw Vector Minimum Signed Word |
2496 |
* v.190 |
2497 |
*/ |
2498 |
void ppc_opc_vminsw() |
2499 |
{ |
2500 |
VECTOR_DEBUG; |
2501 |
int vrD, vrA, vrB; |
2502 |
sint32 res; |
2503 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2504 |
|
2505 |
for (int i=0; i<4; i++) { |
2506 |
res = gCPU.vr[vrA].sw[i]; |
2507 |
|
2508 |
if (res > gCPU.vr[vrB].sw[i]) |
2509 |
res = gCPU.vr[vrB].sw[i]; |
2510 |
|
2511 |
gCPU.vr[vrD].sw[i] = res; |
2512 |
} |
2513 |
} |
2514 |
|
2515 |
/* vminfp Vector Minimum Floating Point |
2516 |
* v.187 |
2517 |
*/ |
2518 |
void ppc_opc_vminfp() |
2519 |
{ |
2520 |
VECTOR_DEBUG; |
2521 |
int vrD, vrA, vrB; |
2522 |
float res; |
2523 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2524 |
|
2525 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2526 |
res = gCPU.vr[vrA].f[i]; |
2527 |
|
2528 |
if (res > gCPU.vr[vrB].f[i]) |
2529 |
res = gCPU.vr[vrB].f[i]; |
2530 |
|
2531 |
gCPU.vr[vrD].f[i] = res; |
2532 |
} |
2533 |
} |
2534 |
|
2535 |
/* vrfin Vector Round to Floating-Point Integer Nearest |
2536 |
* v.231 |
2537 |
*/ |
2538 |
void ppc_opc_vrfin() |
2539 |
{ |
2540 |
VECTOR_DEBUG; |
2541 |
int vrD, vrA, vrB; |
2542 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2543 |
PPC_OPC_ASSERT(vrA==0); |
2544 |
|
2545 |
/* Documentation doesn't dictate how this instruction should |
2546 |
* round from a middle point. With a test on a real G4, it was |
2547 |
* found to be round to nearest, with bias to even if equidistant. |
2548 |
* |
2549 |
* This is covered by the function rint() |
2550 |
*/ |
2551 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2552 |
gCPU.vr[vrD].f[i] = rintf(gCPU.vr[vrB].f[i]); |
2553 |
} |
2554 |
} |
2555 |
|
2556 |
/* vrfip Vector Round to Floating-Point Integer toward Plus Infinity |
2557 |
* v.232 |
2558 |
*/ |
2559 |
void ppc_opc_vrfip() |
2560 |
{ |
2561 |
VECTOR_DEBUG; |
2562 |
int vrD, vrA, vrB; |
2563 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2564 |
PPC_OPC_ASSERT(vrA==0); |
2565 |
|
2566 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2567 |
gCPU.vr[vrD].f[i] = ceilf(gCPU.vr[vrB].f[i]); |
2568 |
} |
2569 |
} |
2570 |
|
2571 |
/* vrfim Vector Round to Floating-Point Integer toward Minus Infinity |
2572 |
* v.230 |
2573 |
*/ |
2574 |
void ppc_opc_vrfim() |
2575 |
{ |
2576 |
VECTOR_DEBUG; |
2577 |
int vrD, vrA, vrB; |
2578 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2579 |
PPC_OPC_ASSERT(vrA==0); |
2580 |
|
2581 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2582 |
gCPU.vr[vrD].f[i] = floorf(gCPU.vr[vrB].f[i]); |
2583 |
} |
2584 |
} |
2585 |
|
2586 |
/* vrfiz Vector Round to Floating-Point Integer toward Zero |
2587 |
* v.233 |
2588 |
*/ |
2589 |
void ppc_opc_vrfiz() |
2590 |
{ |
2591 |
VECTOR_DEBUG; |
2592 |
int vrD, vrA, vrB; |
2593 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2594 |
PPC_OPC_ASSERT(vrA==0); |
2595 |
|
2596 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2597 |
gCPU.vr[vrD].f[i] = truncf(gCPU.vr[vrD].f[i]); |
2598 |
} |
2599 |
} |
2600 |
|
2601 |
/* vrefp Vector Reciprocal Estimate Floating Point |
2602 |
* v.228 |
2603 |
*/ |
2604 |
void ppc_opc_vrefp() |
2605 |
{ |
2606 |
VECTOR_DEBUG; |
2607 |
int vrD, vrA, vrB; |
2608 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2609 |
PPC_OPC_ASSERT(vrA==0); |
2610 |
|
2611 |
/* This emulation generates an exact value, instead of an estimate. |
2612 |
* This is technically within specs, but some test-suites expect the |
2613 |
* exact estimate value returned by G4s. These anomolous failures |
2614 |
* should be ignored. |
2615 |
*/ |
2616 |
|
2617 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2618 |
gCPU.vr[vrD].f[i] = 1 / gCPU.vr[vrB].f[i]; |
2619 |
} |
2620 |
} |
2621 |
|
2622 |
/* vrsqrtefp Vector Reciprocal Square Root Estimate Floating Point |
2623 |
* v.237 |
2624 |
*/ |
2625 |
void ppc_opc_vrsqrtefp() |
2626 |
{ |
2627 |
VECTOR_DEBUG; |
2628 |
int vrD, vrA, vrB; |
2629 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2630 |
PPC_OPC_ASSERT(vrA==0); |
2631 |
|
2632 |
/* This emulation generates an exact value, instead of an estimate. |
2633 |
* This is technically within specs, but some test-suites expect the |
2634 |
* exact estimate value returned by G4s. These anomolous failures |
2635 |
* should be ignored. |
2636 |
*/ |
2637 |
|
2638 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2639 |
gCPU.vr[vrD].f[i] = 1 / sqrt(gCPU.vr[vrB].f[i]); |
2640 |
} |
2641 |
} |
2642 |
|
2643 |
/* vlogefp Vector Log2 Estimate Floating Point |
2644 |
* v.175 |
2645 |
*/ |
2646 |
void ppc_opc_vlogefp() |
2647 |
{ |
2648 |
VECTOR_DEBUG; |
2649 |
int vrD, vrA, vrB; |
2650 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2651 |
PPC_OPC_ASSERT(vrA==0); |
2652 |
|
2653 |
/* This emulation generates an exact value, instead of an estimate. |
2654 |
* This is technically within specs, but some test-suites expect the |
2655 |
* exact estimate value returned by G4s. These anomolous failures |
2656 |
* should be ignored. |
2657 |
*/ |
2658 |
|
2659 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2660 |
gCPU.vr[vrD].f[i] = log2(gCPU.vr[vrB].f[i]); |
2661 |
} |
2662 |
} |
2663 |
|
2664 |
/* vexptefp Vector 2 Raised to the Exponent Estimate Floating Point |
2665 |
* v.173 |
2666 |
*/ |
2667 |
void ppc_opc_vexptefp() |
2668 |
{ |
2669 |
VECTOR_DEBUG; |
2670 |
int vrD, vrA, vrB; |
2671 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2672 |
PPC_OPC_ASSERT(vrA==0); |
2673 |
|
2674 |
/* This emulation generates an exact value, instead of an estimate. |
2675 |
* This is technically within specs, but some test-suites expect the |
2676 |
* exact estimate value returned by G4s. These anomolous failures |
2677 |
* should be ignored. |
2678 |
*/ |
2679 |
|
2680 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2681 |
gCPU.vr[vrD].f[i] = exp2(gCPU.vr[vrB].f[i]); |
2682 |
} |
2683 |
} |
2684 |
|
2685 |
/* vcfux Vector Convert from Unsigned Fixed-Point Word |
2686 |
* v.156 |
2687 |
*/ |
2688 |
void ppc_opc_vcfux() |
2689 |
{ |
2690 |
VECTOR_DEBUG; |
2691 |
int vrD, vrB; |
2692 |
uint32 uimm; |
2693 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB); |
2694 |
|
2695 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2696 |
gCPU.vr[vrD].f[i] = ((float)gCPU.vr[vrB].w[i]) / (1 << uimm); |
2697 |
} |
2698 |
} |
2699 |
|
2700 |
/* vcfsx Vector Convert from Signed Fixed-Point Word |
2701 |
* v.155 |
2702 |
*/ |
2703 |
void ppc_opc_vcfsx() |
2704 |
{ |
2705 |
VECTOR_DEBUG; |
2706 |
int vrD, vrB; |
2707 |
uint32 uimm; |
2708 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB); |
2709 |
|
2710 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2711 |
gCPU.vr[vrD].f[i] = ((float)gCPU.vr[vrB].sw[i]) / (1 << uimm); |
2712 |
} |
2713 |
} |
2714 |
|
2715 |
/* vctsxs Vector Convert To Signed Fixed-Point Word Saturate |
2716 |
* v.171 |
2717 |
*/ |
2718 |
void ppc_opc_vctsxs() |
2719 |
{ |
2720 |
VECTOR_DEBUG; |
2721 |
int vrD, vrB; |
2722 |
uint32 uimm; |
2723 |
float ftmp; |
2724 |
sint32 tmp; |
2725 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB); |
2726 |
|
2727 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2728 |
ftmp = gCPU.vr[vrB].f[i] * (float)(1 << uimm); |
2729 |
ftmp = truncf(ftmp); |
2730 |
|
2731 |
tmp = (sint32)ftmp; |
2732 |
|
2733 |
if (ftmp > 2147483647.0) { |
2734 |
tmp = 2147483647; // 0x7fffffff |
2735 |
gCPU.vscr |= VSCR_SAT; |
2736 |
} else if (ftmp < -2147483648.0) { |
2737 |
tmp = -2147483648LL; // 0x80000000 |
2738 |
gCPU.vscr |= VSCR_SAT; |
2739 |
} |
2740 |
|
2741 |
gCPU.vr[vrD].sw[i] = tmp; |
2742 |
} |
2743 |
} |
2744 |
|
2745 |
/* vctuxs Vector Convert to Unsigned Fixed-Point Word Saturate |
2746 |
* v.172 |
2747 |
*/ |
2748 |
void ppc_opc_vctuxs() |
2749 |
{ |
2750 |
VECTOR_DEBUG; |
2751 |
int vrD, vrB; |
2752 |
uint32 tmp, uimm; |
2753 |
float ftmp; |
2754 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, uimm, vrB); |
2755 |
|
2756 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2757 |
ftmp = gCPU.vr[vrB].f[i] * (float)(1 << uimm); |
2758 |
ftmp = truncf(ftmp); |
2759 |
|
2760 |
tmp = (uint32)ftmp; |
2761 |
|
2762 |
if (ftmp > 4294967295.0) { |
2763 |
tmp = 0xffffffff; |
2764 |
gCPU.vscr |= VSCR_SAT; |
2765 |
} else if (ftmp < 0) { |
2766 |
tmp = 0; |
2767 |
gCPU.vscr |= VSCR_SAT; |
2768 |
} |
2769 |
|
2770 |
gCPU.vr[vrD].w[i] = tmp; |
2771 |
} |
2772 |
} |
2773 |
|
2774 |
/* vand Vector Logical AND |
2775 |
* v.147 |
2776 |
*/ |
2777 |
void ppc_opc_vand() |
2778 |
{ |
2779 |
VECTOR_DEBUG; |
2780 |
int vrD, vrA, vrB; |
2781 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2782 |
|
2783 |
gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] & gCPU.vr[vrB].d[0]; |
2784 |
gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] & gCPU.vr[vrB].d[1]; |
2785 |
} |
2786 |
|
2787 |
/* vandc Vector Logical AND with Complement |
2788 |
* v.148 |
2789 |
*/ |
2790 |
void ppc_opc_vandc() |
2791 |
{ |
2792 |
VECTOR_DEBUG; |
2793 |
int vrD, vrA, vrB; |
2794 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2795 |
|
2796 |
gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] & ~gCPU.vr[vrB].d[0]; |
2797 |
gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] & ~gCPU.vr[vrB].d[1]; |
2798 |
} |
2799 |
|
2800 |
/* vor Vector Logical OR |
2801 |
* v.217 |
2802 |
*/ |
2803 |
void ppc_opc_vor() |
2804 |
{ |
2805 |
VECTOR_DEBUG_COMMON; |
2806 |
int vrD, vrA, vrB; |
2807 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2808 |
|
2809 |
gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] | gCPU.vr[vrB].d[0]; |
2810 |
gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] | gCPU.vr[vrB].d[1]; |
2811 |
} |
2812 |
|
2813 |
/* vnor Vector Logical NOR |
2814 |
* v.216 |
2815 |
*/ |
2816 |
void ppc_opc_vnor() |
2817 |
{ |
2818 |
VECTOR_DEBUG; |
2819 |
int vrD, vrA, vrB; |
2820 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2821 |
|
2822 |
gCPU.vr[vrD].d[0] = ~(gCPU.vr[vrA].d[0] | gCPU.vr[vrB].d[0]); |
2823 |
gCPU.vr[vrD].d[1] = ~(gCPU.vr[vrA].d[1] | gCPU.vr[vrB].d[1]); |
2824 |
} |
2825 |
|
2826 |
/* vxor Vector Logical XOR |
2827 |
* v.282 |
2828 |
*/ |
2829 |
void ppc_opc_vxor() |
2830 |
{ |
2831 |
VECTOR_DEBUG_COMMON; |
2832 |
int vrD, vrA, vrB; |
2833 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2834 |
|
2835 |
gCPU.vr[vrD].d[0] = gCPU.vr[vrA].d[0] ^ gCPU.vr[vrB].d[0]; |
2836 |
gCPU.vr[vrD].d[1] = gCPU.vr[vrA].d[1] ^ gCPU.vr[vrB].d[1]; |
2837 |
} |
2838 |
|
2839 |
#define CR_CR6 (0x00f0) |
2840 |
#define CR_CR6_EQ (1<<7) |
2841 |
#define CR_CR6_NE_SOME (1<<6) |
2842 |
#define CR_CR6_NE (1<<5) |
2843 |
#define CR_CR6_EQ_SOME (1<<4) |
2844 |
|
2845 |
/* vcmpequbx Vector Compare Equal-to Unsigned Byte |
2846 |
* v.160 |
2847 |
*/ |
2848 |
void ppc_opc_vcmpequbx() |
2849 |
{ |
2850 |
VECTOR_DEBUG; |
2851 |
int vrD, vrA, vrB; |
2852 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
2853 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2854 |
|
2855 |
for (int i=0; i<16; i++) { |
2856 |
if (gCPU.vr[vrA].b[i] == gCPU.vr[vrB].b[i]) { |
2857 |
gCPU.vr[vrD].b[i] = 0xff; |
2858 |
tf &= ~CR_CR6_NE; |
2859 |
tf |= CR_CR6_EQ_SOME; |
2860 |
} else { |
2861 |
gCPU.vr[vrD].b[i] = 0; |
2862 |
tf &= ~CR_CR6_EQ; |
2863 |
tf |= CR_CR6_NE_SOME; |
2864 |
} |
2865 |
} |
2866 |
|
2867 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
2868 |
gCPU.cr &= ~CR_CR6; |
2869 |
gCPU.cr |= tf; |
2870 |
} |
2871 |
} |
2872 |
|
2873 |
/* vcmpequhx Vector Compare Equal-to Unsigned Half Word |
2874 |
* v.161 |
2875 |
*/ |
2876 |
void ppc_opc_vcmpequhx() |
2877 |
{ |
2878 |
VECTOR_DEBUG; |
2879 |
int vrD, vrA, vrB; |
2880 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
2881 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2882 |
|
2883 |
for (int i=0; i<8; i++) { |
2884 |
if (gCPU.vr[vrA].h[i] == gCPU.vr[vrB].h[i]) { |
2885 |
gCPU.vr[vrD].h[i] = 0xffff; |
2886 |
tf &= ~CR_CR6_NE; |
2887 |
tf |= CR_CR6_EQ_SOME; |
2888 |
} else { |
2889 |
gCPU.vr[vrD].h[i] = 0; |
2890 |
tf &= ~CR_CR6_EQ; |
2891 |
tf |= CR_CR6_NE_SOME; |
2892 |
} |
2893 |
} |
2894 |
|
2895 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
2896 |
gCPU.cr &= ~CR_CR6; |
2897 |
gCPU.cr |= tf; |
2898 |
} |
2899 |
} |
2900 |
|
2901 |
/* vcmpequwx Vector Compare Equal-to Unsigned Word |
2902 |
* v.162 |
2903 |
*/ |
2904 |
void ppc_opc_vcmpequwx() |
2905 |
{ |
2906 |
VECTOR_DEBUG; |
2907 |
int vrD, vrA, vrB; |
2908 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
2909 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2910 |
|
2911 |
for (int i=0; i<4; i++) { |
2912 |
if (gCPU.vr[vrA].w[i] == gCPU.vr[vrB].w[i]) { |
2913 |
gCPU.vr[vrD].w[i] = 0xffffffff; |
2914 |
tf &= ~CR_CR6_NE; |
2915 |
tf |= CR_CR6_EQ_SOME; |
2916 |
} else { |
2917 |
gCPU.vr[vrD].w[i] = 0; |
2918 |
tf &= ~CR_CR6_EQ; |
2919 |
tf |= CR_CR6_NE_SOME; |
2920 |
} |
2921 |
} |
2922 |
|
2923 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
2924 |
gCPU.cr &= ~CR_CR6; |
2925 |
gCPU.cr |= tf; |
2926 |
} |
2927 |
} |
2928 |
|
2929 |
/* vcmpeqfpx Vector Compare Equal-to-Floating Point |
2930 |
* v.159 |
2931 |
*/ |
2932 |
void ppc_opc_vcmpeqfpx() |
2933 |
{ |
2934 |
VECTOR_DEBUG; |
2935 |
int vrD, vrA, vrB; |
2936 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
2937 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2938 |
|
2939 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
2940 |
if (gCPU.vr[vrA].f[i] == gCPU.vr[vrB].f[i]) { |
2941 |
gCPU.vr[vrD].w[i] = 0xffffffff; |
2942 |
tf &= ~CR_CR6_NE; |
2943 |
tf |= CR_CR6_EQ_SOME; |
2944 |
} else { |
2945 |
gCPU.vr[vrD].w[i] = 0; |
2946 |
tf &= ~CR_CR6_EQ; |
2947 |
tf |= CR_CR6_NE_SOME; |
2948 |
} |
2949 |
} |
2950 |
|
2951 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
2952 |
gCPU.cr &= ~CR_CR6; |
2953 |
gCPU.cr |= tf; |
2954 |
} |
2955 |
} |
2956 |
|
2957 |
/* vcmpgtubx Vector Compare Greater-Than Unsigned Byte |
2958 |
* v.168 |
2959 |
*/ |
2960 |
void ppc_opc_vcmpgtubx() |
2961 |
{ |
2962 |
VECTOR_DEBUG; |
2963 |
int vrD, vrA, vrB; |
2964 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
2965 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2966 |
|
2967 |
for (int i=0; i<16; i++) { |
2968 |
if (gCPU.vr[vrA].b[i] > gCPU.vr[vrB].b[i]) { |
2969 |
gCPU.vr[vrD].b[i] = 0xff; |
2970 |
tf &= ~CR_CR6_NE; |
2971 |
tf |= CR_CR6_EQ_SOME; |
2972 |
} else { |
2973 |
gCPU.vr[vrD].b[i] = 0; |
2974 |
tf &= ~CR_CR6_EQ; |
2975 |
tf |= CR_CR6_NE_SOME; |
2976 |
} |
2977 |
} |
2978 |
|
2979 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
2980 |
gCPU.cr &= ~CR_CR6; |
2981 |
gCPU.cr |= tf; |
2982 |
} |
2983 |
} |
2984 |
|
2985 |
/* vcmpgtsbx Vector Compare Greater-Than Signed Byte |
2986 |
* v.165 |
2987 |
*/ |
2988 |
void ppc_opc_vcmpgtsbx() |
2989 |
{ |
2990 |
VECTOR_DEBUG; |
2991 |
int vrD, vrA, vrB; |
2992 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
2993 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
2994 |
|
2995 |
for (int i=0; i<16; i++) { |
2996 |
if (gCPU.vr[vrA].sb[i] > gCPU.vr[vrB].sb[i]) { |
2997 |
gCPU.vr[vrD].b[i] = 0xff; |
2998 |
tf &= ~CR_CR6_NE; |
2999 |
tf |= CR_CR6_EQ_SOME; |
3000 |
} else { |
3001 |
gCPU.vr[vrD].b[i] = 0; |
3002 |
tf &= ~CR_CR6_EQ; |
3003 |
tf |= CR_CR6_NE_SOME; |
3004 |
} |
3005 |
} |
3006 |
|
3007 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
3008 |
gCPU.cr &= ~CR_CR6; |
3009 |
gCPU.cr |= tf; |
3010 |
} |
3011 |
} |
3012 |
|
3013 |
/* vcmpgtuhx Vector Compare Greater-Than Unsigned Half Word |
3014 |
* v.169 |
3015 |
*/ |
3016 |
void ppc_opc_vcmpgtuhx() |
3017 |
{ |
3018 |
VECTOR_DEBUG; |
3019 |
int vrD, vrA, vrB; |
3020 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
3021 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
3022 |
|
3023 |
for (int i=0; i<8; i++) { |
3024 |
if (gCPU.vr[vrA].h[i] > gCPU.vr[vrB].h[i]) { |
3025 |
gCPU.vr[vrD].h[i] = 0xffff; |
3026 |
tf &= ~CR_CR6_NE; |
3027 |
tf |= CR_CR6_EQ_SOME; |
3028 |
} else { |
3029 |
gCPU.vr[vrD].h[i] = 0; |
3030 |
tf &= ~CR_CR6_EQ; |
3031 |
tf |= CR_CR6_NE_SOME; |
3032 |
} |
3033 |
} |
3034 |
|
3035 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
3036 |
gCPU.cr &= ~CR_CR6; |
3037 |
gCPU.cr |= tf; |
3038 |
} |
3039 |
} |
3040 |
|
3041 |
/* vcmpgtshx Vector Compare Greater-Than Signed Half Word |
3042 |
* v.166 |
3043 |
*/ |
3044 |
void ppc_opc_vcmpgtshx() |
3045 |
{ |
3046 |
VECTOR_DEBUG; |
3047 |
int vrD, vrA, vrB; |
3048 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
3049 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
3050 |
|
3051 |
for (int i=0; i<8; i++) { |
3052 |
if (gCPU.vr[vrA].sh[i] > gCPU.vr[vrB].sh[i]) { |
3053 |
gCPU.vr[vrD].h[i] = 0xffff; |
3054 |
tf &= ~CR_CR6_NE; |
3055 |
tf |= CR_CR6_EQ_SOME; |
3056 |
} else { |
3057 |
gCPU.vr[vrD].h[i] = 0; |
3058 |
tf &= ~CR_CR6_EQ; |
3059 |
tf |= CR_CR6_NE_SOME; |
3060 |
} |
3061 |
} |
3062 |
|
3063 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
3064 |
gCPU.cr &= ~CR_CR6; |
3065 |
gCPU.cr |= tf; |
3066 |
} |
3067 |
} |
3068 |
|
3069 |
/* vcmpgtuwx Vector Compare Greater-Than Unsigned Word |
3070 |
* v.170 |
3071 |
*/ |
3072 |
void ppc_opc_vcmpgtuwx() |
3073 |
{ |
3074 |
VECTOR_DEBUG; |
3075 |
int vrD, vrA, vrB; |
3076 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
3077 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
3078 |
|
3079 |
for (int i=0; i<4; i++) { |
3080 |
if (gCPU.vr[vrA].w[i] > gCPU.vr[vrB].w[i]) { |
3081 |
gCPU.vr[vrD].w[i] = 0xffffffff; |
3082 |
tf &= ~CR_CR6_NE; |
3083 |
tf |= CR_CR6_EQ_SOME; |
3084 |
} else { |
3085 |
gCPU.vr[vrD].w[i] = 0; |
3086 |
tf &= ~CR_CR6_EQ; |
3087 |
tf |= CR_CR6_NE_SOME; |
3088 |
} |
3089 |
} |
3090 |
|
3091 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
3092 |
gCPU.cr &= ~CR_CR6; |
3093 |
gCPU.cr |= tf; |
3094 |
} |
3095 |
} |
3096 |
|
3097 |
/* vcmpgtswx Vector Compare Greater-Than Signed Word |
3098 |
* v.167 |
3099 |
*/ |
3100 |
void ppc_opc_vcmpgtswx() |
3101 |
{ |
3102 |
VECTOR_DEBUG; |
3103 |
int vrD, vrA, vrB; |
3104 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
3105 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
3106 |
|
3107 |
for (int i=0; i<4; i++) { |
3108 |
if (gCPU.vr[vrA].sw[i] > gCPU.vr[vrB].sw[i]) { |
3109 |
gCPU.vr[vrD].w[i] = 0xffffffff; |
3110 |
tf &= ~CR_CR6_NE; |
3111 |
tf |= CR_CR6_EQ_SOME; |
3112 |
} else { |
3113 |
gCPU.vr[vrD].w[i] = 0; |
3114 |
tf &= ~CR_CR6_EQ; |
3115 |
tf |= CR_CR6_NE_SOME; |
3116 |
} |
3117 |
} |
3118 |
|
3119 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
3120 |
gCPU.cr &= ~CR_CR6; |
3121 |
gCPU.cr |= tf; |
3122 |
} |
3123 |
} |
3124 |
|
3125 |
/* vcmpgtfpx Vector Compare Greater-Than Floating-Point |
3126 |
* v.164 |
3127 |
*/ |
3128 |
void ppc_opc_vcmpgtfpx() |
3129 |
{ |
3130 |
VECTOR_DEBUG; |
3131 |
int vrD, vrA, vrB; |
3132 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
3133 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
3134 |
|
3135 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
3136 |
if (gCPU.vr[vrA].f[i] > gCPU.vr[vrB].f[i]) { |
3137 |
gCPU.vr[vrD].w[i] = 0xffffffff; |
3138 |
tf &= ~CR_CR6_NE; |
3139 |
tf |= CR_CR6_EQ_SOME; |
3140 |
} else { |
3141 |
gCPU.vr[vrD].w[i] = 0; |
3142 |
tf &= ~CR_CR6_EQ; |
3143 |
tf |= CR_CR6_NE_SOME; |
3144 |
} |
3145 |
} |
3146 |
|
3147 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
3148 |
gCPU.cr &= ~CR_CR6; |
3149 |
gCPU.cr |= tf; |
3150 |
} |
3151 |
} |
3152 |
|
3153 |
/* vcmpgefpx Vector Compare Greater-Than-or-Equal-to Floating Point |
3154 |
* v.163 |
3155 |
*/ |
3156 |
void ppc_opc_vcmpgefpx() |
3157 |
{ |
3158 |
VECTOR_DEBUG; |
3159 |
int vrD, vrA, vrB; |
3160 |
int tf=CR_CR6_EQ | CR_CR6_NE; |
3161 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
3162 |
|
3163 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
3164 |
if (gCPU.vr[vrA].f[i] >= gCPU.vr[vrB].f[i]) { |
3165 |
gCPU.vr[vrD].w[i] = 0xffffffff; |
3166 |
tf &= ~CR_CR6_NE; |
3167 |
tf |= CR_CR6_EQ_SOME; |
3168 |
} else { |
3169 |
gCPU.vr[vrD].w[i] = 0; |
3170 |
tf &= ~CR_CR6_EQ; |
3171 |
tf |= CR_CR6_NE_SOME; |
3172 |
} |
3173 |
} |
3174 |
|
3175 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
3176 |
gCPU.cr &= ~CR_CR6; |
3177 |
gCPU.cr |= tf; |
3178 |
} |
3179 |
} |
3180 |
|
3181 |
/* vcmpbfpx Vector Compare Bounds Floating Point |
3182 |
* v.157 |
3183 |
*/ |
3184 |
void ppc_opc_vcmpbfpx() |
3185 |
{ |
3186 |
VECTOR_DEBUG; |
3187 |
int vrD, vrA, vrB; |
3188 |
int le, ge; |
3189 |
int ib=CR_CR6_NE; |
3190 |
PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); |
3191 |
|
3192 |
for (int i=0; i<4; i++) { //FIXME: This might not comply with Java FP |
3193 |
le = (gCPU.vr[vrA].f[i] <= gCPU.vr[vrB].f[i]) ? 0 : 0x80000000; |
3194 |
ge = (gCPU.vr[vrA].f[i] >= -gCPU.vr[vrB].f[i]) ? 0 : 0x40000000; |
3195 |
|
3196 |
gCPU.vr[vrD].w[i] = le | ge; |
3197 |
if (le | ge) { |
3198 |
ib = 0; |
3199 |
} |
3200 |
} |
3201 |
|
3202 |
if (PPC_OPC_VRc & gCPU.current_opc) { |
3203 |
gCPU.cr &= ~CR_CR6; |
3204 |
gCPU.cr |= ib; |
3205 |
} |
3206 |
} |