/[gxemul]/upstream/0.3.5/src/bintrans_alpha.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /upstream/0.3.5/src/bintrans_alpha.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 13 - (show annotations)
Mon Oct 8 16:18:43 2007 UTC (16 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 83282 byte(s)
0.3.5
1 /*
2 * Copyright (C) 2004-2005 Anders Gavare. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 *
28 * $Id: bintrans_alpha.c,v 1.125 2005/07/31 08:47:56 debug Exp $
29 *
30 * Alpha specific code for dynamic binary translation.
31 *
32 * See bintrans.c for more information. Included from bintrans.c.
33 *
34 *
35 * Some Alpha registers that are reasonable to use:
36 *
37 * t5..t7 6..8 3
38 * s0..s6 9..15 7
39 * a1..a5 17..21 5
40 * t8..t11 22..25 4
41 *
42 * These can be "mapped" to MIPS registers in the translated code, except a0
43 * which points to the cpu struct, and t0..t4 (or so) which are used by the
44 * translated code as temporaries.
45 *
46 * 3 + 7 + 5 + 4 = 19 available registers. Of course, all (except s0..s6) must
47 * be saved when calling external functions, such as when calling tlbp and
48 * other external functions.
49 *
50 * Which are the 19 most commonly used MIPS registers? (This will include the
51 * pc, and the "current number of executed translated instructions.)
52 *
53 * The current allocation is as follows:
54 *
55 * Alpha: MIPS:
56 * ------ -----
57 *
58 * t5 pc (64-bit)
59 * t6 bintrans_instructions_executed (32-bit int)
60 * t7 a0 (mips register 4) (64-bit)
61 * t8 a1 (mips register 5) (64-bit)
62 * t9 s0 (mips register 16) (64-bit)
63 * t10 load table base cached
64 * t11 v0 (mips register 2) (64-bit)
65 * s0 delay_slot (32-bit int)
66 * s1 delay_jmpaddr (64-bit)
67 * s2 sp (mips register 29) (64-bit)
68 * s3 ra (mips register 31) (64-bit)
69 * s4 t0 (mips register 8) (64-bit)
70 * s5 t1 (mips register 9) (64-bit)
71 * s6 store table base cached
72 */
73
74 #define MIPSREG_PC -3
75 #define MIPSREG_DELAY_SLOT -2
76 #define MIPSREG_DELAY_JMPADDR -1
77
78 #define ALPHA_T0 1
79 #define ALPHA_T1 2
80 #define ALPHA_T2 3
81 #define ALPHA_T3 4
82 #define ALPHA_T4 5
83 #define ALPHA_T5 6
84 #define ALPHA_T6 7
85 #define ALPHA_T7 8
86 #define ALPHA_S0 9
87 #define ALPHA_S1 10
88 #define ALPHA_S2 11
89 #define ALPHA_S3 12
90 #define ALPHA_S4 13
91 #define ALPHA_S5 14
92 #define ALPHA_S6 15
93 #define ALPHA_A0 16
94 #define ALPHA_A1 17
95 #define ALPHA_A2 18
96 #define ALPHA_A3 19
97 #define ALPHA_A4 20
98 #define ALPHA_A5 21
99 #define ALPHA_T8 22
100 #define ALPHA_T9 23
101 #define ALPHA_T10 24
102 #define ALPHA_T11 25
103 #define ALPHA_ZERO 31
104
105 static int map_MIPS_to_Alpha[32] = {
106 ALPHA_ZERO, -1, ALPHA_T11, -1, /* 0 .. 3 */
107 ALPHA_T7, ALPHA_T8, -1, -1, /* 4 .. 7 */
108 ALPHA_S4, ALPHA_S5, -1, -1, /* 8 .. 11 */
109 -1, -1, -1, -1, /* 12 .. 15 */
110 ALPHA_T9, -1, -1, -1, /* 16 .. 19 */
111 -1, -1, -1, -1, /* 20 .. 23 */
112 -1, -1, -1, -1, /* 24 .. 27 */
113 -1, ALPHA_S2, -1, ALPHA_S3, /* 28 .. 31 */
114 };
115
116
117 struct cpu dummy_cpu;
118 struct mips_coproc dummy_coproc;
119 struct vth32_table dummy_vth32_table;
120
121 unsigned char bintrans_alpha_imb[32] = {
122 0x86, 0x00, 0x00, 0x00, /* imb */
123 0x01, 0x80, 0xfa, 0x6b, /* ret */
124 0x1f, 0x04, 0xff, 0x47, /* nop */
125 0x00, 0x00, 0xfe, 0x2e, /* unop */
126 0x1f, 0x04, 0xff, 0x47, /* nop */
127 0x00, 0x00, 0xfe, 0x2e, /* unop */
128 0x1f, 0x04, 0xff, 0x47, /* nop */
129 0x00, 0x00, 0xfe, 0x2e /* unop */
130 };
131
132
133 /*
134 * bintrans_host_cacheinvalidate()
135 *
136 * Invalidate the host's instruction cache. On Alpha, we do this by
137 * executing an imb instruction.
138 *
139 * NOTE: A simple asm("imb"); would be enough here, but not all
140 * compilers have such simple constructs, so an entire function has to
141 * be written as bintrans_alpha_imb[] above.
142 */
143 static void bintrans_host_cacheinvalidate(unsigned char *p, size_t len)
144 {
145 /* Long form of ``asm("imb");'' */
146
147 void (*f)(void);
148 f = (void *)&bintrans_alpha_imb[0];
149 f();
150 }
151
152
153 /* note: offsetof (in stdarg.h) could possibly be used, but I'm not sure
154 if it will take care of the compiler problems... */
155 #define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu))
156 #define ofs_pc_last (((size_t)&dummy_cpu.cd.mips.pc_last) - ((size_t)&dummy_cpu))
157 #define ofs_n (((size_t)&dummy_cpu.cd.mips.bintrans_instructions_executed) - ((size_t)&dummy_cpu))
158 #define ofs_ds (((size_t)&dummy_cpu.cd.mips.delay_slot) - ((size_t)&dummy_cpu))
159 #define ofs_ja (((size_t)&dummy_cpu.cd.mips.delay_jmpaddr) - ((size_t)&dummy_cpu))
160 #define ofs_sp (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_SP]) - ((size_t)&dummy_cpu))
161 #define ofs_ra (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_RA]) - ((size_t)&dummy_cpu))
162 #define ofs_a0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A0]) - ((size_t)&dummy_cpu))
163 #define ofs_a1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A1]) - ((size_t)&dummy_cpu))
164 #define ofs_t0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T0]) - ((size_t)&dummy_cpu))
165 #define ofs_t1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T1]) - ((size_t)&dummy_cpu))
166 #define ofs_t2 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T2]) - ((size_t)&dummy_cpu))
167 #define ofs_v0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_V0]) - ((size_t)&dummy_cpu))
168 #define ofs_s0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_S0]) - ((size_t)&dummy_cpu))
169 #define ofs_tbl0 (((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - ((size_t)&dummy_cpu))
170 #define ofs_c0 ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table)
171 #define ofs_cb (((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu)
172
173 #define ofs_h_l (((size_t)&dummy_cpu.cd.mips.host_load) - ((size_t)&dummy_cpu))
174 #define ofs_h_s (((size_t)&dummy_cpu.cd.mips.host_store) - ((size_t)&dummy_cpu))
175
176
177 static uint32_t bintrans_alpha_load_32bit[18] = {
178 0x4a21f622, /* zapnot a1,0xf,t1 */
179 0x209f0fff, /* lda t3,4095 */
180 0x48419682, /* srl t1,0xc,t1 t1 = addr >> 12 */
181 0x46240004, /* and a1,t3,t3 t3 = addr & 4095 */
182 0x40580642, /* s8addq t1,t10,t1 &host_load[t1] */
183 0xa6620000, /* ldq a3,0(t1) a3 = host_load[t1] */
184
185 /* NULL? Then return failure at once. */
186 0xe6600002, /* beq a3, return */
187
188 /* The rest of the load/store code was written with t3 as the address. */
189 /* Add the offset within the page: */
190 0x42640404, /* addq a3,t3,t3 */
191 0x6be58000, /* ret (t4) */
192
193 /* return: */
194 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */
195 0x44270407, /* or t0,t6,t6 */
196 0x6bfa8000 /* ret */
197 };
198
199 static uint32_t bintrans_alpha_store_32bit[18] = {
200 0x4a21f622, /* zapnot a1,0xf,t1 */
201 0x209f0fff, /* lda t3,4095 */
202 0x48419682, /* srl t1,0xc,t1 t1 = addr >> 12 */
203 0x46240004, /* and a1,t3,t3 t3 = addr & 4095 */
204 0x404f0642, /* s8addq t1,s6,t1 &host_store[t1] */
205 0xa6620000, /* ldq a3,0(t1) a3 = host_store[t1] */
206
207 /* NULL? Then return failure at once. */
208 /* beq a3, return */
209 0xe6600002,
210
211 /* The rest of the load/store code was written with t3 as the address. */
212 0x42640404, /* addq a3,t3,t3 */
213 0x6be58000, /* ret (t4) */
214
215 /* return: */
216 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */
217 0x44270407, /* or t0,t6,t6 */
218 0x6bfa8000 /* ret */
219 };
220
221 static void (*bintrans_runchunk)(struct cpu *, unsigned char *);
222
223 static void (*bintrans_jump_to_32bit_pc)(struct cpu *);
224
225 static void (*bintrans_load_32bit)
226 (struct cpu *) = (void *)bintrans_alpha_load_32bit;
227
228 static void (*bintrans_store_32bit)
229 (struct cpu *) = (void *)bintrans_alpha_store_32bit;
230
231
232 /*
233 * bintrans_write_quickjump():
234 */
235 static void bintrans_write_quickjump(struct memory *mem,
236 unsigned char *quickjump_code, uint32_t chunkoffset)
237 {
238 int ofs;
239 uint64_t alpha_addr = chunkoffset +
240 (size_t)mem->translation_code_chunk_space;
241 uint32_t *a = (uint32_t *)quickjump_code;
242
243 ofs = (alpha_addr - ((size_t)a+4)) / 4;
244
245 /* printf("chunkoffset=%i, %016llx %016llx %i\n",
246 chunkoffset, (long long)alpha_addr, (long long)a, ofs); */
247
248 if (ofs > -0xfffff && ofs < 0xfffff) {
249 *a++ = 0xc3e00000 | (ofs & 0x1fffff); /* br <chunk> */
250 }
251 }
252
253
254 /*
255 * bintrans_write_chunkreturn():
256 */
257 static void bintrans_write_chunkreturn(unsigned char **addrp)
258 {
259 uint32_t *a = (uint32_t *) *addrp;
260 *a++ = 0x6bfa8000; /* ret */
261 *addrp = (unsigned char *) a;
262 }
263
264
265 /*
266 * bintrans_write_chunkreturn_fail():
267 */
268 static void bintrans_write_chunkreturn_fail(unsigned char **addrp)
269 {
270 uint32_t *a = (uint32_t *) *addrp;
271 /* 00 01 3f 24 ldah t0,256 */
272 /* 07 04 27 44 or t0,t6,t6 */
273 *a++ = 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16);
274 *a++ = 0x44270407;
275 *a++ = 0x6bfa8000; /* ret */
276 *addrp = (unsigned char *) a;
277 }
278
279
280 /*
281 * bintrans_move_MIPS_reg_into_Alpha_reg():
282 */
283 static void bintrans_move_MIPS_reg_into_Alpha_reg(unsigned char **addrp, int mipsreg, int alphareg)
284 {
285 uint32_t *a = (uint32_t *) *addrp;
286 int ofs, alpha_mips_reg;
287
288 switch (mipsreg) {
289 case MIPSREG_PC:
290 /* addq t5,0,alphareg */
291 *a++ = 0x40c01400 | alphareg;
292 break;
293 case MIPSREG_DELAY_SLOT:
294 /* addq s0,0,alphareg */
295 *a++ = 0x41201400 | alphareg;
296 break;
297 case MIPSREG_DELAY_JMPADDR:
298 /* addq s1,0,alphareg */
299 *a++ = 0x41401400 | alphareg;
300 break;
301 default:
302 alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
303 if (alpha_mips_reg < 0) {
304 ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
305 /* ldq alphareg,gpr[mipsreg](a0) */
306 *a++ = 0xa4100000 | (alphareg << 21) | ofs;
307 } else {
308 /* addq alpha_mips_reg,0,alphareg */
309 *a++ = 0x40001400 | (alpha_mips_reg << 21) | alphareg;
310 }
311 }
312 *addrp = (unsigned char *) a;
313 }
314
315
316 /*
317 * bintrans_move_Alpha_reg_into_MIPS_reg():
318 */
319 static void bintrans_move_Alpha_reg_into_MIPS_reg(unsigned char **addrp, int alphareg, int mipsreg)
320 {
321 uint32_t *a = (uint32_t *) *addrp;
322 int ofs, alpha_mips_reg;
323
324 switch (mipsreg) {
325 case MIPSREG_PC:
326 /* addq alphareg,0,t5 */
327 *a++ = 0x40001406 | (alphareg << 21);
328 break;
329 case MIPSREG_DELAY_SLOT:
330 /* addq alphareg,0,s0 */
331 *a++ = 0x40001409 | (alphareg << 21);
332 break;
333 case MIPSREG_DELAY_JMPADDR:
334 /* addq alphareg,0,s1 */
335 *a++ = 0x4000140a | (alphareg << 21);
336 break;
337 case 0: /* the zero register */
338 break;
339 default:
340 alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
341 if (alpha_mips_reg < 0) {
342 /* stq alphareg,gpr[mipsreg](a0) */
343 ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
344 *a++ = 0xb4100000 | (alphareg << 21) | ofs;
345 } else {
346 /* addq alphareg,0,alpha_mips_reg */
347 *a++ = 0x40001400 | (alphareg << 21) | alpha_mips_reg;
348 }
349 }
350 *addrp = (unsigned char *) a;
351 }
352
353
354 /*
355 * bintrans_write_pc_inc():
356 */
357 static void bintrans_write_pc_inc(unsigned char **addrp)
358 {
359 uint32_t *a = (uint32_t *) *addrp;
360
361 /* lda t6,1(t6) */
362 *a++ = 0x20e70001;
363
364 /* lda t5,4(t5) */
365 *a++ = 0x20c60004;
366
367 *addrp = (unsigned char *) a;
368 }
369
370
371 /*
372 * bintrans_write_instruction__addiu_etc():
373 */
374 static int bintrans_write_instruction__addiu_etc(
375 struct memory *mem, unsigned char **addrp,
376 int rt, int rs, int imm, int instruction_type)
377 {
378 uint32_t *a;
379 unsigned int uimm;
380 int alpha_rs, alpha_rt;
381
382 /* TODO: overflow detection for ADDI and DADDI */
383 switch (instruction_type) {
384 case HI6_ADDI:
385 case HI6_DADDI:
386 return 0;
387 }
388
389 a = (uint32_t *) *addrp;
390
391 if (rt == 0)
392 goto rt0;
393
394 uimm = imm & 0xffff;
395
396 alpha_rs = map_MIPS_to_Alpha[rs];
397 alpha_rt = map_MIPS_to_Alpha[rt];
398
399 if (uimm == 0 && (instruction_type == HI6_ADDI ||
400 instruction_type == HI6_ADDIU || instruction_type == HI6_DADDI ||
401 instruction_type == HI6_DADDIU || instruction_type == HI6_ORI)) {
402 if (alpha_rs >= 0 && alpha_rt >= 0) {
403 /* addq rs,0,rt */
404 *a++ = 0x40001400 | (alpha_rs << 21) | alpha_rt;
405 } else {
406 *addrp = (unsigned char *) a;
407 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
408 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
409 a = (uint32_t *) *addrp;
410 }
411 goto rt0;
412 }
413
414 if (alpha_rs < 0) {
415 /* ldq t0,"rs"(a0) */
416 *addrp = (unsigned char *) a;
417 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
418 a = (uint32_t *) *addrp;
419 alpha_rs = ALPHA_T0;
420 }
421
422 if (alpha_rt < 0)
423 alpha_rt = ALPHA_T0;
424
425 /* Place the result of the calculation in alpha_rt: */
426
427 switch (instruction_type) {
428 case HI6_ADDIU:
429 case HI6_DADDIU:
430 case HI6_ADDI:
431 case HI6_DADDI:
432 if (uimm < 256) {
433 if (instruction_type == HI6_ADDI ||
434 instruction_type == HI6_ADDIU) {
435 /* addl rs,uimm,rt */
436 *a++ = 0x40001000 | (alpha_rs << 21)
437 | (uimm << 13) | alpha_rt;
438 } else {
439 /* addq rs,uimm,rt */
440 *a++ = 0x40001400 | (alpha_rs << 21)
441 | (uimm << 13) | alpha_rt;
442 }
443 } else {
444 /* lda rt,imm(rs) */
445 *a++ = 0x20000000 | (alpha_rt << 21) | (alpha_rs << 16) | uimm;
446 if (instruction_type == HI6_ADDI ||
447 instruction_type == HI6_ADDIU) {
448 /* sign extend, 32->64 bits: addl t0,zero,t0 */
449 *a++ = 0x40001000 | (alpha_rt << 21) | alpha_rt;
450 }
451 }
452 break;
453 case HI6_ANDI:
454 case HI6_ORI:
455 case HI6_XORI:
456 if (uimm >= 256) {
457 /* lda t1,4660 */
458 *a++ = 0x205f0000 | uimm;
459 if (uimm & 0x8000) {
460 /* 01 00 42 24 ldah t1,1(t1) <-- if negative only */
461 *a++ = 0x24420001;
462 }
463 }
464
465 switch (instruction_type) {
466 case HI6_ANDI:
467 if (uimm < 256) {
468 /* and rs,uimm,rt */
469 *a++ = 0x44001000 | (alpha_rs << 21)
470 | (uimm << 13) | alpha_rt;
471 } else {
472 /* and rs,t1,rt */
473 *a++ = 0x44020000 | (alpha_rs << 21) | alpha_rt;
474 }
475 break;
476 case HI6_ORI:
477 if (uimm < 256) {
478 /* or rs,uimm,rt */
479 *a++ = 0x44001400 | (alpha_rs << 21)
480 | (uimm << 13) | alpha_rt;
481 } else {
482 /* or rs,t1,rt */
483 *a++ = 0x44020400 | (alpha_rs << 21) | alpha_rt;
484 }
485 break;
486 case HI6_XORI:
487 if (uimm < 256) {
488 /* xor rs,uimm,rt */
489 *a++ = 0x44001800 | (alpha_rs << 21)
490 | (uimm << 13) | alpha_rt;
491 } else {
492 /* xor rs,t1,rt */
493 *a++ = 0x44020800 | (alpha_rs << 21) | alpha_rt;
494 }
495 break;
496 }
497 break;
498 case HI6_SLTI:
499 case HI6_SLTIU:
500 /* lda t1,4660 */
501 *a++ = 0x205f0000 | uimm;
502
503 switch (instruction_type) {
504 case HI6_SLTI:
505 /* cmplt rs,t1,rt */
506 *a++ = 0x400209a0 | (alpha_rs << 21) | alpha_rt;
507 break;
508 case HI6_SLTIU:
509 /* cmpult rs,t1,rt */
510 *a++ = 0x400203a0 | (alpha_rs << 21) | alpha_rt;
511 break;
512 }
513 break;
514 }
515
516 if (alpha_rt == ALPHA_T0) {
517 *a++ = 0x5fff041f; /* fnop */
518 *addrp = (unsigned char *) a;
519 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
520 a = (uint32_t *) *addrp;
521 }
522
523 rt0:
524 *addrp = (unsigned char *) a;
525 bintrans_write_pc_inc(addrp);
526 return 1;
527 }
528
529
530 /*
531 * bintrans_write_instruction__addu_etc():
532 */
533 static int bintrans_write_instruction__addu_etc(
534 struct memory *mem, unsigned char **addrp,
535 int rd, int rs, int rt, int sa, int instruction_type)
536 {
537 unsigned char *a, *unmodified = NULL;
538 int load64 = 0, store = 1, ofs, alpha_rd = ALPHA_T0;
539
540 alpha_rd = map_MIPS_to_Alpha[rd];
541 if (alpha_rd < 0)
542 alpha_rd = ALPHA_T0;
543
544 switch (instruction_type) {
545 case SPECIAL_DIV:
546 case SPECIAL_DIVU:
547 return 0;
548 }
549
550 switch (instruction_type) {
551 case SPECIAL_DADDU:
552 case SPECIAL_DSUBU:
553 case SPECIAL_OR:
554 case SPECIAL_AND:
555 case SPECIAL_NOR:
556 case SPECIAL_XOR:
557 case SPECIAL_DSLL:
558 case SPECIAL_DSRL:
559 case SPECIAL_DSRA:
560 case SPECIAL_DSLL32:
561 case SPECIAL_DSRL32:
562 case SPECIAL_DSRA32:
563 case SPECIAL_SLT:
564 case SPECIAL_SLTU:
565 case SPECIAL_MOVZ:
566 case SPECIAL_MOVN:
567 load64 = 1;
568 }
569
570 switch (instruction_type) {
571 case SPECIAL_MULT:
572 case SPECIAL_MULTU:
573 if (rd != 0)
574 return 0;
575 store = 0;
576 break;
577 default:
578 if (rd == 0)
579 goto rd0;
580 }
581
582 a = *addrp;
583
584 if ((instruction_type == SPECIAL_ADDU || instruction_type == SPECIAL_DADDU
585 || instruction_type == SPECIAL_OR) && rt == 0) {
586 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
587 if (!load64) {
588 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
589 }
590 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
591 *addrp = a;
592 goto rd0;
593 }
594
595 /* t0 = rs, t1 = rt */
596 if (load64) {
597 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
598 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
599 } else {
600 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
601 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
602 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
603 *a++ = 0x02; *a++ = 0x10; *a++ = 0x40; *a++ = 0x40; /* addl t1,0,t1 */
604 }
605
606 switch (instruction_type) {
607 case SPECIAL_ADDU:
608 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x40; /* addl t0,t1,rd */
609 break;
610 case SPECIAL_DADDU:
611 *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq t0,t1,rd */
612 break;
613 case SPECIAL_SUBU:
614 *a++ = 0x20 + alpha_rd; *a++ = 0x01; *a++ = 0x22; *a++ = 0x40; /* subl t0,t1,t0 */
615 break;
616 case SPECIAL_DSUBU:
617 *a++ = 0x20 + alpha_rd; *a++ = 0x05; *a++ = 0x22; *a++ = 0x40; /* subq t0,t1,t0 */
618 break;
619 case SPECIAL_AND:
620 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and t0,t1,t0 */
621 break;
622 case SPECIAL_OR:
623 *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
624 break;
625 case SPECIAL_NOR:
626 *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
627 *a++ = alpha_rd; *a++ = 0x05; *a++ = 0xe1; *a++ = 0x47; /* not t0,t0 */
628 break;
629 case SPECIAL_XOR:
630 *a++ = alpha_rd; *a++ = 0x08; *a++ = 0x22; *a++ = 0x44; /* xor t0,t1,t0 */
631 break;
632 case SPECIAL_SLL:
633 *a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
634 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
635 break;
636 case SPECIAL_SLLV:
637 /* rd = rt << (rs&31) (logical) t0 = t1 << (t0&31) */
638 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
639 *a++ = 0x21; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sll t1,t0,t0 */
640 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
641 break;
642 case SPECIAL_DSLL:
643 *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
644 break;
645 case SPECIAL_DSLL32:
646 sa += 32;
647 *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
648 break;
649 case SPECIAL_SRA:
650 *a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
651 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
652 break;
653 case SPECIAL_SRAV:
654 /* rd = rt >> (rs&31) (arithmetic) t0 = t1 >> (t0&31) */
655 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
656 *a++ = 0x81; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sra t1,t0,t0 */
657 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
658 break;
659 case SPECIAL_DSRA:
660 *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
661 break;
662 case SPECIAL_DSRA32:
663 sa += 32;
664 *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
665 break;
666 case SPECIAL_SRL:
667 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
668 /* Note: bits of sa are distributed among two different bytes. */
669 *a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
670 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl */
671 break;
672 case SPECIAL_SRLV:
673 /* rd = rt >> (rs&31) (logical) t0 = t1 >> (t0&31) */
674 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
675 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
676 *a++ = 0x81; *a++ = 0x06; *a++ = 0x41; *a++ = 0x48; /* srl t1,t0,t0 */
677 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
678 break;
679 case SPECIAL_DSRL:
680 /* Note: bits of sa are distributed among two different bytes. */
681 *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
682 break;
683 case SPECIAL_DSRL32:
684 /* Note: bits of sa are distributed among two different bytes. */
685 sa += 32;
686 *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
687 break;
688 case SPECIAL_SLT:
689 *a++ = 0xa0 + alpha_rd; *a++ = 0x09; *a++ = 0x22; *a++ = 0x40; /* cmplt t0,t1,t0 */
690 break;
691 case SPECIAL_SLTU:
692 *a++ = 0xa0 + alpha_rd; *a++ = 0x03; *a++ = 0x22; *a++ = 0x40; /* cmpult t0,t1,t0 */
693 break;
694 case SPECIAL_MULT:
695 case SPECIAL_MULTU:
696 if (instruction_type == SPECIAL_MULTU) {
697 /* 21 f6 21 48 zapnot t0,0xf,t0 */
698 /* 22 f6 41 48 zapnot t1,0xf,t1 */
699 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48;
700 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48;
701 }
702
703 /* 03 04 22 4c mulq t0,t1,t2 */
704 *a++ = 0x03; *a++ = 0x04; *a++ = 0x22; *a++ = 0x4c;
705
706 /* 01 10 60 40 addl t2,0,t0 */
707 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
708
709 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
710 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
711
712 /* 81 17 64 48 sra t2,0x20,t0 */
713 *a++ = 0x81; *a++ = 0x17; *a++ = 0x64; *a++ = 0x48;
714 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
715 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
716 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
717 break;
718 case SPECIAL_MOVZ:
719 /* if rt=0 then rd=rs ==> if t1!=0 then t0=unmodified else t0=rd */
720 /* 00 00 40 f4 bne t1,unmodified */
721 unmodified = a;
722 *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xf4;
723 alpha_rd = ALPHA_T0;
724 break;
725 case SPECIAL_MOVN:
726 /* if rt!=0 then rd=rs ==> if t1=0 then t0=unmodified else t0=rd */
727 /* 00 00 40 e4 beq t1,unmodified */
728 unmodified = a;
729 *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
730 alpha_rd = ALPHA_T0;
731 break;
732 }
733
734 if (store && alpha_rd == ALPHA_T0) {
735 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
736 }
737
738 if (unmodified != NULL)
739 *unmodified = ((size_t)a - (size_t)unmodified - 4) / 4;
740
741 *addrp = a;
742 rd0:
743 bintrans_write_pc_inc(addrp);
744 return 1;
745 }
746
747
748 /*
749 * bintrans_write_instruction__branch():
750 */
751 static int bintrans_write_instruction__branch(unsigned char **addrp,
752 int instruction_type, int regimm_type, int rt, int rs, int imm)
753 {
754 uint32_t *a, *b, *c = NULL;
755 int alpha_rs, alpha_rt, likely = 0, ofs;
756
757 alpha_rs = map_MIPS_to_Alpha[rs];
758 alpha_rt = map_MIPS_to_Alpha[rt];
759
760 switch (instruction_type) {
761 case HI6_BEQL:
762 case HI6_BNEL:
763 case HI6_BLEZL:
764 case HI6_BGTZL:
765 likely = 1;
766 }
767
768 /*
769 * t0 = gpr[rt]; t1 = gpr[rs];
770 *
771 * 50 00 30 a4 ldq t0,80(a0)
772 * 58 00 50 a4 ldq t1,88(a0)
773 */
774
775 switch (instruction_type) {
776 case HI6_BEQ:
777 case HI6_BNE:
778 case HI6_BEQL:
779 case HI6_BNEL:
780 if (alpha_rt < 0) {
781 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
782 alpha_rt = ALPHA_T0;
783 }
784 }
785
786 if (alpha_rs < 0) {
787 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T1);
788 alpha_rs = ALPHA_T1;
789 }
790
791 a = (uint32_t *) *addrp;
792
793 /*
794 * Compare alpha_rt (t0) and alpha_rs (t1) for equality (BEQ).
795 * If the result was false (equal to zero), then skip a lot
796 * of instructions:
797 *
798 * a1 05 22 40 cmpeq t0,t1,t0
799 * 01 00 20 e4 beq t0,14 <f+0x14>
800 */
801 b = NULL;
802 if ((instruction_type == HI6_BEQ ||
803 instruction_type == HI6_BEQL) && rt != rs) {
804 /* cmpeq rt,rs,t0 */
805 *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
806 b = a;
807 *a++ = 0xe4200001; /* beq */
808 }
809 if (instruction_type == HI6_BNE || instruction_type == HI6_BNEL) {
810 /* cmpeq rt,rs,t0 */
811 *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
812 b = a;
813 *a++ = 0xf4200001; /* bne */
814 }
815 if (instruction_type == HI6_BLEZ || instruction_type == HI6_BLEZL) {
816 /* cmple rs,0,t0 */
817 *a++ = 0x40001da1 | (alpha_rs << 21);
818 b = a;
819 *a++ = 0xe4200001; /* beq */
820 }
821 if (instruction_type == HI6_BGTZ || instruction_type == HI6_BGTZL) {
822 /* cmple rs,0,t0 */
823 *a++ = 0x40001da1 | (alpha_rs << 21);
824 b = a;
825 *a++ = 0xf4200001; /* bne */
826 }
827 if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BLTZ) {
828 /* cmplt rs,0,t0 */
829 *a++ = 0x400019a1 | (alpha_rs << 21);
830 b = a;
831 *a++ = 0xe4200001; /* beq */
832 }
833 if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BGEZ) {
834 *a++ = 0x207fffff; /* lda t2,-1 */
835 /* cmple rs,t2,t0 */
836 *a++ = 0x40030da1 | (alpha_rs << 21);
837 b = a;
838 *a++ = 0xf4200001; /* bne */
839 }
840
841 /*
842 * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
843 * and cpu->delay_jmpaddr = pc + 4 + (imm << 2).
844 *
845 * 04 00 26 20 lda t0,4(t5) add 4
846 * c8 01 5f 20 lda t1,456
847 * 4a 04 41 40 s4addq t1,t0,s1 s1 = (t1<<2) + t0
848 */
849
850 *a++ = 0x20260004; /* lda t0,4(t5) */
851 *a++ = 0x205f0000 | (imm & 0xffff); /* lda */
852 *a++ = 0x4041044a; /* s4addq */
853
854 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
855 *a++ = 0x213f0000 | TO_BE_DELAYED;
856
857 /*
858 * Special case: "likely"-branches:
859 */
860 if (likely) {
861 c = a;
862 *a++ = 0xc3e00001; /* br delayed_ok */
863
864 if (b != NULL)
865 *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
866
867 /* cpu->cd.mips.nullify_next = 1; */
868 /* 01 00 3f 20 lda t0,1 */
869 *a++ = 0x203f0001;
870 ofs = (size_t)&dummy_cpu.cd.mips.nullify_next - (size_t)&dummy_cpu;
871 *a++ = 0xb0300000 | (ofs & 0xffff);
872
873 /* fail, so that the next instruction is handled manually: */
874 *addrp = (unsigned char *) a;
875 bintrans_write_pc_inc(addrp);
876 bintrans_write_chunkreturn_fail(addrp);
877 a = (uint32_t *) *addrp;
878
879 if (c != NULL)
880 *((unsigned char *)c) = ((size_t)a - (size_t)c - 4) / 4;
881 } else {
882 /* Normal (non-likely) exit: */
883 if (b != NULL)
884 *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
885 }
886
887 *addrp = (unsigned char *) a;
888 bintrans_write_pc_inc(addrp);
889 return 1;
890 }
891
892
893 /*
894 * bintrans_write_instruction__jr():
895 */
896 static int bintrans_write_instruction__jr(unsigned char **addrp, int rs, int rd, int special)
897 {
898 uint32_t *a;
899 int alpha_rd;
900
901 alpha_rd = map_MIPS_to_Alpha[rd];
902 if (alpha_rd < 0)
903 alpha_rd = ALPHA_T0;
904
905 /*
906 * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
907 * and cpu->delay_jmpaddr = gpr[rs].
908 */
909
910 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_S1);
911
912 a = (uint32_t *) *addrp;
913 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
914 *a++ = 0x213f0000 | TO_BE_DELAYED;
915 *addrp = (unsigned char *) a;
916
917 if (special == SPECIAL_JALR && rd != 0) {
918 /* gpr[rd] = retaddr (pc + 8) */
919 a = (uint32_t *) *addrp;
920 /* lda alpha_rd,8(t5) */
921 *a++ = 0x20060008 | (alpha_rd << 21);
922 *addrp = (unsigned char *) a;
923 if (alpha_rd == ALPHA_T0)
924 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rd);
925 }
926
927 bintrans_write_pc_inc(addrp);
928 return 1;
929 }
930
931
932 /*
933 * bintrans_write_instruction__jal():
934 */
935 static int bintrans_write_instruction__jal(unsigned char **addrp,
936 int imm, int link)
937 {
938 uint32_t *a;
939
940 a = (uint32_t *) *addrp;
941
942 /* gpr[31] = retaddr (NOTE: mips register 31 is in alpha reg s3) */
943 if (link) {
944 *a++ = 0x21860008; /* lda s3,8(t5) */
945 }
946
947 /* Set the jmpaddr to top 4 bits of pc + lowest 28 bits of imm*4: */
948
949 /*
950 * imm = 4*imm;
951 * t0 = ((pc + 4) & ~0x0fffffff) | imm;
952 *
953 * 04 00 26 20 lda t0,4(t5) <-- because the jump is from the delay slot
954 * 23 01 5f 24 ldah t1,291
955 * 67 45 42 20 lda t1,17767(t1)
956 * 00 f0 7f 24 ldah t2,-4096
957 * 04 00 23 44 and t0,t2,t3
958 * 0a 04 44 44 or t1,t3,s1
959 */
960 imm *= 4;
961 *a++ = 0x20260004;
962 *a++ = 0x245f0000 | ((imm >> 16) + (imm & 0x8000? 1 : 0));
963 *a++ = 0x20420000 | (imm & 0xffff);
964 *a++ = 0x247ff000;
965 *a++ = 0x44230004;
966 *a++ = 0x4444040a;
967
968 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
969 *a++ = 0x213f0000 | TO_BE_DELAYED;
970
971 /* If the machine continues executing here, it will return
972 to the main loop, which is fine. */
973
974 *addrp = (unsigned char *) a;
975 bintrans_write_pc_inc(addrp);
976 return 1;
977 }
978
979
980 /*
981 * bintrans_write_instruction__delayedbranch():
982 */
983 static int bintrans_write_instruction__delayedbranch(
984 struct memory *mem, unsigned char **addrp,
985 uint32_t *potential_chunk_p, uint32_t *chunks,
986 int only_care_about_chunk_p, int p, int forward)
987 {
988 unsigned char *a, *skip=NULL, *generic64bit;
989 int ofs;
990 uint64_t alpha_addr, subaddr;
991
992 a = *addrp;
993
994 if (!only_care_about_chunk_p) {
995 /* Skip all of this if there is no branch: */
996 skip = a;
997 *a++ = 0; *a++ = 0; *a++ = 0x20; *a++ = 0xe5; /* beq s0,skip */
998
999 /*
1000 * Perform the jump by setting cpu->delay_slot = 0
1001 * and pc = cpu->delay_jmpaddr.
1002 */
1003 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3);
1004 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T5);
1005
1006 /* 00 00 3f 21 lda s0,0 */
1007 *a++ = 0; *a++ = 0; *a++ = 0x3f; *a++ = 0x21;
1008
1009 }
1010
1011 if (potential_chunk_p == NULL) {
1012 if (mem->bintrans_32bit_only) {
1013 /* 34 12 70 a7 ldq t12,4660(a0) */
1014 ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1015 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1016
1017 /* 00 00 fb 6b jmp (t12) */
1018 *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1019 } else {
1020 /*
1021 * If the highest 32 bits of the address are either
1022 * 0x00000000 or 0xffffffff, then the tables used for
1023 * 32-bit load/stores can be used.
1024 *
1025 * 81 16 24 4a srl a1,0x20,t0
1026 * 03 00 20 e4 beq t0,14 <ok1>
1027 * 01 30 20 40 addl t0,0x1,t0
1028 * 01 00 20 e4 beq t0,14 <ok1>
1029 * 01 00 e0 c3 br 18 <nook>
1030 */
1031 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a;
1032 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1033 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40;
1034 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1035 generic64bit = a;
1036 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1037
1038 /* 34 12 70 a7 ldq t12,4660(a0) */
1039 ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1040 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1041
1042 /* 00 00 fb 6b jmp (t12) */
1043 *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1044
1045
1046 if (generic64bit != NULL)
1047 *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1048
1049 /* Not much we can do here if this wasn't to the same
1050 physical page... */
1051
1052 *a++ = 0xfc; *a++ = 0xff; *a++ = 0x84; *a++ = 0x20; /* lda t3,-4(t3) */
1053
1054 /*
1055 * Compare the old pc (t3) and the new pc (t0). If they are on the
1056 * same virtual page (which means that they are on the same physical
1057 * page), then we can check the right chunk pointer, and if it
1058 * is non-NULL, then we can jump there. Otherwise just return.
1059 *
1060 * 00 f0 5f 20 lda t1,-4096
1061 * 01 00 22 44 and t0,t1,t0
1062 * 04 00 82 44 and t3,t1,t3
1063 * a3 05 24 40 cmpeq t0,t3,t2
1064 * 01 00 60 f4 bne t2,7c <ok2>
1065 * 00 80 fa 6b ret
1066 */
1067 *a++ = 0x00; *a++ = 0xf0; *a++ = 0x5f; *a++ = 0x20; /* lda */
1068 *a++ = 0x01; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and */
1069 *a++ = 0x04; *a++ = 0x00; *a++ = 0x82; *a++ = 0x44; /* and */
1070 *a++ = 0xa3; *a++ = 0x05; *a++ = 0x24; *a++ = 0x40; /* cmpeq */
1071 *a++ = 0x01; *a++ = 0x00; *a++ = 0x60; *a++ = 0xf4; /* bne */
1072 *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1073
1074 /* Don't execute too many instructions. (see comment below) */
1075 *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1076 *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1077 *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1078 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1079 *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1080
1081 /* 15 bits at a time, which means max 60 bits, but
1082 that should be enough. the top 4 bits are probably
1083 not used by userland alpha code. (TODO: verify this) */
1084 alpha_addr = (size_t)chunks;
1085 subaddr = (alpha_addr >> 45) & 0x7fff;
1086
1087 /*
1088 * 00 00 3f 20 lda t0,0
1089 * 21 f7 21 48 sll t0,0xf,t0
1090 * 34 12 21 20 lda t0,4660(t0)
1091 * 21 f7 21 48 sll t0,0xf,t0
1092 * 34 12 21 20 lda t0,4660(t0)
1093 * 21 f7 21 48 sll t0,0xf,t0
1094 * 34 12 21 20 lda t0,4660(t0)
1095 */
1096
1097 /* Start with the topmost 15 bits: */
1098 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1099 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1100
1101 subaddr = (alpha_addr >> 30) & 0x7fff;
1102 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1103 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1104
1105 subaddr = (alpha_addr >> 15) & 0x7fff;
1106 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1107 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1108
1109 subaddr = alpha_addr & 0x7fff;
1110 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1111
1112 /*
1113 * t2 = pc
1114 * t1 = t2 & 0xfff
1115 * t0 += t1
1116 *
1117 * ff 0f 5f 20 lda t1,4095
1118 * 02 00 62 44 and t2,t1,t1
1119 * 01 04 22 40 addq t0,t1,t0
1120 */
1121 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T2);
1122 *a++ = 0xff; *a++ = 0x0f; *a++ = 0x5f; *a++ = 0x20; /* lda */
1123 *a++ = 0x02; *a++ = 0x00; *a++ = 0x62; *a++ = 0x44; /* and */
1124 *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq */
1125
1126 /*
1127 * Load the chunk pointer (actually, a 32-bit offset) into t0.
1128 * If it is zero, then skip the following.
1129 * Add cpu->chunk_base_address to t0.
1130 * Jump to t0.
1131 */
1132
1133 *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1134 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1135
1136 /* ldq t2,chunk_base_address(a0) */
1137 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1138 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1139 /* addq t0,t2,t0 */
1140 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1141
1142 /* 00 00 e1 6b jmp (t0) */
1143 *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1144
1145 /* Failure, then return to the main loop. */
1146 *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1147 }
1148 } else {
1149 /*
1150 * Just to make sure that we don't become too unreliant
1151 * on the main program loop, we need to return every once
1152 * in a while (interrupts etc).
1153 *
1154 * Load the "nr of instructions executed" (which is an int)
1155 * and see if it is below a certain threshold. If so, then
1156 * we go on with the fast path (bintrans), otherwise we
1157 * abort by returning.
1158 *
1159 * f4 01 5f 20 lda t1,500 (some low number...)
1160 * a1 0d c2 40 cmple t6,t1,t0
1161 * 01 00 20 f4 bne t0,14 <f+0x14>
1162 */
1163 if (!only_care_about_chunk_p && !forward) {
1164 *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1165 *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1166 *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1167 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1168 *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1169 }
1170
1171 /*
1172 * potential_chunk_p points to an "uint32_t".
1173 * If this value is non-NULL, then it is a piece of Alpha
1174 * machine language code corresponding to the address
1175 * we're jumping to. Otherwise, those instructions haven't
1176 * been translated yet, so we have to return to the main
1177 * loop. (Actually, we have to add cpu->chunk_base_address,
1178 * because the uint32_t is limited to 32-bit offsets.)
1179 *
1180 * Case 1: The value is non-NULL already at translation
1181 * time. Then we can make a direct (fast) native
1182 * Alpha jump to the code chunk.
1183 *
1184 * Case 2: The value was NULL at translation time, then we
1185 * have to check during runtime.
1186 */
1187
1188 /* Case 1: */
1189 /* printf("%08x ", *potential_chunk_p); */
1190 alpha_addr = *potential_chunk_p + (size_t)mem->translation_code_chunk_space;
1191 ofs = (alpha_addr - ((size_t)a+4)) / 4;
1192 /* printf("%016llx %016llx %i\n", (long long)alpha_addr, (long long)a, ofs); */
1193
1194 if ((*potential_chunk_p) != 0 && ofs > -0xfffff && ofs < 0xfffff) {
1195 *a++ = ofs & 255; *a++ = (ofs >> 8) & 255; *a++ = 0xe0 + ((ofs >> 16) & 0x1f); *a++ = 0xc3; /* br <chunk> */
1196 } else {
1197 /* Case 2: */
1198
1199 bintrans_register_potential_quick_jump(mem, a, p);
1200
1201 /* 15 bits at a time, which means max 60 bits, but
1202 that should be enough. the top 4 bits are probably
1203 not used by userland alpha code. (TODO: verify this) */
1204 alpha_addr = (size_t)potential_chunk_p;
1205 subaddr = (alpha_addr >> 45) & 0x7fff;
1206
1207 /*
1208 * 00 00 3f 20 lda t0,0
1209 * 21 f7 21 48 sll t0,0xf,t0
1210 * 34 12 21 20 lda t0,4660(t0)
1211 * 21 f7 21 48 sll t0,0xf,t0
1212 * 34 12 21 20 lda t0,4660(t0)
1213 * 21 f7 21 48 sll t0,0xf,t0
1214 * 34 12 21 20 lda t0,4660(t0)
1215 */
1216
1217 /* Start with the topmost 15 bits: */
1218 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1219 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1220
1221 subaddr = (alpha_addr >> 30) & 0x7fff;
1222 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1223 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1224
1225 subaddr = (alpha_addr >> 15) & 0x7fff;
1226 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1227 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1228
1229 subaddr = alpha_addr & 0x7fff;
1230 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1231
1232 /*
1233 * Load the chunk pointer into t0.
1234 * If it is NULL (zero), then skip the following jump.
1235 * Jump to t0.
1236 */
1237 *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1238 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1239
1240 /* ldq t2,chunk_base_address(a0) */
1241 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1242 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1243 /* addq t0,t2,t0 */
1244 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1245
1246 /* 00 00 e1 6b jmp (t0) */
1247 *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1248
1249 /* "Failure", then let's return to the main loop. */
1250 *a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1251 }
1252 }
1253
1254 if (skip != NULL) {
1255 *skip = ((size_t)a - (size_t)skip - 4) / 4;
1256 skip ++;
1257 *skip = (((size_t)a - (size_t)skip - 4) / 4) >> 8;
1258 }
1259
1260 *addrp = a;
1261 return 1;
1262 }
1263
1264
1265 /*
1266 * bintrans_write_instruction__loadstore():
1267 */
1268 static int bintrans_write_instruction__loadstore(
1269 struct memory *mem, unsigned char **addrp,
1270 int rt, int imm, int rs, int instruction_type, int bigendian,
1271 int do_alignment_check)
1272 {
1273 unsigned char *a, *fail, *generic64bit = NULL, *generic64bitA = NULL;
1274 unsigned char *doloadstore = NULL,
1275 *ok_unaligned_load3, *ok_unaligned_load2, *ok_unaligned_load1;
1276 uint32_t *b;
1277 int need_imm_lda = 0;
1278 int ofs, alignment, load = 0, alpha_rs, alpha_rt, unaligned = 0;
1279
1280 /* TODO: Not yet: */
1281 if (instruction_type == HI6_LQ_MDMX || instruction_type == HI6_SQ) {
1282 return 0;
1283 }
1284
1285 switch (instruction_type) {
1286 case HI6_LQ_MDMX:
1287 case HI6_LD:
1288 case HI6_LDL:
1289 case HI6_LDR:
1290 case HI6_LWU:
1291 case HI6_LW:
1292 case HI6_LWL:
1293 case HI6_LWR:
1294 case HI6_LHU:
1295 case HI6_LH:
1296 case HI6_LBU:
1297 case HI6_LB:
1298 load = 1;
1299 if (rt == 0)
1300 return 0;
1301 }
1302
1303 switch (instruction_type) {
1304 case HI6_LDL:
1305 case HI6_LDR:
1306 case HI6_LWL:
1307 case HI6_LWR:
1308 case HI6_SDL:
1309 case HI6_SDR:
1310 case HI6_SWL:
1311 case HI6_SWR:
1312 unaligned = 1;
1313 }
1314
1315 a = *addrp;
1316
1317 /*
1318 * a1 = gpr[rs] + imm;
1319 *
1320 * rs_ofs 30 a4 ldq a1,rs(a0)
1321 * imm 21 22 lda a1,imm(a1)
1322 */
1323
1324 alpha_rs = map_MIPS_to_Alpha[rs];
1325 if (alpha_rs < 0) {
1326 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_A1);
1327 alpha_rs = ALPHA_A1;
1328 } else
1329 need_imm_lda = 1;
1330
1331 if (imm != 0 || need_imm_lda) {
1332 *a++ = imm; *a++ = (imm >> 8);
1333 *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1334 }
1335
1336 alignment = 0;
1337 switch (instruction_type) {
1338 case HI6_LQ_MDMX:
1339 case HI6_SQ:
1340 alignment = 15;
1341 break;
1342 case HI6_LD:
1343 case HI6_LDL:
1344 case HI6_LDR:
1345 case HI6_SD:
1346 case HI6_SDL:
1347 case HI6_SDR:
1348 alignment = 7;
1349 break;
1350 case HI6_LW:
1351 case HI6_LWL:
1352 case HI6_LWR:
1353 case HI6_LWU:
1354 case HI6_SW:
1355 case HI6_SWL:
1356 case HI6_SWR:
1357 alignment = 3;
1358 break;
1359 case HI6_LH:
1360 case HI6_LHU:
1361 case HI6_SH:
1362 alignment = 1;
1363 break;
1364 }
1365
1366 if (unaligned) {
1367 /*
1368 * Unaligned load/store: Perform the host load/store at
1369 * an aligned address, and then figure out which bytes to
1370 * actually load into the destination register.
1371 *
1372 * 02 30 20 46 and a1,alignment,t1
1373 * 31 05 22 42 subq a1,t1,a1
1374 */
1375 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1376 *a++ = 0x31; *a++ = 0x05; *a++ = 0x22; *a++ = 0x42;
1377 } else if (alignment > 0 && do_alignment_check) {
1378 /*
1379 * Check alignment:
1380 *
1381 * 02 30 20 46 and a1,0x1,t1
1382 * 02 70 20 46 and a1,0x3,t1 (one of these "and"s)
1383 * 02 f0 20 46 and a1,0x7,t1
1384 * 02 f0 21 46 and a1,0xf,t1
1385 * 01 00 40 e4 beq t1,<okalign>
1386 * 00 80 fa 6b ret
1387 */
1388 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1389 fail = a;
1390 *a++ = 0x01; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
1391 *addrp = a;
1392 bintrans_write_chunkreturn_fail(addrp);
1393 a = *addrp;
1394 *fail = ((size_t)a - (size_t)fail - 4) / 4;
1395 }
1396
1397 alpha_rt = map_MIPS_to_Alpha[rt];
1398
1399 if (mem->bintrans_32bit_only) {
1400 /* Special case for 32-bit addressing: */
1401
1402 if (load)
1403 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_load_32bit) - (size_t)&dummy_cpu;
1404 else
1405 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_store_32bit) - (size_t)&dummy_cpu;
1406 /* ldq t12,bintrans_loadstore_32bit(a0) */
1407 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1408
1409 /* jsr t4,(t12),<after> */
1410 *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1411
1412 /* Now: a3 = host page, t3 = address of host load/store */
1413 } else {
1414 /*
1415 * If the highest 33 bits of the address are either all ones
1416 * or all zeroes, then the tables used for 32-bit load/stores
1417 * can be used.
1418 */
1419 *a++ = 0x81; *a++ = 0xf6; *a++ = 0x23; *a++ = 0x4a; /* srl a1,0x1f,t0 */
1420 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x44; /* and t0,0x1,t0 */
1421 *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<noll> */
1422 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1423 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; /* addl t0,0x1,t0 */
1424 *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1425 generic64bit = a;
1426 *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1427 /* <noll>: */
1428 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1429 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1430 generic64bitA = a;
1431 *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1432
1433 if (load)
1434 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_load_32bit) - (size_t)&dummy_cpu;
1435 else
1436 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_store_32bit) - (size_t)&dummy_cpu;
1437 /* ldq t12,bintrans_loadstore_32bit(a0) */
1438 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1439
1440 /* jsr t4,(t12),<after> */
1441 *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1442
1443 /*
1444 * Now: a3 = host page
1445 * t3 = (potential) address of host load/store
1446 */
1447
1448 doloadstore = a;
1449 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1450
1451
1452 /*
1453 * Generic (64-bit) load/store:
1454 */
1455
1456 if (generic64bit != NULL)
1457 *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1458 if (generic64bitA != NULL)
1459 *generic64bitA = ((size_t)a - (size_t)generic64bitA - 4) / 4;
1460
1461 *addrp = a;
1462 b = (uint32_t *) *addrp;
1463
1464 /* Save a0 and the old return address on the stack: */
1465 *b++ = 0x23deff80; /* lda sp,-128(sp) */
1466
1467 *b++ = 0xb75e0000; /* stq ra,0(sp) */
1468 *b++ = 0xb61e0008; /* stq a0,8(sp) */
1469 *b++ = 0xb4de0010; /* stq t5,16(sp) */
1470 *b++ = 0xb0fe0018; /* stl t6,24(sp) */
1471 *b++ = 0xb71e0020; /* stq t10,32(sp) */
1472 *b++ = 0xb73e0028; /* stq t11,40(sp) */
1473 *b++ = 0xb51e0030; /* stq t7,48(sp) */
1474 *b++ = 0xb6de0038; /* stq t8,56(sp) */
1475 *b++ = 0xb6fe0040; /* stq t9,64(sp) */
1476
1477 ofs = ((size_t)&dummy_cpu.cd.mips.fast_vaddr_to_hostaddr) - (size_t)&dummy_cpu;
1478
1479 *b++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
1480
1481 /* a1 is already vaddr. set a2 = writeflag */
1482 *b++ = 0x225f0000 | (load? 0 : 1);
1483
1484 /* Call fast_vaddr_to_hostaddr: */
1485 *b++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
1486
1487 /* Restore the old return address and a0 from the stack: */
1488 *b++ = 0xa75e0000; /* ldq ra,0(sp) */
1489 *b++ = 0xa61e0008; /* ldq a0,8(sp) */
1490 *b++ = 0xa4de0010; /* ldq t5,16(sp) */
1491 *b++ = 0xa0fe0018; /* ldl t6,24(sp) */
1492 *b++ = 0xa71e0020; /* ldq t10,32(sp) */
1493 *b++ = 0xa73e0028; /* ldq t11,40(sp) */
1494 *b++ = 0xa51e0030; /* ldq t7,48(sp) */
1495 *b++ = 0xa6de0038; /* ldq t8,56(sp) */
1496 *b++ = 0xa6fe0040; /* ldq t9,64(sp) */
1497
1498 *b++ = 0x23de0080; /* lda sp,128(sp) */
1499
1500 *addrp = (unsigned char *) b;
1501 a = *addrp;
1502
1503 /*
1504 * NULL? Then return failure.
1505 * 01 00 00 f4 bne v0,f8 <okzz>
1506 */
1507 fail = a;
1508 *a++ = 0x01; *a++ = 0x00; *a++ = 0x00; *a++ = 0xf4;
1509 bintrans_write_chunkreturn_fail(&a);
1510 *fail = ((size_t)a - (size_t)fail - 4) / 4;
1511
1512 /* The rest of this code was written with t3 as the address. */
1513
1514 /* 04 14 00 40 addq v0,0,t3 */
1515 *a++ = 0x04; *a++ = 0x14; *a++ = 0x00; *a++ = 0x40;
1516
1517 if (doloadstore != NULL)
1518 *doloadstore = ((size_t)a - (size_t)doloadstore - 4) / 4;
1519 }
1520
1521
1522 switch (instruction_type) {
1523 case HI6_LQ_MDMX:
1524 /* TODO */
1525 break;
1526 case HI6_LD:
1527 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa4; /* ldq t0,0(t3) */
1528 if (bigendian) {
1529 /* remember original 8 bytes of t0: */
1530 *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1531
1532 /* swap lowest 4 bytes: */
1533 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1534 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1535 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1536 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1537 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1538 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1539 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1540 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1541 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1542
1543 /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1544 *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1545 *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1546 *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1547
1548 /* swap highest 4 bytes: */
1549 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1550 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1551 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1552 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1553 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1554 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1555 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1556 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1557 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1558
1559 /* or the results together: */
1560 *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1561 }
1562 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1563 break;
1564 case HI6_LW:
1565 case HI6_LWU:
1566 if (alpha_rt < 0 || bigendian || instruction_type == HI6_LWU)
1567 alpha_rt = ALPHA_T0;
1568 /* ldl rt,0(t3) */
1569 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1570 *a++ = 0xa0 | ((alpha_rt >> 3) & 3);
1571 if (bigendian) {
1572 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1573 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1574 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1575 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1576 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1577 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1578 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1579 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1580 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1581 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,zero,t0 (sign extend) 32->64 */
1582 }
1583 if (instruction_type == HI6_LWU) {
1584 /* Use only lowest 32 bits: */
1585 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; /* zapnot t0,0xf,t0 */
1586 }
1587 if (alpha_rt == ALPHA_T0)
1588 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1589 break;
1590 case HI6_LHU:
1591 case HI6_LH:
1592 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x30; /* ldwu from memory */
1593 if (bigendian) {
1594 *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1595 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1596 *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1597 }
1598 if (instruction_type == HI6_LH) {
1599 *a++ = 0x21; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x73; /* sextw t0,t0 */
1600 }
1601 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1602 break;
1603 case HI6_LBU:
1604 case HI6_LB:
1605 if (alpha_rt < 0)
1606 alpha_rt = ALPHA_T0;
1607 /* ldbu rt,0(t3) */
1608 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1609 *a++ = 0x28 | ((alpha_rt >> 3) & 3);
1610 if (instruction_type == HI6_LB) {
1611 /* sextb rt,rt */
1612 *a++ = alpha_rt; *a++ = 0x00; *a++ = 0xe0 + alpha_rt; *a++ = 0x73;
1613 }
1614 if (alpha_rt == ALPHA_T0)
1615 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1616 break;
1617
1618 case HI6_LWL:
1619 /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1620 alpha_rs = map_MIPS_to_Alpha[rs];
1621 if (alpha_rs < 0) {
1622 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1623 alpha_rs = ALPHA_T0;
1624 }
1625 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1626 /* 02 30 20 46 and a1,alignment,t1 */
1627 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1628
1629 /* ldl t0,0(t3) */
1630 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1631
1632 if (bigendian) {
1633 /* TODO */
1634 bintrans_write_chunkreturn_fail(&a);
1635 }
1636 /*
1637 * lwl: memory = 0x12 0x34 0x56 0x78
1638 * offset (a1): register rt becomes:
1639 * 0 0x12......
1640 * 1 0x3412....
1641 * 2 0x563412..
1642 * 3 0x78563412
1643 */
1644
1645 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1646
1647 /*
1648 10: 03 00 9f 20 lda t3,3
1649 14: a5 05 82 40 cmpeq t3,t1,t4
1650 18: 01 00 a0 e4 beq t4,20 <skip>
1651 */
1652 *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1653 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1654 *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1655
1656 /* 03 14 20 40 addq t0,0,t2 */
1657 *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1658
1659 ok_unaligned_load3 = a;
1660 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1661
1662
1663
1664 *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1665 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1666 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1667 /*
1668 * 2 0x563412..
1669 2c: 21 17 21 48 sll t0,0x8,t0
1670 30: 01 10 20 40 addl t0,0,t0
1671 34: 03 f0 7f 44 and t2,0xff,t2
1672 38: 03 04 23 44 or t0,t2,t2
1673 */
1674 *a++ = 0x21; *a++ = 0x17; *a++ = 0x21; *a++ = 0x48;
1675 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1676 *a++ = 0x03; *a++ = 0xf0; *a++ = 0x7f; *a++ = 0x44;
1677 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1678
1679 ok_unaligned_load2 = a;
1680 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1681
1682
1683
1684 *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1685 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1686 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1687 /*
1688 * 1 0x3412....
1689 2c: 21 17 22 48 sll t0,0x10,t0
1690 30: 01 10 20 40 addl t0,0,t0
1691 34: 23 76 60 48 zapnot t2,0x3,t2
1692 38: 03 04 23 44 or t0,t2,t2
1693 */
1694 *a++ = 0x21; *a++ = 0x17; *a++ = 0x22; *a++ = 0x48;
1695 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1696 *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
1697 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1698
1699 ok_unaligned_load1 = a;
1700 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1701
1702
1703
1704
1705 /*
1706 * 0 0x12......
1707 2c: 21 17 23 48 sll t0,0x18,t0
1708 30: 01 10 20 40 addl t0,0,t0
1709 34: 23 f6 60 48 zapnot t2,0x7,t2
1710 38: 03 04 23 44 or t0,t2,t2
1711 */
1712 *a++ = 0x21; *a++ = 0x17; *a++ = 0x23; *a++ = 0x48;
1713 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1714 *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
1715 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1716
1717
1718 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1719 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1720 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1721
1722 /* 03 10 60 40 addl t2,0,t2 */
1723 *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1724
1725 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1726 break;
1727
1728 case HI6_LWR:
1729 /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1730 alpha_rs = map_MIPS_to_Alpha[rs];
1731 if (alpha_rs < 0) {
1732 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1733 alpha_rs = ALPHA_T0;
1734 }
1735 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1736 /* 02 30 20 46 and a1,alignment,t1 */
1737 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1738
1739 /* ldl t0,0(t3) */
1740 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1741
1742 if (bigendian) {
1743 /* TODO */
1744 bintrans_write_chunkreturn_fail(&a);
1745 }
1746 /*
1747 * lwr: memory = 0x12 0x34 0x56 0x78
1748 * offset (a1): register rt becomes:
1749 * 0 0x78563412
1750 * 1 0x..785634
1751 * 2 0x....7856
1752 * 3 0x......78
1753 */
1754
1755 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1756
1757 /*
1758 10: 03 00 9f 20 lda t3,3
1759 14: a5 05 82 40 cmpeq t3,t1,t4
1760 18: 01 00 a0 e4 beq t4,20 <skip>
1761 */
1762 *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1763 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1764 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1765
1766 /*
1767 2c: 81 16 23 48 srl t0,0x18,t0
1768 b0: 21 36 20 48 zapnot t0,0x1,t0
1769 34: 23 d6 7f 48 zapnot t2,0xfe,t2
1770 38: 03 04 23 44 or t0,t2,t2
1771 */
1772 *a++ = 0x81; *a++ = 0x16; *a++ = 0x23; *a++ = 0x48;
1773 *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
1774 *a++ = 0x23; *a++ = 0xd6; *a++ = 0x7f; *a++ = 0x48;
1775 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1776
1777 ok_unaligned_load3 = a;
1778 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1779
1780
1781
1782 *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1783 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1784 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1785 /*
1786 2c: 81 16 22 48 srl t0,0x10,t0
1787 b4: 21 76 20 48 zapnot t0,0x3,t0
1788 34: 23 96 7f 48 zapnot t2,0xfc,t2
1789 38: 03 04 23 44 or t0,t2,t2
1790 */
1791 *a++ = 0x81; *a++ = 0x16; *a++ = 0x22; *a++ = 0x48;
1792 *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
1793 *a++ = 0x23; *a++ = 0x96; *a++ = 0x7f; *a++ = 0x48;
1794 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1795
1796 ok_unaligned_load2 = a;
1797 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1798
1799
1800
1801 *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1802 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1803 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1804 /*
1805 2c: 81 16 21 48 srl t0,0x8,t0
1806 b8: 21 f6 20 48 zapnot t0,0x7,t0
1807 3c: 23 16 7f 48 zapnot t2,0xf8,t2
1808 40: 03 04 23 44 or t0,t2,t2
1809 */
1810 *a++ = 0x81; *a++ = 0x16; *a++ = 0x21; *a++ = 0x48;
1811 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
1812 *a++ = 0x23; *a++ = 0x16; *a++ = 0x7f; *a++ = 0x48;
1813 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1814
1815 ok_unaligned_load1 = a;
1816 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1817
1818
1819
1820
1821 /*
1822 * 0 0x12......
1823 */
1824 /* 03 14 20 40 addq t0,0,t2 */
1825 *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1826
1827
1828
1829 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1830 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1831 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1832
1833 /* 03 10 60 40 addl t2,0,t2 */
1834 *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1835
1836 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1837 break;
1838
1839 case HI6_SQ:
1840 /* TODO */
1841 break;
1842 case HI6_SD:
1843 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1844 if (bigendian) {
1845 /* remember original 8 bytes of t0: */
1846 *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1847
1848 /* swap lowest 4 bytes: */
1849 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1850 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1851 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1852 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1853 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1854 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1855 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1856 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1857 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1858
1859 /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1860 *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1861 *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1862 *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1863
1864 /* swap highest 4 bytes: */
1865 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1866 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1867 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1868 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1869 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1870 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1871 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1872 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1873 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1874
1875 /* or the results together: */
1876 *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1877 }
1878 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb4; /* stq to memory */
1879 break;
1880 case HI6_SW:
1881 if (alpha_rt < 0 || bigendian) {
1882 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1883 alpha_rt = ALPHA_T0;
1884 }
1885 if (bigendian) {
1886 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1887 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1888 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1889 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1890 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1891 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1892 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1893 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1894 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1895 }
1896 /* stl to memory: stl rt,0(t3) */
1897 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1898 *a++ = 0xb0 | ((alpha_rt >> 3) & 3);
1899 break;
1900 case HI6_SH:
1901 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1902 if (bigendian) {
1903 *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1904 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1905 *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1906 }
1907 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x34; /* stw to memory */
1908 break;
1909 case HI6_SB:
1910 if (alpha_rt < 0) {
1911 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1912 alpha_rt = ALPHA_T0;
1913 }
1914 /* stb to memory: stb rt,0(t3) */
1915 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1916 *a++ = 0x38 | ((alpha_rt >> 3) & 3);
1917 break;
1918
1919 case HI6_SWL:
1920 /* a1 = 0..3 (or 0..7 for 64-bit stores): */
1921 alpha_rs = map_MIPS_to_Alpha[rs];
1922 if (alpha_rs < 0) {
1923 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1924 alpha_rs = ALPHA_T0;
1925 }
1926 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1927 /* 02 30 20 46 and a1,alignment,t1 */
1928 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1929
1930 /* ldl t0,0(t3) */
1931 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1932
1933 if (bigendian) {
1934 /* TODO */
1935 bintrans_write_chunkreturn_fail(&a);
1936 }
1937
1938 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1939
1940 /*
1941 * swl: memory = 0x12 0x34 0x56 0x78
1942 * register = 0x89abcdef
1943 * offset (a1): memory becomes:
1944 * 0 0x89 0x.. 0x.. 0x..
1945 * 1 0xab 0x89 0x.. 0x..
1946 * 2 0xcd 0xab 0x89 0x..
1947 * 3 0xef 0xcd 0xab 0x89
1948 */
1949
1950 /*
1951 a5 75 40 40 cmpeq t1,0x03,t4
1952 01 00 a0 e4 beq t4,20 <skip>
1953 */
1954 *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
1955 *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1956
1957 /* 01 10 60 40 addl t2,0,t0 */
1958 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1959
1960 ok_unaligned_load3 = a;
1961 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1962
1963
1964
1965
1966 *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
1967 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1968 /*
1969 2:
1970 e8: 83 16 61 48 srl t2,0x8,t2
1971 ec: 23 f6 60 48 zapnot t2,0x7,t2
1972 f0: 21 16 3f 48 zapnot t0,0xf8,t0
1973 f4: 01 04 23 44 or t0,t2,t0
1974 */
1975 *a++ = 0x83; *a++ = 0x16; *a++ = 0x61; *a++ = 0x48;
1976 *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
1977 *a++ = 0x21; *a++ = 0x16; *a++ = 0x3f; *a++ = 0x48;
1978 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1979
1980 ok_unaligned_load2 = a;
1981 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1982
1983
1984
1985 *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
1986 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1987 /*
1988 1:
1989 f8: 83 16 62 48 srl t2,0x10,t2
1990 fc: 23 76 60 48 zapnot t2,0x3,t2
1991 100: 21 96 3f 48 zapnot t0,0xfc,t0
1992 104: 01 04 23 44 or t0,t2,t0
1993 */
1994 *a++ = 0x83; *a++ = 0x16; *a++ = 0x62; *a++ = 0x48;
1995 *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
1996 *a++ = 0x21; *a++ = 0x96; *a++ = 0x3f; *a++ = 0x48;
1997 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1998
1999 ok_unaligned_load1 = a;
2000 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2001
2002
2003
2004
2005
2006 /*
2007 0:
2008 108: 83 16 63 48 srl t2,0x18,t2
2009 10c: 23 36 60 48 zapnot t2,0x1,t2
2010 110: 21 d6 3f 48 zapnot t0,0xfe,t0
2011 114: 01 04 23 44 or t0,t2,t0
2012 */
2013 *a++ = 0x83; *a++ = 0x16; *a++ = 0x63; *a++ = 0x48;
2014 *a++ = 0x23; *a++ = 0x36; *a++ = 0x60; *a++ = 0x48;
2015 *a++ = 0x21; *a++ = 0xd6; *a++ = 0x3f; *a++ = 0x48;
2016 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2017
2018
2019 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2020 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2021 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2022
2023 /* sdl t0,0(t3) */
2024 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2025 break;
2026
2027 case HI6_SWR:
2028 /* a1 = 0..3 (or 0..7 for 64-bit stores): */
2029 alpha_rs = map_MIPS_to_Alpha[rs];
2030 if (alpha_rs < 0) {
2031 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
2032 alpha_rs = ALPHA_T0;
2033 }
2034 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
2035 /* 02 30 20 46 and a1,alignment,t1 */
2036 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
2037
2038 /* ldl t0,0(t3) */
2039 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
2040
2041 if (bigendian) {
2042 /* TODO */
2043 bintrans_write_chunkreturn_fail(&a);
2044 }
2045
2046 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
2047
2048 /*
2049 * swr: memory = 0x12 0x34 0x56 0x78
2050 * register = 0x89abcdef
2051 * offset (a1): memory becomes:
2052 * 0 0xef 0xcd 0xab 0x89
2053 * 1 0x.. 0xef 0xcd 0xab
2054 * 2 0x.. 0x.. 0xef 0xcd
2055 * 3 0x.. 0x.. 0x.. 0xef
2056 */
2057
2058
2059 /*
2060 a5 75 40 40 cmpeq t1,0x03,t4
2061 01 00 a0 e4 beq t4,20 <skip>
2062 */
2063 *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2064 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2065
2066 /*
2067 118: 23 17 63 48 sll t2,0x18,t2
2068 11c: 21 f6 20 48 zapnot t0,0x7,t0
2069 120: 01 04 23 44 or t0,t2,t0
2070 */
2071 *a++ = 0x23; *a++ = 0x17; *a++ = 0x63; *a++ = 0x48;
2072 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
2073 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2074
2075 ok_unaligned_load3 = a;
2076 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2077
2078
2079
2080
2081
2082 *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2083 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2084 /*
2085 2:
2086 124: 23 17 62 48 sll t2,0x10,t2
2087 128: 21 76 20 48 zapnot t0,0x3,t0
2088 12c: 01 04 23 44 or t0,t2,t0
2089 */
2090 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48;
2091 *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
2092 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2093
2094 ok_unaligned_load2 = a;
2095 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2096
2097
2098
2099 *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2100 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2101 /*
2102 1:
2103 130: 23 17 61 48 sll t2,0x8,t2
2104 134: 21 36 20 48 zapnot t0,0x1,t0
2105 138: 01 04 23 44 or t0,t2,t0
2106 */
2107 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48;
2108 *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
2109 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2110
2111 ok_unaligned_load1 = a;
2112 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2113
2114
2115
2116 /*
2117 0:
2118 13c: 01 10 60 40 addl t2,0,t0
2119 */
2120 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2121
2122
2123 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2124 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2125 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2126
2127 /* sdl t0,0(t3) */
2128 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2129 break;
2130
2131 default:
2132 ;
2133 }
2134
2135 *addrp = a;
2136 bintrans_write_pc_inc(addrp);
2137 return 1;
2138 }
2139
2140
2141 /*
2142 * bintrans_write_instruction__lui():
2143 */
2144 static int bintrans_write_instruction__lui(unsigned char **addrp,
2145 int rt, int imm)
2146 {
2147 uint32_t *a;
2148
2149 /*
2150 * dc fe 3f 24 ldah t0,-292
2151 * 1f 04 ff 5f fnop
2152 * 88 08 30 b4 stq t0,2184(a0)
2153 */
2154 if (rt != 0) {
2155 int alpha_rt = map_MIPS_to_Alpha[rt];
2156 if (alpha_rt < 0)
2157 alpha_rt = ALPHA_T0;
2158
2159 a = (uint32_t *) *addrp;
2160 *a++ = 0x241f0000 | (alpha_rt << 21) | ((uint32_t)imm & 0xffff);
2161 *addrp = (unsigned char *) a;
2162
2163 if (alpha_rt == ALPHA_T0) {
2164 *a++ = 0x5fff041f; /* fnop */
2165 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
2166 }
2167 }
2168
2169 bintrans_write_pc_inc(addrp);
2170
2171 return 1;
2172 }
2173
2174
2175 /*
2176 * bintrans_write_instruction__mfmthilo():
2177 */
2178 static int bintrans_write_instruction__mfmthilo(unsigned char **addrp,
2179 int rd, int from_flag, int hi_flag)
2180 {
2181 unsigned char *a;
2182 int ofs;
2183
2184 a = *addrp;
2185
2186 /*
2187 * 18 09 30 a4 ldq t0,hi(a0) (or lo)
2188 * 18 09 30 b4 stq t0,rd(a0)
2189 *
2190 * (or if from_flag is cleared then move the other way, it's
2191 * actually not rd then, but rs...)
2192 */
2193
2194 if (from_flag) {
2195 if (rd != 0) {
2196 /* mfhi or mflo */
2197 if (hi_flag)
2198 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2199 else
2200 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2201 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xa4;
2202
2203 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
2204 }
2205 } else {
2206 /* mthi or mtlo */
2207 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rd, ALPHA_T0);
2208
2209 if (hi_flag)
2210 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2211 else
2212 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2213 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
2214 }
2215
2216 *addrp = a;
2217 bintrans_write_pc_inc(addrp);
2218 return 1;
2219 }
2220
2221
2222 /*
2223 * bintrans_write_instruction__mfc_mtc():
2224 */
2225 static int bintrans_write_instruction__mfc_mtc(struct memory *mem,
2226 unsigned char **addrp, int coproc_nr, int flag64bit, int rt,
2227 int rd, int mtcflag)
2228 {
2229 uint32_t *a, *jump;
2230 int ofs;
2231
2232 /*
2233 * NOTE: Only a few registers are readable without side effects.
2234 */
2235 if (rt == 0 && !mtcflag)
2236 return 0;
2237
2238 if (coproc_nr >= 1)
2239 return 0;
2240
2241 if (rd == COP0_RANDOM || rd == COP0_COUNT)
2242 return 0;
2243
2244
2245 /*************************************************************
2246 *
2247 * TODO: Check for kernel mode, or Coproc X usability bit!
2248 *
2249 *************************************************************/
2250
2251 a = (uint32_t *) *addrp;
2252
2253 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2254 *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2255
2256 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2257 *a++ = 0xa4410000 | (ofs & 0xffff); /* ldq t1,reg_rd(t0) */
2258
2259 if (mtcflag) {
2260 /* mtc: */
2261 *addrp = (unsigned char *) a;
2262 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
2263 a = (uint32_t *) *addrp;
2264
2265 if (!flag64bit) {
2266 *a++ = 0x40201001; /* addl t0,0,t0 */
2267 *a++ = 0x40401002; /* addl t1,0,t1 */
2268 }
2269
2270 /*
2271 * In the general case: Only allow mtc if it does NOT
2272 * change the register!!
2273 */
2274
2275 switch (rd) {
2276 case COP0_INDEX:
2277 break;
2278
2279 case COP0_EPC:
2280 break;
2281
2282 /* TODO: Some bits are not writable */
2283 case COP0_ENTRYLO0:
2284 case COP0_ENTRYLO1:
2285 break;
2286
2287 case COP0_ENTRYHI:
2288 /*
2289 * Entryhi is ok to write to, as long as the
2290 * ASID isn't changed. (That would require
2291 * cache invalidations etc. Instead of checking
2292 * for MMU3K vs others, we just assume that all the
2293 * lowest 12 bits must be the same.
2294 */
2295 /* ff 0f bf 20 lda t4,0x0fff */
2296 /* 03 00 25 44 and t0,t4,t2 */
2297 /* 04 00 45 44 and t1,t4,t3 */
2298 /* a3 05 64 40 cmpeq t2,t3,t2 */
2299 /* 01 00 60 f4 bne t2,<ok> */
2300 *a++ = 0x20bf0fff;
2301 *a++ = 0x44250003;
2302 *a++ = 0x44450004;
2303 *a++ = 0x406405a3;
2304 jump = a;
2305 *a++ = 0; /* later */
2306 *addrp = (unsigned char *) a;
2307 bintrans_write_chunkreturn_fail(addrp);
2308 a = (uint32_t *) *addrp;
2309 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2310 break;
2311
2312 case COP0_STATUS:
2313 /* Only allow updates to the status register if
2314 the interrupt enable bits were changed, but no
2315 other bits! */
2316 if (mem->bintrans_32bit_only) {
2317 /* R3000 etc. */
2318 /* t4 = 0x0fe70000; */
2319 *a++ = 0x20bf0000;
2320 *a++ = 0x24a50fe7;
2321 } else {
2322 /* fe 00 bf 20 lda t4,0x00fe */
2323 /* ff ff a5 24 ldah t4,-1(t4) */
2324 *a++ = 0x20bf0000;
2325 *a++ = 0x24a5ffff;
2326 }
2327
2328 /* 03 00 25 44 and t0,t4,t2 */
2329 /* 04 00 45 44 and t1,t4,t3 */
2330 /* a3 05 64 40 cmpeq t2,t3,t2 */
2331 /* 01 00 60 f4 bne t2,<ok> */
2332 *a++ = 0x44250003;
2333 *a++ = 0x44450004;
2334 *a++ = 0x406405a3;
2335 jump = a;
2336 *a++ = 0; /* later */
2337 *addrp = (unsigned char *) a;
2338 bintrans_write_chunkreturn_fail(addrp);
2339 a = (uint32_t *) *addrp;
2340 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2341
2342 /* If enabling interrupt bits would cause an
2343 exception, then don't do it: */
2344 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2345 *a++ = 0xa4900000 | (ofs & 0xffff); /* ldq t3,coproc[0](a0) */
2346 ofs = ((size_t)&dummy_coproc.reg[COP0_CAUSE]) - (size_t)&dummy_coproc;
2347 *a++ = 0xa4a40000 | (ofs & 0xffff); /* ldq t4,reg_rd(t3) */
2348
2349 /* 02 00 a1 44 and t4,t0,t1 */
2350 /* 83 16 41 48 srl t1,0x8,t2 */
2351 /* 04 f0 7f 44 and t2,0xff,t3 */
2352 *a++ = 0x44a10002;
2353 *a++ = 0x48411683;
2354 *a++ = 0x447ff004;
2355 /* 01 00 80 e4 beq t3,<ok> */
2356 jump = a;
2357 *a++ = 0; /* later */
2358 *addrp = (unsigned char *) a;
2359 bintrans_write_chunkreturn_fail(addrp);
2360 a = (uint32_t *) *addrp;
2361 *jump = 0xe4800000 | (((size_t)a - (size_t)jump - 4) / 4);
2362 break;
2363
2364 default:
2365 /* a3 05 22 40 cmpeq t0,t1,t2 */
2366 /* 01 00 60 f4 bne t2,<ok> */
2367 *a++ = 0x402205a3;
2368 jump = a;
2369 *a++ = 0; /* later */
2370 *addrp = (unsigned char *) a;
2371 bintrans_write_chunkreturn_fail(addrp);
2372 a = (uint32_t *) *addrp;
2373 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2374 }
2375
2376 *a++ = 0x40201402; /* addq t0,0,t1 */
2377
2378 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2379 *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2380 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2381 *a++ = 0xb4410000 | (ofs & 0xffff); /* stq t1,reg_rd(t0) */
2382 } else {
2383 /* mfc: */
2384 if (!flag64bit) {
2385 *a++ = 0x40401002; /* addl t1,0,t1 */
2386 }
2387
2388 *addrp = (unsigned char *) a;
2389 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T1, rt);
2390 a = (uint32_t *) *addrp;
2391 }
2392
2393 *addrp = (unsigned char *) a;
2394
2395 bintrans_write_pc_inc(addrp);
2396 return 1;
2397 }
2398
2399
2400 /*
2401 * bintrans_write_instruction__tlb_rfe_etc():
2402 */
2403 static int bintrans_write_instruction__tlb_rfe_etc(unsigned char **addrp,
2404 int itype)
2405 {
2406 uint32_t *a;
2407 int ofs = 0;
2408
2409 switch (itype) {
2410 case CALL_TLBWI:
2411 case CALL_TLBWR:
2412 case CALL_TLBP:
2413 case CALL_TLBR:
2414 case CALL_RFE:
2415 case CALL_ERET:
2416 case CALL_BREAK:
2417 case CALL_SYSCALL:
2418 break;
2419 default:
2420 return 0;
2421 }
2422
2423 a = (uint32_t *) *addrp;
2424
2425 /* a0 = pointer to the cpu struct */
2426
2427 switch (itype) {
2428 case CALL_TLBWI:
2429 case CALL_TLBWR:
2430 /* a1 = 0 for indexed, 1 for random */
2431 *a++ = 0x223f0000 | (itype == CALL_TLBWR);
2432 break;
2433 case CALL_TLBP:
2434 case CALL_TLBR:
2435 /* a1 = 0 for probe, 1 for read */
2436 *a++ = 0x223f0000 | (itype == CALL_TLBR);
2437 break;
2438 case CALL_BREAK:
2439 case CALL_SYSCALL:
2440 *a++ = 0x223f0000 | (itype == CALL_BREAK? EXCEPTION_BP : EXCEPTION_SYS);
2441 break;
2442 }
2443
2444 /* Put PC into the cpu struct (both pc and pc_last). */
2445 *a++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2446 *a++ = 0xb4d00000 | ofs_pc_last;/* stq t5,"pc_last"(a0) */
2447
2448 /* Save a0 and the old return address on the stack: */
2449 *a++ = 0x23deff80; /* lda sp,-128(sp) */
2450
2451 *a++ = 0xb75e0000; /* stq ra,0(sp) */
2452 *a++ = 0xb61e0008; /* stq a0,8(sp) */
2453 *a++ = 0xb0fe0018; /* stl t6,24(sp) */
2454 *a++ = 0xb71e0020; /* stq t10,32(sp) */
2455 *a++ = 0xb73e0028; /* stq t11,40(sp) */
2456 *a++ = 0xb51e0030; /* stq t7,48(sp) */
2457 *a++ = 0xb6de0038; /* stq t8,56(sp) */
2458 *a++ = 0xb6fe0040; /* stq t9,64(sp) */
2459
2460 switch (itype) {
2461 case CALL_TLBP:
2462 case CALL_TLBR:
2463 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbpr) - (size_t)&dummy_cpu;
2464 break;
2465 case CALL_TLBWR:
2466 case CALL_TLBWI:
2467 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbwri) - (size_t)&dummy_cpu;
2468 break;
2469 case CALL_RFE:
2470 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_rfe) - (size_t)&dummy_cpu;
2471 break;
2472 case CALL_ERET:
2473 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_eret) - (size_t)&dummy_cpu;
2474 break;
2475 case CALL_BREAK:
2476 case CALL_SYSCALL:
2477 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_simple_exception) - (size_t)&dummy_cpu;
2478 break;
2479 }
2480
2481 *a++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
2482
2483 /* Call bintrans_fast_tlbwr: */
2484 *a++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
2485
2486 /* Restore the old return address and a0 from the stack: */
2487 *a++ = 0xa75e0000; /* ldq ra,0(sp) */
2488 *a++ = 0xa61e0008; /* ldq a0,8(sp) */
2489 *a++ = 0xa0fe0018; /* ldl t6,24(sp) */
2490 *a++ = 0xa71e0020; /* ldq t10,32(sp) */
2491 *a++ = 0xa73e0028; /* ldq t11,40(sp) */
2492 *a++ = 0xa51e0030; /* ldq t7,48(sp) */
2493 *a++ = 0xa6de0038; /* ldq t8,56(sp) */
2494 *a++ = 0xa6fe0040; /* ldq t9,64(sp) */
2495
2496 *a++ = 0x23de0080; /* lda sp,128(sp) */
2497
2498 /* Load PC from the cpu struct. */
2499 *a++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2500
2501 *addrp = (unsigned char *) a;
2502
2503 switch (itype) {
2504 case CALL_ERET:
2505 case CALL_BREAK:
2506 case CALL_SYSCALL:
2507 /* Increase the nr of instructions: */
2508 a = (uint32_t *) *addrp;
2509 *a++ = 0x20e70001; /* lda t6,1(t6) */
2510 *addrp = (unsigned char *) a;
2511 break;
2512 default:
2513 bintrans_write_pc_inc(addrp);
2514 }
2515
2516 return 1;
2517 }
2518
2519
2520 /*
2521 * bintrans_backend_init():
2522 *
2523 * This is neccessary for broken 2.95.4 compilers on FreeBSD/Alpha 4.9,
2524 * and probably a few others. (For Compaq's CC, and for gcc 3.x, this
2525 * wouldn't be neccessary, and the old code would have worked.)
2526 */
2527 static void bintrans_backend_init(void)
2528 {
2529 int size;
2530 uint32_t *p, *q;
2531
2532
2533 /* "runchunk": */
2534 size = 256; /* NOTE: This MUST be enough, or we fail */
2535 p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2536 MAP_ANON | MAP_PRIVATE, -1, 0);
2537
2538 /* If mmap() failed, try malloc(): */
2539 if (p == NULL) {
2540 p = malloc(size);
2541 if (p == NULL) {
2542 fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2543 exit(1);
2544 }
2545 }
2546
2547 bintrans_runchunk = (void *)p;
2548
2549 *p++ = 0x23deffa0; /* lda sp,-0x60(sp) */
2550 *p++ = 0xb75e0000; /* stq ra,0(sp) */
2551 *p++ = 0xb53e0008; /* stq s0,8(sp) */
2552 *p++ = 0xb55e0010; /* stq s1,16(sp) */
2553 *p++ = 0xb57e0018; /* stq s2,24(sp) */
2554 *p++ = 0xb59e0020; /* stq s3,32(sp) */
2555 *p++ = 0xb5be0028; /* stq s4,40(sp) */
2556 *p++ = 0xb5de0030; /* stq s5,48(sp) */
2557 *p++ = 0xb5fe0038; /* stq s6,56(sp) */
2558 *p++ = 0xb7be0058; /* stq gp,0x58(sp) */
2559
2560 *p++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2561 *p++ = 0xa0f00000 | ofs_n; /* ldl t6,"bintrans_instructions_executed"(a0) */
2562 *p++ = 0xa5100000 | ofs_a0; /* ldq t7,"a0"(a0) */
2563 *p++ = 0xa6d00000 | ofs_a1; /* ldq t8,"a1"(a0) */
2564 *p++ = 0xa6f00000 | ofs_s0; /* ldq t9,"s0"(a0) */
2565 *p++ = 0xa1300000 | ofs_ds; /* ldl s0,"delay_slot"(a0) */
2566 *p++ = 0xa5500000 | ofs_ja; /* ldq s1,"delay_jmpaddr"(a0) */
2567 *p++ = 0xa5700000 | ofs_sp; /* ldq s2,"gpr[sp]"(a0) */
2568 *p++ = 0xa5900000 | ofs_ra; /* ldq s3,"gpr[ra]"(a0) */
2569 *p++ = 0xa5b00000 | ofs_t0; /* ldq s4,"gpr[t0]"(a0) */
2570 *p++ = 0xa5d00000 | ofs_t1; /* ldq s5,"gpr[t1]"(a0) */
2571 *p++ = 0xa5f00000 | ofs_h_s; /* ldq s6,host_store(a0) */
2572 *p++ = 0xa7100000 | ofs_h_l; /* ldq t10,host_load(a0) */
2573 *p++ = 0xa7300000 | ofs_v0; /* ldq t11,"gpr[v0]"(a0) */
2574
2575 *p++ = 0x6b514000; /* jsr ra,(a1),<back> */
2576
2577 *p++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2578 *p++ = 0xb0f00000 | ofs_n; /* stl t6,"bintrans_instructions_executed"(a0) */
2579 *p++ = 0xb5100000 | ofs_a0; /* stq t7,"a0"(a0) */
2580 *p++ = 0xb6d00000 | ofs_a1; /* stq t8,"a1"(a0) */
2581 *p++ = 0xb6f00000 | ofs_s0; /* stq t9,"s0"(a0) */
2582 *p++ = 0xb1300000 | ofs_ds; /* stl s0,"delay_slot"(a0) */
2583 *p++ = 0xb5500000 | ofs_ja; /* stq s1,"delay_jmpaddr"(a0) */
2584 *p++ = 0xb5700000 | ofs_sp; /* stq s2,"gpr[sp]"(a0) */
2585 *p++ = 0xb5900000 | ofs_ra; /* stq s3,"gpr[ra]"(a0) */
2586 *p++ = 0xb5b00000 | ofs_t0; /* stq s4,"gpr[t0]"(a0) */
2587 *p++ = 0xb5d00000 | ofs_t1; /* stq s5,"gpr[t1]"(a0) */
2588 *p++ = 0xb7300000 | ofs_v0; /* stq t11,"gpr[v0]"(a0) */
2589
2590 *p++ = 0xa75e0000; /* ldq ra,0(sp) */
2591 *p++ = 0xa53e0008; /* ldq s0,8(sp) */
2592 *p++ = 0xa55e0010; /* ldq s1,16(sp) */
2593 *p++ = 0xa57e0018; /* ldq s2,24(sp) */
2594 *p++ = 0xa59e0020; /* ldq s3,32(sp) */
2595 *p++ = 0xa5be0028; /* ldq s4,40(sp) */
2596 *p++ = 0xa5de0030; /* ldq s5,48(sp) */
2597 *p++ = 0xa5fe0038; /* ldq s6,56(sp) */
2598 *p++ = 0xa7be0058; /* ldq gp,0x58(sp) */
2599 *p++ = 0x23de0060; /* lda sp,0x60(sp) */
2600 *p++ = 0x6bfa8000; /* ret */
2601
2602
2603 /* "jump to 32bit pc": */
2604 size = 128; /* WARNING! Don't make this too small. */
2605 p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2606 MAP_ANON | MAP_PRIVATE, -1, 0);
2607
2608 /* If mmap() failed, try malloc(): */
2609 if (p == NULL) {
2610 p = malloc(size);
2611 if (p == NULL) {
2612 fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2613 exit(1);
2614 }
2615 }
2616
2617 bintrans_jump_to_32bit_pc = (void *)p;
2618
2619 /* Don't execute too many instructions: */
2620 *p++ = 0x205f0000 | (N_SAFE_BINTRANS_LIMIT-1); /* lda t1,safe-1 */
2621
2622 *p++ = 0x40e20da1; /* cmple t6,t1,t0 */
2623 q = p; /* *q is updated later */
2624 *p++ = 0xe4200001; /* beq ret (far below) */
2625
2626 /*
2627 * Special case for 32-bit addressing:
2628 *
2629 * t1 = 1023;
2630 * t2 = ((pc >> 22) & t1) * sizeof(void *);
2631 * t3 = ((pc >> 12) & t1) * sizeof(void *);
2632 * t1 = pc & 4095;
2633 */
2634 *p++ = 0x205f1ff8; /* lda t1,1023 * 8 */
2635 *p++ = 0x48c27683; /* srl t5,19,t2 */
2636 *p++ = 0x48c13684; /* srl t5, 9,t3 */
2637
2638 *p++ = 0x44620003; /* and t2,t1,t2 */
2639
2640 /* ldq a2, vaddr_to_hostaddr_table0(a0) */
2641 *p++ = 0xa6500000 | ofs_tbl0;
2642
2643 /* a3 = tbl0[t2] (load entry from tbl0) */
2644 *p++ = 0x40720412; /* addq t2,a2,a2 */
2645 *p++ = 0x44820004; /* and t3,t1,t3 */
2646 *p++ = 0xa6720000; /* ldq a3,0(a2) */
2647 *p++ = 0x205f0ffc; /* lda t1,0xffc */
2648
2649 /*
2650 * a3 = tbl1[t3] (load entry from tbl1 (which is a3))
2651 */
2652 *p++ = 0x42640413; /* addq a3,t3,a3 */
2653
2654 *p++ = 0xa6730000 | ofs_c0; /* ldq a3,chunks[0](a3) */
2655
2656 *p++ = 0x44c20002; /* and t5,t1,t1 */
2657
2658 /*
2659 * NULL? Then just return.
2660 */
2661 *p++ = 0xf6600001; /* bne a3,<ok> */
2662 *p++ = 0x6bfa8001; /* ret */
2663
2664 *p++ = 0x40530402; /* addq t1,a3,t1 */
2665 *p++ = 0xa0220000; /* ldl t0,0(t1) */
2666
2667 *p++ = 0xa4700000 | ofs_cb; /* ldq t2,chunk_base_address(a0) */
2668
2669 /* No translation? Then return. */
2670 *p++ = 0xe4200002; /* beq t0,<skip> */
2671
2672 *p++ = 0x40230401; /* addq t0,t2,t0 */
2673 *p++ = 0x6be10000; /* jmp (t0) */
2674
2675 /* Now, update *q to point here: */
2676 *q = 0xe4200000 | (((size_t)p - (size_t)q)/4 - 1); /* beq ret */
2677
2678 /* Return to the main translation loop. */
2679 *p++ = 0x6bfa8001; /* ret */
2680 }
2681

  ViewVC Help
Powered by ViewVC 1.1.26