/[gxemul]/upstream/0.3.1/src/bintrans_alpha.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /upstream/0.3.1/src/bintrans_alpha.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (show annotations)
Mon Oct 8 16:17:52 2007 UTC (16 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 83665 byte(s)
0.3.1
1 /*
2 * Copyright (C) 2004-2005 Anders Gavare. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 *
28 * $Id: bintrans_alpha.c,v 1.114 2005/03/22 09:12:04 debug Exp $
29 *
30 * Alpha specific code for dynamic binary translation.
31 *
32 * See bintrans.c for more information. Included from bintrans.c.
33 *
34 *
35 * Some Alpha registers that are reasonable to use:
36 *
37 * t5..t7 6..8 3
38 * s0..s6 9..15 7
39 * a1..a5 17..21 5
40 * t8..t11 22..25 4
41 *
42 * These can be "mapped" to MIPS registers in the translated code,
43 * except a0 which points to the cpu struct, and t0..t4 (or so)
44 * which are used by the translated code as temporaries.
45 *
46 * 3 + 7 + 5 + 4 = 19 available registers. Of course, all (except
47 * s0..s6) must be saved when calling external functions, such as
48 * when doing load/store.
49 *
50 * Which are the 19 most commonly used MIPS registers? (This will
51 * include the pc, and the "current number of executed translated
52 * instructions.)
53 *
54 * The current allocation is as follows:
55 *
56 * Alpha: MIPS:
57 * ------ -----
58 *
59 * t5 pc (64-bit)
60 * t6 bintrans_instructions_executed (32-bit int)
61 * t7 a0 (mips register 4) (64-bit)
62 * t8 a1 (mips register 5) (64-bit)
63 * t9 s0 (mips register 16) (64-bit)
64 * t10 table0 cached (for load/store)
65 * t11 v0 (mips register 2) (64-bit)
66 * s0 delay_slot (32-bit int)
67 * s1 delay_jmpaddr (64-bit)
68 * s2 sp (mips register 29) (64-bit)
69 * s3 ra (mips register 31) (64-bit)
70 * s4 t0 (mips register 8) (64-bit)
71 * s5 t1 (mips register 9) (64-bit)
72 * s6 t2 (mips register 10) (64-bit)
73 */
74
75 #define MIPSREG_PC -3
76 #define MIPSREG_DELAY_SLOT -2
77 #define MIPSREG_DELAY_JMPADDR -1
78
79 #define ALPHA_T0 1
80 #define ALPHA_T1 2
81 #define ALPHA_T2 3
82 #define ALPHA_T3 4
83 #define ALPHA_T4 5
84 #define ALPHA_T5 6
85 #define ALPHA_T6 7
86 #define ALPHA_T7 8
87 #define ALPHA_S0 9
88 #define ALPHA_S1 10
89 #define ALPHA_S2 11
90 #define ALPHA_S3 12
91 #define ALPHA_S4 13
92 #define ALPHA_S5 14
93 #define ALPHA_S6 15
94 #define ALPHA_A0 16
95 #define ALPHA_A1 17
96 #define ALPHA_A2 18
97 #define ALPHA_A3 19
98 #define ALPHA_A4 20
99 #define ALPHA_A5 21
100 #define ALPHA_T8 22
101 #define ALPHA_T9 23
102 #define ALPHA_T10 24
103 #define ALPHA_T11 25
104 #define ALPHA_ZERO 31
105
106 static int map_MIPS_to_Alpha[32] = {
107 ALPHA_ZERO, -1, ALPHA_T11, -1, /* 0 .. 3 */
108 ALPHA_T7, ALPHA_T8, -1, -1, /* 4 .. 7 */
109 ALPHA_S4, ALPHA_S5, ALPHA_S6, -1, /* 8 .. 11 */
110 -1, -1, -1, -1, /* 12 .. 15 */
111 ALPHA_T9, -1, -1, -1, /* 16 .. 19 */
112 -1, -1, -1, -1, /* 20 .. 23 */
113 -1, -1, -1, -1, /* 24 .. 27 */
114 -1, ALPHA_S2, -1, ALPHA_S3, /* 28 .. 31 */
115 };
116
117
118 struct cpu dummy_cpu;
119 struct mips_coproc dummy_coproc;
120 struct vth32_table dummy_vth32_table;
121
122 unsigned char bintrans_alpha_imb[32] = {
123 0x86, 0x00, 0x00, 0x00, /* imb */
124 0x01, 0x80, 0xfa, 0x6b, /* ret */
125 0x1f, 0x04, 0xff, 0x47, /* nop */
126 0x00, 0x00, 0xfe, 0x2e, /* unop */
127 0x1f, 0x04, 0xff, 0x47, /* nop */
128 0x00, 0x00, 0xfe, 0x2e, /* unop */
129 0x1f, 0x04, 0xff, 0x47, /* nop */
130 0x00, 0x00, 0xfe, 0x2e /* unop */
131 };
132
133
134 /*
135 * bintrans_host_cacheinvalidate()
136 *
137 * Invalidate the host's instruction cache. On Alpha, we do this by
138 * executing an imb instruction.
139 *
140 * NOTE: A simple asm("imb"); would be enough here, but not all
141 * compilers have such simple constructs, so an entire function has to
142 * be written as bintrans_alpha_imb[] above.
143 */
144 static void bintrans_host_cacheinvalidate(unsigned char *p, size_t len)
145 {
146 /* Long form of ``asm("imb");'' */
147
148 void (*f)(void);
149 f = (void *)&bintrans_alpha_imb[0];
150 f();
151 }
152
153
154 /*
155 * lda sp,-128(sp) some margin
156 * stq ra,0(sp)
157 * stq s0,8(sp)
158 * stq s1,16(sp)
159 * stq s2,24(sp)
160 * stq s3,32(sp)
161 * stq s4,40(sp)
162 * stq s5,48(sp)
163 * stq s6,56(sp)
164 *
165 * jsr ra,(a1),<back>
166 * back:
167 *
168 * ldq ra,0(sp)
169 * ldq s0,8(sp)
170 * ldq s1,16(sp)
171 * ldq s2,24(sp)
172 * ldq s3,32(sp)
173 * ldq s4,40(sp)
174 * ldq s5,48(sp)
175 * ldq s6,56(sp)
176 * lda sp,128(sp)
177 * ret
178 */
179 /* note: offsetof (in stdarg.h) could possibly be used, but I'm not sure
180 if it will take care of the compiler problems... */
181 #define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu))
182 #define ofs_pc_last (((size_t)&dummy_cpu.cd.mips.pc_last) - ((size_t)&dummy_cpu))
183 #define ofs_n (((size_t)&dummy_cpu.cd.mips.bintrans_instructions_executed) - ((size_t)&dummy_cpu))
184 #define ofs_ds (((size_t)&dummy_cpu.cd.mips.delay_slot) - ((size_t)&dummy_cpu))
185 #define ofs_ja (((size_t)&dummy_cpu.cd.mips.delay_jmpaddr) - ((size_t)&dummy_cpu))
186 #define ofs_sp (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_SP]) - ((size_t)&dummy_cpu))
187 #define ofs_ra (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_RA]) - ((size_t)&dummy_cpu))
188 #define ofs_a0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A0]) - ((size_t)&dummy_cpu))
189 #define ofs_a1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A1]) - ((size_t)&dummy_cpu))
190 #define ofs_t0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T0]) - ((size_t)&dummy_cpu))
191 #define ofs_t1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T1]) - ((size_t)&dummy_cpu))
192 #define ofs_t2 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T2]) - ((size_t)&dummy_cpu))
193 #define ofs_v0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_V0]) - ((size_t)&dummy_cpu))
194 #define ofs_s0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_S0]) - ((size_t)&dummy_cpu))
195 #define ofs_tbl0 (((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - ((size_t)&dummy_cpu))
196 #define ofs_c0 ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table)
197 #define ofs_cb (((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu)
198
199
200 static uint32_t bintrans_alpha_loadstore_32bit[19] = {
201 /*
202 * t1 = 1023;
203 * t2 = ((a1 >> 22) & t1) * sizeof(void *);
204 * t3 = ((a1 >> 12) & t1) * sizeof(void *);
205 * t1 = a1 & 4095;
206 *
207 * f8 1f 5f 20 lda t1,1023 * 8
208 * 83 76 22 4a srl a1,19,t2
209 * 84 36 21 4a srl a1, 9,t3
210 * 03 00 62 44 and t2,t1,t2
211 */
212 0x205f1ff8,
213 0x4a227683,
214 0x4a213684,
215 0x44620003,
216
217 /*
218 * t10 is vaddr_to_hostaddr_table0
219 *
220 * a3 = tbl0[t2] (load entry from tbl0)
221 * 12 04 03 43 addq t10,t2,a2
222 */
223 0x43030412,
224
225 /* 04 00 82 44 and t3,t1,t3 */
226 0x44820004,
227
228 /* 00 00 72 a6 ldq a3,0(a2) */
229 0xa6720000,
230
231 /* ff 0f 5f 20 lda t1,4095 */
232 0x205f0fff,
233
234 /*
235 * a3 = tbl1[t3] (load entry from tbl1 (which is a3))
236 * 13 04 64 42 addq a3,t3,a3
237 */
238 0x42640413,
239
240 /* 02 00 22 46 and a1,t1,t1 */
241 0x46220002,
242
243 /* 00 00 73 a6 ldq a3,0(a3) */
244 0xa6730000,
245
246 /* NULL? Then return failure at once. */
247 /* bne a3, skip */
248 0xf6600003,
249
250 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */
251 0x44270407, /* or t0,t6,t6 */
252 0x6bfa8001, /* ret */
253
254 /* skip: */
255
256 /* 01 30 60 46 and a3,0x1,t0 */
257 0x46603001,
258
259 /* Get rid of the lowest bit: */
260 /* 33 05 61 42 subq a3,t0,a3 */
261 0x42610533,
262
263 /* The rest of the load/store code was written with t3 as the address. */
264
265 /* Add the offset within the page: */
266 /* 04 04 62 42 addq a3,t1,t3 */
267 0x42620404,
268
269 0x6be50000 /* jmp (t4) */
270 };
271
272 static void (*bintrans_runchunk)(struct cpu *, unsigned char *);
273
274 static void (*bintrans_jump_to_32bit_pc)(struct cpu *);
275
276 static void (*bintrans_loadstore_32bit)
277 (struct cpu *) = (void *)bintrans_alpha_loadstore_32bit;
278
279
280 /*
281 * bintrans_write_quickjump():
282 */
283 static void bintrans_write_quickjump(struct memory *mem,
284 unsigned char *quickjump_code, uint32_t chunkoffset)
285 {
286 int ofs;
287 uint64_t alpha_addr = chunkoffset +
288 (size_t)mem->translation_code_chunk_space;
289 uint32_t *a = (uint32_t *)quickjump_code;
290
291 ofs = (alpha_addr - ((size_t)a+4)) / 4;
292
293 /* printf("chunkoffset=%i, %016llx %016llx %i\n",
294 chunkoffset, (long long)alpha_addr, (long long)a, ofs); */
295
296 if (ofs > -0xfffff && ofs < 0xfffff) {
297 *a++ = 0xc3e00000 | (ofs & 0x1fffff); /* br <chunk> */
298 }
299 }
300
301
302 /*
303 * bintrans_write_chunkreturn():
304 */
305 static void bintrans_write_chunkreturn(unsigned char **addrp)
306 {
307 uint32_t *a = (uint32_t *) *addrp;
308 *a++ = 0x6bfa8001; /* ret */
309 *addrp = (unsigned char *) a;
310 }
311
312
313 /*
314 * bintrans_write_chunkreturn_fail():
315 */
316 static void bintrans_write_chunkreturn_fail(unsigned char **addrp)
317 {
318 uint32_t *a = (uint32_t *) *addrp;
319 /* 00 01 3f 24 ldah t0,256 */
320 /* 07 04 27 44 or t0,t6,t6 */
321 *a++ = 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16);
322 *a++ = 0x44270407;
323 *a++ = 0x6bfa8001; /* ret */
324 *addrp = (unsigned char *) a;
325 }
326
327
328 /*
329 * bintrans_move_MIPS_reg_into_Alpha_reg():
330 */
331 static void bintrans_move_MIPS_reg_into_Alpha_reg(unsigned char **addrp, int mipsreg, int alphareg)
332 {
333 uint32_t *a = (uint32_t *) *addrp;
334 int ofs, alpha_mips_reg;
335
336 switch (mipsreg) {
337 case MIPSREG_PC:
338 /* addq t5,0,alphareg */
339 *a++ = 0x40c01400 | alphareg;
340 break;
341 case MIPSREG_DELAY_SLOT:
342 /* addq s0,0,alphareg */
343 *a++ = 0x41201400 | alphareg;
344 break;
345 case MIPSREG_DELAY_JMPADDR:
346 /* addq s1,0,alphareg */
347 *a++ = 0x41401400 | alphareg;
348 break;
349 default:
350 alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
351 if (alpha_mips_reg < 0) {
352 ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
353 /* ldq alphareg,gpr[mipsreg](a0) */
354 *a++ = 0xa4100000 | (alphareg << 21) | ofs;
355 } else {
356 /* addq alpha_mips_reg,0,alphareg */
357 *a++ = 0x40001400 | (alpha_mips_reg << 21) | alphareg;
358 }
359 }
360 *addrp = (unsigned char *) a;
361 }
362
363
364 /*
365 * bintrans_move_Alpha_reg_into_MIPS_reg():
366 */
367 static void bintrans_move_Alpha_reg_into_MIPS_reg(unsigned char **addrp, int alphareg, int mipsreg)
368 {
369 uint32_t *a = (uint32_t *) *addrp;
370 int ofs, alpha_mips_reg;
371
372 switch (mipsreg) {
373 case MIPSREG_PC:
374 /* addq alphareg,0,t5 */
375 *a++ = 0x40001406 | (alphareg << 21);
376 break;
377 case MIPSREG_DELAY_SLOT:
378 /* addq alphareg,0,s0 */
379 *a++ = 0x40001409 | (alphareg << 21);
380 break;
381 case MIPSREG_DELAY_JMPADDR:
382 /* addq alphareg,0,s1 */
383 *a++ = 0x4000140a | (alphareg << 21);
384 break;
385 case 0: /* the zero register */
386 break;
387 default:
388 alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
389 if (alpha_mips_reg < 0) {
390 /* stq alphareg,gpr[mipsreg](a0) */
391 ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
392 *a++ = 0xb4100000 | (alphareg << 21) | ofs;
393 } else {
394 /* addq alphareg,0,alpha_mips_reg */
395 *a++ = 0x40001400 | (alphareg << 21) | alpha_mips_reg;
396 }
397 }
398 *addrp = (unsigned char *) a;
399 }
400
401
402 /*
403 * bintrans_write_pc_inc():
404 */
405 static void bintrans_write_pc_inc(unsigned char **addrp)
406 {
407 uint32_t *a = (uint32_t *) *addrp;
408
409 /* lda t6,1(t6) */
410 *a++ = 0x20e70001;
411
412 /* lda t5,4(t5) */
413 *a++ = 0x20c60004;
414
415 *addrp = (unsigned char *) a;
416 }
417
418
419 /*
420 * bintrans_write_instruction__addiu_etc():
421 */
422 static int bintrans_write_instruction__addiu_etc(unsigned char **addrp,
423 int rt, int rs, int imm, int instruction_type)
424 {
425 uint32_t *a;
426 unsigned int uimm;
427 int alpha_rs, alpha_rt;
428
429 /* TODO: overflow detection for ADDI and DADDI */
430 switch (instruction_type) {
431 case HI6_ADDI:
432 case HI6_DADDI:
433 return 0;
434 }
435
436 a = (uint32_t *) *addrp;
437
438 if (rt == 0)
439 goto rt0;
440
441 uimm = imm & 0xffff;
442
443 alpha_rs = map_MIPS_to_Alpha[rs];
444 alpha_rt = map_MIPS_to_Alpha[rt];
445
446 if (uimm == 0 && (instruction_type == HI6_ADDI ||
447 instruction_type == HI6_ADDIU || instruction_type == HI6_DADDI ||
448 instruction_type == HI6_DADDIU || instruction_type == HI6_ORI)) {
449 if (alpha_rs >= 0 && alpha_rt >= 0) {
450 /* addq rs,0,rt */
451 *a++ = 0x40001400 | (alpha_rs << 21) | alpha_rt;
452 } else {
453 *addrp = (unsigned char *) a;
454 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
455 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
456 a = (uint32_t *) *addrp;
457 }
458 goto rt0;
459 }
460
461 if (alpha_rs < 0) {
462 /* ldq t0,"rs"(a0) */
463 *addrp = (unsigned char *) a;
464 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
465 a = (uint32_t *) *addrp;
466 alpha_rs = ALPHA_T0;
467 }
468
469 if (alpha_rt < 0)
470 alpha_rt = ALPHA_T0;
471
472 /* Place the result of the calculation in alpha_rt: */
473
474 switch (instruction_type) {
475 case HI6_ADDIU:
476 case HI6_DADDIU:
477 case HI6_ADDI:
478 case HI6_DADDI:
479 if (uimm < 256) {
480 if (instruction_type == HI6_ADDI ||
481 instruction_type == HI6_ADDIU) {
482 /* addl rs,uimm,rt */
483 *a++ = 0x40001000 | (alpha_rs << 21)
484 | (uimm << 13) | alpha_rt;
485 } else {
486 /* addq rs,uimm,rt */
487 *a++ = 0x40001400 | (alpha_rs << 21)
488 | (uimm << 13) | alpha_rt;
489 }
490 } else {
491 /* lda rt,imm(rs) */
492 *a++ = 0x20000000 | (alpha_rt << 21) | (alpha_rs << 16) | uimm;
493 if (instruction_type == HI6_ADDI ||
494 instruction_type == HI6_ADDIU) {
495 /* sign extend, 32->64 bits: addl t0,zero,t0 */
496 *a++ = 0x40001000 | (alpha_rt << 21) | alpha_rt;
497 }
498 }
499 break;
500 case HI6_ANDI:
501 case HI6_ORI:
502 case HI6_XORI:
503 if (uimm >= 256) {
504 /* lda t1,4660 */
505 *a++ = 0x205f0000 | uimm;
506 if (uimm & 0x8000) {
507 /* 01 00 42 24 ldah t1,1(t1) <-- if negative only */
508 *a++ = 0x24420001;
509 }
510 }
511
512 switch (instruction_type) {
513 case HI6_ANDI:
514 if (uimm < 256) {
515 /* and rs,uimm,rt */
516 *a++ = 0x44001000 | (alpha_rs << 21)
517 | (uimm << 13) | alpha_rt;
518 } else {
519 /* and rs,t1,rt */
520 *a++ = 0x44020000 | (alpha_rs << 21) | alpha_rt;
521 }
522 break;
523 case HI6_ORI:
524 if (uimm < 256) {
525 /* or rs,uimm,rt */
526 *a++ = 0x44001400 | (alpha_rs << 21)
527 | (uimm << 13) | alpha_rt;
528 } else {
529 /* or rs,t1,rt */
530 *a++ = 0x44020400 | (alpha_rs << 21) | alpha_rt;
531 }
532 break;
533 case HI6_XORI:
534 if (uimm < 256) {
535 /* xor rs,uimm,rt */
536 *a++ = 0x44001800 | (alpha_rs << 21)
537 | (uimm << 13) | alpha_rt;
538 } else {
539 /* xor rs,t1,rt */
540 *a++ = 0x44020800 | (alpha_rs << 21) | alpha_rt;
541 }
542 break;
543 }
544 break;
545 case HI6_SLTI:
546 case HI6_SLTIU:
547 /* lda t1,4660 */
548 *a++ = 0x205f0000 | uimm;
549
550 switch (instruction_type) {
551 case HI6_SLTI:
552 /* cmplt rs,t1,rt */
553 *a++ = 0x400209a0 | (alpha_rs << 21) | alpha_rt;
554 break;
555 case HI6_SLTIU:
556 /* cmpult rs,t1,rt */
557 *a++ = 0x400203a0 | (alpha_rs << 21) | alpha_rt;
558 break;
559 }
560 break;
561 }
562
563 if (alpha_rt == ALPHA_T0) {
564 *a++ = 0x5fff041f; /* fnop */
565 *addrp = (unsigned char *) a;
566 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
567 a = (uint32_t *) *addrp;
568 }
569
570 rt0:
571 *addrp = (unsigned char *) a;
572 bintrans_write_pc_inc(addrp);
573 return 1;
574 }
575
576
577 /*
578 * bintrans_write_instruction__addu_etc():
579 */
580 static int bintrans_write_instruction__addu_etc(unsigned char **addrp,
581 int rd, int rs, int rt, int sa, int instruction_type)
582 {
583 unsigned char *a, *unmodified = NULL;
584 int load64 = 0, store = 1, ofs, alpha_rd = ALPHA_T0;
585
586 alpha_rd = map_MIPS_to_Alpha[rd];
587 if (alpha_rd < 0)
588 alpha_rd = ALPHA_T0;
589
590 switch (instruction_type) {
591 case SPECIAL_DIV:
592 case SPECIAL_DIVU:
593 return 0;
594 }
595
596 switch (instruction_type) {
597 case SPECIAL_DADDU:
598 case SPECIAL_DSUBU:
599 case SPECIAL_OR:
600 case SPECIAL_AND:
601 case SPECIAL_NOR:
602 case SPECIAL_XOR:
603 case SPECIAL_DSLL:
604 case SPECIAL_DSRL:
605 case SPECIAL_DSRA:
606 case SPECIAL_DSLL32:
607 case SPECIAL_DSRL32:
608 case SPECIAL_DSRA32:
609 case SPECIAL_SLT:
610 case SPECIAL_SLTU:
611 case SPECIAL_MOVZ:
612 case SPECIAL_MOVN:
613 load64 = 1;
614 }
615
616 switch (instruction_type) {
617 case SPECIAL_MULT:
618 case SPECIAL_MULTU:
619 if (rd != 0)
620 return 0;
621 store = 0;
622 break;
623 default:
624 if (rd == 0)
625 goto rd0;
626 }
627
628 a = *addrp;
629
630 if ((instruction_type == SPECIAL_ADDU || instruction_type == SPECIAL_DADDU
631 || instruction_type == SPECIAL_OR) && rt == 0) {
632 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
633 if (!load64) {
634 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
635 }
636 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
637 *addrp = a;
638 goto rd0;
639 }
640
641 /* t0 = rs, t1 = rt */
642 if (load64) {
643 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
644 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
645 } else {
646 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
647 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
648 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
649 *a++ = 0x02; *a++ = 0x10; *a++ = 0x40; *a++ = 0x40; /* addl t1,0,t1 */
650 }
651
652 switch (instruction_type) {
653 case SPECIAL_ADDU:
654 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x40; /* addl t0,t1,rd */
655 break;
656 case SPECIAL_DADDU:
657 *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq t0,t1,rd */
658 break;
659 case SPECIAL_SUBU:
660 *a++ = 0x20 + alpha_rd; *a++ = 0x01; *a++ = 0x22; *a++ = 0x40; /* subl t0,t1,t0 */
661 break;
662 case SPECIAL_DSUBU:
663 *a++ = 0x20 + alpha_rd; *a++ = 0x05; *a++ = 0x22; *a++ = 0x40; /* subq t0,t1,t0 */
664 break;
665 case SPECIAL_AND:
666 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and t0,t1,t0 */
667 break;
668 case SPECIAL_OR:
669 *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
670 break;
671 case SPECIAL_NOR:
672 *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
673 *a++ = alpha_rd; *a++ = 0x05; *a++ = 0xe1; *a++ = 0x47; /* not t0,t0 */
674 break;
675 case SPECIAL_XOR:
676 *a++ = alpha_rd; *a++ = 0x08; *a++ = 0x22; *a++ = 0x44; /* xor t0,t1,t0 */
677 break;
678 case SPECIAL_SLL:
679 *a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
680 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
681 break;
682 case SPECIAL_SLLV:
683 /* rd = rt << (rs&31) (logical) t0 = t1 << (t0&31) */
684 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
685 *a++ = 0x21; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sll t1,t0,t0 */
686 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
687 break;
688 case SPECIAL_DSLL:
689 *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
690 break;
691 case SPECIAL_DSLL32:
692 sa += 32;
693 *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
694 break;
695 case SPECIAL_SRA:
696 *a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
697 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
698 break;
699 case SPECIAL_SRAV:
700 /* rd = rt >> (rs&31) (arithmetic) t0 = t1 >> (t0&31) */
701 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
702 *a++ = 0x81; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sra t1,t0,t0 */
703 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
704 break;
705 case SPECIAL_DSRA:
706 *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
707 break;
708 case SPECIAL_DSRA32:
709 sa += 32;
710 *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
711 break;
712 case SPECIAL_SRL:
713 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
714 /* Note: bits of sa are distributed among two different bytes. */
715 *a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
716 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl */
717 break;
718 case SPECIAL_SRLV:
719 /* rd = rt >> (rs&31) (logical) t0 = t1 >> (t0&31) */
720 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
721 *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
722 *a++ = 0x81; *a++ = 0x06; *a++ = 0x41; *a++ = 0x48; /* srl t1,t0,t0 */
723 *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
724 break;
725 case SPECIAL_DSRL:
726 /* Note: bits of sa are distributed among two different bytes. */
727 *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
728 break;
729 case SPECIAL_DSRL32:
730 /* Note: bits of sa are distributed among two different bytes. */
731 sa += 32;
732 *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
733 break;
734 case SPECIAL_SLT:
735 *a++ = 0xa0 + alpha_rd; *a++ = 0x09; *a++ = 0x22; *a++ = 0x40; /* cmplt t0,t1,t0 */
736 break;
737 case SPECIAL_SLTU:
738 *a++ = 0xa0 + alpha_rd; *a++ = 0x03; *a++ = 0x22; *a++ = 0x40; /* cmpult t0,t1,t0 */
739 break;
740 case SPECIAL_MULT:
741 case SPECIAL_MULTU:
742 if (instruction_type == SPECIAL_MULTU) {
743 /* 21 f6 21 48 zapnot t0,0xf,t0 */
744 /* 22 f6 41 48 zapnot t1,0xf,t1 */
745 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48;
746 *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48;
747 }
748
749 /* 03 04 22 4c mulq t0,t1,t2 */
750 *a++ = 0x03; *a++ = 0x04; *a++ = 0x22; *a++ = 0x4c;
751
752 /* 01 10 60 40 addl t2,0,t0 */
753 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
754
755 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
756 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
757
758 /* 81 17 64 48 sra t2,0x20,t0 */
759 *a++ = 0x81; *a++ = 0x17; *a++ = 0x64; *a++ = 0x48;
760 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
761 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
762 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
763 break;
764 case SPECIAL_MOVZ:
765 /* if rt=0 then rd=rs ==> if t1!=0 then t0=unmodified else t0=rd */
766 /* 00 00 40 f4 bne t1,unmodified */
767 unmodified = a;
768 *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xf4;
769 alpha_rd = ALPHA_T0;
770 break;
771 case SPECIAL_MOVN:
772 /* if rt!=0 then rd=rs ==> if t1=0 then t0=unmodified else t0=rd */
773 /* 00 00 40 e4 beq t1,unmodified */
774 unmodified = a;
775 *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
776 alpha_rd = ALPHA_T0;
777 break;
778 }
779
780 if (store && alpha_rd == ALPHA_T0) {
781 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
782 }
783
784 if (unmodified != NULL)
785 *unmodified = ((size_t)a - (size_t)unmodified - 4) / 4;
786
787 *addrp = a;
788 rd0:
789 bintrans_write_pc_inc(addrp);
790 return 1;
791 }
792
793
794 /*
795 * bintrans_write_instruction__branch():
796 */
797 static int bintrans_write_instruction__branch(unsigned char **addrp,
798 int instruction_type, int regimm_type, int rt, int rs, int imm)
799 {
800 uint32_t *a, *b, *c = NULL;
801 int alpha_rs, alpha_rt, likely = 0, ofs;
802
803 alpha_rs = map_MIPS_to_Alpha[rs];
804 alpha_rt = map_MIPS_to_Alpha[rt];
805
806 switch (instruction_type) {
807 case HI6_BEQL:
808 case HI6_BNEL:
809 case HI6_BLEZL:
810 case HI6_BGTZL:
811 likely = 1;
812 }
813
814 /*
815 * t0 = gpr[rt]; t1 = gpr[rs];
816 *
817 * 50 00 30 a4 ldq t0,80(a0)
818 * 58 00 50 a4 ldq t1,88(a0)
819 */
820
821 switch (instruction_type) {
822 case HI6_BEQ:
823 case HI6_BNE:
824 case HI6_BEQL:
825 case HI6_BNEL:
826 if (alpha_rt < 0) {
827 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
828 alpha_rt = ALPHA_T0;
829 }
830 }
831
832 if (alpha_rs < 0) {
833 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T1);
834 alpha_rs = ALPHA_T1;
835 }
836
837 a = (uint32_t *) *addrp;
838
839 /*
840 * Compare alpha_rt (t0) and alpha_rs (t1) for equality (BEQ).
841 * If the result was false (equal to zero), then skip a lot
842 * of instructions:
843 *
844 * a1 05 22 40 cmpeq t0,t1,t0
845 * 01 00 20 e4 beq t0,14 <f+0x14>
846 */
847 b = NULL;
848 if ((instruction_type == HI6_BEQ ||
849 instruction_type == HI6_BEQL) && rt != rs) {
850 /* cmpeq rt,rs,t0 */
851 *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
852 b = a;
853 *a++ = 0xe4200001; /* beq */
854 }
855 if (instruction_type == HI6_BNE || instruction_type == HI6_BNEL) {
856 /* cmpeq rt,rs,t0 */
857 *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
858 b = a;
859 *a++ = 0xf4200001; /* bne */
860 }
861 if (instruction_type == HI6_BLEZ || instruction_type == HI6_BLEZL) {
862 /* cmple rs,0,t0 */
863 *a++ = 0x40001da1 | (alpha_rs << 21);
864 b = a;
865 *a++ = 0xe4200001; /* beq */
866 }
867 if (instruction_type == HI6_BGTZ || instruction_type == HI6_BGTZL) {
868 /* cmple rs,0,t0 */
869 *a++ = 0x40001da1 | (alpha_rs << 21);
870 b = a;
871 *a++ = 0xf4200001; /* bne */
872 }
873 if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BLTZ) {
874 /* cmplt rs,0,t0 */
875 *a++ = 0x400019a1 | (alpha_rs << 21);
876 b = a;
877 *a++ = 0xe4200001; /* beq */
878 }
879 if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BGEZ) {
880 *a++ = 0x207fffff; /* lda t2,-1 */
881 /* cmple rs,t2,t0 */
882 *a++ = 0x40030da1 | (alpha_rs << 21);
883 b = a;
884 *a++ = 0xf4200001; /* bne */
885 }
886
887 /*
888 * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
889 * and cpu->delay_jmpaddr = pc + 4 + (imm << 2).
890 *
891 * 04 00 26 20 lda t0,4(t5) add 4
892 * c8 01 5f 20 lda t1,456
893 * 4a 04 41 40 s4addq t1,t0,s1 s1 = (t1<<2) + t0
894 */
895
896 *a++ = 0x20260004; /* lda t0,4(t5) */
897 *a++ = 0x205f0000 | (imm & 0xffff); /* lda */
898 *a++ = 0x4041044a; /* s4addq */
899
900 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
901 *a++ = 0x213f0000 | TO_BE_DELAYED;
902
903 /*
904 * Special case: "likely"-branches:
905 */
906 if (likely) {
907 c = a;
908 *a++ = 0xc3e00001; /* br delayed_ok */
909
910 if (b != NULL)
911 *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
912
913 /* cpu->cd.mips.nullify_next = 1; */
914 /* 01 00 3f 20 lda t0,1 */
915 *a++ = 0x203f0001;
916 ofs = (size_t)&dummy_cpu.cd.mips.nullify_next - (size_t)&dummy_cpu;
917 *a++ = 0xb0300000 | (ofs & 0xffff);
918
919 /* fail, so that the next instruction is handled manually: */
920 *addrp = (unsigned char *) a;
921 bintrans_write_pc_inc(addrp);
922 bintrans_write_chunkreturn_fail(addrp);
923 a = (uint32_t *) *addrp;
924
925 if (c != NULL)
926 *((unsigned char *)c) = ((size_t)a - (size_t)c - 4) / 4;
927 } else {
928 /* Normal (non-likely) exit: */
929 if (b != NULL)
930 *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
931 }
932
933 *addrp = (unsigned char *) a;
934 bintrans_write_pc_inc(addrp);
935 return 1;
936 }
937
938
939 /*
940 * bintrans_write_instruction__jr():
941 */
942 static int bintrans_write_instruction__jr(unsigned char **addrp, int rs, int rd, int special)
943 {
944 uint32_t *a;
945 int alpha_rd;
946
947 alpha_rd = map_MIPS_to_Alpha[rd];
948 if (alpha_rd < 0)
949 alpha_rd = ALPHA_T0;
950
951 /*
952 * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
953 * and cpu->delay_jmpaddr = gpr[rs].
954 */
955
956 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_S1);
957
958 a = (uint32_t *) *addrp;
959 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
960 *a++ = 0x213f0000 | TO_BE_DELAYED;
961 *addrp = (unsigned char *) a;
962
963 if (special == SPECIAL_JALR && rd != 0) {
964 /* gpr[rd] = retaddr (pc + 8) */
965 a = (uint32_t *) *addrp;
966 /* lda alpha_rd,8(t5) */
967 *a++ = 0x20060008 | (alpha_rd << 21);
968 *addrp = (unsigned char *) a;
969 if (alpha_rd == ALPHA_T0)
970 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rd);
971 }
972
973 bintrans_write_pc_inc(addrp);
974 return 1;
975 }
976
977
978 /*
979 * bintrans_write_instruction__jal():
980 */
981 static int bintrans_write_instruction__jal(unsigned char **addrp,
982 int imm, int link)
983 {
984 uint32_t *a;
985
986 a = (uint32_t *) *addrp;
987
988 /* gpr[31] = retaddr (NOTE: mips register 31 is in alpha reg s3) */
989 if (link) {
990 *a++ = 0x21860008; /* lda s3,8(t5) */
991 }
992
993 /* Set the jmpaddr to top 4 bits of pc + lowest 28 bits of imm*4: */
994
995 /*
996 * imm = 4*imm;
997 * t0 = ((pc + 4) & ~0x0fffffff) | imm;
998 *
999 * 04 00 26 20 lda t0,4(t5) <-- because the jump is from the delay slot
1000 * 23 01 5f 24 ldah t1,291
1001 * 67 45 42 20 lda t1,17767(t1)
1002 * 00 f0 7f 24 ldah t2,-4096
1003 * 04 00 23 44 and t0,t2,t3
1004 * 0a 04 44 44 or t1,t3,s1
1005 */
1006 imm *= 4;
1007 *a++ = 0x20260004;
1008 *a++ = 0x245f0000 | ((imm >> 16) + (imm & 0x8000? 1 : 0));
1009 *a++ = 0x20420000 | (imm & 0xffff);
1010 *a++ = 0x247ff000;
1011 *a++ = 0x44230004;
1012 *a++ = 0x4444040a;
1013
1014 /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
1015 *a++ = 0x213f0000 | TO_BE_DELAYED;
1016
1017 /* If the machine continues executing here, it will return
1018 to the main loop, which is fine. */
1019
1020 *addrp = (unsigned char *) a;
1021 bintrans_write_pc_inc(addrp);
1022 return 1;
1023 }
1024
1025
1026 /*
1027 * bintrans_write_instruction__delayedbranch():
1028 */
1029 static int bintrans_write_instruction__delayedbranch(
1030 struct memory *mem, unsigned char **addrp,
1031 uint32_t *potential_chunk_p, uint32_t *chunks,
1032 int only_care_about_chunk_p, int p, int forward)
1033 {
1034 unsigned char *a, *skip=NULL, *generic64bit;
1035 int ofs;
1036 uint64_t alpha_addr, subaddr;
1037
1038 a = *addrp;
1039
1040 if (!only_care_about_chunk_p) {
1041 /* Skip all of this if there is no branch: */
1042 skip = a;
1043 *a++ = 0; *a++ = 0; *a++ = 0x20; *a++ = 0xe5; /* beq s0,skip */
1044
1045 /*
1046 * Perform the jump by setting cpu->delay_slot = 0
1047 * and pc = cpu->delay_jmpaddr.
1048 */
1049 /* 00 00 3f 21 lda s0,0 */
1050 *a++ = 0; *a++ = 0; *a++ = 0x3f; *a++ = 0x21;
1051
1052 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T0);
1053 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3);
1054 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, MIPSREG_PC);
1055 }
1056
1057 if (potential_chunk_p == NULL) {
1058 if (mem->bintrans_32bit_only) {
1059 /* 34 12 70 a7 ldq t12,4660(a0) */
1060 ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1061 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1062
1063 /* 00 00 fb 6b jmp (t12) */
1064 *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1065 } else {
1066 /*
1067 * If the highest 32 bits of the address are either
1068 * 0x00000000 or 0xffffffff, then the tables used for
1069 * 32-bit load/stores can be used.
1070 *
1071 * 81 16 24 4a srl a1,0x20,t0
1072 * 03 00 20 e4 beq t0,14 <ok1>
1073 * 01 30 20 40 addl t0,0x1,t0
1074 * 01 00 20 e4 beq t0,14 <ok1>
1075 * 01 00 e0 c3 br 18 <nook>
1076 */
1077 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a;
1078 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1079 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40;
1080 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1081 generic64bit = a;
1082 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1083
1084 /* 34 12 70 a7 ldq t12,4660(a0) */
1085 ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1086 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1087
1088 /* 00 00 fb 6b jmp (t12) */
1089 *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1090
1091
1092 if (generic64bit != NULL)
1093 *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1094
1095 /* Not much we can do here if this wasn't to the same
1096 physical page... */
1097
1098 *a++ = 0xfc; *a++ = 0xff; *a++ = 0x84; *a++ = 0x20; /* lda t3,-4(t3) */
1099
1100 /*
1101 * Compare the old pc (t3) and the new pc (t0). If they are on the
1102 * same virtual page (which means that they are on the same physical
1103 * page), then we can check the right chunk pointer, and if it
1104 * is non-NULL, then we can jump there. Otherwise just return.
1105 *
1106 * 00 f0 5f 20 lda t1,-4096
1107 * 01 00 22 44 and t0,t1,t0
1108 * 04 00 82 44 and t3,t1,t3
1109 * a3 05 24 40 cmpeq t0,t3,t2
1110 * 01 00 60 f4 bne t2,7c <ok2>
1111 * 01 80 fa 6b ret
1112 */
1113 *a++ = 0x00; *a++ = 0xf0; *a++ = 0x5f; *a++ = 0x20; /* lda */
1114 *a++ = 0x01; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and */
1115 *a++ = 0x04; *a++ = 0x00; *a++ = 0x82; *a++ = 0x44; /* and */
1116 *a++ = 0xa3; *a++ = 0x05; *a++ = 0x24; *a++ = 0x40; /* cmpeq */
1117 *a++ = 0x01; *a++ = 0x00; *a++ = 0x60; *a++ = 0xf4; /* bne */
1118 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1119
1120 /* Don't execute too many instructions. (see comment below) */
1121 *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1122 *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1123 *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1124 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1125 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1126
1127 /* 15 bits at a time, which means max 60 bits, but
1128 that should be enough. the top 4 bits are probably
1129 not used by userland alpha code. (TODO: verify this) */
1130 alpha_addr = (size_t)chunks;
1131 subaddr = (alpha_addr >> 45) & 0x7fff;
1132
1133 /*
1134 * 00 00 3f 20 lda t0,0
1135 * 21 f7 21 48 sll t0,0xf,t0
1136 * 34 12 21 20 lda t0,4660(t0)
1137 * 21 f7 21 48 sll t0,0xf,t0
1138 * 34 12 21 20 lda t0,4660(t0)
1139 * 21 f7 21 48 sll t0,0xf,t0
1140 * 34 12 21 20 lda t0,4660(t0)
1141 */
1142
1143 /* Start with the topmost 15 bits: */
1144 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1145 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1146
1147 subaddr = (alpha_addr >> 30) & 0x7fff;
1148 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1149 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1150
1151 subaddr = (alpha_addr >> 15) & 0x7fff;
1152 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1153 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1154
1155 subaddr = alpha_addr & 0x7fff;
1156 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1157
1158 /*
1159 * t2 = pc
1160 * t1 = t2 & 0xfff
1161 * t0 += t1
1162 *
1163 * ff 0f 5f 20 lda t1,4095
1164 * 02 00 62 44 and t2,t1,t1
1165 * 01 04 22 40 addq t0,t1,t0
1166 */
1167 bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T2);
1168 *a++ = 0xff; *a++ = 0x0f; *a++ = 0x5f; *a++ = 0x20; /* lda */
1169 *a++ = 0x02; *a++ = 0x00; *a++ = 0x62; *a++ = 0x44; /* and */
1170 *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq */
1171
1172 /*
1173 * Load the chunk pointer (actually, a 32-bit offset) into t0.
1174 * If it is zero, then skip the following.
1175 * Add cpu->chunk_base_address to t0.
1176 * Jump to t0.
1177 */
1178
1179 *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1180 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1181
1182 /* ldq t2,chunk_base_address(a0) */
1183 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1184 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1185 /* addq t0,t2,t0 */
1186 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1187
1188 /* 00 00 e1 6b jmp (t0) */
1189 *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1190
1191 /* Failure, then return to the main loop. */
1192 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1193 }
1194 } else {
1195 /*
1196 * Just to make sure that we don't become too unreliant
1197 * on the main program loop, we need to return every once
1198 * in a while (interrupts etc).
1199 *
1200 * Load the "nr of instructions executed" (which is an int)
1201 * and see if it is below a certain threshold. If so, then
1202 * we go on with the fast path (bintrans), otherwise we
1203 * abort by returning.
1204 *
1205 * f4 01 5f 20 lda t1,500 (some low number...)
1206 * a1 0d c2 40 cmple t6,t1,t0
1207 * 01 00 20 f4 bne t0,14 <f+0x14>
1208 */
1209 if (!only_care_about_chunk_p && !forward) {
1210 *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1211 *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1212 *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1213 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1214 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1215 }
1216
1217 /*
1218 * potential_chunk_p points to an "uint32_t".
1219 * If this value is non-NULL, then it is a piece of Alpha
1220 * machine language code corresponding to the address
1221 * we're jumping to. Otherwise, those instructions haven't
1222 * been translated yet, so we have to return to the main
1223 * loop. (Actually, we have to add cpu->chunk_base_address,
1224 * because the uint32_t is limited to 32-bit offsets.)
1225 *
1226 * Case 1: The value is non-NULL already at translation
1227 * time. Then we can make a direct (fast) native
1228 * Alpha jump to the code chunk.
1229 *
1230 * Case 2: The value was NULL at translation time, then we
1231 * have to check during runtime.
1232 */
1233
1234 /* Case 1: */
1235 /* printf("%08x ", *potential_chunk_p); */
1236 alpha_addr = *potential_chunk_p + (size_t)mem->translation_code_chunk_space;
1237 ofs = (alpha_addr - ((size_t)a+4)) / 4;
1238 /* printf("%016llx %016llx %i\n", (long long)alpha_addr, (long long)a, ofs); */
1239
1240 if ((*potential_chunk_p) != 0 && ofs > -0xfffff && ofs < 0xfffff) {
1241 *a++ = ofs & 255; *a++ = (ofs >> 8) & 255; *a++ = 0xe0 + ((ofs >> 16) & 0x1f); *a++ = 0xc3; /* br <chunk> */
1242 } else {
1243 /* Case 2: */
1244
1245 bintrans_register_potential_quick_jump(mem, a, p);
1246
1247 /* 15 bits at a time, which means max 60 bits, but
1248 that should be enough. the top 4 bits are probably
1249 not used by userland alpha code. (TODO: verify this) */
1250 alpha_addr = (size_t)potential_chunk_p;
1251 subaddr = (alpha_addr >> 45) & 0x7fff;
1252
1253 /*
1254 * 00 00 3f 20 lda t0,0
1255 * 21 f7 21 48 sll t0,0xf,t0
1256 * 34 12 21 20 lda t0,4660(t0)
1257 * 21 f7 21 48 sll t0,0xf,t0
1258 * 34 12 21 20 lda t0,4660(t0)
1259 * 21 f7 21 48 sll t0,0xf,t0
1260 * 34 12 21 20 lda t0,4660(t0)
1261 */
1262
1263 /* Start with the topmost 15 bits: */
1264 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1265 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1266
1267 subaddr = (alpha_addr >> 30) & 0x7fff;
1268 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1269 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1270
1271 subaddr = (alpha_addr >> 15) & 0x7fff;
1272 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1273 *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1274
1275 subaddr = alpha_addr & 0x7fff;
1276 *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1277
1278 /*
1279 * Load the chunk pointer into t0.
1280 * If it is NULL (zero), then skip the following jump.
1281 * Jump to t0.
1282 */
1283 *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1284 *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1285
1286 /* ldq t2,chunk_base_address(a0) */
1287 ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1288 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1289 /* addq t0,t2,t0 */
1290 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1291
1292 /* 00 00 e1 6b jmp (t0) */
1293 *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1294
1295 /* "Failure", then let's return to the main loop. */
1296 *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1297 }
1298 }
1299
1300 if (skip != NULL) {
1301 *skip = ((size_t)a - (size_t)skip - 4) / 4;
1302 skip ++;
1303 *skip = (((size_t)a - (size_t)skip - 4) / 4) >> 8;
1304 }
1305
1306 *addrp = a;
1307 return 1;
1308 }
1309
1310
1311 /*
1312 * bintrans_write_instruction__loadstore():
1313 */
1314 static int bintrans_write_instruction__loadstore(
1315 struct memory *mem, unsigned char **addrp,
1316 int rt, int imm, int rs, int instruction_type, int bigendian)
1317 {
1318 unsigned char *a, *fail, *generic64bit = NULL, *generic64bitA = NULL;
1319 unsigned char *doloadstore = NULL,
1320 *ok_unaligned_load3, *ok_unaligned_load2, *ok_unaligned_load1;
1321 uint32_t *b;
1322 int ofs, alignment, load = 0, alpha_rs, alpha_rt, unaligned = 0;
1323
1324 /* TODO: Not yet: */
1325 if (instruction_type == HI6_LQ_MDMX || instruction_type == HI6_SQ) {
1326 return 0;
1327 }
1328
1329 switch (instruction_type) {
1330 case HI6_LQ_MDMX:
1331 case HI6_LD:
1332 case HI6_LDL:
1333 case HI6_LDR:
1334 case HI6_LWU:
1335 case HI6_LW:
1336 case HI6_LWL:
1337 case HI6_LWR:
1338 case HI6_LHU:
1339 case HI6_LH:
1340 case HI6_LBU:
1341 case HI6_LB:
1342 load = 1;
1343 if (rt == 0)
1344 return 0;
1345 }
1346
1347 switch (instruction_type) {
1348 case HI6_LDL:
1349 case HI6_LDR:
1350 case HI6_LWL:
1351 case HI6_LWR:
1352 case HI6_SDL:
1353 case HI6_SDR:
1354 case HI6_SWL:
1355 case HI6_SWR:
1356 unaligned = 1;
1357 }
1358
1359 a = *addrp;
1360
1361 /*
1362 * a1 = gpr[rs] + imm;
1363 *
1364 * 88 08 30 a4 ldq t0,2184(a0)
1365 * 34 12 21 22 lda a1,4660(t0)
1366 */
1367
1368 alpha_rs = map_MIPS_to_Alpha[rs];
1369 if (alpha_rs < 0) {
1370 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1371 alpha_rs = ALPHA_T0;
1372 }
1373 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1374
1375 alignment = 0;
1376 switch (instruction_type) {
1377 case HI6_LQ_MDMX:
1378 case HI6_SQ:
1379 alignment = 15;
1380 break;
1381 case HI6_LD:
1382 case HI6_LDL:
1383 case HI6_LDR:
1384 case HI6_SD:
1385 case HI6_SDL:
1386 case HI6_SDR:
1387 alignment = 7;
1388 break;
1389 case HI6_LW:
1390 case HI6_LWL:
1391 case HI6_LWR:
1392 case HI6_LWU:
1393 case HI6_SW:
1394 case HI6_SWL:
1395 case HI6_SWR:
1396 alignment = 3;
1397 break;
1398 case HI6_LH:
1399 case HI6_LHU:
1400 case HI6_SH:
1401 alignment = 1;
1402 break;
1403 }
1404
1405 if (unaligned) {
1406 /*
1407 * Unaligned load/store: Perform the host load/store at
1408 * an aligned address, and then figure out which bytes to
1409 * actually load into the destination register.
1410 *
1411 * 02 30 20 46 and a1,alignment,t1
1412 * 31 05 22 42 subq a1,t1,a1
1413 */
1414 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1415 *a++ = 0x31; *a++ = 0x05; *a++ = 0x22; *a++ = 0x42;
1416 } else if (alignment > 0) {
1417 /*
1418 * Check alignment:
1419 *
1420 * 02 30 20 46 and a1,0x1,t1
1421 * 02 70 20 46 and a1,0x3,t1 (one of these "and"s)
1422 * 02 f0 20 46 and a1,0x7,t1
1423 * 02 f0 21 46 and a1,0xf,t1
1424 * 01 00 40 e4 beq t1,<okalign>
1425 * 01 80 fa 6b ret
1426 */
1427 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1428 fail = a;
1429 *a++ = 0x01; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
1430 *addrp = a;
1431 bintrans_write_chunkreturn_fail(addrp);
1432 a = *addrp;
1433 *fail = ((size_t)a - (size_t)fail - 4) / 4;
1434 }
1435
1436 alpha_rt = map_MIPS_to_Alpha[rt];
1437
1438 if (mem->bintrans_32bit_only) {
1439 /* Special case for 32-bit addressing: */
1440
1441 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_loadstore_32bit) - (size_t)&dummy_cpu;
1442 /* ldq t12,bintrans_loadstore_32bit(a0) */
1443 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1444
1445 /* jsr t4,(t12),<after> */
1446 *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1447
1448 /*
1449 * Now:
1450 * a3 = host page
1451 * t0 = 0 for readonly pages, 1 for read/write pages
1452 * t3 = address of host load/store
1453 */
1454
1455 /* If this is a store, then the lowest bit must be set: */
1456 if (!load) {
1457 /* 01 00 20 f4 bne t0,<okzzz> */
1458 fail = a;
1459 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4;
1460 bintrans_write_chunkreturn_fail(&a);
1461 *fail = ((size_t)a - (size_t)fail - 4) / 4;
1462 }
1463 } else {
1464 /*
1465 * If the highest 33 bits of the address are either all ones
1466 * or all zeroes, then the tables used for 32-bit load/stores
1467 * can be used.
1468 */
1469 *a++ = 0x81; *a++ = 0xf6; *a++ = 0x23; *a++ = 0x4a; /* srl a1,0x1f,t0 */
1470 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x44; /* and t0,0x1,t0 */
1471 *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<noll> */
1472 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1473 *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; /* addl t0,0x1,t0 */
1474 *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1475 generic64bit = a;
1476 *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1477 /* <noll>: */
1478 *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1479 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1480 generic64bitA = a;
1481 *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1482
1483 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_loadstore_32bit) - (size_t)&dummy_cpu;
1484 /* ldq t12,bintrans_loadstore_32bit(a0) */
1485 *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1486
1487 /* jsr t4,(t12),<after> */
1488 *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1489
1490 /*
1491 * Now:
1492 * a3 = host page (or NULL if not found)
1493 * t0 = 0 for readonly pages, 1 for read/write pages
1494 * t3 = (potential) address of host load/store
1495 */
1496
1497 /* If this is a store, then the lowest bit must be set: */
1498 if (!load) {
1499 /* 01 00 20 f4 bne t0,<okzzz> */
1500 fail = a;
1501 *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4;
1502 bintrans_write_chunkreturn_fail(&a);
1503 *fail = ((size_t)a - (size_t)fail - 4) / 4;
1504 }
1505
1506 doloadstore = a;
1507 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1508
1509
1510 /*
1511 * Generic (64-bit) load/store:
1512 */
1513
1514 if (generic64bit != NULL)
1515 *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1516 if (generic64bitA != NULL)
1517 *generic64bitA = ((size_t)a - (size_t)generic64bitA - 4) / 4;
1518
1519 *addrp = a;
1520 b = (uint32_t *) *addrp;
1521
1522 /* Save a0 and the old return address on the stack: */
1523 *b++ = 0x23deff80; /* lda sp,-128(sp) */
1524
1525 *b++ = 0xb75e0000; /* stq ra,0(sp) */
1526 *b++ = 0xb61e0008; /* stq a0,8(sp) */
1527 *b++ = 0xb4de0010; /* stq t5,16(sp) */
1528 *b++ = 0xb0fe0018; /* stl t6,24(sp) */
1529 *b++ = 0xb71e0020; /* stq t10,32(sp) */
1530 *b++ = 0xb73e0028; /* stq t11,40(sp) */
1531 *b++ = 0xb51e0030; /* stq t7,48(sp) */
1532 *b++ = 0xb6de0038; /* stq t8,56(sp) */
1533 *b++ = 0xb6fe0040; /* stq t9,64(sp) */
1534
1535 ofs = ((size_t)&dummy_cpu.cd.mips.fast_vaddr_to_hostaddr) - (size_t)&dummy_cpu;
1536
1537 *b++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
1538
1539 /* a1 is already vaddr. set a2 = writeflag */
1540 *b++ = 0x225f0000 | (load? 0 : 1);
1541
1542 /* Call fast_vaddr_to_hostaddr: */
1543 *b++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
1544
1545 /* Restore the old return address and a0 from the stack: */
1546 *b++ = 0xa75e0000; /* ldq ra,0(sp) */
1547 *b++ = 0xa61e0008; /* ldq a0,8(sp) */
1548 *b++ = 0xa4de0010; /* ldq t5,16(sp) */
1549 *b++ = 0xa0fe0018; /* ldl t6,24(sp) */
1550 *b++ = 0xa71e0020; /* ldq t10,32(sp) */
1551 *b++ = 0xa73e0028; /* ldq t11,40(sp) */
1552 *b++ = 0xa51e0030; /* ldq t7,48(sp) */
1553 *b++ = 0xa6de0038; /* ldq t8,56(sp) */
1554 *b++ = 0xa6fe0040; /* ldq t9,64(sp) */
1555
1556 *b++ = 0x23de0080; /* lda sp,128(sp) */
1557
1558 *addrp = (unsigned char *) b;
1559 a = *addrp;
1560
1561 /*
1562 * NULL? Then return failure.
1563 * 01 00 00 f4 bne v0,f8 <okzz>
1564 */
1565 fail = a;
1566 *a++ = 0x01; *a++ = 0x00; *a++ = 0x00; *a++ = 0xf4;
1567 bintrans_write_chunkreturn_fail(&a);
1568 *fail = ((size_t)a - (size_t)fail - 4) / 4;
1569
1570 /* The rest of this code was written with t3 as the address. */
1571
1572 /* 04 14 00 40 addq v0,0,t3 */
1573 *a++ = 0x04; *a++ = 0x14; *a++ = 0x00; *a++ = 0x40;
1574
1575 if (doloadstore != NULL)
1576 *doloadstore = ((size_t)a - (size_t)doloadstore - 4) / 4;
1577 }
1578
1579
1580 switch (instruction_type) {
1581 case HI6_LQ_MDMX:
1582 /* TODO */
1583 break;
1584 case HI6_LD:
1585 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa4; /* ldq t0,0(t3) */
1586 if (bigendian) {
1587 /* remember original 8 bytes of t0: */
1588 *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1589
1590 /* swap lowest 4 bytes: */
1591 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1592 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1593 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1594 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1595 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1596 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1597 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1598 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1599 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1600
1601 /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1602 *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1603 *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1604 *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1605
1606 /* swap highest 4 bytes: */
1607 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1608 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1609 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1610 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1611 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1612 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1613 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1614 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1615 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1616
1617 /* or the results together: */
1618 *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1619 }
1620 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1621 break;
1622 case HI6_LW:
1623 case HI6_LWU:
1624 if (alpha_rt < 0 || bigendian || instruction_type == HI6_LWU)
1625 alpha_rt = ALPHA_T0;
1626 /* ldl rt,0(t3) */
1627 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1628 *a++ = 0xa0 | ((alpha_rt >> 3) & 3);
1629 if (bigendian) {
1630 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1631 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1632 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1633 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1634 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1635 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1636 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1637 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1638 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1639 *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,zero,t0 (sign extend) 32->64 */
1640 }
1641 if (instruction_type == HI6_LWU) {
1642 /* Use only lowest 32 bits: */
1643 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; /* zapnot t0,0xf,t0 */
1644 }
1645 if (alpha_rt == ALPHA_T0)
1646 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1647 break;
1648 case HI6_LHU:
1649 case HI6_LH:
1650 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x30; /* ldwu from memory */
1651 if (bigendian) {
1652 *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1653 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1654 *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1655 }
1656 if (instruction_type == HI6_LH) {
1657 *a++ = 0x21; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x73; /* sextw t0,t0 */
1658 }
1659 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1660 break;
1661 case HI6_LBU:
1662 case HI6_LB:
1663 if (alpha_rt < 0)
1664 alpha_rt = ALPHA_T0;
1665 /* ldbu rt,0(t3) */
1666 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1667 *a++ = 0x28 | ((alpha_rt >> 3) & 3);
1668 if (instruction_type == HI6_LB) {
1669 /* sextb rt,rt */
1670 *a++ = alpha_rt; *a++ = 0x00; *a++ = 0xe0 + alpha_rt; *a++ = 0x73;
1671 }
1672 if (alpha_rt == ALPHA_T0)
1673 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1674 break;
1675
1676 case HI6_LWL:
1677 /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1678 alpha_rs = map_MIPS_to_Alpha[rs];
1679 if (alpha_rs < 0) {
1680 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1681 alpha_rs = ALPHA_T0;
1682 }
1683 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1684 /* 02 30 20 46 and a1,alignment,t1 */
1685 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1686
1687 /* ldl t0,0(t3) */
1688 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1689
1690 if (bigendian) {
1691 /* TODO */
1692 bintrans_write_chunkreturn_fail(&a);
1693 }
1694 /*
1695 * lwl: memory = 0x12 0x34 0x56 0x78
1696 * offset (a1): register rt becomes:
1697 * 0 0x12......
1698 * 1 0x3412....
1699 * 2 0x563412..
1700 * 3 0x78563412
1701 */
1702
1703 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1704
1705 /*
1706 10: 03 00 9f 20 lda t3,3
1707 14: a5 05 82 40 cmpeq t3,t1,t4
1708 18: 01 00 a0 e4 beq t4,20 <skip>
1709 */
1710 *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1711 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1712 *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1713
1714 /* 03 14 20 40 addq t0,0,t2 */
1715 *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1716
1717 ok_unaligned_load3 = a;
1718 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1719
1720
1721
1722 *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1723 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1724 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1725 /*
1726 * 2 0x563412..
1727 2c: 21 17 21 48 sll t0,0x8,t0
1728 30: 01 10 20 40 addl t0,0,t0
1729 34: 03 f0 7f 44 and t2,0xff,t2
1730 38: 03 04 23 44 or t0,t2,t2
1731 */
1732 *a++ = 0x21; *a++ = 0x17; *a++ = 0x21; *a++ = 0x48;
1733 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1734 *a++ = 0x03; *a++ = 0xf0; *a++ = 0x7f; *a++ = 0x44;
1735 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1736
1737 ok_unaligned_load2 = a;
1738 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1739
1740
1741
1742 *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1743 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1744 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1745 /*
1746 * 1 0x3412....
1747 2c: 21 17 22 48 sll t0,0x10,t0
1748 30: 01 10 20 40 addl t0,0,t0
1749 34: 23 76 60 48 zapnot t2,0x3,t2
1750 38: 03 04 23 44 or t0,t2,t2
1751 */
1752 *a++ = 0x21; *a++ = 0x17; *a++ = 0x22; *a++ = 0x48;
1753 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1754 *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
1755 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1756
1757 ok_unaligned_load1 = a;
1758 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1759
1760
1761
1762
1763 /*
1764 * 0 0x12......
1765 2c: 21 17 23 48 sll t0,0x18,t0
1766 30: 01 10 20 40 addl t0,0,t0
1767 34: 23 f6 60 48 zapnot t2,0x7,t2
1768 38: 03 04 23 44 or t0,t2,t2
1769 */
1770 *a++ = 0x21; *a++ = 0x17; *a++ = 0x23; *a++ = 0x48;
1771 *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1772 *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
1773 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1774
1775
1776 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1777 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1778 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1779
1780 /* 03 10 60 40 addl t2,0,t2 */
1781 *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1782
1783 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1784 break;
1785
1786 case HI6_LWR:
1787 /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1788 alpha_rs = map_MIPS_to_Alpha[rs];
1789 if (alpha_rs < 0) {
1790 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1791 alpha_rs = ALPHA_T0;
1792 }
1793 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1794 /* 02 30 20 46 and a1,alignment,t1 */
1795 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1796
1797 /* ldl t0,0(t3) */
1798 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1799
1800 if (bigendian) {
1801 /* TODO */
1802 bintrans_write_chunkreturn_fail(&a);
1803 }
1804 /*
1805 * lwr: memory = 0x12 0x34 0x56 0x78
1806 * offset (a1): register rt becomes:
1807 * 0 0x78563412
1808 * 1 0x..785634
1809 * 2 0x....7856
1810 * 3 0x......78
1811 */
1812
1813 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1814
1815 /*
1816 10: 03 00 9f 20 lda t3,3
1817 14: a5 05 82 40 cmpeq t3,t1,t4
1818 18: 01 00 a0 e4 beq t4,20 <skip>
1819 */
1820 *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1821 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1822 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1823
1824 /*
1825 2c: 81 16 23 48 srl t0,0x18,t0
1826 b0: 21 36 20 48 zapnot t0,0x1,t0
1827 34: 23 d6 7f 48 zapnot t2,0xfe,t2
1828 38: 03 04 23 44 or t0,t2,t2
1829 */
1830 *a++ = 0x81; *a++ = 0x16; *a++ = 0x23; *a++ = 0x48;
1831 *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
1832 *a++ = 0x23; *a++ = 0xd6; *a++ = 0x7f; *a++ = 0x48;
1833 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1834
1835 ok_unaligned_load3 = a;
1836 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1837
1838
1839
1840 *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1841 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1842 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1843 /*
1844 2c: 81 16 22 48 srl t0,0x10,t0
1845 b4: 21 76 20 48 zapnot t0,0x3,t0
1846 34: 23 96 7f 48 zapnot t2,0xfc,t2
1847 38: 03 04 23 44 or t0,t2,t2
1848 */
1849 *a++ = 0x81; *a++ = 0x16; *a++ = 0x22; *a++ = 0x48;
1850 *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
1851 *a++ = 0x23; *a++ = 0x96; *a++ = 0x7f; *a++ = 0x48;
1852 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1853
1854 ok_unaligned_load2 = a;
1855 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1856
1857
1858
1859 *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1860 *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1861 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1862 /*
1863 2c: 81 16 21 48 srl t0,0x8,t0
1864 b8: 21 f6 20 48 zapnot t0,0x7,t0
1865 3c: 23 16 7f 48 zapnot t2,0xf8,t2
1866 40: 03 04 23 44 or t0,t2,t2
1867 */
1868 *a++ = 0x81; *a++ = 0x16; *a++ = 0x21; *a++ = 0x48;
1869 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
1870 *a++ = 0x23; *a++ = 0x16; *a++ = 0x7f; *a++ = 0x48;
1871 *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1872
1873 ok_unaligned_load1 = a;
1874 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1875
1876
1877
1878
1879 /*
1880 * 0 0x12......
1881 */
1882 /* 03 14 20 40 addq t0,0,t2 */
1883 *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1884
1885
1886
1887 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1888 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1889 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1890
1891 /* 03 10 60 40 addl t2,0,t2 */
1892 *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1893
1894 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1895 break;
1896
1897 case HI6_SQ:
1898 /* TODO */
1899 break;
1900 case HI6_SD:
1901 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1902 if (bigendian) {
1903 /* remember original 8 bytes of t0: */
1904 *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1905
1906 /* swap lowest 4 bytes: */
1907 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1908 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1909 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1910 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1911 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1912 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1913 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1914 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1915 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1916
1917 /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1918 *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1919 *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1920 *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1921
1922 /* swap highest 4 bytes: */
1923 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1924 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1925 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1926 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1927 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1928 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1929 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1930 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1931 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1932
1933 /* or the results together: */
1934 *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1935 }
1936 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb4; /* stq to memory */
1937 break;
1938 case HI6_SW:
1939 if (alpha_rt < 0 || bigendian) {
1940 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1941 alpha_rt = ALPHA_T0;
1942 }
1943 if (bigendian) {
1944 *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1945 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1946 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1947 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1948 *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1949 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1950 *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1951 *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1952 *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1953 }
1954 /* stl to memory: stl rt,0(t3) */
1955 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1956 *a++ = 0xb0 | ((alpha_rt >> 3) & 3);
1957 break;
1958 case HI6_SH:
1959 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1960 if (bigendian) {
1961 *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1962 *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1963 *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1964 }
1965 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x34; /* stw to memory */
1966 break;
1967 case HI6_SB:
1968 if (alpha_rt < 0) {
1969 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1970 alpha_rt = ALPHA_T0;
1971 }
1972 /* stb to memory: stb rt,0(t3) */
1973 *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1974 *a++ = 0x38 | ((alpha_rt >> 3) & 3);
1975 break;
1976
1977 case HI6_SWL:
1978 /* a1 = 0..3 (or 0..7 for 64-bit stores): */
1979 alpha_rs = map_MIPS_to_Alpha[rs];
1980 if (alpha_rs < 0) {
1981 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1982 alpha_rs = ALPHA_T0;
1983 }
1984 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1985 /* 02 30 20 46 and a1,alignment,t1 */
1986 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1987
1988 /* ldl t0,0(t3) */
1989 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1990
1991 if (bigendian) {
1992 /* TODO */
1993 bintrans_write_chunkreturn_fail(&a);
1994 }
1995
1996 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1997
1998 /*
1999 * swl: memory = 0x12 0x34 0x56 0x78
2000 * register = 0x89abcdef
2001 * offset (a1): memory becomes:
2002 * 0 0x89 0x.. 0x.. 0x..
2003 * 1 0xab 0x89 0x.. 0x..
2004 * 2 0xcd 0xab 0x89 0x..
2005 * 3 0xef 0xcd 0xab 0x89
2006 */
2007
2008 /*
2009 a5 75 40 40 cmpeq t1,0x03,t4
2010 01 00 a0 e4 beq t4,20 <skip>
2011 */
2012 *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2013 *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2014
2015 /* 01 10 60 40 addl t2,0,t0 */
2016 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2017
2018 ok_unaligned_load3 = a;
2019 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2020
2021
2022
2023
2024 *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2025 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2026 /*
2027 2:
2028 e8: 83 16 61 48 srl t2,0x8,t2
2029 ec: 23 f6 60 48 zapnot t2,0x7,t2
2030 f0: 21 16 3f 48 zapnot t0,0xf8,t0
2031 f4: 01 04 23 44 or t0,t2,t0
2032 */
2033 *a++ = 0x83; *a++ = 0x16; *a++ = 0x61; *a++ = 0x48;
2034 *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
2035 *a++ = 0x21; *a++ = 0x16; *a++ = 0x3f; *a++ = 0x48;
2036 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2037
2038 ok_unaligned_load2 = a;
2039 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2040
2041
2042
2043 *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2044 *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2045 /*
2046 1:
2047 f8: 83 16 62 48 srl t2,0x10,t2
2048 fc: 23 76 60 48 zapnot t2,0x3,t2
2049 100: 21 96 3f 48 zapnot t0,0xfc,t0
2050 104: 01 04 23 44 or t0,t2,t0
2051 */
2052 *a++ = 0x83; *a++ = 0x16; *a++ = 0x62; *a++ = 0x48;
2053 *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
2054 *a++ = 0x21; *a++ = 0x96; *a++ = 0x3f; *a++ = 0x48;
2055 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2056
2057 ok_unaligned_load1 = a;
2058 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2059
2060
2061
2062
2063
2064 /*
2065 0:
2066 108: 83 16 63 48 srl t2,0x18,t2
2067 10c: 23 36 60 48 zapnot t2,0x1,t2
2068 110: 21 d6 3f 48 zapnot t0,0xfe,t0
2069 114: 01 04 23 44 or t0,t2,t0
2070 */
2071 *a++ = 0x83; *a++ = 0x16; *a++ = 0x63; *a++ = 0x48;
2072 *a++ = 0x23; *a++ = 0x36; *a++ = 0x60; *a++ = 0x48;
2073 *a++ = 0x21; *a++ = 0xd6; *a++ = 0x3f; *a++ = 0x48;
2074 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2075
2076
2077 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2078 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2079 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2080
2081 /* sdl t0,0(t3) */
2082 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2083 break;
2084
2085 case HI6_SWR:
2086 /* a1 = 0..3 (or 0..7 for 64-bit stores): */
2087 alpha_rs = map_MIPS_to_Alpha[rs];
2088 if (alpha_rs < 0) {
2089 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
2090 alpha_rs = ALPHA_T0;
2091 }
2092 *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
2093 /* 02 30 20 46 and a1,alignment,t1 */
2094 *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
2095
2096 /* ldl t0,0(t3) */
2097 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
2098
2099 if (bigendian) {
2100 /* TODO */
2101 bintrans_write_chunkreturn_fail(&a);
2102 }
2103
2104 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
2105
2106 /*
2107 * swr: memory = 0x12 0x34 0x56 0x78
2108 * register = 0x89abcdef
2109 * offset (a1): memory becomes:
2110 * 0 0xef 0xcd 0xab 0x89
2111 * 1 0x.. 0xef 0xcd 0xab
2112 * 2 0x.. 0x.. 0xef 0xcd
2113 * 3 0x.. 0x.. 0x.. 0xef
2114 */
2115
2116
2117 /*
2118 a5 75 40 40 cmpeq t1,0x03,t4
2119 01 00 a0 e4 beq t4,20 <skip>
2120 */
2121 *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2122 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2123
2124 /*
2125 118: 23 17 63 48 sll t2,0x18,t2
2126 11c: 21 f6 20 48 zapnot t0,0x7,t0
2127 120: 01 04 23 44 or t0,t2,t0
2128 */
2129 *a++ = 0x23; *a++ = 0x17; *a++ = 0x63; *a++ = 0x48;
2130 *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
2131 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2132
2133 ok_unaligned_load3 = a;
2134 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2135
2136
2137
2138
2139
2140 *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2141 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2142 /*
2143 2:
2144 124: 23 17 62 48 sll t2,0x10,t2
2145 128: 21 76 20 48 zapnot t0,0x3,t0
2146 12c: 01 04 23 44 or t0,t2,t0
2147 */
2148 *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48;
2149 *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
2150 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2151
2152 ok_unaligned_load2 = a;
2153 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2154
2155
2156
2157 *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2158 *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2159 /*
2160 1:
2161 130: 23 17 61 48 sll t2,0x8,t2
2162 134: 21 36 20 48 zapnot t0,0x1,t0
2163 138: 01 04 23 44 or t0,t2,t0
2164 */
2165 *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48;
2166 *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
2167 *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2168
2169 ok_unaligned_load1 = a;
2170 *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2171
2172
2173
2174 /*
2175 0:
2176 13c: 01 10 60 40 addl t2,0,t0
2177 */
2178 *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2179
2180
2181 *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2182 *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2183 *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2184
2185 /* sdl t0,0(t3) */
2186 *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2187 break;
2188
2189 default:
2190 ;
2191 }
2192
2193 *addrp = a;
2194 bintrans_write_pc_inc(addrp);
2195 return 1;
2196 }
2197
2198
2199 /*
2200 * bintrans_write_instruction__lui():
2201 */
2202 static int bintrans_write_instruction__lui(unsigned char **addrp,
2203 int rt, int imm)
2204 {
2205 uint32_t *a;
2206
2207 /*
2208 * dc fe 3f 24 ldah t0,-292
2209 * 1f 04 ff 5f fnop
2210 * 88 08 30 b4 stq t0,2184(a0)
2211 */
2212 if (rt != 0) {
2213 int alpha_rt = map_MIPS_to_Alpha[rt];
2214 if (alpha_rt < 0)
2215 alpha_rt = ALPHA_T0;
2216
2217 a = (uint32_t *) *addrp;
2218 *a++ = 0x241f0000 | (alpha_rt << 21) | ((uint32_t)imm & 0xffff);
2219 *addrp = (unsigned char *) a;
2220
2221 if (alpha_rt == ALPHA_T0) {
2222 *a++ = 0x5fff041f; /* fnop */
2223 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
2224 }
2225 }
2226
2227 bintrans_write_pc_inc(addrp);
2228
2229 return 1;
2230 }
2231
2232
2233 /*
2234 * bintrans_write_instruction__mfmthilo():
2235 */
2236 static int bintrans_write_instruction__mfmthilo(unsigned char **addrp,
2237 int rd, int from_flag, int hi_flag)
2238 {
2239 unsigned char *a;
2240 int ofs;
2241
2242 a = *addrp;
2243
2244 /*
2245 * 18 09 30 a4 ldq t0,hi(a0) (or lo)
2246 * 18 09 30 b4 stq t0,rd(a0)
2247 *
2248 * (or if from_flag is cleared then move the other way, it's
2249 * actually not rd then, but rs...)
2250 */
2251
2252 if (from_flag) {
2253 if (rd != 0) {
2254 /* mfhi or mflo */
2255 if (hi_flag)
2256 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2257 else
2258 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2259 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xa4;
2260
2261 bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
2262 }
2263 } else {
2264 /* mthi or mtlo */
2265 bintrans_move_MIPS_reg_into_Alpha_reg(&a, rd, ALPHA_T0);
2266
2267 if (hi_flag)
2268 ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2269 else
2270 ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2271 *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
2272 }
2273
2274 *addrp = a;
2275 bintrans_write_pc_inc(addrp);
2276 return 1;
2277 }
2278
2279
2280 /*
2281 * bintrans_write_instruction__mfc_mtc():
2282 */
2283 static int bintrans_write_instruction__mfc_mtc(struct memory *mem,
2284 unsigned char **addrp, int coproc_nr, int flag64bit, int rt,
2285 int rd, int mtcflag)
2286 {
2287 uint32_t *a, *jump;
2288 int ofs;
2289
2290 /*
2291 * NOTE: Only a few registers are readable without side effects.
2292 */
2293 if (rt == 0 && !mtcflag)
2294 return 0;
2295
2296 if (coproc_nr >= 1)
2297 return 0;
2298
2299 if (rd == COP0_RANDOM || rd == COP0_COUNT)
2300 return 0;
2301
2302
2303 /*************************************************************
2304 *
2305 * TODO: Check for kernel mode, or Coproc X usability bit!
2306 *
2307 *************************************************************/
2308
2309 a = (uint32_t *) *addrp;
2310
2311 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2312 *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2313
2314 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2315 *a++ = 0xa4410000 | (ofs & 0xffff); /* ldq t1,reg_rd(t0) */
2316
2317 if (mtcflag) {
2318 /* mtc: */
2319 *addrp = (unsigned char *) a;
2320 bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
2321 a = (uint32_t *) *addrp;
2322
2323 if (!flag64bit) {
2324 *a++ = 0x40201001; /* addl t0,0,t0 */
2325 *a++ = 0x40401002; /* addl t1,0,t1 */
2326 }
2327
2328 /*
2329 * In the general case: Only allow mtc if it does NOT
2330 * change the register!!
2331 */
2332
2333 switch (rd) {
2334 case COP0_INDEX:
2335 break;
2336
2337 case COP0_EPC:
2338 break;
2339
2340 /* TODO: Some bits are not writable */
2341 case COP0_ENTRYLO0:
2342 case COP0_ENTRYLO1:
2343 break;
2344
2345 case COP0_ENTRYHI:
2346 /*
2347 * Entryhi is ok to write to, as long as the
2348 * ASID isn't changed. (That would require
2349 * cache invalidations etc. Instead of checking
2350 * for MMU3K vs others, we just assume that all the
2351 * lowest 12 bits must be the same.
2352 */
2353 /* ff 0f bf 20 lda t4,0x0fff */
2354 /* 03 00 25 44 and t0,t4,t2 */
2355 /* 04 00 45 44 and t1,t4,t3 */
2356 /* a3 05 64 40 cmpeq t2,t3,t2 */
2357 /* 01 00 60 f4 bne t2,<ok> */
2358 *a++ = 0x20bf0fff;
2359 *a++ = 0x44250003;
2360 *a++ = 0x44450004;
2361 *a++ = 0x406405a3;
2362 jump = a;
2363 *a++ = 0; /* later */
2364 *addrp = (unsigned char *) a;
2365 bintrans_write_chunkreturn_fail(addrp);
2366 a = (uint32_t *) *addrp;
2367 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2368 break;
2369
2370 case COP0_STATUS:
2371 /* Only allow updates to the status register if
2372 the interrupt enable bits were changed, but no
2373 other bits! */
2374 if (mem->bintrans_32bit_only) {
2375 /* R3000 etc. */
2376 /* t4 = 0x0fe70000; */
2377 *a++ = 0x20bf0000;
2378 *a++ = 0x24a50fe7;
2379 } else {
2380 /* fe 00 bf 20 lda t4,0x00fe */
2381 /* ff ff a5 24 ldah t4,-1(t4) */
2382 *a++ = 0x20bf0000;
2383 *a++ = 0x24a5ffff;
2384 }
2385
2386 /* 03 00 25 44 and t0,t4,t2 */
2387 /* 04 00 45 44 and t1,t4,t3 */
2388 /* a3 05 64 40 cmpeq t2,t3,t2 */
2389 /* 01 00 60 f4 bne t2,<ok> */
2390 *a++ = 0x44250003;
2391 *a++ = 0x44450004;
2392 *a++ = 0x406405a3;
2393 jump = a;
2394 *a++ = 0; /* later */
2395 *addrp = (unsigned char *) a;
2396 bintrans_write_chunkreturn_fail(addrp);
2397 a = (uint32_t *) *addrp;
2398 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2399
2400 /* If enabling interrupt bits would cause an
2401 exception, then don't do it: */
2402 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2403 *a++ = 0xa4900000 | (ofs & 0xffff); /* ldq t3,coproc[0](a0) */
2404 ofs = ((size_t)&dummy_coproc.reg[COP0_CAUSE]) - (size_t)&dummy_coproc;
2405 *a++ = 0xa4a40000 | (ofs & 0xffff); /* ldq t4,reg_rd(t3) */
2406
2407 /* 02 00 a1 44 and t4,t0,t1 */
2408 /* 83 16 41 48 srl t1,0x8,t2 */
2409 /* 04 f0 7f 44 and t2,0xff,t3 */
2410 *a++ = 0x44a10002;
2411 *a++ = 0x48411683;
2412 *a++ = 0x447ff004;
2413 /* 01 00 80 e4 beq t3,<ok> */
2414 jump = a;
2415 *a++ = 0; /* later */
2416 *addrp = (unsigned char *) a;
2417 bintrans_write_chunkreturn_fail(addrp);
2418 a = (uint32_t *) *addrp;
2419 *jump = 0xe4800000 | (((size_t)a - (size_t)jump - 4) / 4);
2420 break;
2421
2422 default:
2423 /* a3 05 22 40 cmpeq t0,t1,t2 */
2424 /* 01 00 60 f4 bne t2,<ok> */
2425 *a++ = 0x402205a3;
2426 jump = a;
2427 *a++ = 0; /* later */
2428 *addrp = (unsigned char *) a;
2429 bintrans_write_chunkreturn_fail(addrp);
2430 a = (uint32_t *) *addrp;
2431 *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2432 }
2433
2434 *a++ = 0x40201402; /* addq t0,0,t1 */
2435
2436 ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2437 *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2438 ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2439 *a++ = 0xb4410000 | (ofs & 0xffff); /* stq t1,reg_rd(t0) */
2440 } else {
2441 /* mfc: */
2442 if (!flag64bit) {
2443 *a++ = 0x40401002; /* addl t1,0,t1 */
2444 }
2445
2446 *addrp = (unsigned char *) a;
2447 bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T1, rt);
2448 a = (uint32_t *) *addrp;
2449 }
2450
2451 *addrp = (unsigned char *) a;
2452
2453 bintrans_write_pc_inc(addrp);
2454 return 1;
2455 }
2456
2457
2458 /*
2459 * bintrans_write_instruction__tlb_rfe_etc():
2460 */
2461 static int bintrans_write_instruction__tlb_rfe_etc(unsigned char **addrp,
2462 int itype)
2463 {
2464 uint32_t *a;
2465 int ofs = 0;
2466
2467 switch (itype) {
2468 case CALL_TLBWI:
2469 case CALL_TLBWR:
2470 case CALL_TLBP:
2471 case CALL_TLBR:
2472 case CALL_RFE:
2473 case CALL_ERET:
2474 case CALL_BREAK:
2475 case CALL_SYSCALL:
2476 break;
2477 default:
2478 return 0;
2479 }
2480
2481 a = (uint32_t *) *addrp;
2482
2483 /* a0 = pointer to the cpu struct */
2484
2485 switch (itype) {
2486 case CALL_TLBWI:
2487 case CALL_TLBWR:
2488 /* a1 = 0 for indexed, 1 for random */
2489 *a++ = 0x223f0000 | (itype == CALL_TLBWR);
2490 break;
2491 case CALL_TLBP:
2492 case CALL_TLBR:
2493 /* a1 = 0 for probe, 1 for read */
2494 *a++ = 0x223f0000 | (itype == CALL_TLBR);
2495 break;
2496 case CALL_BREAK:
2497 case CALL_SYSCALL:
2498 *a++ = 0x223f0000 | (itype == CALL_BREAK? EXCEPTION_BP : EXCEPTION_SYS);
2499 break;
2500 }
2501
2502 /* Put PC into the cpu struct (both pc and pc_last). */
2503 *a++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2504 *a++ = 0xb4d00000 | ofs_pc_last;/* stq t5,"pc_last"(a0) */
2505
2506 /* Save a0 and the old return address on the stack: */
2507 *a++ = 0x23deff80; /* lda sp,-128(sp) */
2508
2509 *a++ = 0xb75e0000; /* stq ra,0(sp) */
2510 *a++ = 0xb61e0008; /* stq a0,8(sp) */
2511 *a++ = 0xb0fe0018; /* stl t6,24(sp) */
2512 *a++ = 0xb71e0020; /* stq t10,32(sp) */
2513 *a++ = 0xb73e0028; /* stq t11,40(sp) */
2514 *a++ = 0xb51e0030; /* stq t7,48(sp) */
2515 *a++ = 0xb6de0038; /* stq t8,56(sp) */
2516 *a++ = 0xb6fe0040; /* stq t9,64(sp) */
2517
2518 switch (itype) {
2519 case CALL_TLBP:
2520 case CALL_TLBR:
2521 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbpr) - (size_t)&dummy_cpu;
2522 break;
2523 case CALL_TLBWR:
2524 case CALL_TLBWI:
2525 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbwri) - (size_t)&dummy_cpu;
2526 break;
2527 case CALL_RFE:
2528 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_rfe) - (size_t)&dummy_cpu;
2529 break;
2530 case CALL_ERET:
2531 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_eret) - (size_t)&dummy_cpu;
2532 break;
2533 case CALL_BREAK:
2534 case CALL_SYSCALL:
2535 ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_simple_exception) - (size_t)&dummy_cpu;
2536 break;
2537 }
2538
2539 *a++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
2540
2541 /* Call bintrans_fast_tlbwr: */
2542 *a++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
2543
2544 /* Restore the old return address and a0 from the stack: */
2545 *a++ = 0xa75e0000; /* ldq ra,0(sp) */
2546 *a++ = 0xa61e0008; /* ldq a0,8(sp) */
2547 *a++ = 0xa0fe0018; /* ldl t6,24(sp) */
2548 *a++ = 0xa71e0020; /* ldq t10,32(sp) */
2549 *a++ = 0xa73e0028; /* ldq t11,40(sp) */
2550 *a++ = 0xa51e0030; /* ldq t7,48(sp) */
2551 *a++ = 0xa6de0038; /* ldq t8,56(sp) */
2552 *a++ = 0xa6fe0040; /* ldq t9,64(sp) */
2553
2554 *a++ = 0x23de0080; /* lda sp,128(sp) */
2555
2556 /* Load PC from the cpu struct. */
2557 *a++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2558
2559 *addrp = (unsigned char *) a;
2560
2561 switch (itype) {
2562 case CALL_ERET:
2563 case CALL_BREAK:
2564 case CALL_SYSCALL:
2565 break;
2566 default:
2567 bintrans_write_pc_inc(addrp);
2568 }
2569
2570 return 1;
2571 }
2572
2573
2574 /*
2575 * bintrans_backend_init():
2576 *
2577 * This is neccessary for broken 2.95.4 compilers on FreeBSD/Alpha 4.9,
2578 * and probably a few others. (For Compaq's CC, and for gcc 3.x, this
2579 * wouldn't be neccessary, and the old code would have worked.)
2580 */
2581 static void bintrans_backend_init(void)
2582 {
2583 int size;
2584 uint32_t *p;
2585
2586
2587 /* "runchunk": */
2588 size = 256; /* NOTE: This MUST be enough, or we fail */
2589 p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2590 MAP_ANON | MAP_PRIVATE, -1, 0);
2591
2592 /* If mmap() failed, try malloc(): */
2593 if (p == NULL) {
2594 p = malloc(size);
2595 if (p == NULL) {
2596 fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2597 exit(1);
2598 }
2599 }
2600
2601 bintrans_runchunk = (void *)p;
2602
2603 *p++ = 0x23deffa0; /* lda sp,-0x60(sp) */
2604 *p++ = 0xb75e0000; /* stq ra,0(sp) */
2605 *p++ = 0xb53e0008; /* stq s0,8(sp) */
2606 *p++ = 0xb55e0010; /* stq s1,16(sp) */
2607 *p++ = 0xb57e0018; /* stq s2,24(sp) */
2608 *p++ = 0xb59e0020; /* stq s3,32(sp) */
2609 *p++ = 0xb5be0028; /* stq s4,40(sp) */
2610 *p++ = 0xb5de0030; /* stq s5,48(sp) */
2611 *p++ = 0xb5fe0038; /* stq s6,56(sp) */
2612 *p++ = 0xb7be0058; /* stq gp,0x58(sp) */
2613
2614 *p++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2615 *p++ = 0xa0f00000 | ofs_n; /* ldl t6,"bintrans_instructions_executed"(a0) */
2616 *p++ = 0xa5100000 | ofs_a0; /* ldq t7,"a0"(a0) */
2617 *p++ = 0xa6d00000 | ofs_a1; /* ldq t8,"a1"(a0) */
2618 *p++ = 0xa6f00000 | ofs_s0; /* ldq t9,"s0"(a0) */
2619 *p++ = 0xa1300000 | ofs_ds; /* ldl s0,"delay_slot"(a0) */
2620 *p++ = 0xa5500000 | ofs_ja; /* ldq s1,"delay_jmpaddr"(a0) */
2621 *p++ = 0xa5700000 | ofs_sp; /* ldq s2,"gpr[sp]"(a0) */
2622 *p++ = 0xa5900000 | ofs_ra; /* ldq s3,"gpr[ra]"(a0) */
2623 *p++ = 0xa5b00000 | ofs_t0; /* ldq s4,"gpr[t0]"(a0) */
2624 *p++ = 0xa5d00000 | ofs_t1; /* ldq s5,"gpr[t1]"(a0) */
2625 *p++ = 0xa5f00000 | ofs_t2; /* ldq s6,"gpr[t2]"(a0) */
2626 *p++ = 0xa7100000 | ofs_tbl0; /* ldq t10,table0(a0) */
2627 *p++ = 0xa7300000 | ofs_v0; /* ldq t11,"gpr[v0]"(a0) */
2628
2629 *p++ = 0x6b514000; /* jsr ra,(a1),<back> */
2630
2631 *p++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2632 *p++ = 0xb0f00000 | ofs_n; /* stl t6,"bintrans_instructions_executed"(a0) */
2633 *p++ = 0xb5100000 | ofs_a0; /* stq t7,"a0"(a0) */
2634 *p++ = 0xb6d00000 | ofs_a1; /* stq t8,"a1"(a0) */
2635 *p++ = 0xb6f00000 | ofs_s0; /* stq t9,"s0"(a0) */
2636 *p++ = 0xb1300000 | ofs_ds; /* stl s0,"delay_slot"(a0) */
2637 *p++ = 0xb5500000 | ofs_ja; /* stq s1,"delay_jmpaddr"(a0) */
2638 *p++ = 0xb5700000 | ofs_sp; /* stq s2,"gpr[sp]"(a0) */
2639 *p++ = 0xb5900000 | ofs_ra; /* stq s3,"gpr[ra]"(a0) */
2640 *p++ = 0xb5b00000 | ofs_t0; /* stq s4,"gpr[t0]"(a0) */
2641 *p++ = 0xb5d00000 | ofs_t1; /* stq s5,"gpr[t1]"(a0) */
2642 *p++ = 0xb5f00000 | ofs_t2; /* stq s6,"gpr[t2]"(a0) */
2643 *p++ = 0xb7300000 | ofs_v0; /* stq t11,"gpr[v0]"(a0) */
2644
2645 *p++ = 0xa75e0000; /* ldq ra,0(sp) */
2646 *p++ = 0xa53e0008; /* ldq s0,8(sp) */
2647 *p++ = 0xa55e0010; /* ldq s1,16(sp) */
2648 *p++ = 0xa57e0018; /* ldq s2,24(sp) */
2649 *p++ = 0xa59e0020; /* ldq s3,32(sp) */
2650 *p++ = 0xa5be0028; /* ldq s4,40(sp) */
2651 *p++ = 0xa5de0030; /* ldq s5,48(sp) */
2652 *p++ = 0xa5fe0038; /* ldq s6,56(sp) */
2653 *p++ = 0xa7be0058; /* ldq gp,0x58(sp) */
2654 *p++ = 0x23de0060; /* lda sp,0x60(sp) */
2655 *p++ = 0x6bfa8001; /* ret */
2656
2657
2658 /* "jump to 32bit pc": */
2659 size = 128; /* WARNING! Don't make this too small. */
2660 p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2661 MAP_ANON | MAP_PRIVATE, -1, 0);
2662
2663 /* If mmap() failed, try malloc(): */
2664 if (p == NULL) {
2665 p = malloc(size);
2666 if (p == NULL) {
2667 fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2668 exit(1);
2669 }
2670 }
2671
2672 bintrans_jump_to_32bit_pc = (void *)p;
2673
2674 /* Don't execute too many instructions: */
2675 *p++ = 0x205f0000 | (N_SAFE_BINTRANS_LIMIT-1); /* lda t1,safe-1 */
2676
2677 *p++ = 0x40e20da1; /* cmple t6,t1,t0 */
2678 *p++ = 0xf4200001; /* bne */
2679 *p++ = 0x6bfa8001; /* ret */
2680
2681 *p++ = 0x40c01411; /* addq t5,0,a1 */
2682
2683 /*
2684 * Special case for 32-bit addressing:
2685 *
2686 * t1 = 1023;
2687 * t2 = ((a1 >> 22) & t1) * sizeof(void *);
2688 * t3 = ((a1 >> 12) & t1) * sizeof(void *);
2689 * t1 = a1 & 4095;
2690 */
2691 *p++ = 0x205f1ff8; /* lda t1,1023 * 8 */
2692 *p++ = 0x4a227683; /* srl a1,19,t2 */
2693 *p++ = 0x4a213684; /* srl a1, 9,t3 */
2694 *p++ = 0x44620003; /* and t2,t1,t2 */
2695
2696 /*
2697 * t10 is vaddr_to_hostaddr_table0
2698 *
2699 * a3 = tbl0[t2] (load entry from tbl0)
2700 */
2701 *p++ = 0x43030412; /* addq t10,t2,a2 */
2702 *p++ = 0x44820004; /* and t3,t1,t3 */
2703 *p++ = 0xa6720000; /* ldq a3,0(a2) */
2704 *p++ = 0x205f0ffc; /* lda t1,0xffc */
2705
2706 /*
2707 * a3 = tbl1[t3] (load entry from tbl1 (whic is a3))
2708 */
2709 *p++ = 0x42640413; /* addq a3,t3,a3 */
2710 *p++ = 0x46220002; /* and a1,t1,t1 */
2711
2712 *p++ = 0xa6730000 | ofs_c0; /* ldq a3,chunks[0](a3) */
2713
2714 /*
2715 * NULL? Then just return.
2716 */
2717 *p++ = 0xf6600001; /* bne a3,<ok> */
2718 *p++ = 0x6bfa8001; /* ret */
2719
2720 *p++ = 0x40530402; /* addq t1,a3,t1 */
2721 *p++ = 0xa0220000; /* ldl t0,0(t1) */
2722
2723 /* No translation? Then return. */
2724 *p++ = 0xe4200003; /* beq t0,<skip> */
2725
2726 *p++ = 0xa4700000 | ofs_cb; /* ldq t2,chunk_base_address(a0) */
2727
2728 *p++ = 0x40230401; /* addq t0,t2,t0 */
2729 *p++ = 0x6be10000; /* jmp (t0) */
2730
2731 /* Return to the main translation loop. */
2732 *p++ = 0x6bfa8001; /* ret */
2733 }
2734

  ViewVC Help
Powered by ViewVC 1.1.26