/[gxemul]/upstream/0.3.3.2/src/bintrans_alpha.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /upstream/0.3.3.2/src/bintrans_alpha.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9 - (hide annotations)
Mon Oct 8 16:18:22 2007 UTC (16 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 83821 byte(s)
0.3.3.2
1 dpavlin 2 /*
2     * Copyright (C) 2004-2005 Anders Gavare. All rights reserved.
3     *
4     * Redistribution and use in source and binary forms, with or without
5     * modification, are permitted provided that the following conditions are met:
6     *
7     * 1. Redistributions of source code must retain the above copyright
8     * notice, this list of conditions and the following disclaimer.
9     * 2. Redistributions in binary form must reproduce the above copyright
10     * notice, this list of conditions and the following disclaimer in the
11     * documentation and/or other materials provided with the distribution.
12     * 3. The name of the author may not be used to endorse or promote products
13     * derived from this software without specific prior written permission.
14     *
15     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18     * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25     * SUCH DAMAGE.
26     *
27     *
28 dpavlin 4 * $Id: bintrans_alpha.c,v 1.118 2005/04/18 22:30:31 debug Exp $
29 dpavlin 2 *
30     * Alpha specific code for dynamic binary translation.
31     *
32     * See bintrans.c for more information. Included from bintrans.c.
33     *
34     *
35     * Some Alpha registers that are reasonable to use:
36     *
37     * t5..t7 6..8 3
38     * s0..s6 9..15 7
39     * a1..a5 17..21 5
40     * t8..t11 22..25 4
41     *
42 dpavlin 4 * These can be "mapped" to MIPS registers in the translated code, except a0
43     * which points to the cpu struct, and t0..t4 (or so) which are used by the
44     * translated code as temporaries.
45 dpavlin 2 *
46 dpavlin 4 * 3 + 7 + 5 + 4 = 19 available registers. Of course, all (except s0..s6) must
47     * be saved when calling external functions, such as when calling tlbp and
48     * other external functions.
49 dpavlin 2 *
50 dpavlin 4 * Which are the 19 most commonly used MIPS registers? (This will include the
51     * pc, and the "current number of executed translated instructions.)
52 dpavlin 2 *
53     * The current allocation is as follows:
54     *
55     * Alpha: MIPS:
56     * ------ -----
57     *
58     * t5 pc (64-bit)
59     * t6 bintrans_instructions_executed (32-bit int)
60     * t7 a0 (mips register 4) (64-bit)
61     * t8 a1 (mips register 5) (64-bit)
62     * t9 s0 (mips register 16) (64-bit)
63     * t10 table0 cached (for load/store)
64     * t11 v0 (mips register 2) (64-bit)
65     * s0 delay_slot (32-bit int)
66     * s1 delay_jmpaddr (64-bit)
67     * s2 sp (mips register 29) (64-bit)
68     * s3 ra (mips register 31) (64-bit)
69     * s4 t0 (mips register 8) (64-bit)
70     * s5 t1 (mips register 9) (64-bit)
71     * s6 t2 (mips register 10) (64-bit)
72     */
73    
74     #define MIPSREG_PC -3
75     #define MIPSREG_DELAY_SLOT -2
76     #define MIPSREG_DELAY_JMPADDR -1
77    
78     #define ALPHA_T0 1
79     #define ALPHA_T1 2
80     #define ALPHA_T2 3
81     #define ALPHA_T3 4
82     #define ALPHA_T4 5
83     #define ALPHA_T5 6
84     #define ALPHA_T6 7
85     #define ALPHA_T7 8
86     #define ALPHA_S0 9
87     #define ALPHA_S1 10
88     #define ALPHA_S2 11
89     #define ALPHA_S3 12
90     #define ALPHA_S4 13
91     #define ALPHA_S5 14
92     #define ALPHA_S6 15
93     #define ALPHA_A0 16
94     #define ALPHA_A1 17
95     #define ALPHA_A2 18
96     #define ALPHA_A3 19
97     #define ALPHA_A4 20
98     #define ALPHA_A5 21
99     #define ALPHA_T8 22
100     #define ALPHA_T9 23
101     #define ALPHA_T10 24
102     #define ALPHA_T11 25
103     #define ALPHA_ZERO 31
104    
105     static int map_MIPS_to_Alpha[32] = {
106     ALPHA_ZERO, -1, ALPHA_T11, -1, /* 0 .. 3 */
107     ALPHA_T7, ALPHA_T8, -1, -1, /* 4 .. 7 */
108     ALPHA_S4, ALPHA_S5, ALPHA_S6, -1, /* 8 .. 11 */
109     -1, -1, -1, -1, /* 12 .. 15 */
110     ALPHA_T9, -1, -1, -1, /* 16 .. 19 */
111     -1, -1, -1, -1, /* 20 .. 23 */
112     -1, -1, -1, -1, /* 24 .. 27 */
113     -1, ALPHA_S2, -1, ALPHA_S3, /* 28 .. 31 */
114     };
115    
116    
117     struct cpu dummy_cpu;
118     struct mips_coproc dummy_coproc;
119     struct vth32_table dummy_vth32_table;
120    
121     unsigned char bintrans_alpha_imb[32] = {
122     0x86, 0x00, 0x00, 0x00, /* imb */
123     0x01, 0x80, 0xfa, 0x6b, /* ret */
124     0x1f, 0x04, 0xff, 0x47, /* nop */
125     0x00, 0x00, 0xfe, 0x2e, /* unop */
126     0x1f, 0x04, 0xff, 0x47, /* nop */
127     0x00, 0x00, 0xfe, 0x2e, /* unop */
128     0x1f, 0x04, 0xff, 0x47, /* nop */
129     0x00, 0x00, 0xfe, 0x2e /* unop */
130     };
131    
132    
133     /*
134     * bintrans_host_cacheinvalidate()
135     *
136     * Invalidate the host's instruction cache. On Alpha, we do this by
137     * executing an imb instruction.
138     *
139     * NOTE: A simple asm("imb"); would be enough here, but not all
140     * compilers have such simple constructs, so an entire function has to
141     * be written as bintrans_alpha_imb[] above.
142     */
143     static void bintrans_host_cacheinvalidate(unsigned char *p, size_t len)
144     {
145     /* Long form of ``asm("imb");'' */
146    
147     void (*f)(void);
148     f = (void *)&bintrans_alpha_imb[0];
149     f();
150     }
151    
152    
153     /*
154     * lda sp,-128(sp) some margin
155     * stq ra,0(sp)
156     * stq s0,8(sp)
157     * stq s1,16(sp)
158     * stq s2,24(sp)
159     * stq s3,32(sp)
160     * stq s4,40(sp)
161     * stq s5,48(sp)
162     * stq s6,56(sp)
163     *
164     * jsr ra,(a1),<back>
165     * back:
166     *
167     * ldq ra,0(sp)
168     * ldq s0,8(sp)
169     * ldq s1,16(sp)
170     * ldq s2,24(sp)
171     * ldq s3,32(sp)
172     * ldq s4,40(sp)
173     * ldq s5,48(sp)
174     * ldq s6,56(sp)
175     * lda sp,128(sp)
176     * ret
177     */
178     /* note: offsetof (in stdarg.h) could possibly be used, but I'm not sure
179     if it will take care of the compiler problems... */
180     #define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu))
181     #define ofs_pc_last (((size_t)&dummy_cpu.cd.mips.pc_last) - ((size_t)&dummy_cpu))
182     #define ofs_n (((size_t)&dummy_cpu.cd.mips.bintrans_instructions_executed) - ((size_t)&dummy_cpu))
183     #define ofs_ds (((size_t)&dummy_cpu.cd.mips.delay_slot) - ((size_t)&dummy_cpu))
184     #define ofs_ja (((size_t)&dummy_cpu.cd.mips.delay_jmpaddr) - ((size_t)&dummy_cpu))
185     #define ofs_sp (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_SP]) - ((size_t)&dummy_cpu))
186     #define ofs_ra (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_RA]) - ((size_t)&dummy_cpu))
187     #define ofs_a0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A0]) - ((size_t)&dummy_cpu))
188     #define ofs_a1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A1]) - ((size_t)&dummy_cpu))
189     #define ofs_t0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T0]) - ((size_t)&dummy_cpu))
190     #define ofs_t1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T1]) - ((size_t)&dummy_cpu))
191     #define ofs_t2 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T2]) - ((size_t)&dummy_cpu))
192     #define ofs_v0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_V0]) - ((size_t)&dummy_cpu))
193     #define ofs_s0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_S0]) - ((size_t)&dummy_cpu))
194     #define ofs_tbl0 (((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - ((size_t)&dummy_cpu))
195     #define ofs_c0 ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table)
196     #define ofs_cb (((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu)
197    
198    
199     static uint32_t bintrans_alpha_loadstore_32bit[19] = {
200     /*
201     * t1 = 1023;
202     * t2 = ((a1 >> 22) & t1) * sizeof(void *);
203     * t3 = ((a1 >> 12) & t1) * sizeof(void *);
204     * t1 = a1 & 4095;
205     *
206     * f8 1f 5f 20 lda t1,1023 * 8
207     * 83 76 22 4a srl a1,19,t2
208     * 84 36 21 4a srl a1, 9,t3
209     * 03 00 62 44 and t2,t1,t2
210     */
211     0x205f1ff8,
212     0x4a227683,
213     0x4a213684,
214     0x44620003,
215    
216     /*
217     * t10 is vaddr_to_hostaddr_table0
218     *
219     * a3 = tbl0[t2] (load entry from tbl0)
220     * 12 04 03 43 addq t10,t2,a2
221     */
222     0x43030412,
223    
224     /* 04 00 82 44 and t3,t1,t3 */
225     0x44820004,
226    
227     /* 00 00 72 a6 ldq a3,0(a2) */
228     0xa6720000,
229    
230     /* ff 0f 5f 20 lda t1,4095 */
231     0x205f0fff,
232    
233     /*
234     * a3 = tbl1[t3] (load entry from tbl1 (which is a3))
235     * 13 04 64 42 addq a3,t3,a3
236     */
237     0x42640413,
238    
239     /* 02 00 22 46 and a1,t1,t1 */
240     0x46220002,
241    
242     /* 00 00 73 a6 ldq a3,0(a3) */
243     0xa6730000,
244    
245     /* NULL? Then return failure at once. */
246 dpavlin 4 /* beq a3, return */
247     0xe6600004,
248 dpavlin 2
249     /* 01 30 60 46 and a3,0x1,t0 */
250     0x46603001,
251    
252     /* Get rid of the lowest bit: */
253     /* 33 05 61 42 subq a3,t0,a3 */
254     0x42610533,
255    
256     /* The rest of the load/store code was written with t3 as the address. */
257    
258     /* Add the offset within the page: */
259     /* 04 04 62 42 addq a3,t1,t3 */
260     0x42620404,
261    
262 dpavlin 4 0x6be50000, /* jmp (t4) */
263    
264     /* return: */
265     0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */
266     0x44270407, /* or t0,t6,t6 */
267     0x6bfa8001 /* ret */
268 dpavlin 2 };
269    
270     static void (*bintrans_runchunk)(struct cpu *, unsigned char *);
271    
272     static void (*bintrans_jump_to_32bit_pc)(struct cpu *);
273    
274     static void (*bintrans_loadstore_32bit)
275     (struct cpu *) = (void *)bintrans_alpha_loadstore_32bit;
276    
277    
278     /*
279     * bintrans_write_quickjump():
280     */
281     static void bintrans_write_quickjump(struct memory *mem,
282     unsigned char *quickjump_code, uint32_t chunkoffset)
283     {
284     int ofs;
285     uint64_t alpha_addr = chunkoffset +
286     (size_t)mem->translation_code_chunk_space;
287     uint32_t *a = (uint32_t *)quickjump_code;
288    
289     ofs = (alpha_addr - ((size_t)a+4)) / 4;
290    
291     /* printf("chunkoffset=%i, %016llx %016llx %i\n",
292     chunkoffset, (long long)alpha_addr, (long long)a, ofs); */
293    
294     if (ofs > -0xfffff && ofs < 0xfffff) {
295     *a++ = 0xc3e00000 | (ofs & 0x1fffff); /* br <chunk> */
296     }
297     }
298    
299    
300     /*
301     * bintrans_write_chunkreturn():
302     */
303     static void bintrans_write_chunkreturn(unsigned char **addrp)
304     {
305     uint32_t *a = (uint32_t *) *addrp;
306     *a++ = 0x6bfa8001; /* ret */
307     *addrp = (unsigned char *) a;
308     }
309    
310    
311     /*
312     * bintrans_write_chunkreturn_fail():
313     */
314     static void bintrans_write_chunkreturn_fail(unsigned char **addrp)
315     {
316     uint32_t *a = (uint32_t *) *addrp;
317     /* 00 01 3f 24 ldah t0,256 */
318     /* 07 04 27 44 or t0,t6,t6 */
319     *a++ = 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16);
320     *a++ = 0x44270407;
321     *a++ = 0x6bfa8001; /* ret */
322     *addrp = (unsigned char *) a;
323     }
324    
325    
326     /*
327     * bintrans_move_MIPS_reg_into_Alpha_reg():
328     */
329     static void bintrans_move_MIPS_reg_into_Alpha_reg(unsigned char **addrp, int mipsreg, int alphareg)
330     {
331     uint32_t *a = (uint32_t *) *addrp;
332     int ofs, alpha_mips_reg;
333    
334     switch (mipsreg) {
335     case MIPSREG_PC:
336     /* addq t5,0,alphareg */
337     *a++ = 0x40c01400 | alphareg;
338     break;
339     case MIPSREG_DELAY_SLOT:
340     /* addq s0,0,alphareg */
341     *a++ = 0x41201400 | alphareg;
342     break;
343     case MIPSREG_DELAY_JMPADDR:
344     /* addq s1,0,alphareg */
345     *a++ = 0x41401400 | alphareg;
346     break;
347     default:
348     alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
349     if (alpha_mips_reg < 0) {
350     ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
351     /* ldq alphareg,gpr[mipsreg](a0) */
352     *a++ = 0xa4100000 | (alphareg << 21) | ofs;
353     } else {
354     /* addq alpha_mips_reg,0,alphareg */
355     *a++ = 0x40001400 | (alpha_mips_reg << 21) | alphareg;
356     }
357     }
358     *addrp = (unsigned char *) a;
359     }
360    
361    
362     /*
363     * bintrans_move_Alpha_reg_into_MIPS_reg():
364     */
365     static void bintrans_move_Alpha_reg_into_MIPS_reg(unsigned char **addrp, int alphareg, int mipsreg)
366     {
367     uint32_t *a = (uint32_t *) *addrp;
368     int ofs, alpha_mips_reg;
369    
370     switch (mipsreg) {
371     case MIPSREG_PC:
372     /* addq alphareg,0,t5 */
373     *a++ = 0x40001406 | (alphareg << 21);
374     break;
375     case MIPSREG_DELAY_SLOT:
376     /* addq alphareg,0,s0 */
377     *a++ = 0x40001409 | (alphareg << 21);
378     break;
379     case MIPSREG_DELAY_JMPADDR:
380     /* addq alphareg,0,s1 */
381     *a++ = 0x4000140a | (alphareg << 21);
382     break;
383     case 0: /* the zero register */
384     break;
385     default:
386     alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
387     if (alpha_mips_reg < 0) {
388     /* stq alphareg,gpr[mipsreg](a0) */
389     ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
390     *a++ = 0xb4100000 | (alphareg << 21) | ofs;
391     } else {
392     /* addq alphareg,0,alpha_mips_reg */
393     *a++ = 0x40001400 | (alphareg << 21) | alpha_mips_reg;
394     }
395     }
396     *addrp = (unsigned char *) a;
397     }
398    
399    
400     /*
401     * bintrans_write_pc_inc():
402     */
403     static void bintrans_write_pc_inc(unsigned char **addrp)
404     {
405     uint32_t *a = (uint32_t *) *addrp;
406    
407     /* lda t6,1(t6) */
408     *a++ = 0x20e70001;
409    
410     /* lda t5,4(t5) */
411     *a++ = 0x20c60004;
412    
413     *addrp = (unsigned char *) a;
414     }
415    
416    
417     /*
418     * bintrans_write_instruction__addiu_etc():
419     */
420     static int bintrans_write_instruction__addiu_etc(unsigned char **addrp,
421     int rt, int rs, int imm, int instruction_type)
422     {
423     uint32_t *a;
424     unsigned int uimm;
425     int alpha_rs, alpha_rt;
426    
427     /* TODO: overflow detection for ADDI and DADDI */
428     switch (instruction_type) {
429     case HI6_ADDI:
430     case HI6_DADDI:
431     return 0;
432     }
433    
434     a = (uint32_t *) *addrp;
435    
436     if (rt == 0)
437     goto rt0;
438    
439     uimm = imm & 0xffff;
440    
441     alpha_rs = map_MIPS_to_Alpha[rs];
442     alpha_rt = map_MIPS_to_Alpha[rt];
443    
444     if (uimm == 0 && (instruction_type == HI6_ADDI ||
445     instruction_type == HI6_ADDIU || instruction_type == HI6_DADDI ||
446     instruction_type == HI6_DADDIU || instruction_type == HI6_ORI)) {
447     if (alpha_rs >= 0 && alpha_rt >= 0) {
448     /* addq rs,0,rt */
449     *a++ = 0x40001400 | (alpha_rs << 21) | alpha_rt;
450     } else {
451     *addrp = (unsigned char *) a;
452     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
453     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
454     a = (uint32_t *) *addrp;
455     }
456     goto rt0;
457     }
458    
459     if (alpha_rs < 0) {
460     /* ldq t0,"rs"(a0) */
461     *addrp = (unsigned char *) a;
462     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
463     a = (uint32_t *) *addrp;
464     alpha_rs = ALPHA_T0;
465     }
466    
467     if (alpha_rt < 0)
468     alpha_rt = ALPHA_T0;
469    
470     /* Place the result of the calculation in alpha_rt: */
471    
472     switch (instruction_type) {
473     case HI6_ADDIU:
474     case HI6_DADDIU:
475     case HI6_ADDI:
476     case HI6_DADDI:
477     if (uimm < 256) {
478     if (instruction_type == HI6_ADDI ||
479     instruction_type == HI6_ADDIU) {
480     /* addl rs,uimm,rt */
481     *a++ = 0x40001000 | (alpha_rs << 21)
482     | (uimm << 13) | alpha_rt;
483     } else {
484     /* addq rs,uimm,rt */
485     *a++ = 0x40001400 | (alpha_rs << 21)
486     | (uimm << 13) | alpha_rt;
487     }
488     } else {
489     /* lda rt,imm(rs) */
490     *a++ = 0x20000000 | (alpha_rt << 21) | (alpha_rs << 16) | uimm;
491     if (instruction_type == HI6_ADDI ||
492     instruction_type == HI6_ADDIU) {
493     /* sign extend, 32->64 bits: addl t0,zero,t0 */
494     *a++ = 0x40001000 | (alpha_rt << 21) | alpha_rt;
495     }
496     }
497     break;
498     case HI6_ANDI:
499     case HI6_ORI:
500     case HI6_XORI:
501     if (uimm >= 256) {
502     /* lda t1,4660 */
503     *a++ = 0x205f0000 | uimm;
504     if (uimm & 0x8000) {
505     /* 01 00 42 24 ldah t1,1(t1) <-- if negative only */
506     *a++ = 0x24420001;
507     }
508     }
509    
510     switch (instruction_type) {
511     case HI6_ANDI:
512     if (uimm < 256) {
513     /* and rs,uimm,rt */
514     *a++ = 0x44001000 | (alpha_rs << 21)
515     | (uimm << 13) | alpha_rt;
516     } else {
517     /* and rs,t1,rt */
518     *a++ = 0x44020000 | (alpha_rs << 21) | alpha_rt;
519     }
520     break;
521     case HI6_ORI:
522     if (uimm < 256) {
523     /* or rs,uimm,rt */
524     *a++ = 0x44001400 | (alpha_rs << 21)
525     | (uimm << 13) | alpha_rt;
526     } else {
527     /* or rs,t1,rt */
528     *a++ = 0x44020400 | (alpha_rs << 21) | alpha_rt;
529     }
530     break;
531     case HI6_XORI:
532     if (uimm < 256) {
533     /* xor rs,uimm,rt */
534     *a++ = 0x44001800 | (alpha_rs << 21)
535     | (uimm << 13) | alpha_rt;
536     } else {
537     /* xor rs,t1,rt */
538     *a++ = 0x44020800 | (alpha_rs << 21) | alpha_rt;
539     }
540     break;
541     }
542     break;
543     case HI6_SLTI:
544     case HI6_SLTIU:
545     /* lda t1,4660 */
546     *a++ = 0x205f0000 | uimm;
547    
548     switch (instruction_type) {
549     case HI6_SLTI:
550     /* cmplt rs,t1,rt */
551     *a++ = 0x400209a0 | (alpha_rs << 21) | alpha_rt;
552     break;
553     case HI6_SLTIU:
554     /* cmpult rs,t1,rt */
555     *a++ = 0x400203a0 | (alpha_rs << 21) | alpha_rt;
556     break;
557     }
558     break;
559     }
560    
561     if (alpha_rt == ALPHA_T0) {
562     *a++ = 0x5fff041f; /* fnop */
563     *addrp = (unsigned char *) a;
564     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
565     a = (uint32_t *) *addrp;
566     }
567    
568     rt0:
569     *addrp = (unsigned char *) a;
570     bintrans_write_pc_inc(addrp);
571     return 1;
572     }
573    
574    
575     /*
576     * bintrans_write_instruction__addu_etc():
577     */
578     static int bintrans_write_instruction__addu_etc(unsigned char **addrp,
579     int rd, int rs, int rt, int sa, int instruction_type)
580     {
581     unsigned char *a, *unmodified = NULL;
582     int load64 = 0, store = 1, ofs, alpha_rd = ALPHA_T0;
583    
584     alpha_rd = map_MIPS_to_Alpha[rd];
585     if (alpha_rd < 0)
586     alpha_rd = ALPHA_T0;
587    
588     switch (instruction_type) {
589     case SPECIAL_DIV:
590     case SPECIAL_DIVU:
591     return 0;
592     }
593    
594     switch (instruction_type) {
595     case SPECIAL_DADDU:
596     case SPECIAL_DSUBU:
597     case SPECIAL_OR:
598     case SPECIAL_AND:
599     case SPECIAL_NOR:
600     case SPECIAL_XOR:
601     case SPECIAL_DSLL:
602     case SPECIAL_DSRL:
603     case SPECIAL_DSRA:
604     case SPECIAL_DSLL32:
605     case SPECIAL_DSRL32:
606     case SPECIAL_DSRA32:
607     case SPECIAL_SLT:
608     case SPECIAL_SLTU:
609     case SPECIAL_MOVZ:
610     case SPECIAL_MOVN:
611     load64 = 1;
612     }
613    
614     switch (instruction_type) {
615     case SPECIAL_MULT:
616     case SPECIAL_MULTU:
617     if (rd != 0)
618     return 0;
619     store = 0;
620     break;
621     default:
622     if (rd == 0)
623     goto rd0;
624     }
625    
626     a = *addrp;
627    
628     if ((instruction_type == SPECIAL_ADDU || instruction_type == SPECIAL_DADDU
629     || instruction_type == SPECIAL_OR) && rt == 0) {
630     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
631     if (!load64) {
632     *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
633     }
634     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
635     *addrp = a;
636     goto rd0;
637     }
638    
639     /* t0 = rs, t1 = rt */
640     if (load64) {
641     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
642     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
643     } else {
644     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
645     *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
646     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
647     *a++ = 0x02; *a++ = 0x10; *a++ = 0x40; *a++ = 0x40; /* addl t1,0,t1 */
648     }
649    
650     switch (instruction_type) {
651     case SPECIAL_ADDU:
652     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x40; /* addl t0,t1,rd */
653     break;
654     case SPECIAL_DADDU:
655     *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq t0,t1,rd */
656     break;
657     case SPECIAL_SUBU:
658     *a++ = 0x20 + alpha_rd; *a++ = 0x01; *a++ = 0x22; *a++ = 0x40; /* subl t0,t1,t0 */
659     break;
660     case SPECIAL_DSUBU:
661     *a++ = 0x20 + alpha_rd; *a++ = 0x05; *a++ = 0x22; *a++ = 0x40; /* subq t0,t1,t0 */
662     break;
663     case SPECIAL_AND:
664     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and t0,t1,t0 */
665     break;
666     case SPECIAL_OR:
667     *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
668     break;
669     case SPECIAL_NOR:
670     *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
671     *a++ = alpha_rd; *a++ = 0x05; *a++ = 0xe1; *a++ = 0x47; /* not t0,t0 */
672     break;
673     case SPECIAL_XOR:
674     *a++ = alpha_rd; *a++ = 0x08; *a++ = 0x22; *a++ = 0x44; /* xor t0,t1,t0 */
675     break;
676     case SPECIAL_SLL:
677     *a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
678     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
679     break;
680     case SPECIAL_SLLV:
681     /* rd = rt << (rs&31) (logical) t0 = t1 << (t0&31) */
682     *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
683     *a++ = 0x21; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sll t1,t0,t0 */
684     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
685     break;
686     case SPECIAL_DSLL:
687     *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
688     break;
689     case SPECIAL_DSLL32:
690     sa += 32;
691     *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
692     break;
693     case SPECIAL_SRA:
694     *a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
695     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
696     break;
697     case SPECIAL_SRAV:
698     /* rd = rt >> (rs&31) (arithmetic) t0 = t1 >> (t0&31) */
699     *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
700     *a++ = 0x81; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sra t1,t0,t0 */
701     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
702     break;
703     case SPECIAL_DSRA:
704     *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
705     break;
706     case SPECIAL_DSRA32:
707     sa += 32;
708     *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
709     break;
710     case SPECIAL_SRL:
711     *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
712     /* Note: bits of sa are distributed among two different bytes. */
713     *a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
714     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl */
715     break;
716     case SPECIAL_SRLV:
717     /* rd = rt >> (rs&31) (logical) t0 = t1 >> (t0&31) */
718     *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
719     *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
720     *a++ = 0x81; *a++ = 0x06; *a++ = 0x41; *a++ = 0x48; /* srl t1,t0,t0 */
721     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
722     break;
723     case SPECIAL_DSRL:
724     /* Note: bits of sa are distributed among two different bytes. */
725     *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
726     break;
727     case SPECIAL_DSRL32:
728     /* Note: bits of sa are distributed among two different bytes. */
729     sa += 32;
730     *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
731     break;
732     case SPECIAL_SLT:
733     *a++ = 0xa0 + alpha_rd; *a++ = 0x09; *a++ = 0x22; *a++ = 0x40; /* cmplt t0,t1,t0 */
734     break;
735     case SPECIAL_SLTU:
736     *a++ = 0xa0 + alpha_rd; *a++ = 0x03; *a++ = 0x22; *a++ = 0x40; /* cmpult t0,t1,t0 */
737     break;
738     case SPECIAL_MULT:
739     case SPECIAL_MULTU:
740     if (instruction_type == SPECIAL_MULTU) {
741     /* 21 f6 21 48 zapnot t0,0xf,t0 */
742     /* 22 f6 41 48 zapnot t1,0xf,t1 */
743     *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48;
744     *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48;
745     }
746    
747     /* 03 04 22 4c mulq t0,t1,t2 */
748     *a++ = 0x03; *a++ = 0x04; *a++ = 0x22; *a++ = 0x4c;
749    
750     /* 01 10 60 40 addl t2,0,t0 */
751     *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
752    
753     ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
754     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
755    
756     /* 81 17 64 48 sra t2,0x20,t0 */
757     *a++ = 0x81; *a++ = 0x17; *a++ = 0x64; *a++ = 0x48;
758     *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
759     ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
760     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
761     break;
762     case SPECIAL_MOVZ:
763     /* if rt=0 then rd=rs ==> if t1!=0 then t0=unmodified else t0=rd */
764     /* 00 00 40 f4 bne t1,unmodified */
765     unmodified = a;
766     *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xf4;
767     alpha_rd = ALPHA_T0;
768     break;
769     case SPECIAL_MOVN:
770     /* if rt!=0 then rd=rs ==> if t1=0 then t0=unmodified else t0=rd */
771     /* 00 00 40 e4 beq t1,unmodified */
772     unmodified = a;
773     *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
774     alpha_rd = ALPHA_T0;
775     break;
776     }
777    
778     if (store && alpha_rd == ALPHA_T0) {
779     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
780     }
781    
782     if (unmodified != NULL)
783     *unmodified = ((size_t)a - (size_t)unmodified - 4) / 4;
784    
785     *addrp = a;
786     rd0:
787     bintrans_write_pc_inc(addrp);
788     return 1;
789     }
790    
791    
792     /*
793     * bintrans_write_instruction__branch():
794     */
795     static int bintrans_write_instruction__branch(unsigned char **addrp,
796     int instruction_type, int regimm_type, int rt, int rs, int imm)
797     {
798     uint32_t *a, *b, *c = NULL;
799     int alpha_rs, alpha_rt, likely = 0, ofs;
800    
801     alpha_rs = map_MIPS_to_Alpha[rs];
802     alpha_rt = map_MIPS_to_Alpha[rt];
803    
804     switch (instruction_type) {
805     case HI6_BEQL:
806     case HI6_BNEL:
807     case HI6_BLEZL:
808     case HI6_BGTZL:
809     likely = 1;
810     }
811    
812     /*
813     * t0 = gpr[rt]; t1 = gpr[rs];
814     *
815     * 50 00 30 a4 ldq t0,80(a0)
816     * 58 00 50 a4 ldq t1,88(a0)
817     */
818    
819     switch (instruction_type) {
820     case HI6_BEQ:
821     case HI6_BNE:
822     case HI6_BEQL:
823     case HI6_BNEL:
824     if (alpha_rt < 0) {
825     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
826     alpha_rt = ALPHA_T0;
827     }
828     }
829    
830     if (alpha_rs < 0) {
831     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T1);
832     alpha_rs = ALPHA_T1;
833     }
834    
835     a = (uint32_t *) *addrp;
836    
837     /*
838     * Compare alpha_rt (t0) and alpha_rs (t1) for equality (BEQ).
839     * If the result was false (equal to zero), then skip a lot
840     * of instructions:
841     *
842     * a1 05 22 40 cmpeq t0,t1,t0
843     * 01 00 20 e4 beq t0,14 <f+0x14>
844     */
845     b = NULL;
846     if ((instruction_type == HI6_BEQ ||
847     instruction_type == HI6_BEQL) && rt != rs) {
848     /* cmpeq rt,rs,t0 */
849     *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
850     b = a;
851     *a++ = 0xe4200001; /* beq */
852     }
853     if (instruction_type == HI6_BNE || instruction_type == HI6_BNEL) {
854     /* cmpeq rt,rs,t0 */
855     *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
856     b = a;
857     *a++ = 0xf4200001; /* bne */
858     }
859     if (instruction_type == HI6_BLEZ || instruction_type == HI6_BLEZL) {
860     /* cmple rs,0,t0 */
861     *a++ = 0x40001da1 | (alpha_rs << 21);
862     b = a;
863     *a++ = 0xe4200001; /* beq */
864     }
865     if (instruction_type == HI6_BGTZ || instruction_type == HI6_BGTZL) {
866     /* cmple rs,0,t0 */
867     *a++ = 0x40001da1 | (alpha_rs << 21);
868     b = a;
869     *a++ = 0xf4200001; /* bne */
870     }
871     if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BLTZ) {
872     /* cmplt rs,0,t0 */
873     *a++ = 0x400019a1 | (alpha_rs << 21);
874     b = a;
875     *a++ = 0xe4200001; /* beq */
876     }
877     if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BGEZ) {
878     *a++ = 0x207fffff; /* lda t2,-1 */
879     /* cmple rs,t2,t0 */
880     *a++ = 0x40030da1 | (alpha_rs << 21);
881     b = a;
882     *a++ = 0xf4200001; /* bne */
883     }
884    
885     /*
886     * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
887     * and cpu->delay_jmpaddr = pc + 4 + (imm << 2).
888     *
889     * 04 00 26 20 lda t0,4(t5) add 4
890     * c8 01 5f 20 lda t1,456
891     * 4a 04 41 40 s4addq t1,t0,s1 s1 = (t1<<2) + t0
892     */
893    
894     *a++ = 0x20260004; /* lda t0,4(t5) */
895     *a++ = 0x205f0000 | (imm & 0xffff); /* lda */
896     *a++ = 0x4041044a; /* s4addq */
897    
898     /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
899     *a++ = 0x213f0000 | TO_BE_DELAYED;
900    
901     /*
902     * Special case: "likely"-branches:
903     */
904     if (likely) {
905     c = a;
906     *a++ = 0xc3e00001; /* br delayed_ok */
907    
908     if (b != NULL)
909     *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
910    
911     /* cpu->cd.mips.nullify_next = 1; */
912     /* 01 00 3f 20 lda t0,1 */
913     *a++ = 0x203f0001;
914     ofs = (size_t)&dummy_cpu.cd.mips.nullify_next - (size_t)&dummy_cpu;
915     *a++ = 0xb0300000 | (ofs & 0xffff);
916    
917     /* fail, so that the next instruction is handled manually: */
918     *addrp = (unsigned char *) a;
919     bintrans_write_pc_inc(addrp);
920     bintrans_write_chunkreturn_fail(addrp);
921     a = (uint32_t *) *addrp;
922    
923     if (c != NULL)
924     *((unsigned char *)c) = ((size_t)a - (size_t)c - 4) / 4;
925     } else {
926     /* Normal (non-likely) exit: */
927     if (b != NULL)
928     *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
929     }
930    
931     *addrp = (unsigned char *) a;
932     bintrans_write_pc_inc(addrp);
933     return 1;
934     }
935    
936    
937     /*
938     * bintrans_write_instruction__jr():
939     */
940     static int bintrans_write_instruction__jr(unsigned char **addrp, int rs, int rd, int special)
941     {
942     uint32_t *a;
943     int alpha_rd;
944    
945     alpha_rd = map_MIPS_to_Alpha[rd];
946     if (alpha_rd < 0)
947     alpha_rd = ALPHA_T0;
948    
949     /*
950     * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
951     * and cpu->delay_jmpaddr = gpr[rs].
952     */
953    
954     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_S1);
955    
956     a = (uint32_t *) *addrp;
957     /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
958     *a++ = 0x213f0000 | TO_BE_DELAYED;
959     *addrp = (unsigned char *) a;
960    
961     if (special == SPECIAL_JALR && rd != 0) {
962     /* gpr[rd] = retaddr (pc + 8) */
963     a = (uint32_t *) *addrp;
964     /* lda alpha_rd,8(t5) */
965     *a++ = 0x20060008 | (alpha_rd << 21);
966     *addrp = (unsigned char *) a;
967     if (alpha_rd == ALPHA_T0)
968     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rd);
969     }
970    
971     bintrans_write_pc_inc(addrp);
972     return 1;
973     }
974    
975    
976     /*
977     * bintrans_write_instruction__jal():
978     */
979     static int bintrans_write_instruction__jal(unsigned char **addrp,
980     int imm, int link)
981     {
982     uint32_t *a;
983    
984     a = (uint32_t *) *addrp;
985    
986     /* gpr[31] = retaddr (NOTE: mips register 31 is in alpha reg s3) */
987     if (link) {
988     *a++ = 0x21860008; /* lda s3,8(t5) */
989     }
990    
991     /* Set the jmpaddr to top 4 bits of pc + lowest 28 bits of imm*4: */
992    
993     /*
994     * imm = 4*imm;
995     * t0 = ((pc + 4) & ~0x0fffffff) | imm;
996     *
997     * 04 00 26 20 lda t0,4(t5) <-- because the jump is from the delay slot
998     * 23 01 5f 24 ldah t1,291
999     * 67 45 42 20 lda t1,17767(t1)
1000     * 00 f0 7f 24 ldah t2,-4096
1001     * 04 00 23 44 and t0,t2,t3
1002     * 0a 04 44 44 or t1,t3,s1
1003     */
1004     imm *= 4;
1005     *a++ = 0x20260004;
1006     *a++ = 0x245f0000 | ((imm >> 16) + (imm & 0x8000? 1 : 0));
1007     *a++ = 0x20420000 | (imm & 0xffff);
1008     *a++ = 0x247ff000;
1009     *a++ = 0x44230004;
1010     *a++ = 0x4444040a;
1011    
1012     /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
1013     *a++ = 0x213f0000 | TO_BE_DELAYED;
1014    
1015     /* If the machine continues executing here, it will return
1016     to the main loop, which is fine. */
1017    
1018     *addrp = (unsigned char *) a;
1019     bintrans_write_pc_inc(addrp);
1020     return 1;
1021     }
1022    
1023    
1024     /*
1025     * bintrans_write_instruction__delayedbranch():
1026     */
1027     static int bintrans_write_instruction__delayedbranch(
1028     struct memory *mem, unsigned char **addrp,
1029     uint32_t *potential_chunk_p, uint32_t *chunks,
1030     int only_care_about_chunk_p, int p, int forward)
1031     {
1032     unsigned char *a, *skip=NULL, *generic64bit;
1033     int ofs;
1034     uint64_t alpha_addr, subaddr;
1035    
1036     a = *addrp;
1037    
1038     if (!only_care_about_chunk_p) {
1039     /* Skip all of this if there is no branch: */
1040     skip = a;
1041     *a++ = 0; *a++ = 0; *a++ = 0x20; *a++ = 0xe5; /* beq s0,skip */
1042    
1043     /*
1044     * Perform the jump by setting cpu->delay_slot = 0
1045     * and pc = cpu->delay_jmpaddr.
1046     */
1047     /* 00 00 3f 21 lda s0,0 */
1048     *a++ = 0; *a++ = 0; *a++ = 0x3f; *a++ = 0x21;
1049    
1050     bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T0);
1051     bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3);
1052     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, MIPSREG_PC);
1053     }
1054    
1055     if (potential_chunk_p == NULL) {
1056     if (mem->bintrans_32bit_only) {
1057     /* 34 12 70 a7 ldq t12,4660(a0) */
1058     ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1059     *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1060    
1061     /* 00 00 fb 6b jmp (t12) */
1062     *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1063     } else {
1064     /*
1065     * If the highest 32 bits of the address are either
1066     * 0x00000000 or 0xffffffff, then the tables used for
1067     * 32-bit load/stores can be used.
1068     *
1069     * 81 16 24 4a srl a1,0x20,t0
1070     * 03 00 20 e4 beq t0,14 <ok1>
1071     * 01 30 20 40 addl t0,0x1,t0
1072     * 01 00 20 e4 beq t0,14 <ok1>
1073     * 01 00 e0 c3 br 18 <nook>
1074     */
1075     *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a;
1076     *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1077     *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40;
1078     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1079     generic64bit = a;
1080     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1081    
1082     /* 34 12 70 a7 ldq t12,4660(a0) */
1083     ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1084     *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1085    
1086     /* 00 00 fb 6b jmp (t12) */
1087     *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1088    
1089    
1090     if (generic64bit != NULL)
1091     *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1092    
1093     /* Not much we can do here if this wasn't to the same
1094     physical page... */
1095    
1096     *a++ = 0xfc; *a++ = 0xff; *a++ = 0x84; *a++ = 0x20; /* lda t3,-4(t3) */
1097    
1098     /*
1099     * Compare the old pc (t3) and the new pc (t0). If they are on the
1100     * same virtual page (which means that they are on the same physical
1101     * page), then we can check the right chunk pointer, and if it
1102     * is non-NULL, then we can jump there. Otherwise just return.
1103     *
1104     * 00 f0 5f 20 lda t1,-4096
1105     * 01 00 22 44 and t0,t1,t0
1106     * 04 00 82 44 and t3,t1,t3
1107     * a3 05 24 40 cmpeq t0,t3,t2
1108     * 01 00 60 f4 bne t2,7c <ok2>
1109     * 01 80 fa 6b ret
1110     */
1111     *a++ = 0x00; *a++ = 0xf0; *a++ = 0x5f; *a++ = 0x20; /* lda */
1112     *a++ = 0x01; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and */
1113     *a++ = 0x04; *a++ = 0x00; *a++ = 0x82; *a++ = 0x44; /* and */
1114     *a++ = 0xa3; *a++ = 0x05; *a++ = 0x24; *a++ = 0x40; /* cmpeq */
1115     *a++ = 0x01; *a++ = 0x00; *a++ = 0x60; *a++ = 0xf4; /* bne */
1116     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1117    
1118     /* Don't execute too many instructions. (see comment below) */
1119     *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1120     *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1121     *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1122     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1123     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1124    
1125     /* 15 bits at a time, which means max 60 bits, but
1126     that should be enough. the top 4 bits are probably
1127     not used by userland alpha code. (TODO: verify this) */
1128     alpha_addr = (size_t)chunks;
1129     subaddr = (alpha_addr >> 45) & 0x7fff;
1130    
1131     /*
1132     * 00 00 3f 20 lda t0,0
1133     * 21 f7 21 48 sll t0,0xf,t0
1134     * 34 12 21 20 lda t0,4660(t0)
1135     * 21 f7 21 48 sll t0,0xf,t0
1136     * 34 12 21 20 lda t0,4660(t0)
1137     * 21 f7 21 48 sll t0,0xf,t0
1138     * 34 12 21 20 lda t0,4660(t0)
1139     */
1140    
1141     /* Start with the topmost 15 bits: */
1142     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1143     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1144    
1145     subaddr = (alpha_addr >> 30) & 0x7fff;
1146     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1147     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1148    
1149     subaddr = (alpha_addr >> 15) & 0x7fff;
1150     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1151     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1152    
1153     subaddr = alpha_addr & 0x7fff;
1154     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1155    
1156     /*
1157     * t2 = pc
1158     * t1 = t2 & 0xfff
1159     * t0 += t1
1160     *
1161     * ff 0f 5f 20 lda t1,4095
1162     * 02 00 62 44 and t2,t1,t1
1163     * 01 04 22 40 addq t0,t1,t0
1164     */
1165     bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T2);
1166     *a++ = 0xff; *a++ = 0x0f; *a++ = 0x5f; *a++ = 0x20; /* lda */
1167     *a++ = 0x02; *a++ = 0x00; *a++ = 0x62; *a++ = 0x44; /* and */
1168     *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq */
1169    
1170     /*
1171     * Load the chunk pointer (actually, a 32-bit offset) into t0.
1172     * If it is zero, then skip the following.
1173     * Add cpu->chunk_base_address to t0.
1174     * Jump to t0.
1175     */
1176    
1177     *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1178     *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1179    
1180     /* ldq t2,chunk_base_address(a0) */
1181     ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1182     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1183     /* addq t0,t2,t0 */
1184     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1185    
1186     /* 00 00 e1 6b jmp (t0) */
1187     *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1188    
1189     /* Failure, then return to the main loop. */
1190     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1191     }
1192     } else {
1193     /*
1194     * Just to make sure that we don't become too unreliant
1195     * on the main program loop, we need to return every once
1196     * in a while (interrupts etc).
1197     *
1198     * Load the "nr of instructions executed" (which is an int)
1199     * and see if it is below a certain threshold. If so, then
1200     * we go on with the fast path (bintrans), otherwise we
1201     * abort by returning.
1202     *
1203     * f4 01 5f 20 lda t1,500 (some low number...)
1204     * a1 0d c2 40 cmple t6,t1,t0
1205     * 01 00 20 f4 bne t0,14 <f+0x14>
1206     */
1207     if (!only_care_about_chunk_p && !forward) {
1208     *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1209     *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1210     *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1211     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1212     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1213     }
1214    
1215     /*
1216     * potential_chunk_p points to an "uint32_t".
1217     * If this value is non-NULL, then it is a piece of Alpha
1218     * machine language code corresponding to the address
1219     * we're jumping to. Otherwise, those instructions haven't
1220     * been translated yet, so we have to return to the main
1221     * loop. (Actually, we have to add cpu->chunk_base_address,
1222     * because the uint32_t is limited to 32-bit offsets.)
1223     *
1224     * Case 1: The value is non-NULL already at translation
1225     * time. Then we can make a direct (fast) native
1226     * Alpha jump to the code chunk.
1227     *
1228     * Case 2: The value was NULL at translation time, then we
1229     * have to check during runtime.
1230     */
1231    
1232     /* Case 1: */
1233     /* printf("%08x ", *potential_chunk_p); */
1234     alpha_addr = *potential_chunk_p + (size_t)mem->translation_code_chunk_space;
1235     ofs = (alpha_addr - ((size_t)a+4)) / 4;
1236     /* printf("%016llx %016llx %i\n", (long long)alpha_addr, (long long)a, ofs); */
1237    
1238     if ((*potential_chunk_p) != 0 && ofs > -0xfffff && ofs < 0xfffff) {
1239     *a++ = ofs & 255; *a++ = (ofs >> 8) & 255; *a++ = 0xe0 + ((ofs >> 16) & 0x1f); *a++ = 0xc3; /* br <chunk> */
1240     } else {
1241     /* Case 2: */
1242    
1243     bintrans_register_potential_quick_jump(mem, a, p);
1244    
1245     /* 15 bits at a time, which means max 60 bits, but
1246     that should be enough. the top 4 bits are probably
1247     not used by userland alpha code. (TODO: verify this) */
1248     alpha_addr = (size_t)potential_chunk_p;
1249     subaddr = (alpha_addr >> 45) & 0x7fff;
1250    
1251     /*
1252     * 00 00 3f 20 lda t0,0
1253     * 21 f7 21 48 sll t0,0xf,t0
1254     * 34 12 21 20 lda t0,4660(t0)
1255     * 21 f7 21 48 sll t0,0xf,t0
1256     * 34 12 21 20 lda t0,4660(t0)
1257     * 21 f7 21 48 sll t0,0xf,t0
1258     * 34 12 21 20 lda t0,4660(t0)
1259     */
1260    
1261     /* Start with the topmost 15 bits: */
1262     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1263     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1264    
1265     subaddr = (alpha_addr >> 30) & 0x7fff;
1266     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1267     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1268    
1269     subaddr = (alpha_addr >> 15) & 0x7fff;
1270     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1271     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1272    
1273     subaddr = alpha_addr & 0x7fff;
1274     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1275    
1276     /*
1277     * Load the chunk pointer into t0.
1278     * If it is NULL (zero), then skip the following jump.
1279     * Jump to t0.
1280     */
1281     *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1282     *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1283    
1284     /* ldq t2,chunk_base_address(a0) */
1285     ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1286     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1287     /* addq t0,t2,t0 */
1288     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1289    
1290     /* 00 00 e1 6b jmp (t0) */
1291     *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1292    
1293     /* "Failure", then let's return to the main loop. */
1294     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1295     }
1296     }
1297    
1298     if (skip != NULL) {
1299     *skip = ((size_t)a - (size_t)skip - 4) / 4;
1300     skip ++;
1301     *skip = (((size_t)a - (size_t)skip - 4) / 4) >> 8;
1302     }
1303    
1304     *addrp = a;
1305     return 1;
1306     }
1307    
1308    
1309     /*
1310     * bintrans_write_instruction__loadstore():
1311     */
1312     static int bintrans_write_instruction__loadstore(
1313     struct memory *mem, unsigned char **addrp,
1314     int rt, int imm, int rs, int instruction_type, int bigendian)
1315     {
1316     unsigned char *a, *fail, *generic64bit = NULL, *generic64bitA = NULL;
1317     unsigned char *doloadstore = NULL,
1318     *ok_unaligned_load3, *ok_unaligned_load2, *ok_unaligned_load1;
1319     uint32_t *b;
1320     int ofs, alignment, load = 0, alpha_rs, alpha_rt, unaligned = 0;
1321    
1322     /* TODO: Not yet: */
1323     if (instruction_type == HI6_LQ_MDMX || instruction_type == HI6_SQ) {
1324     return 0;
1325     }
1326    
1327     switch (instruction_type) {
1328     case HI6_LQ_MDMX:
1329     case HI6_LD:
1330     case HI6_LDL:
1331     case HI6_LDR:
1332     case HI6_LWU:
1333     case HI6_LW:
1334     case HI6_LWL:
1335     case HI6_LWR:
1336     case HI6_LHU:
1337     case HI6_LH:
1338     case HI6_LBU:
1339     case HI6_LB:
1340     load = 1;
1341     if (rt == 0)
1342     return 0;
1343     }
1344    
1345     switch (instruction_type) {
1346     case HI6_LDL:
1347     case HI6_LDR:
1348     case HI6_LWL:
1349     case HI6_LWR:
1350     case HI6_SDL:
1351     case HI6_SDR:
1352     case HI6_SWL:
1353     case HI6_SWR:
1354     unaligned = 1;
1355     }
1356    
1357     a = *addrp;
1358    
1359     /*
1360     * a1 = gpr[rs] + imm;
1361     *
1362     * 88 08 30 a4 ldq t0,2184(a0)
1363     * 34 12 21 22 lda a1,4660(t0)
1364     */
1365    
1366     alpha_rs = map_MIPS_to_Alpha[rs];
1367     if (alpha_rs < 0) {
1368     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1369     alpha_rs = ALPHA_T0;
1370     }
1371     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1372    
1373     alignment = 0;
1374     switch (instruction_type) {
1375     case HI6_LQ_MDMX:
1376     case HI6_SQ:
1377     alignment = 15;
1378     break;
1379     case HI6_LD:
1380     case HI6_LDL:
1381     case HI6_LDR:
1382     case HI6_SD:
1383     case HI6_SDL:
1384     case HI6_SDR:
1385     alignment = 7;
1386     break;
1387     case HI6_LW:
1388     case HI6_LWL:
1389     case HI6_LWR:
1390     case HI6_LWU:
1391     case HI6_SW:
1392     case HI6_SWL:
1393     case HI6_SWR:
1394     alignment = 3;
1395     break;
1396     case HI6_LH:
1397     case HI6_LHU:
1398     case HI6_SH:
1399     alignment = 1;
1400     break;
1401     }
1402    
1403     if (unaligned) {
1404     /*
1405     * Unaligned load/store: Perform the host load/store at
1406     * an aligned address, and then figure out which bytes to
1407     * actually load into the destination register.
1408     *
1409     * 02 30 20 46 and a1,alignment,t1
1410     * 31 05 22 42 subq a1,t1,a1
1411     */
1412     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1413     *a++ = 0x31; *a++ = 0x05; *a++ = 0x22; *a++ = 0x42;
1414     } else if (alignment > 0) {
1415     /*
1416     * Check alignment:
1417     *
1418     * 02 30 20 46 and a1,0x1,t1
1419     * 02 70 20 46 and a1,0x3,t1 (one of these "and"s)
1420     * 02 f0 20 46 and a1,0x7,t1
1421     * 02 f0 21 46 and a1,0xf,t1
1422     * 01 00 40 e4 beq t1,<okalign>
1423     * 01 80 fa 6b ret
1424     */
1425     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1426     fail = a;
1427     *a++ = 0x01; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
1428     *addrp = a;
1429     bintrans_write_chunkreturn_fail(addrp);
1430     a = *addrp;
1431     *fail = ((size_t)a - (size_t)fail - 4) / 4;
1432     }
1433    
1434     alpha_rt = map_MIPS_to_Alpha[rt];
1435    
1436     if (mem->bintrans_32bit_only) {
1437     /* Special case for 32-bit addressing: */
1438    
1439     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_loadstore_32bit) - (size_t)&dummy_cpu;
1440     /* ldq t12,bintrans_loadstore_32bit(a0) */
1441     *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1442    
1443     /* jsr t4,(t12),<after> */
1444     *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1445    
1446     /*
1447     * Now:
1448     * a3 = host page
1449     * t0 = 0 for readonly pages, 1 for read/write pages
1450     * t3 = address of host load/store
1451     */
1452    
1453     /* If this is a store, then the lowest bit must be set: */
1454     if (!load) {
1455     /* 01 00 20 f4 bne t0,<okzzz> */
1456     fail = a;
1457     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4;
1458     bintrans_write_chunkreturn_fail(&a);
1459     *fail = ((size_t)a - (size_t)fail - 4) / 4;
1460     }
1461     } else {
1462     /*
1463     * If the highest 33 bits of the address are either all ones
1464     * or all zeroes, then the tables used for 32-bit load/stores
1465     * can be used.
1466     */
1467     *a++ = 0x81; *a++ = 0xf6; *a++ = 0x23; *a++ = 0x4a; /* srl a1,0x1f,t0 */
1468     *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x44; /* and t0,0x1,t0 */
1469     *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<noll> */
1470     *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1471     *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; /* addl t0,0x1,t0 */
1472     *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1473     generic64bit = a;
1474     *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1475     /* <noll>: */
1476     *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1477     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1478     generic64bitA = a;
1479     *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1480    
1481     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_loadstore_32bit) - (size_t)&dummy_cpu;
1482     /* ldq t12,bintrans_loadstore_32bit(a0) */
1483     *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1484    
1485     /* jsr t4,(t12),<after> */
1486     *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1487    
1488     /*
1489     * Now:
1490     * a3 = host page (or NULL if not found)
1491     * t0 = 0 for readonly pages, 1 for read/write pages
1492     * t3 = (potential) address of host load/store
1493     */
1494    
1495     /* If this is a store, then the lowest bit must be set: */
1496     if (!load) {
1497     /* 01 00 20 f4 bne t0,<okzzz> */
1498     fail = a;
1499     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4;
1500     bintrans_write_chunkreturn_fail(&a);
1501     *fail = ((size_t)a - (size_t)fail - 4) / 4;
1502     }
1503    
1504     doloadstore = a;
1505     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1506    
1507    
1508     /*
1509     * Generic (64-bit) load/store:
1510     */
1511    
1512     if (generic64bit != NULL)
1513     *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1514     if (generic64bitA != NULL)
1515     *generic64bitA = ((size_t)a - (size_t)generic64bitA - 4) / 4;
1516    
1517     *addrp = a;
1518     b = (uint32_t *) *addrp;
1519    
1520     /* Save a0 and the old return address on the stack: */
1521     *b++ = 0x23deff80; /* lda sp,-128(sp) */
1522    
1523     *b++ = 0xb75e0000; /* stq ra,0(sp) */
1524     *b++ = 0xb61e0008; /* stq a0,8(sp) */
1525     *b++ = 0xb4de0010; /* stq t5,16(sp) */
1526     *b++ = 0xb0fe0018; /* stl t6,24(sp) */
1527     *b++ = 0xb71e0020; /* stq t10,32(sp) */
1528     *b++ = 0xb73e0028; /* stq t11,40(sp) */
1529     *b++ = 0xb51e0030; /* stq t7,48(sp) */
1530     *b++ = 0xb6de0038; /* stq t8,56(sp) */
1531     *b++ = 0xb6fe0040; /* stq t9,64(sp) */
1532    
1533     ofs = ((size_t)&dummy_cpu.cd.mips.fast_vaddr_to_hostaddr) - (size_t)&dummy_cpu;
1534    
1535     *b++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
1536    
1537     /* a1 is already vaddr. set a2 = writeflag */
1538     *b++ = 0x225f0000 | (load? 0 : 1);
1539    
1540     /* Call fast_vaddr_to_hostaddr: */
1541     *b++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
1542    
1543     /* Restore the old return address and a0 from the stack: */
1544     *b++ = 0xa75e0000; /* ldq ra,0(sp) */
1545     *b++ = 0xa61e0008; /* ldq a0,8(sp) */
1546     *b++ = 0xa4de0010; /* ldq t5,16(sp) */
1547     *b++ = 0xa0fe0018; /* ldl t6,24(sp) */
1548     *b++ = 0xa71e0020; /* ldq t10,32(sp) */
1549     *b++ = 0xa73e0028; /* ldq t11,40(sp) */
1550     *b++ = 0xa51e0030; /* ldq t7,48(sp) */
1551     *b++ = 0xa6de0038; /* ldq t8,56(sp) */
1552     *b++ = 0xa6fe0040; /* ldq t9,64(sp) */
1553    
1554     *b++ = 0x23de0080; /* lda sp,128(sp) */
1555    
1556     *addrp = (unsigned char *) b;
1557     a = *addrp;
1558    
1559     /*
1560     * NULL? Then return failure.
1561     * 01 00 00 f4 bne v0,f8 <okzz>
1562     */
1563     fail = a;
1564     *a++ = 0x01; *a++ = 0x00; *a++ = 0x00; *a++ = 0xf4;
1565     bintrans_write_chunkreturn_fail(&a);
1566     *fail = ((size_t)a - (size_t)fail - 4) / 4;
1567    
1568     /* The rest of this code was written with t3 as the address. */
1569    
1570     /* 04 14 00 40 addq v0,0,t3 */
1571     *a++ = 0x04; *a++ = 0x14; *a++ = 0x00; *a++ = 0x40;
1572    
1573     if (doloadstore != NULL)
1574     *doloadstore = ((size_t)a - (size_t)doloadstore - 4) / 4;
1575     }
1576    
1577    
1578     switch (instruction_type) {
1579     case HI6_LQ_MDMX:
1580     /* TODO */
1581     break;
1582     case HI6_LD:
1583     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa4; /* ldq t0,0(t3) */
1584     if (bigendian) {
1585     /* remember original 8 bytes of t0: */
1586     *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1587    
1588     /* swap lowest 4 bytes: */
1589     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1590     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1591     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1592     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1593     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1594     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1595     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1596     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1597     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1598    
1599     /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1600     *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1601     *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1602     *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1603    
1604     /* swap highest 4 bytes: */
1605     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1606     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1607     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1608     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1609     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1610     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1611     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1612     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1613     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1614    
1615     /* or the results together: */
1616     *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1617     }
1618     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1619     break;
1620     case HI6_LW:
1621     case HI6_LWU:
1622     if (alpha_rt < 0 || bigendian || instruction_type == HI6_LWU)
1623     alpha_rt = ALPHA_T0;
1624     /* ldl rt,0(t3) */
1625     *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1626     *a++ = 0xa0 | ((alpha_rt >> 3) & 3);
1627     if (bigendian) {
1628     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1629     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1630     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1631     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1632     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1633     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1634     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1635     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1636     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1637     *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,zero,t0 (sign extend) 32->64 */
1638     }
1639     if (instruction_type == HI6_LWU) {
1640     /* Use only lowest 32 bits: */
1641     *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; /* zapnot t0,0xf,t0 */
1642     }
1643     if (alpha_rt == ALPHA_T0)
1644     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1645     break;
1646     case HI6_LHU:
1647     case HI6_LH:
1648     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x30; /* ldwu from memory */
1649     if (bigendian) {
1650     *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1651     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1652     *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1653     }
1654     if (instruction_type == HI6_LH) {
1655     *a++ = 0x21; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x73; /* sextw t0,t0 */
1656     }
1657     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1658     break;
1659     case HI6_LBU:
1660     case HI6_LB:
1661     if (alpha_rt < 0)
1662     alpha_rt = ALPHA_T0;
1663     /* ldbu rt,0(t3) */
1664     *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1665     *a++ = 0x28 | ((alpha_rt >> 3) & 3);
1666     if (instruction_type == HI6_LB) {
1667     /* sextb rt,rt */
1668     *a++ = alpha_rt; *a++ = 0x00; *a++ = 0xe0 + alpha_rt; *a++ = 0x73;
1669     }
1670     if (alpha_rt == ALPHA_T0)
1671     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1672     break;
1673    
1674     case HI6_LWL:
1675     /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1676     alpha_rs = map_MIPS_to_Alpha[rs];
1677     if (alpha_rs < 0) {
1678     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1679     alpha_rs = ALPHA_T0;
1680     }
1681     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1682     /* 02 30 20 46 and a1,alignment,t1 */
1683     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1684    
1685     /* ldl t0,0(t3) */
1686     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1687    
1688     if (bigendian) {
1689     /* TODO */
1690     bintrans_write_chunkreturn_fail(&a);
1691     }
1692     /*
1693     * lwl: memory = 0x12 0x34 0x56 0x78
1694     * offset (a1): register rt becomes:
1695     * 0 0x12......
1696     * 1 0x3412....
1697     * 2 0x563412..
1698     * 3 0x78563412
1699     */
1700    
1701     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1702    
1703     /*
1704     10: 03 00 9f 20 lda t3,3
1705     14: a5 05 82 40 cmpeq t3,t1,t4
1706     18: 01 00 a0 e4 beq t4,20 <skip>
1707     */
1708     *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1709     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1710     *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1711    
1712     /* 03 14 20 40 addq t0,0,t2 */
1713     *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1714    
1715     ok_unaligned_load3 = a;
1716     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1717    
1718    
1719    
1720     *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1721     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1722     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1723     /*
1724     * 2 0x563412..
1725     2c: 21 17 21 48 sll t0,0x8,t0
1726     30: 01 10 20 40 addl t0,0,t0
1727     34: 03 f0 7f 44 and t2,0xff,t2
1728     38: 03 04 23 44 or t0,t2,t2
1729     */
1730     *a++ = 0x21; *a++ = 0x17; *a++ = 0x21; *a++ = 0x48;
1731     *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1732     *a++ = 0x03; *a++ = 0xf0; *a++ = 0x7f; *a++ = 0x44;
1733     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1734    
1735     ok_unaligned_load2 = a;
1736     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1737    
1738    
1739    
1740     *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1741     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1742     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1743     /*
1744     * 1 0x3412....
1745     2c: 21 17 22 48 sll t0,0x10,t0
1746     30: 01 10 20 40 addl t0,0,t0
1747     34: 23 76 60 48 zapnot t2,0x3,t2
1748     38: 03 04 23 44 or t0,t2,t2
1749     */
1750     *a++ = 0x21; *a++ = 0x17; *a++ = 0x22; *a++ = 0x48;
1751     *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1752     *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
1753     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1754    
1755     ok_unaligned_load1 = a;
1756     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1757    
1758    
1759    
1760    
1761     /*
1762     * 0 0x12......
1763     2c: 21 17 23 48 sll t0,0x18,t0
1764     30: 01 10 20 40 addl t0,0,t0
1765     34: 23 f6 60 48 zapnot t2,0x7,t2
1766     38: 03 04 23 44 or t0,t2,t2
1767     */
1768     *a++ = 0x21; *a++ = 0x17; *a++ = 0x23; *a++ = 0x48;
1769     *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1770     *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
1771     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1772    
1773    
1774     *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1775     *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1776     *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1777    
1778     /* 03 10 60 40 addl t2,0,t2 */
1779     *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1780    
1781     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1782     break;
1783    
1784     case HI6_LWR:
1785     /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1786     alpha_rs = map_MIPS_to_Alpha[rs];
1787     if (alpha_rs < 0) {
1788     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1789     alpha_rs = ALPHA_T0;
1790     }
1791     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1792     /* 02 30 20 46 and a1,alignment,t1 */
1793     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1794    
1795     /* ldl t0,0(t3) */
1796     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1797    
1798     if (bigendian) {
1799     /* TODO */
1800     bintrans_write_chunkreturn_fail(&a);
1801     }
1802     /*
1803     * lwr: memory = 0x12 0x34 0x56 0x78
1804     * offset (a1): register rt becomes:
1805     * 0 0x78563412
1806     * 1 0x..785634
1807     * 2 0x....7856
1808     * 3 0x......78
1809     */
1810    
1811     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1812    
1813     /*
1814     10: 03 00 9f 20 lda t3,3
1815     14: a5 05 82 40 cmpeq t3,t1,t4
1816     18: 01 00 a0 e4 beq t4,20 <skip>
1817     */
1818     *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1819     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1820     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1821    
1822     /*
1823     2c: 81 16 23 48 srl t0,0x18,t0
1824     b0: 21 36 20 48 zapnot t0,0x1,t0
1825     34: 23 d6 7f 48 zapnot t2,0xfe,t2
1826     38: 03 04 23 44 or t0,t2,t2
1827     */
1828     *a++ = 0x81; *a++ = 0x16; *a++ = 0x23; *a++ = 0x48;
1829     *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
1830     *a++ = 0x23; *a++ = 0xd6; *a++ = 0x7f; *a++ = 0x48;
1831     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1832    
1833     ok_unaligned_load3 = a;
1834     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1835    
1836    
1837    
1838     *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1839     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1840     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1841     /*
1842     2c: 81 16 22 48 srl t0,0x10,t0
1843     b4: 21 76 20 48 zapnot t0,0x3,t0
1844     34: 23 96 7f 48 zapnot t2,0xfc,t2
1845     38: 03 04 23 44 or t0,t2,t2
1846     */
1847     *a++ = 0x81; *a++ = 0x16; *a++ = 0x22; *a++ = 0x48;
1848     *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
1849     *a++ = 0x23; *a++ = 0x96; *a++ = 0x7f; *a++ = 0x48;
1850     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1851    
1852     ok_unaligned_load2 = a;
1853     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1854    
1855    
1856    
1857     *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1858     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1859     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1860     /*
1861     2c: 81 16 21 48 srl t0,0x8,t0
1862     b8: 21 f6 20 48 zapnot t0,0x7,t0
1863     3c: 23 16 7f 48 zapnot t2,0xf8,t2
1864     40: 03 04 23 44 or t0,t2,t2
1865     */
1866     *a++ = 0x81; *a++ = 0x16; *a++ = 0x21; *a++ = 0x48;
1867     *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
1868     *a++ = 0x23; *a++ = 0x16; *a++ = 0x7f; *a++ = 0x48;
1869     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1870    
1871     ok_unaligned_load1 = a;
1872     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1873    
1874    
1875    
1876    
1877     /*
1878     * 0 0x12......
1879     */
1880     /* 03 14 20 40 addq t0,0,t2 */
1881     *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1882    
1883    
1884    
1885     *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1886     *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1887     *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1888    
1889     /* 03 10 60 40 addl t2,0,t2 */
1890     *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1891    
1892     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1893     break;
1894    
1895     case HI6_SQ:
1896     /* TODO */
1897     break;
1898     case HI6_SD:
1899     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1900     if (bigendian) {
1901     /* remember original 8 bytes of t0: */
1902     *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1903    
1904     /* swap lowest 4 bytes: */
1905     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1906     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1907     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1908     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1909     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1910     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1911     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1912     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1913     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1914    
1915     /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1916     *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1917     *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1918     *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1919    
1920     /* swap highest 4 bytes: */
1921     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1922     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1923     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1924     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1925     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1926     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1927     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1928     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1929     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1930    
1931     /* or the results together: */
1932     *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1933     }
1934     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb4; /* stq to memory */
1935     break;
1936     case HI6_SW:
1937     if (alpha_rt < 0 || bigendian) {
1938     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1939     alpha_rt = ALPHA_T0;
1940     }
1941     if (bigendian) {
1942     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1943     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1944     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1945     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1946     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1947     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1948     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1949     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1950     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1951     }
1952     /* stl to memory: stl rt,0(t3) */
1953     *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1954     *a++ = 0xb0 | ((alpha_rt >> 3) & 3);
1955     break;
1956     case HI6_SH:
1957     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1958     if (bigendian) {
1959     *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1960     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1961     *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1962     }
1963     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x34; /* stw to memory */
1964     break;
1965     case HI6_SB:
1966     if (alpha_rt < 0) {
1967     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1968     alpha_rt = ALPHA_T0;
1969     }
1970     /* stb to memory: stb rt,0(t3) */
1971     *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1972     *a++ = 0x38 | ((alpha_rt >> 3) & 3);
1973     break;
1974    
1975     case HI6_SWL:
1976     /* a1 = 0..3 (or 0..7 for 64-bit stores): */
1977     alpha_rs = map_MIPS_to_Alpha[rs];
1978     if (alpha_rs < 0) {
1979     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1980     alpha_rs = ALPHA_T0;
1981     }
1982     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1983     /* 02 30 20 46 and a1,alignment,t1 */
1984     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1985    
1986     /* ldl t0,0(t3) */
1987     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1988    
1989     if (bigendian) {
1990     /* TODO */
1991     bintrans_write_chunkreturn_fail(&a);
1992     }
1993    
1994     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1995    
1996     /*
1997     * swl: memory = 0x12 0x34 0x56 0x78
1998     * register = 0x89abcdef
1999     * offset (a1): memory becomes:
2000     * 0 0x89 0x.. 0x.. 0x..
2001     * 1 0xab 0x89 0x.. 0x..
2002     * 2 0xcd 0xab 0x89 0x..
2003     * 3 0xef 0xcd 0xab 0x89
2004     */
2005    
2006     /*
2007     a5 75 40 40 cmpeq t1,0x03,t4
2008     01 00 a0 e4 beq t4,20 <skip>
2009     */
2010     *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2011     *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2012    
2013     /* 01 10 60 40 addl t2,0,t0 */
2014     *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2015    
2016     ok_unaligned_load3 = a;
2017     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2018    
2019    
2020    
2021    
2022     *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2023     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2024     /*
2025     2:
2026     e8: 83 16 61 48 srl t2,0x8,t2
2027     ec: 23 f6 60 48 zapnot t2,0x7,t2
2028     f0: 21 16 3f 48 zapnot t0,0xf8,t0
2029     f4: 01 04 23 44 or t0,t2,t0
2030     */
2031     *a++ = 0x83; *a++ = 0x16; *a++ = 0x61; *a++ = 0x48;
2032     *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
2033     *a++ = 0x21; *a++ = 0x16; *a++ = 0x3f; *a++ = 0x48;
2034     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2035    
2036     ok_unaligned_load2 = a;
2037     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2038    
2039    
2040    
2041     *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2042     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2043     /*
2044     1:
2045     f8: 83 16 62 48 srl t2,0x10,t2
2046     fc: 23 76 60 48 zapnot t2,0x3,t2
2047     100: 21 96 3f 48 zapnot t0,0xfc,t0
2048     104: 01 04 23 44 or t0,t2,t0
2049     */
2050     *a++ = 0x83; *a++ = 0x16; *a++ = 0x62; *a++ = 0x48;
2051     *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
2052     *a++ = 0x21; *a++ = 0x96; *a++ = 0x3f; *a++ = 0x48;
2053     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2054    
2055     ok_unaligned_load1 = a;
2056     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2057    
2058    
2059    
2060    
2061    
2062     /*
2063     0:
2064     108: 83 16 63 48 srl t2,0x18,t2
2065     10c: 23 36 60 48 zapnot t2,0x1,t2
2066     110: 21 d6 3f 48 zapnot t0,0xfe,t0
2067     114: 01 04 23 44 or t0,t2,t0
2068     */
2069     *a++ = 0x83; *a++ = 0x16; *a++ = 0x63; *a++ = 0x48;
2070     *a++ = 0x23; *a++ = 0x36; *a++ = 0x60; *a++ = 0x48;
2071     *a++ = 0x21; *a++ = 0xd6; *a++ = 0x3f; *a++ = 0x48;
2072     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2073    
2074    
2075     *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2076     *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2077     *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2078    
2079     /* sdl t0,0(t3) */
2080     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2081     break;
2082    
2083     case HI6_SWR:
2084     /* a1 = 0..3 (or 0..7 for 64-bit stores): */
2085     alpha_rs = map_MIPS_to_Alpha[rs];
2086     if (alpha_rs < 0) {
2087     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
2088     alpha_rs = ALPHA_T0;
2089     }
2090     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
2091     /* 02 30 20 46 and a1,alignment,t1 */
2092     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
2093    
2094     /* ldl t0,0(t3) */
2095     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
2096    
2097     if (bigendian) {
2098     /* TODO */
2099     bintrans_write_chunkreturn_fail(&a);
2100     }
2101    
2102     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
2103    
2104     /*
2105     * swr: memory = 0x12 0x34 0x56 0x78
2106     * register = 0x89abcdef
2107     * offset (a1): memory becomes:
2108     * 0 0xef 0xcd 0xab 0x89
2109     * 1 0x.. 0xef 0xcd 0xab
2110     * 2 0x.. 0x.. 0xef 0xcd
2111     * 3 0x.. 0x.. 0x.. 0xef
2112     */
2113    
2114    
2115     /*
2116     a5 75 40 40 cmpeq t1,0x03,t4
2117     01 00 a0 e4 beq t4,20 <skip>
2118     */
2119     *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2120     *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2121    
2122     /*
2123     118: 23 17 63 48 sll t2,0x18,t2
2124     11c: 21 f6 20 48 zapnot t0,0x7,t0
2125     120: 01 04 23 44 or t0,t2,t0
2126     */
2127     *a++ = 0x23; *a++ = 0x17; *a++ = 0x63; *a++ = 0x48;
2128     *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
2129     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2130    
2131     ok_unaligned_load3 = a;
2132     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2133    
2134    
2135    
2136    
2137    
2138     *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2139     *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2140     /*
2141     2:
2142     124: 23 17 62 48 sll t2,0x10,t2
2143     128: 21 76 20 48 zapnot t0,0x3,t0
2144     12c: 01 04 23 44 or t0,t2,t0
2145     */
2146     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48;
2147     *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
2148     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2149    
2150     ok_unaligned_load2 = a;
2151     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2152    
2153    
2154    
2155     *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2156     *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2157     /*
2158     1:
2159     130: 23 17 61 48 sll t2,0x8,t2
2160     134: 21 36 20 48 zapnot t0,0x1,t0
2161     138: 01 04 23 44 or t0,t2,t0
2162     */
2163     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48;
2164     *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
2165     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2166    
2167     ok_unaligned_load1 = a;
2168     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2169    
2170    
2171    
2172     /*
2173     0:
2174     13c: 01 10 60 40 addl t2,0,t0
2175     */
2176     *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2177    
2178    
2179     *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2180     *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2181     *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2182    
2183     /* sdl t0,0(t3) */
2184     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2185     break;
2186    
2187     default:
2188     ;
2189     }
2190    
2191     *addrp = a;
2192     bintrans_write_pc_inc(addrp);
2193     return 1;
2194     }
2195    
2196    
2197     /*
2198     * bintrans_write_instruction__lui():
2199     */
2200     static int bintrans_write_instruction__lui(unsigned char **addrp,
2201     int rt, int imm)
2202     {
2203     uint32_t *a;
2204    
2205     /*
2206     * dc fe 3f 24 ldah t0,-292
2207     * 1f 04 ff 5f fnop
2208     * 88 08 30 b4 stq t0,2184(a0)
2209     */
2210     if (rt != 0) {
2211     int alpha_rt = map_MIPS_to_Alpha[rt];
2212     if (alpha_rt < 0)
2213     alpha_rt = ALPHA_T0;
2214    
2215     a = (uint32_t *) *addrp;
2216     *a++ = 0x241f0000 | (alpha_rt << 21) | ((uint32_t)imm & 0xffff);
2217     *addrp = (unsigned char *) a;
2218    
2219     if (alpha_rt == ALPHA_T0) {
2220     *a++ = 0x5fff041f; /* fnop */
2221     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
2222     }
2223     }
2224    
2225     bintrans_write_pc_inc(addrp);
2226    
2227     return 1;
2228     }
2229    
2230    
2231     /*
2232     * bintrans_write_instruction__mfmthilo():
2233     */
2234     static int bintrans_write_instruction__mfmthilo(unsigned char **addrp,
2235     int rd, int from_flag, int hi_flag)
2236     {
2237     unsigned char *a;
2238     int ofs;
2239    
2240     a = *addrp;
2241    
2242     /*
2243     * 18 09 30 a4 ldq t0,hi(a0) (or lo)
2244     * 18 09 30 b4 stq t0,rd(a0)
2245     *
2246     * (or if from_flag is cleared then move the other way, it's
2247     * actually not rd then, but rs...)
2248     */
2249    
2250     if (from_flag) {
2251     if (rd != 0) {
2252     /* mfhi or mflo */
2253     if (hi_flag)
2254     ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2255     else
2256     ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2257     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xa4;
2258    
2259     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
2260     }
2261     } else {
2262     /* mthi or mtlo */
2263     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rd, ALPHA_T0);
2264    
2265     if (hi_flag)
2266     ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2267     else
2268     ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2269     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
2270     }
2271    
2272     *addrp = a;
2273     bintrans_write_pc_inc(addrp);
2274     return 1;
2275     }
2276    
2277    
2278     /*
2279     * bintrans_write_instruction__mfc_mtc():
2280     */
2281     static int bintrans_write_instruction__mfc_mtc(struct memory *mem,
2282     unsigned char **addrp, int coproc_nr, int flag64bit, int rt,
2283     int rd, int mtcflag)
2284     {
2285     uint32_t *a, *jump;
2286     int ofs;
2287    
2288     /*
2289     * NOTE: Only a few registers are readable without side effects.
2290     */
2291     if (rt == 0 && !mtcflag)
2292     return 0;
2293    
2294     if (coproc_nr >= 1)
2295     return 0;
2296    
2297     if (rd == COP0_RANDOM || rd == COP0_COUNT)
2298     return 0;
2299    
2300    
2301     /*************************************************************
2302     *
2303     * TODO: Check for kernel mode, or Coproc X usability bit!
2304     *
2305     *************************************************************/
2306    
2307     a = (uint32_t *) *addrp;
2308    
2309     ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2310     *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2311    
2312     ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2313     *a++ = 0xa4410000 | (ofs & 0xffff); /* ldq t1,reg_rd(t0) */
2314    
2315     if (mtcflag) {
2316     /* mtc: */
2317     *addrp = (unsigned char *) a;
2318     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
2319     a = (uint32_t *) *addrp;
2320    
2321     if (!flag64bit) {
2322     *a++ = 0x40201001; /* addl t0,0,t0 */
2323     *a++ = 0x40401002; /* addl t1,0,t1 */
2324     }
2325    
2326     /*
2327     * In the general case: Only allow mtc if it does NOT
2328     * change the register!!
2329     */
2330    
2331     switch (rd) {
2332     case COP0_INDEX:
2333     break;
2334    
2335     case COP0_EPC:
2336     break;
2337    
2338     /* TODO: Some bits are not writable */
2339     case COP0_ENTRYLO0:
2340     case COP0_ENTRYLO1:
2341     break;
2342    
2343     case COP0_ENTRYHI:
2344     /*
2345     * Entryhi is ok to write to, as long as the
2346     * ASID isn't changed. (That would require
2347     * cache invalidations etc. Instead of checking
2348     * for MMU3K vs others, we just assume that all the
2349     * lowest 12 bits must be the same.
2350     */
2351     /* ff 0f bf 20 lda t4,0x0fff */
2352     /* 03 00 25 44 and t0,t4,t2 */
2353     /* 04 00 45 44 and t1,t4,t3 */
2354     /* a3 05 64 40 cmpeq t2,t3,t2 */
2355     /* 01 00 60 f4 bne t2,<ok> */
2356     *a++ = 0x20bf0fff;
2357     *a++ = 0x44250003;
2358     *a++ = 0x44450004;
2359     *a++ = 0x406405a3;
2360     jump = a;
2361     *a++ = 0; /* later */
2362     *addrp = (unsigned char *) a;
2363     bintrans_write_chunkreturn_fail(addrp);
2364     a = (uint32_t *) *addrp;
2365     *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2366     break;
2367    
2368     case COP0_STATUS:
2369     /* Only allow updates to the status register if
2370     the interrupt enable bits were changed, but no
2371     other bits! */
2372     if (mem->bintrans_32bit_only) {
2373     /* R3000 etc. */
2374     /* t4 = 0x0fe70000; */
2375     *a++ = 0x20bf0000;
2376     *a++ = 0x24a50fe7;
2377     } else {
2378     /* fe 00 bf 20 lda t4,0x00fe */
2379     /* ff ff a5 24 ldah t4,-1(t4) */
2380     *a++ = 0x20bf0000;
2381     *a++ = 0x24a5ffff;
2382     }
2383    
2384     /* 03 00 25 44 and t0,t4,t2 */
2385     /* 04 00 45 44 and t1,t4,t3 */
2386     /* a3 05 64 40 cmpeq t2,t3,t2 */
2387     /* 01 00 60 f4 bne t2,<ok> */
2388     *a++ = 0x44250003;
2389     *a++ = 0x44450004;
2390     *a++ = 0x406405a3;
2391     jump = a;
2392     *a++ = 0; /* later */
2393     *addrp = (unsigned char *) a;
2394     bintrans_write_chunkreturn_fail(addrp);
2395     a = (uint32_t *) *addrp;
2396     *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2397    
2398     /* If enabling interrupt bits would cause an
2399     exception, then don't do it: */
2400     ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2401     *a++ = 0xa4900000 | (ofs & 0xffff); /* ldq t3,coproc[0](a0) */
2402     ofs = ((size_t)&dummy_coproc.reg[COP0_CAUSE]) - (size_t)&dummy_coproc;
2403     *a++ = 0xa4a40000 | (ofs & 0xffff); /* ldq t4,reg_rd(t3) */
2404    
2405     /* 02 00 a1 44 and t4,t0,t1 */
2406     /* 83 16 41 48 srl t1,0x8,t2 */
2407     /* 04 f0 7f 44 and t2,0xff,t3 */
2408     *a++ = 0x44a10002;
2409     *a++ = 0x48411683;
2410     *a++ = 0x447ff004;
2411     /* 01 00 80 e4 beq t3,<ok> */
2412     jump = a;
2413     *a++ = 0; /* later */
2414     *addrp = (unsigned char *) a;
2415     bintrans_write_chunkreturn_fail(addrp);
2416     a = (uint32_t *) *addrp;
2417     *jump = 0xe4800000 | (((size_t)a - (size_t)jump - 4) / 4);
2418     break;
2419    
2420     default:
2421     /* a3 05 22 40 cmpeq t0,t1,t2 */
2422     /* 01 00 60 f4 bne t2,<ok> */
2423     *a++ = 0x402205a3;
2424     jump = a;
2425     *a++ = 0; /* later */
2426     *addrp = (unsigned char *) a;
2427     bintrans_write_chunkreturn_fail(addrp);
2428     a = (uint32_t *) *addrp;
2429     *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2430     }
2431    
2432     *a++ = 0x40201402; /* addq t0,0,t1 */
2433    
2434     ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2435     *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2436     ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2437     *a++ = 0xb4410000 | (ofs & 0xffff); /* stq t1,reg_rd(t0) */
2438     } else {
2439     /* mfc: */
2440     if (!flag64bit) {
2441     *a++ = 0x40401002; /* addl t1,0,t1 */
2442     }
2443    
2444     *addrp = (unsigned char *) a;
2445     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T1, rt);
2446     a = (uint32_t *) *addrp;
2447     }
2448    
2449     *addrp = (unsigned char *) a;
2450    
2451     bintrans_write_pc_inc(addrp);
2452     return 1;
2453     }
2454    
2455    
2456     /*
2457     * bintrans_write_instruction__tlb_rfe_etc():
2458     */
2459     static int bintrans_write_instruction__tlb_rfe_etc(unsigned char **addrp,
2460     int itype)
2461     {
2462     uint32_t *a;
2463     int ofs = 0;
2464    
2465     switch (itype) {
2466     case CALL_TLBWI:
2467     case CALL_TLBWR:
2468     case CALL_TLBP:
2469     case CALL_TLBR:
2470     case CALL_RFE:
2471     case CALL_ERET:
2472     case CALL_BREAK:
2473     case CALL_SYSCALL:
2474     break;
2475     default:
2476     return 0;
2477     }
2478    
2479     a = (uint32_t *) *addrp;
2480    
2481     /* a0 = pointer to the cpu struct */
2482    
2483     switch (itype) {
2484     case CALL_TLBWI:
2485     case CALL_TLBWR:
2486     /* a1 = 0 for indexed, 1 for random */
2487     *a++ = 0x223f0000 | (itype == CALL_TLBWR);
2488     break;
2489     case CALL_TLBP:
2490     case CALL_TLBR:
2491     /* a1 = 0 for probe, 1 for read */
2492     *a++ = 0x223f0000 | (itype == CALL_TLBR);
2493     break;
2494     case CALL_BREAK:
2495     case CALL_SYSCALL:
2496     *a++ = 0x223f0000 | (itype == CALL_BREAK? EXCEPTION_BP : EXCEPTION_SYS);
2497     break;
2498     }
2499    
2500     /* Put PC into the cpu struct (both pc and pc_last). */
2501     *a++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2502     *a++ = 0xb4d00000 | ofs_pc_last;/* stq t5,"pc_last"(a0) */
2503    
2504     /* Save a0 and the old return address on the stack: */
2505     *a++ = 0x23deff80; /* lda sp,-128(sp) */
2506    
2507     *a++ = 0xb75e0000; /* stq ra,0(sp) */
2508     *a++ = 0xb61e0008; /* stq a0,8(sp) */
2509     *a++ = 0xb0fe0018; /* stl t6,24(sp) */
2510     *a++ = 0xb71e0020; /* stq t10,32(sp) */
2511     *a++ = 0xb73e0028; /* stq t11,40(sp) */
2512     *a++ = 0xb51e0030; /* stq t7,48(sp) */
2513     *a++ = 0xb6de0038; /* stq t8,56(sp) */
2514     *a++ = 0xb6fe0040; /* stq t9,64(sp) */
2515    
2516     switch (itype) {
2517     case CALL_TLBP:
2518     case CALL_TLBR:
2519     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbpr) - (size_t)&dummy_cpu;
2520     break;
2521     case CALL_TLBWR:
2522     case CALL_TLBWI:
2523     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbwri) - (size_t)&dummy_cpu;
2524     break;
2525     case CALL_RFE:
2526     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_rfe) - (size_t)&dummy_cpu;
2527     break;
2528     case CALL_ERET:
2529     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_eret) - (size_t)&dummy_cpu;
2530     break;
2531     case CALL_BREAK:
2532     case CALL_SYSCALL:
2533     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_simple_exception) - (size_t)&dummy_cpu;
2534     break;
2535     }
2536    
2537     *a++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
2538    
2539     /* Call bintrans_fast_tlbwr: */
2540     *a++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
2541    
2542     /* Restore the old return address and a0 from the stack: */
2543     *a++ = 0xa75e0000; /* ldq ra,0(sp) */
2544     *a++ = 0xa61e0008; /* ldq a0,8(sp) */
2545     *a++ = 0xa0fe0018; /* ldl t6,24(sp) */
2546     *a++ = 0xa71e0020; /* ldq t10,32(sp) */
2547     *a++ = 0xa73e0028; /* ldq t11,40(sp) */
2548     *a++ = 0xa51e0030; /* ldq t7,48(sp) */
2549     *a++ = 0xa6de0038; /* ldq t8,56(sp) */
2550     *a++ = 0xa6fe0040; /* ldq t9,64(sp) */
2551    
2552     *a++ = 0x23de0080; /* lda sp,128(sp) */
2553    
2554     /* Load PC from the cpu struct. */
2555     *a++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2556    
2557     *addrp = (unsigned char *) a;
2558    
2559     switch (itype) {
2560     case CALL_ERET:
2561     case CALL_BREAK:
2562     case CALL_SYSCALL:
2563     break;
2564     default:
2565     bintrans_write_pc_inc(addrp);
2566     }
2567    
2568     return 1;
2569     }
2570    
2571    
2572     /*
2573     * bintrans_backend_init():
2574     *
2575     * This is neccessary for broken 2.95.4 compilers on FreeBSD/Alpha 4.9,
2576     * and probably a few others. (For Compaq's CC, and for gcc 3.x, this
2577     * wouldn't be neccessary, and the old code would have worked.)
2578     */
2579     static void bintrans_backend_init(void)
2580     {
2581     int size;
2582 dpavlin 4 uint32_t *p, *q;
2583 dpavlin 2
2584    
2585     /* "runchunk": */
2586     size = 256; /* NOTE: This MUST be enough, or we fail */
2587     p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2588     MAP_ANON | MAP_PRIVATE, -1, 0);
2589    
2590     /* If mmap() failed, try malloc(): */
2591     if (p == NULL) {
2592     p = malloc(size);
2593     if (p == NULL) {
2594     fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2595     exit(1);
2596     }
2597     }
2598    
2599     bintrans_runchunk = (void *)p;
2600    
2601     *p++ = 0x23deffa0; /* lda sp,-0x60(sp) */
2602     *p++ = 0xb75e0000; /* stq ra,0(sp) */
2603     *p++ = 0xb53e0008; /* stq s0,8(sp) */
2604     *p++ = 0xb55e0010; /* stq s1,16(sp) */
2605     *p++ = 0xb57e0018; /* stq s2,24(sp) */
2606     *p++ = 0xb59e0020; /* stq s3,32(sp) */
2607     *p++ = 0xb5be0028; /* stq s4,40(sp) */
2608     *p++ = 0xb5de0030; /* stq s5,48(sp) */
2609     *p++ = 0xb5fe0038; /* stq s6,56(sp) */
2610     *p++ = 0xb7be0058; /* stq gp,0x58(sp) */
2611    
2612     *p++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2613     *p++ = 0xa0f00000 | ofs_n; /* ldl t6,"bintrans_instructions_executed"(a0) */
2614     *p++ = 0xa5100000 | ofs_a0; /* ldq t7,"a0"(a0) */
2615     *p++ = 0xa6d00000 | ofs_a1; /* ldq t8,"a1"(a0) */
2616     *p++ = 0xa6f00000 | ofs_s0; /* ldq t9,"s0"(a0) */
2617     *p++ = 0xa1300000 | ofs_ds; /* ldl s0,"delay_slot"(a0) */
2618     *p++ = 0xa5500000 | ofs_ja; /* ldq s1,"delay_jmpaddr"(a0) */
2619     *p++ = 0xa5700000 | ofs_sp; /* ldq s2,"gpr[sp]"(a0) */
2620     *p++ = 0xa5900000 | ofs_ra; /* ldq s3,"gpr[ra]"(a0) */
2621     *p++ = 0xa5b00000 | ofs_t0; /* ldq s4,"gpr[t0]"(a0) */
2622     *p++ = 0xa5d00000 | ofs_t1; /* ldq s5,"gpr[t1]"(a0) */
2623     *p++ = 0xa5f00000 | ofs_t2; /* ldq s6,"gpr[t2]"(a0) */
2624     *p++ = 0xa7100000 | ofs_tbl0; /* ldq t10,table0(a0) */
2625     *p++ = 0xa7300000 | ofs_v0; /* ldq t11,"gpr[v0]"(a0) */
2626    
2627     *p++ = 0x6b514000; /* jsr ra,(a1),<back> */
2628    
2629     *p++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2630     *p++ = 0xb0f00000 | ofs_n; /* stl t6,"bintrans_instructions_executed"(a0) */
2631     *p++ = 0xb5100000 | ofs_a0; /* stq t7,"a0"(a0) */
2632     *p++ = 0xb6d00000 | ofs_a1; /* stq t8,"a1"(a0) */
2633     *p++ = 0xb6f00000 | ofs_s0; /* stq t9,"s0"(a0) */
2634     *p++ = 0xb1300000 | ofs_ds; /* stl s0,"delay_slot"(a0) */
2635     *p++ = 0xb5500000 | ofs_ja; /* stq s1,"delay_jmpaddr"(a0) */
2636     *p++ = 0xb5700000 | ofs_sp; /* stq s2,"gpr[sp]"(a0) */
2637     *p++ = 0xb5900000 | ofs_ra; /* stq s3,"gpr[ra]"(a0) */
2638     *p++ = 0xb5b00000 | ofs_t0; /* stq s4,"gpr[t0]"(a0) */
2639     *p++ = 0xb5d00000 | ofs_t1; /* stq s5,"gpr[t1]"(a0) */
2640     *p++ = 0xb5f00000 | ofs_t2; /* stq s6,"gpr[t2]"(a0) */
2641     *p++ = 0xb7300000 | ofs_v0; /* stq t11,"gpr[v0]"(a0) */
2642    
2643     *p++ = 0xa75e0000; /* ldq ra,0(sp) */
2644     *p++ = 0xa53e0008; /* ldq s0,8(sp) */
2645     *p++ = 0xa55e0010; /* ldq s1,16(sp) */
2646     *p++ = 0xa57e0018; /* ldq s2,24(sp) */
2647     *p++ = 0xa59e0020; /* ldq s3,32(sp) */
2648     *p++ = 0xa5be0028; /* ldq s4,40(sp) */
2649     *p++ = 0xa5de0030; /* ldq s5,48(sp) */
2650     *p++ = 0xa5fe0038; /* ldq s6,56(sp) */
2651     *p++ = 0xa7be0058; /* ldq gp,0x58(sp) */
2652     *p++ = 0x23de0060; /* lda sp,0x60(sp) */
2653     *p++ = 0x6bfa8001; /* ret */
2654    
2655    
2656     /* "jump to 32bit pc": */
2657     size = 128; /* WARNING! Don't make this too small. */
2658     p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2659     MAP_ANON | MAP_PRIVATE, -1, 0);
2660    
2661     /* If mmap() failed, try malloc(): */
2662     if (p == NULL) {
2663     p = malloc(size);
2664     if (p == NULL) {
2665     fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2666     exit(1);
2667     }
2668     }
2669    
2670     bintrans_jump_to_32bit_pc = (void *)p;
2671    
2672     /* Don't execute too many instructions: */
2673     *p++ = 0x205f0000 | (N_SAFE_BINTRANS_LIMIT-1); /* lda t1,safe-1 */
2674    
2675     *p++ = 0x40e20da1; /* cmple t6,t1,t0 */
2676 dpavlin 4 q = p; /* *q is updated later */
2677     *p++ = 0xe4200001; /* beq ret (far below) */
2678 dpavlin 2
2679     *p++ = 0x40c01411; /* addq t5,0,a1 */
2680    
2681     /*
2682     * Special case for 32-bit addressing:
2683     *
2684     * t1 = 1023;
2685     * t2 = ((a1 >> 22) & t1) * sizeof(void *);
2686     * t3 = ((a1 >> 12) & t1) * sizeof(void *);
2687     * t1 = a1 & 4095;
2688     */
2689     *p++ = 0x205f1ff8; /* lda t1,1023 * 8 */
2690     *p++ = 0x4a227683; /* srl a1,19,t2 */
2691     *p++ = 0x4a213684; /* srl a1, 9,t3 */
2692     *p++ = 0x44620003; /* and t2,t1,t2 */
2693    
2694     /*
2695     * t10 is vaddr_to_hostaddr_table0
2696     *
2697     * a3 = tbl0[t2] (load entry from tbl0)
2698     */
2699     *p++ = 0x43030412; /* addq t10,t2,a2 */
2700     *p++ = 0x44820004; /* and t3,t1,t3 */
2701     *p++ = 0xa6720000; /* ldq a3,0(a2) */
2702     *p++ = 0x205f0ffc; /* lda t1,0xffc */
2703    
2704     /*
2705 dpavlin 4 * a3 = tbl1[t3] (load entry from tbl1 (which is a3))
2706 dpavlin 2 */
2707     *p++ = 0x42640413; /* addq a3,t3,a3 */
2708     *p++ = 0x46220002; /* and a1,t1,t1 */
2709    
2710     *p++ = 0xa6730000 | ofs_c0; /* ldq a3,chunks[0](a3) */
2711    
2712     /*
2713     * NULL? Then just return.
2714     */
2715     *p++ = 0xf6600001; /* bne a3,<ok> */
2716     *p++ = 0x6bfa8001; /* ret */
2717    
2718     *p++ = 0x40530402; /* addq t1,a3,t1 */
2719     *p++ = 0xa0220000; /* ldl t0,0(t1) */
2720    
2721     /* No translation? Then return. */
2722     *p++ = 0xe4200003; /* beq t0,<skip> */
2723    
2724     *p++ = 0xa4700000 | ofs_cb; /* ldq t2,chunk_base_address(a0) */
2725    
2726     *p++ = 0x40230401; /* addq t0,t2,t0 */
2727     *p++ = 0x6be10000; /* jmp (t0) */
2728    
2729 dpavlin 4 /* Now, update *q to point here: */
2730     *q = 0xe4200000 | (((size_t)p - (size_t)q)/4 - 1); /* beq ret */
2731    
2732 dpavlin 2 /* Return to the main translation loop. */
2733     *p++ = 0x6bfa8001; /* ret */
2734     }
2735    

  ViewVC Help
Powered by ViewVC 1.1.26