/[gxemul]/upstream/0.3.1/src/bintrans_alpha.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /upstream/0.3.1/src/bintrans_alpha.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (hide annotations)
Mon Oct 8 16:17:52 2007 UTC (16 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 83665 byte(s)
0.3.1
1 dpavlin 2 /*
2     * Copyright (C) 2004-2005 Anders Gavare. All rights reserved.
3     *
4     * Redistribution and use in source and binary forms, with or without
5     * modification, are permitted provided that the following conditions are met:
6     *
7     * 1. Redistributions of source code must retain the above copyright
8     * notice, this list of conditions and the following disclaimer.
9     * 2. Redistributions in binary form must reproduce the above copyright
10     * notice, this list of conditions and the following disclaimer in the
11     * documentation and/or other materials provided with the distribution.
12     * 3. The name of the author may not be used to endorse or promote products
13     * derived from this software without specific prior written permission.
14     *
15     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16     * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18     * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19     * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20     * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21     * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22     * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23     * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24     * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25     * SUCH DAMAGE.
26     *
27     *
28     * $Id: bintrans_alpha.c,v 1.114 2005/03/22 09:12:04 debug Exp $
29     *
30     * Alpha specific code for dynamic binary translation.
31     *
32     * See bintrans.c for more information. Included from bintrans.c.
33     *
34     *
35     * Some Alpha registers that are reasonable to use:
36     *
37     * t5..t7 6..8 3
38     * s0..s6 9..15 7
39     * a1..a5 17..21 5
40     * t8..t11 22..25 4
41     *
42     * These can be "mapped" to MIPS registers in the translated code,
43     * except a0 which points to the cpu struct, and t0..t4 (or so)
44     * which are used by the translated code as temporaries.
45     *
46     * 3 + 7 + 5 + 4 = 19 available registers. Of course, all (except
47     * s0..s6) must be saved when calling external functions, such as
48     * when doing load/store.
49     *
50     * Which are the 19 most commonly used MIPS registers? (This will
51     * include the pc, and the "current number of executed translated
52     * instructions.)
53     *
54     * The current allocation is as follows:
55     *
56     * Alpha: MIPS:
57     * ------ -----
58     *
59     * t5 pc (64-bit)
60     * t6 bintrans_instructions_executed (32-bit int)
61     * t7 a0 (mips register 4) (64-bit)
62     * t8 a1 (mips register 5) (64-bit)
63     * t9 s0 (mips register 16) (64-bit)
64     * t10 table0 cached (for load/store)
65     * t11 v0 (mips register 2) (64-bit)
66     * s0 delay_slot (32-bit int)
67     * s1 delay_jmpaddr (64-bit)
68     * s2 sp (mips register 29) (64-bit)
69     * s3 ra (mips register 31) (64-bit)
70     * s4 t0 (mips register 8) (64-bit)
71     * s5 t1 (mips register 9) (64-bit)
72     * s6 t2 (mips register 10) (64-bit)
73     */
74    
75     #define MIPSREG_PC -3
76     #define MIPSREG_DELAY_SLOT -2
77     #define MIPSREG_DELAY_JMPADDR -1
78    
79     #define ALPHA_T0 1
80     #define ALPHA_T1 2
81     #define ALPHA_T2 3
82     #define ALPHA_T3 4
83     #define ALPHA_T4 5
84     #define ALPHA_T5 6
85     #define ALPHA_T6 7
86     #define ALPHA_T7 8
87     #define ALPHA_S0 9
88     #define ALPHA_S1 10
89     #define ALPHA_S2 11
90     #define ALPHA_S3 12
91     #define ALPHA_S4 13
92     #define ALPHA_S5 14
93     #define ALPHA_S6 15
94     #define ALPHA_A0 16
95     #define ALPHA_A1 17
96     #define ALPHA_A2 18
97     #define ALPHA_A3 19
98     #define ALPHA_A4 20
99     #define ALPHA_A5 21
100     #define ALPHA_T8 22
101     #define ALPHA_T9 23
102     #define ALPHA_T10 24
103     #define ALPHA_T11 25
104     #define ALPHA_ZERO 31
105    
106     static int map_MIPS_to_Alpha[32] = {
107     ALPHA_ZERO, -1, ALPHA_T11, -1, /* 0 .. 3 */
108     ALPHA_T7, ALPHA_T8, -1, -1, /* 4 .. 7 */
109     ALPHA_S4, ALPHA_S5, ALPHA_S6, -1, /* 8 .. 11 */
110     -1, -1, -1, -1, /* 12 .. 15 */
111     ALPHA_T9, -1, -1, -1, /* 16 .. 19 */
112     -1, -1, -1, -1, /* 20 .. 23 */
113     -1, -1, -1, -1, /* 24 .. 27 */
114     -1, ALPHA_S2, -1, ALPHA_S3, /* 28 .. 31 */
115     };
116    
117    
118     struct cpu dummy_cpu;
119     struct mips_coproc dummy_coproc;
120     struct vth32_table dummy_vth32_table;
121    
122     unsigned char bintrans_alpha_imb[32] = {
123     0x86, 0x00, 0x00, 0x00, /* imb */
124     0x01, 0x80, 0xfa, 0x6b, /* ret */
125     0x1f, 0x04, 0xff, 0x47, /* nop */
126     0x00, 0x00, 0xfe, 0x2e, /* unop */
127     0x1f, 0x04, 0xff, 0x47, /* nop */
128     0x00, 0x00, 0xfe, 0x2e, /* unop */
129     0x1f, 0x04, 0xff, 0x47, /* nop */
130     0x00, 0x00, 0xfe, 0x2e /* unop */
131     };
132    
133    
134     /*
135     * bintrans_host_cacheinvalidate()
136     *
137     * Invalidate the host's instruction cache. On Alpha, we do this by
138     * executing an imb instruction.
139     *
140     * NOTE: A simple asm("imb"); would be enough here, but not all
141     * compilers have such simple constructs, so an entire function has to
142     * be written as bintrans_alpha_imb[] above.
143     */
144     static void bintrans_host_cacheinvalidate(unsigned char *p, size_t len)
145     {
146     /* Long form of ``asm("imb");'' */
147    
148     void (*f)(void);
149     f = (void *)&bintrans_alpha_imb[0];
150     f();
151     }
152    
153    
154     /*
155     * lda sp,-128(sp) some margin
156     * stq ra,0(sp)
157     * stq s0,8(sp)
158     * stq s1,16(sp)
159     * stq s2,24(sp)
160     * stq s3,32(sp)
161     * stq s4,40(sp)
162     * stq s5,48(sp)
163     * stq s6,56(sp)
164     *
165     * jsr ra,(a1),<back>
166     * back:
167     *
168     * ldq ra,0(sp)
169     * ldq s0,8(sp)
170     * ldq s1,16(sp)
171     * ldq s2,24(sp)
172     * ldq s3,32(sp)
173     * ldq s4,40(sp)
174     * ldq s5,48(sp)
175     * ldq s6,56(sp)
176     * lda sp,128(sp)
177     * ret
178     */
179     /* note: offsetof (in stdarg.h) could possibly be used, but I'm not sure
180     if it will take care of the compiler problems... */
181     #define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu))
182     #define ofs_pc_last (((size_t)&dummy_cpu.cd.mips.pc_last) - ((size_t)&dummy_cpu))
183     #define ofs_n (((size_t)&dummy_cpu.cd.mips.bintrans_instructions_executed) - ((size_t)&dummy_cpu))
184     #define ofs_ds (((size_t)&dummy_cpu.cd.mips.delay_slot) - ((size_t)&dummy_cpu))
185     #define ofs_ja (((size_t)&dummy_cpu.cd.mips.delay_jmpaddr) - ((size_t)&dummy_cpu))
186     #define ofs_sp (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_SP]) - ((size_t)&dummy_cpu))
187     #define ofs_ra (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_RA]) - ((size_t)&dummy_cpu))
188     #define ofs_a0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A0]) - ((size_t)&dummy_cpu))
189     #define ofs_a1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A1]) - ((size_t)&dummy_cpu))
190     #define ofs_t0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T0]) - ((size_t)&dummy_cpu))
191     #define ofs_t1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T1]) - ((size_t)&dummy_cpu))
192     #define ofs_t2 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T2]) - ((size_t)&dummy_cpu))
193     #define ofs_v0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_V0]) - ((size_t)&dummy_cpu))
194     #define ofs_s0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_S0]) - ((size_t)&dummy_cpu))
195     #define ofs_tbl0 (((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - ((size_t)&dummy_cpu))
196     #define ofs_c0 ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table)
197     #define ofs_cb (((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu)
198    
199    
200     static uint32_t bintrans_alpha_loadstore_32bit[19] = {
201     /*
202     * t1 = 1023;
203     * t2 = ((a1 >> 22) & t1) * sizeof(void *);
204     * t3 = ((a1 >> 12) & t1) * sizeof(void *);
205     * t1 = a1 & 4095;
206     *
207     * f8 1f 5f 20 lda t1,1023 * 8
208     * 83 76 22 4a srl a1,19,t2
209     * 84 36 21 4a srl a1, 9,t3
210     * 03 00 62 44 and t2,t1,t2
211     */
212     0x205f1ff8,
213     0x4a227683,
214     0x4a213684,
215     0x44620003,
216    
217     /*
218     * t10 is vaddr_to_hostaddr_table0
219     *
220     * a3 = tbl0[t2] (load entry from tbl0)
221     * 12 04 03 43 addq t10,t2,a2
222     */
223     0x43030412,
224    
225     /* 04 00 82 44 and t3,t1,t3 */
226     0x44820004,
227    
228     /* 00 00 72 a6 ldq a3,0(a2) */
229     0xa6720000,
230    
231     /* ff 0f 5f 20 lda t1,4095 */
232     0x205f0fff,
233    
234     /*
235     * a3 = tbl1[t3] (load entry from tbl1 (which is a3))
236     * 13 04 64 42 addq a3,t3,a3
237     */
238     0x42640413,
239    
240     /* 02 00 22 46 and a1,t1,t1 */
241     0x46220002,
242    
243     /* 00 00 73 a6 ldq a3,0(a3) */
244     0xa6730000,
245    
246     /* NULL? Then return failure at once. */
247     /* bne a3, skip */
248     0xf6600003,
249    
250     0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */
251     0x44270407, /* or t0,t6,t6 */
252     0x6bfa8001, /* ret */
253    
254     /* skip: */
255    
256     /* 01 30 60 46 and a3,0x1,t0 */
257     0x46603001,
258    
259     /* Get rid of the lowest bit: */
260     /* 33 05 61 42 subq a3,t0,a3 */
261     0x42610533,
262    
263     /* The rest of the load/store code was written with t3 as the address. */
264    
265     /* Add the offset within the page: */
266     /* 04 04 62 42 addq a3,t1,t3 */
267     0x42620404,
268    
269     0x6be50000 /* jmp (t4) */
270     };
271    
272     static void (*bintrans_runchunk)(struct cpu *, unsigned char *);
273    
274     static void (*bintrans_jump_to_32bit_pc)(struct cpu *);
275    
276     static void (*bintrans_loadstore_32bit)
277     (struct cpu *) = (void *)bintrans_alpha_loadstore_32bit;
278    
279    
280     /*
281     * bintrans_write_quickjump():
282     */
283     static void bintrans_write_quickjump(struct memory *mem,
284     unsigned char *quickjump_code, uint32_t chunkoffset)
285     {
286     int ofs;
287     uint64_t alpha_addr = chunkoffset +
288     (size_t)mem->translation_code_chunk_space;
289     uint32_t *a = (uint32_t *)quickjump_code;
290    
291     ofs = (alpha_addr - ((size_t)a+4)) / 4;
292    
293     /* printf("chunkoffset=%i, %016llx %016llx %i\n",
294     chunkoffset, (long long)alpha_addr, (long long)a, ofs); */
295    
296     if (ofs > -0xfffff && ofs < 0xfffff) {
297     *a++ = 0xc3e00000 | (ofs & 0x1fffff); /* br <chunk> */
298     }
299     }
300    
301    
302     /*
303     * bintrans_write_chunkreturn():
304     */
305     static void bintrans_write_chunkreturn(unsigned char **addrp)
306     {
307     uint32_t *a = (uint32_t *) *addrp;
308     *a++ = 0x6bfa8001; /* ret */
309     *addrp = (unsigned char *) a;
310     }
311    
312    
313     /*
314     * bintrans_write_chunkreturn_fail():
315     */
316     static void bintrans_write_chunkreturn_fail(unsigned char **addrp)
317     {
318     uint32_t *a = (uint32_t *) *addrp;
319     /* 00 01 3f 24 ldah t0,256 */
320     /* 07 04 27 44 or t0,t6,t6 */
321     *a++ = 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16);
322     *a++ = 0x44270407;
323     *a++ = 0x6bfa8001; /* ret */
324     *addrp = (unsigned char *) a;
325     }
326    
327    
328     /*
329     * bintrans_move_MIPS_reg_into_Alpha_reg():
330     */
331     static void bintrans_move_MIPS_reg_into_Alpha_reg(unsigned char **addrp, int mipsreg, int alphareg)
332     {
333     uint32_t *a = (uint32_t *) *addrp;
334     int ofs, alpha_mips_reg;
335    
336     switch (mipsreg) {
337     case MIPSREG_PC:
338     /* addq t5,0,alphareg */
339     *a++ = 0x40c01400 | alphareg;
340     break;
341     case MIPSREG_DELAY_SLOT:
342     /* addq s0,0,alphareg */
343     *a++ = 0x41201400 | alphareg;
344     break;
345     case MIPSREG_DELAY_JMPADDR:
346     /* addq s1,0,alphareg */
347     *a++ = 0x41401400 | alphareg;
348     break;
349     default:
350     alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
351     if (alpha_mips_reg < 0) {
352     ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
353     /* ldq alphareg,gpr[mipsreg](a0) */
354     *a++ = 0xa4100000 | (alphareg << 21) | ofs;
355     } else {
356     /* addq alpha_mips_reg,0,alphareg */
357     *a++ = 0x40001400 | (alpha_mips_reg << 21) | alphareg;
358     }
359     }
360     *addrp = (unsigned char *) a;
361     }
362    
363    
364     /*
365     * bintrans_move_Alpha_reg_into_MIPS_reg():
366     */
367     static void bintrans_move_Alpha_reg_into_MIPS_reg(unsigned char **addrp, int alphareg, int mipsreg)
368     {
369     uint32_t *a = (uint32_t *) *addrp;
370     int ofs, alpha_mips_reg;
371    
372     switch (mipsreg) {
373     case MIPSREG_PC:
374     /* addq alphareg,0,t5 */
375     *a++ = 0x40001406 | (alphareg << 21);
376     break;
377     case MIPSREG_DELAY_SLOT:
378     /* addq alphareg,0,s0 */
379     *a++ = 0x40001409 | (alphareg << 21);
380     break;
381     case MIPSREG_DELAY_JMPADDR:
382     /* addq alphareg,0,s1 */
383     *a++ = 0x4000140a | (alphareg << 21);
384     break;
385     case 0: /* the zero register */
386     break;
387     default:
388     alpha_mips_reg = map_MIPS_to_Alpha[mipsreg];
389     if (alpha_mips_reg < 0) {
390     /* stq alphareg,gpr[mipsreg](a0) */
391     ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu;
392     *a++ = 0xb4100000 | (alphareg << 21) | ofs;
393     } else {
394     /* addq alphareg,0,alpha_mips_reg */
395     *a++ = 0x40001400 | (alphareg << 21) | alpha_mips_reg;
396     }
397     }
398     *addrp = (unsigned char *) a;
399     }
400    
401    
402     /*
403     * bintrans_write_pc_inc():
404     */
405     static void bintrans_write_pc_inc(unsigned char **addrp)
406     {
407     uint32_t *a = (uint32_t *) *addrp;
408    
409     /* lda t6,1(t6) */
410     *a++ = 0x20e70001;
411    
412     /* lda t5,4(t5) */
413     *a++ = 0x20c60004;
414    
415     *addrp = (unsigned char *) a;
416     }
417    
418    
419     /*
420     * bintrans_write_instruction__addiu_etc():
421     */
422     static int bintrans_write_instruction__addiu_etc(unsigned char **addrp,
423     int rt, int rs, int imm, int instruction_type)
424     {
425     uint32_t *a;
426     unsigned int uimm;
427     int alpha_rs, alpha_rt;
428    
429     /* TODO: overflow detection for ADDI and DADDI */
430     switch (instruction_type) {
431     case HI6_ADDI:
432     case HI6_DADDI:
433     return 0;
434     }
435    
436     a = (uint32_t *) *addrp;
437    
438     if (rt == 0)
439     goto rt0;
440    
441     uimm = imm & 0xffff;
442    
443     alpha_rs = map_MIPS_to_Alpha[rs];
444     alpha_rt = map_MIPS_to_Alpha[rt];
445    
446     if (uimm == 0 && (instruction_type == HI6_ADDI ||
447     instruction_type == HI6_ADDIU || instruction_type == HI6_DADDI ||
448     instruction_type == HI6_DADDIU || instruction_type == HI6_ORI)) {
449     if (alpha_rs >= 0 && alpha_rt >= 0) {
450     /* addq rs,0,rt */
451     *a++ = 0x40001400 | (alpha_rs << 21) | alpha_rt;
452     } else {
453     *addrp = (unsigned char *) a;
454     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
455     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
456     a = (uint32_t *) *addrp;
457     }
458     goto rt0;
459     }
460    
461     if (alpha_rs < 0) {
462     /* ldq t0,"rs"(a0) */
463     *addrp = (unsigned char *) a;
464     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0);
465     a = (uint32_t *) *addrp;
466     alpha_rs = ALPHA_T0;
467     }
468    
469     if (alpha_rt < 0)
470     alpha_rt = ALPHA_T0;
471    
472     /* Place the result of the calculation in alpha_rt: */
473    
474     switch (instruction_type) {
475     case HI6_ADDIU:
476     case HI6_DADDIU:
477     case HI6_ADDI:
478     case HI6_DADDI:
479     if (uimm < 256) {
480     if (instruction_type == HI6_ADDI ||
481     instruction_type == HI6_ADDIU) {
482     /* addl rs,uimm,rt */
483     *a++ = 0x40001000 | (alpha_rs << 21)
484     | (uimm << 13) | alpha_rt;
485     } else {
486     /* addq rs,uimm,rt */
487     *a++ = 0x40001400 | (alpha_rs << 21)
488     | (uimm << 13) | alpha_rt;
489     }
490     } else {
491     /* lda rt,imm(rs) */
492     *a++ = 0x20000000 | (alpha_rt << 21) | (alpha_rs << 16) | uimm;
493     if (instruction_type == HI6_ADDI ||
494     instruction_type == HI6_ADDIU) {
495     /* sign extend, 32->64 bits: addl t0,zero,t0 */
496     *a++ = 0x40001000 | (alpha_rt << 21) | alpha_rt;
497     }
498     }
499     break;
500     case HI6_ANDI:
501     case HI6_ORI:
502     case HI6_XORI:
503     if (uimm >= 256) {
504     /* lda t1,4660 */
505     *a++ = 0x205f0000 | uimm;
506     if (uimm & 0x8000) {
507     /* 01 00 42 24 ldah t1,1(t1) <-- if negative only */
508     *a++ = 0x24420001;
509     }
510     }
511    
512     switch (instruction_type) {
513     case HI6_ANDI:
514     if (uimm < 256) {
515     /* and rs,uimm,rt */
516     *a++ = 0x44001000 | (alpha_rs << 21)
517     | (uimm << 13) | alpha_rt;
518     } else {
519     /* and rs,t1,rt */
520     *a++ = 0x44020000 | (alpha_rs << 21) | alpha_rt;
521     }
522     break;
523     case HI6_ORI:
524     if (uimm < 256) {
525     /* or rs,uimm,rt */
526     *a++ = 0x44001400 | (alpha_rs << 21)
527     | (uimm << 13) | alpha_rt;
528     } else {
529     /* or rs,t1,rt */
530     *a++ = 0x44020400 | (alpha_rs << 21) | alpha_rt;
531     }
532     break;
533     case HI6_XORI:
534     if (uimm < 256) {
535     /* xor rs,uimm,rt */
536     *a++ = 0x44001800 | (alpha_rs << 21)
537     | (uimm << 13) | alpha_rt;
538     } else {
539     /* xor rs,t1,rt */
540     *a++ = 0x44020800 | (alpha_rs << 21) | alpha_rt;
541     }
542     break;
543     }
544     break;
545     case HI6_SLTI:
546     case HI6_SLTIU:
547     /* lda t1,4660 */
548     *a++ = 0x205f0000 | uimm;
549    
550     switch (instruction_type) {
551     case HI6_SLTI:
552     /* cmplt rs,t1,rt */
553     *a++ = 0x400209a0 | (alpha_rs << 21) | alpha_rt;
554     break;
555     case HI6_SLTIU:
556     /* cmpult rs,t1,rt */
557     *a++ = 0x400203a0 | (alpha_rs << 21) | alpha_rt;
558     break;
559     }
560     break;
561     }
562    
563     if (alpha_rt == ALPHA_T0) {
564     *a++ = 0x5fff041f; /* fnop */
565     *addrp = (unsigned char *) a;
566     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
567     a = (uint32_t *) *addrp;
568     }
569    
570     rt0:
571     *addrp = (unsigned char *) a;
572     bintrans_write_pc_inc(addrp);
573     return 1;
574     }
575    
576    
577     /*
578     * bintrans_write_instruction__addu_etc():
579     */
580     static int bintrans_write_instruction__addu_etc(unsigned char **addrp,
581     int rd, int rs, int rt, int sa, int instruction_type)
582     {
583     unsigned char *a, *unmodified = NULL;
584     int load64 = 0, store = 1, ofs, alpha_rd = ALPHA_T0;
585    
586     alpha_rd = map_MIPS_to_Alpha[rd];
587     if (alpha_rd < 0)
588     alpha_rd = ALPHA_T0;
589    
590     switch (instruction_type) {
591     case SPECIAL_DIV:
592     case SPECIAL_DIVU:
593     return 0;
594     }
595    
596     switch (instruction_type) {
597     case SPECIAL_DADDU:
598     case SPECIAL_DSUBU:
599     case SPECIAL_OR:
600     case SPECIAL_AND:
601     case SPECIAL_NOR:
602     case SPECIAL_XOR:
603     case SPECIAL_DSLL:
604     case SPECIAL_DSRL:
605     case SPECIAL_DSRA:
606     case SPECIAL_DSLL32:
607     case SPECIAL_DSRL32:
608     case SPECIAL_DSRA32:
609     case SPECIAL_SLT:
610     case SPECIAL_SLTU:
611     case SPECIAL_MOVZ:
612     case SPECIAL_MOVN:
613     load64 = 1;
614     }
615    
616     switch (instruction_type) {
617     case SPECIAL_MULT:
618     case SPECIAL_MULTU:
619     if (rd != 0)
620     return 0;
621     store = 0;
622     break;
623     default:
624     if (rd == 0)
625     goto rd0;
626     }
627    
628     a = *addrp;
629    
630     if ((instruction_type == SPECIAL_ADDU || instruction_type == SPECIAL_DADDU
631     || instruction_type == SPECIAL_OR) && rt == 0) {
632     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
633     if (!load64) {
634     *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
635     }
636     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
637     *addrp = a;
638     goto rd0;
639     }
640    
641     /* t0 = rs, t1 = rt */
642     if (load64) {
643     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
644     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
645     } else {
646     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
647     *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
648     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1);
649     *a++ = 0x02; *a++ = 0x10; *a++ = 0x40; *a++ = 0x40; /* addl t1,0,t1 */
650     }
651    
652     switch (instruction_type) {
653     case SPECIAL_ADDU:
654     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x40; /* addl t0,t1,rd */
655     break;
656     case SPECIAL_DADDU:
657     *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq t0,t1,rd */
658     break;
659     case SPECIAL_SUBU:
660     *a++ = 0x20 + alpha_rd; *a++ = 0x01; *a++ = 0x22; *a++ = 0x40; /* subl t0,t1,t0 */
661     break;
662     case SPECIAL_DSUBU:
663     *a++ = 0x20 + alpha_rd; *a++ = 0x05; *a++ = 0x22; *a++ = 0x40; /* subq t0,t1,t0 */
664     break;
665     case SPECIAL_AND:
666     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and t0,t1,t0 */
667     break;
668     case SPECIAL_OR:
669     *a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
670     break;
671     case SPECIAL_NOR:
672     *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */
673     *a++ = alpha_rd; *a++ = 0x05; *a++ = 0xe1; *a++ = 0x47; /* not t0,t0 */
674     break;
675     case SPECIAL_XOR:
676     *a++ = alpha_rd; *a++ = 0x08; *a++ = 0x22; *a++ = 0x44; /* xor t0,t1,t0 */
677     break;
678     case SPECIAL_SLL:
679     *a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
680     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
681     break;
682     case SPECIAL_SLLV:
683     /* rd = rt << (rs&31) (logical) t0 = t1 << (t0&31) */
684     *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
685     *a++ = 0x21; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sll t1,t0,t0 */
686     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
687     break;
688     case SPECIAL_DSLL:
689     *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
690     break;
691     case SPECIAL_DSLL32:
692     sa += 32;
693     *a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */
694     break;
695     case SPECIAL_SRA:
696     *a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
697     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
698     break;
699     case SPECIAL_SRAV:
700     /* rd = rt >> (rs&31) (arithmetic) t0 = t1 >> (t0&31) */
701     *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
702     *a++ = 0x81; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sra t1,t0,t0 */
703     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
704     break;
705     case SPECIAL_DSRA:
706     *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
707     break;
708     case SPECIAL_DSRA32:
709     sa += 32;
710     *a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */
711     break;
712     case SPECIAL_SRL:
713     *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
714     /* Note: bits of sa are distributed among two different bytes. */
715     *a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
716     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl */
717     break;
718     case SPECIAL_SRLV:
719     /* rd = rt >> (rs&31) (logical) t0 = t1 >> (t0&31) */
720     *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */
721     *a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */
722     *a++ = 0x81; *a++ = 0x06; *a++ = 0x41; *a++ = 0x48; /* srl t1,t0,t0 */
723     *a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
724     break;
725     case SPECIAL_DSRL:
726     /* Note: bits of sa are distributed among two different bytes. */
727     *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
728     break;
729     case SPECIAL_DSRL32:
730     /* Note: bits of sa are distributed among two different bytes. */
731     sa += 32;
732     *a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48;
733     break;
734     case SPECIAL_SLT:
735     *a++ = 0xa0 + alpha_rd; *a++ = 0x09; *a++ = 0x22; *a++ = 0x40; /* cmplt t0,t1,t0 */
736     break;
737     case SPECIAL_SLTU:
738     *a++ = 0xa0 + alpha_rd; *a++ = 0x03; *a++ = 0x22; *a++ = 0x40; /* cmpult t0,t1,t0 */
739     break;
740     case SPECIAL_MULT:
741     case SPECIAL_MULTU:
742     if (instruction_type == SPECIAL_MULTU) {
743     /* 21 f6 21 48 zapnot t0,0xf,t0 */
744     /* 22 f6 41 48 zapnot t1,0xf,t1 */
745     *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48;
746     *a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48;
747     }
748    
749     /* 03 04 22 4c mulq t0,t1,t2 */
750     *a++ = 0x03; *a++ = 0x04; *a++ = 0x22; *a++ = 0x4c;
751    
752     /* 01 10 60 40 addl t2,0,t0 */
753     *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
754    
755     ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
756     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
757    
758     /* 81 17 64 48 sra t2,0x20,t0 */
759     *a++ = 0x81; *a++ = 0x17; *a++ = 0x64; *a++ = 0x48;
760     *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */
761     ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
762     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
763     break;
764     case SPECIAL_MOVZ:
765     /* if rt=0 then rd=rs ==> if t1!=0 then t0=unmodified else t0=rd */
766     /* 00 00 40 f4 bne t1,unmodified */
767     unmodified = a;
768     *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xf4;
769     alpha_rd = ALPHA_T0;
770     break;
771     case SPECIAL_MOVN:
772     /* if rt!=0 then rd=rs ==> if t1=0 then t0=unmodified else t0=rd */
773     /* 00 00 40 e4 beq t1,unmodified */
774     unmodified = a;
775     *a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
776     alpha_rd = ALPHA_T0;
777     break;
778     }
779    
780     if (store && alpha_rd == ALPHA_T0) {
781     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
782     }
783    
784     if (unmodified != NULL)
785     *unmodified = ((size_t)a - (size_t)unmodified - 4) / 4;
786    
787     *addrp = a;
788     rd0:
789     bintrans_write_pc_inc(addrp);
790     return 1;
791     }
792    
793    
794     /*
795     * bintrans_write_instruction__branch():
796     */
797     static int bintrans_write_instruction__branch(unsigned char **addrp,
798     int instruction_type, int regimm_type, int rt, int rs, int imm)
799     {
800     uint32_t *a, *b, *c = NULL;
801     int alpha_rs, alpha_rt, likely = 0, ofs;
802    
803     alpha_rs = map_MIPS_to_Alpha[rs];
804     alpha_rt = map_MIPS_to_Alpha[rt];
805    
806     switch (instruction_type) {
807     case HI6_BEQL:
808     case HI6_BNEL:
809     case HI6_BLEZL:
810     case HI6_BGTZL:
811     likely = 1;
812     }
813    
814     /*
815     * t0 = gpr[rt]; t1 = gpr[rs];
816     *
817     * 50 00 30 a4 ldq t0,80(a0)
818     * 58 00 50 a4 ldq t1,88(a0)
819     */
820    
821     switch (instruction_type) {
822     case HI6_BEQ:
823     case HI6_BNE:
824     case HI6_BEQL:
825     case HI6_BNEL:
826     if (alpha_rt < 0) {
827     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
828     alpha_rt = ALPHA_T0;
829     }
830     }
831    
832     if (alpha_rs < 0) {
833     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T1);
834     alpha_rs = ALPHA_T1;
835     }
836    
837     a = (uint32_t *) *addrp;
838    
839     /*
840     * Compare alpha_rt (t0) and alpha_rs (t1) for equality (BEQ).
841     * If the result was false (equal to zero), then skip a lot
842     * of instructions:
843     *
844     * a1 05 22 40 cmpeq t0,t1,t0
845     * 01 00 20 e4 beq t0,14 <f+0x14>
846     */
847     b = NULL;
848     if ((instruction_type == HI6_BEQ ||
849     instruction_type == HI6_BEQL) && rt != rs) {
850     /* cmpeq rt,rs,t0 */
851     *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
852     b = a;
853     *a++ = 0xe4200001; /* beq */
854     }
855     if (instruction_type == HI6_BNE || instruction_type == HI6_BNEL) {
856     /* cmpeq rt,rs,t0 */
857     *a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16);
858     b = a;
859     *a++ = 0xf4200001; /* bne */
860     }
861     if (instruction_type == HI6_BLEZ || instruction_type == HI6_BLEZL) {
862     /* cmple rs,0,t0 */
863     *a++ = 0x40001da1 | (alpha_rs << 21);
864     b = a;
865     *a++ = 0xe4200001; /* beq */
866     }
867     if (instruction_type == HI6_BGTZ || instruction_type == HI6_BGTZL) {
868     /* cmple rs,0,t0 */
869     *a++ = 0x40001da1 | (alpha_rs << 21);
870     b = a;
871     *a++ = 0xf4200001; /* bne */
872     }
873     if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BLTZ) {
874     /* cmplt rs,0,t0 */
875     *a++ = 0x400019a1 | (alpha_rs << 21);
876     b = a;
877     *a++ = 0xe4200001; /* beq */
878     }
879     if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BGEZ) {
880     *a++ = 0x207fffff; /* lda t2,-1 */
881     /* cmple rs,t2,t0 */
882     *a++ = 0x40030da1 | (alpha_rs << 21);
883     b = a;
884     *a++ = 0xf4200001; /* bne */
885     }
886    
887     /*
888     * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
889     * and cpu->delay_jmpaddr = pc + 4 + (imm << 2).
890     *
891     * 04 00 26 20 lda t0,4(t5) add 4
892     * c8 01 5f 20 lda t1,456
893     * 4a 04 41 40 s4addq t1,t0,s1 s1 = (t1<<2) + t0
894     */
895    
896     *a++ = 0x20260004; /* lda t0,4(t5) */
897     *a++ = 0x205f0000 | (imm & 0xffff); /* lda */
898     *a++ = 0x4041044a; /* s4addq */
899    
900     /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
901     *a++ = 0x213f0000 | TO_BE_DELAYED;
902    
903     /*
904     * Special case: "likely"-branches:
905     */
906     if (likely) {
907     c = a;
908     *a++ = 0xc3e00001; /* br delayed_ok */
909    
910     if (b != NULL)
911     *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
912    
913     /* cpu->cd.mips.nullify_next = 1; */
914     /* 01 00 3f 20 lda t0,1 */
915     *a++ = 0x203f0001;
916     ofs = (size_t)&dummy_cpu.cd.mips.nullify_next - (size_t)&dummy_cpu;
917     *a++ = 0xb0300000 | (ofs & 0xffff);
918    
919     /* fail, so that the next instruction is handled manually: */
920     *addrp = (unsigned char *) a;
921     bintrans_write_pc_inc(addrp);
922     bintrans_write_chunkreturn_fail(addrp);
923     a = (uint32_t *) *addrp;
924    
925     if (c != NULL)
926     *((unsigned char *)c) = ((size_t)a - (size_t)c - 4) / 4;
927     } else {
928     /* Normal (non-likely) exit: */
929     if (b != NULL)
930     *((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4;
931     }
932    
933     *addrp = (unsigned char *) a;
934     bintrans_write_pc_inc(addrp);
935     return 1;
936     }
937    
938    
939     /*
940     * bintrans_write_instruction__jr():
941     */
942     static int bintrans_write_instruction__jr(unsigned char **addrp, int rs, int rd, int special)
943     {
944     uint32_t *a;
945     int alpha_rd;
946    
947     alpha_rd = map_MIPS_to_Alpha[rd];
948     if (alpha_rd < 0)
949     alpha_rd = ALPHA_T0;
950    
951     /*
952     * Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED
953     * and cpu->delay_jmpaddr = gpr[rs].
954     */
955    
956     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_S1);
957    
958     a = (uint32_t *) *addrp;
959     /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
960     *a++ = 0x213f0000 | TO_BE_DELAYED;
961     *addrp = (unsigned char *) a;
962    
963     if (special == SPECIAL_JALR && rd != 0) {
964     /* gpr[rd] = retaddr (pc + 8) */
965     a = (uint32_t *) *addrp;
966     /* lda alpha_rd,8(t5) */
967     *a++ = 0x20060008 | (alpha_rd << 21);
968     *addrp = (unsigned char *) a;
969     if (alpha_rd == ALPHA_T0)
970     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rd);
971     }
972    
973     bintrans_write_pc_inc(addrp);
974     return 1;
975     }
976    
977    
978     /*
979     * bintrans_write_instruction__jal():
980     */
981     static int bintrans_write_instruction__jal(unsigned char **addrp,
982     int imm, int link)
983     {
984     uint32_t *a;
985    
986     a = (uint32_t *) *addrp;
987    
988     /* gpr[31] = retaddr (NOTE: mips register 31 is in alpha reg s3) */
989     if (link) {
990     *a++ = 0x21860008; /* lda s3,8(t5) */
991     }
992    
993     /* Set the jmpaddr to top 4 bits of pc + lowest 28 bits of imm*4: */
994    
995     /*
996     * imm = 4*imm;
997     * t0 = ((pc + 4) & ~0x0fffffff) | imm;
998     *
999     * 04 00 26 20 lda t0,4(t5) <-- because the jump is from the delay slot
1000     * 23 01 5f 24 ldah t1,291
1001     * 67 45 42 20 lda t1,17767(t1)
1002     * 00 f0 7f 24 ldah t2,-4096
1003     * 04 00 23 44 and t0,t2,t3
1004     * 0a 04 44 44 or t1,t3,s1
1005     */
1006     imm *= 4;
1007     *a++ = 0x20260004;
1008     *a++ = 0x245f0000 | ((imm >> 16) + (imm & 0x8000? 1 : 0));
1009     *a++ = 0x20420000 | (imm & 0xffff);
1010     *a++ = 0x247ff000;
1011     *a++ = 0x44230004;
1012     *a++ = 0x4444040a;
1013    
1014     /* 02 00 3f 21 lda s0,TO_BE_DELAYED */
1015     *a++ = 0x213f0000 | TO_BE_DELAYED;
1016    
1017     /* If the machine continues executing here, it will return
1018     to the main loop, which is fine. */
1019    
1020     *addrp = (unsigned char *) a;
1021     bintrans_write_pc_inc(addrp);
1022     return 1;
1023     }
1024    
1025    
1026     /*
1027     * bintrans_write_instruction__delayedbranch():
1028     */
1029     static int bintrans_write_instruction__delayedbranch(
1030     struct memory *mem, unsigned char **addrp,
1031     uint32_t *potential_chunk_p, uint32_t *chunks,
1032     int only_care_about_chunk_p, int p, int forward)
1033     {
1034     unsigned char *a, *skip=NULL, *generic64bit;
1035     int ofs;
1036     uint64_t alpha_addr, subaddr;
1037    
1038     a = *addrp;
1039    
1040     if (!only_care_about_chunk_p) {
1041     /* Skip all of this if there is no branch: */
1042     skip = a;
1043     *a++ = 0; *a++ = 0; *a++ = 0x20; *a++ = 0xe5; /* beq s0,skip */
1044    
1045     /*
1046     * Perform the jump by setting cpu->delay_slot = 0
1047     * and pc = cpu->delay_jmpaddr.
1048     */
1049     /* 00 00 3f 21 lda s0,0 */
1050     *a++ = 0; *a++ = 0; *a++ = 0x3f; *a++ = 0x21;
1051    
1052     bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T0);
1053     bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3);
1054     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, MIPSREG_PC);
1055     }
1056    
1057     if (potential_chunk_p == NULL) {
1058     if (mem->bintrans_32bit_only) {
1059     /* 34 12 70 a7 ldq t12,4660(a0) */
1060     ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1061     *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1062    
1063     /* 00 00 fb 6b jmp (t12) */
1064     *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1065     } else {
1066     /*
1067     * If the highest 32 bits of the address are either
1068     * 0x00000000 or 0xffffffff, then the tables used for
1069     * 32-bit load/stores can be used.
1070     *
1071     * 81 16 24 4a srl a1,0x20,t0
1072     * 03 00 20 e4 beq t0,14 <ok1>
1073     * 01 30 20 40 addl t0,0x1,t0
1074     * 01 00 20 e4 beq t0,14 <ok1>
1075     * 01 00 e0 c3 br 18 <nook>
1076     */
1077     *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a;
1078     *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1079     *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40;
1080     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4;
1081     generic64bit = a;
1082     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1083    
1084     /* 34 12 70 a7 ldq t12,4660(a0) */
1085     ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu;
1086     *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1087    
1088     /* 00 00 fb 6b jmp (t12) */
1089     *a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b;
1090    
1091    
1092     if (generic64bit != NULL)
1093     *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1094    
1095     /* Not much we can do here if this wasn't to the same
1096     physical page... */
1097    
1098     *a++ = 0xfc; *a++ = 0xff; *a++ = 0x84; *a++ = 0x20; /* lda t3,-4(t3) */
1099    
1100     /*
1101     * Compare the old pc (t3) and the new pc (t0). If they are on the
1102     * same virtual page (which means that they are on the same physical
1103     * page), then we can check the right chunk pointer, and if it
1104     * is non-NULL, then we can jump there. Otherwise just return.
1105     *
1106     * 00 f0 5f 20 lda t1,-4096
1107     * 01 00 22 44 and t0,t1,t0
1108     * 04 00 82 44 and t3,t1,t3
1109     * a3 05 24 40 cmpeq t0,t3,t2
1110     * 01 00 60 f4 bne t2,7c <ok2>
1111     * 01 80 fa 6b ret
1112     */
1113     *a++ = 0x00; *a++ = 0xf0; *a++ = 0x5f; *a++ = 0x20; /* lda */
1114     *a++ = 0x01; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and */
1115     *a++ = 0x04; *a++ = 0x00; *a++ = 0x82; *a++ = 0x44; /* and */
1116     *a++ = 0xa3; *a++ = 0x05; *a++ = 0x24; *a++ = 0x40; /* cmpeq */
1117     *a++ = 0x01; *a++ = 0x00; *a++ = 0x60; *a++ = 0xf4; /* bne */
1118     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1119    
1120     /* Don't execute too many instructions. (see comment below) */
1121     *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1122     *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1123     *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1124     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1125     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1126    
1127     /* 15 bits at a time, which means max 60 bits, but
1128     that should be enough. the top 4 bits are probably
1129     not used by userland alpha code. (TODO: verify this) */
1130     alpha_addr = (size_t)chunks;
1131     subaddr = (alpha_addr >> 45) & 0x7fff;
1132    
1133     /*
1134     * 00 00 3f 20 lda t0,0
1135     * 21 f7 21 48 sll t0,0xf,t0
1136     * 34 12 21 20 lda t0,4660(t0)
1137     * 21 f7 21 48 sll t0,0xf,t0
1138     * 34 12 21 20 lda t0,4660(t0)
1139     * 21 f7 21 48 sll t0,0xf,t0
1140     * 34 12 21 20 lda t0,4660(t0)
1141     */
1142    
1143     /* Start with the topmost 15 bits: */
1144     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1145     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1146    
1147     subaddr = (alpha_addr >> 30) & 0x7fff;
1148     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1149     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1150    
1151     subaddr = (alpha_addr >> 15) & 0x7fff;
1152     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1153     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1154    
1155     subaddr = alpha_addr & 0x7fff;
1156     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1157    
1158     /*
1159     * t2 = pc
1160     * t1 = t2 & 0xfff
1161     * t0 += t1
1162     *
1163     * ff 0f 5f 20 lda t1,4095
1164     * 02 00 62 44 and t2,t1,t1
1165     * 01 04 22 40 addq t0,t1,t0
1166     */
1167     bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T2);
1168     *a++ = 0xff; *a++ = 0x0f; *a++ = 0x5f; *a++ = 0x20; /* lda */
1169     *a++ = 0x02; *a++ = 0x00; *a++ = 0x62; *a++ = 0x44; /* and */
1170     *a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq */
1171    
1172     /*
1173     * Load the chunk pointer (actually, a 32-bit offset) into t0.
1174     * If it is zero, then skip the following.
1175     * Add cpu->chunk_base_address to t0.
1176     * Jump to t0.
1177     */
1178    
1179     *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1180     *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1181    
1182     /* ldq t2,chunk_base_address(a0) */
1183     ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1184     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1185     /* addq t0,t2,t0 */
1186     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1187    
1188     /* 00 00 e1 6b jmp (t0) */
1189     *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1190    
1191     /* Failure, then return to the main loop. */
1192     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1193     }
1194     } else {
1195     /*
1196     * Just to make sure that we don't become too unreliant
1197     * on the main program loop, we need to return every once
1198     * in a while (interrupts etc).
1199     *
1200     * Load the "nr of instructions executed" (which is an int)
1201     * and see if it is below a certain threshold. If so, then
1202     * we go on with the fast path (bintrans), otherwise we
1203     * abort by returning.
1204     *
1205     * f4 01 5f 20 lda t1,500 (some low number...)
1206     * a1 0d c2 40 cmple t6,t1,t0
1207     * 01 00 20 f4 bne t0,14 <f+0x14>
1208     */
1209     if (!only_care_about_chunk_p && !forward) {
1210     *a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255;
1211     *a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */
1212     *a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */
1213     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */
1214     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1215     }
1216    
1217     /*
1218     * potential_chunk_p points to an "uint32_t".
1219     * If this value is non-NULL, then it is a piece of Alpha
1220     * machine language code corresponding to the address
1221     * we're jumping to. Otherwise, those instructions haven't
1222     * been translated yet, so we have to return to the main
1223     * loop. (Actually, we have to add cpu->chunk_base_address,
1224     * because the uint32_t is limited to 32-bit offsets.)
1225     *
1226     * Case 1: The value is non-NULL already at translation
1227     * time. Then we can make a direct (fast) native
1228     * Alpha jump to the code chunk.
1229     *
1230     * Case 2: The value was NULL at translation time, then we
1231     * have to check during runtime.
1232     */
1233    
1234     /* Case 1: */
1235     /* printf("%08x ", *potential_chunk_p); */
1236     alpha_addr = *potential_chunk_p + (size_t)mem->translation_code_chunk_space;
1237     ofs = (alpha_addr - ((size_t)a+4)) / 4;
1238     /* printf("%016llx %016llx %i\n", (long long)alpha_addr, (long long)a, ofs); */
1239    
1240     if ((*potential_chunk_p) != 0 && ofs > -0xfffff && ofs < 0xfffff) {
1241     *a++ = ofs & 255; *a++ = (ofs >> 8) & 255; *a++ = 0xe0 + ((ofs >> 16) & 0x1f); *a++ = 0xc3; /* br <chunk> */
1242     } else {
1243     /* Case 2: */
1244    
1245     bintrans_register_potential_quick_jump(mem, a, p);
1246    
1247     /* 15 bits at a time, which means max 60 bits, but
1248     that should be enough. the top 4 bits are probably
1249     not used by userland alpha code. (TODO: verify this) */
1250     alpha_addr = (size_t)potential_chunk_p;
1251     subaddr = (alpha_addr >> 45) & 0x7fff;
1252    
1253     /*
1254     * 00 00 3f 20 lda t0,0
1255     * 21 f7 21 48 sll t0,0xf,t0
1256     * 34 12 21 20 lda t0,4660(t0)
1257     * 21 f7 21 48 sll t0,0xf,t0
1258     * 34 12 21 20 lda t0,4660(t0)
1259     * 21 f7 21 48 sll t0,0xf,t0
1260     * 34 12 21 20 lda t0,4660(t0)
1261     */
1262    
1263     /* Start with the topmost 15 bits: */
1264     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20;
1265     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1266    
1267     subaddr = (alpha_addr >> 30) & 0x7fff;
1268     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1269     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1270    
1271     subaddr = (alpha_addr >> 15) & 0x7fff;
1272     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1273     *a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */
1274    
1275     subaddr = alpha_addr & 0x7fff;
1276     *a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20;
1277    
1278     /*
1279     * Load the chunk pointer into t0.
1280     * If it is NULL (zero), then skip the following jump.
1281     * Jump to t0.
1282     */
1283     *a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */
1284     *a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */
1285    
1286     /* ldq t2,chunk_base_address(a0) */
1287     ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu;
1288     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4;
1289     /* addq t0,t2,t0 */
1290     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40;
1291    
1292     /* 00 00 e1 6b jmp (t0) */
1293     *a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */
1294    
1295     /* "Failure", then let's return to the main loop. */
1296     *a++ = 0x01; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */
1297     }
1298     }
1299    
1300     if (skip != NULL) {
1301     *skip = ((size_t)a - (size_t)skip - 4) / 4;
1302     skip ++;
1303     *skip = (((size_t)a - (size_t)skip - 4) / 4) >> 8;
1304     }
1305    
1306     *addrp = a;
1307     return 1;
1308     }
1309    
1310    
1311     /*
1312     * bintrans_write_instruction__loadstore():
1313     */
1314     static int bintrans_write_instruction__loadstore(
1315     struct memory *mem, unsigned char **addrp,
1316     int rt, int imm, int rs, int instruction_type, int bigendian)
1317     {
1318     unsigned char *a, *fail, *generic64bit = NULL, *generic64bitA = NULL;
1319     unsigned char *doloadstore = NULL,
1320     *ok_unaligned_load3, *ok_unaligned_load2, *ok_unaligned_load1;
1321     uint32_t *b;
1322     int ofs, alignment, load = 0, alpha_rs, alpha_rt, unaligned = 0;
1323    
1324     /* TODO: Not yet: */
1325     if (instruction_type == HI6_LQ_MDMX || instruction_type == HI6_SQ) {
1326     return 0;
1327     }
1328    
1329     switch (instruction_type) {
1330     case HI6_LQ_MDMX:
1331     case HI6_LD:
1332     case HI6_LDL:
1333     case HI6_LDR:
1334     case HI6_LWU:
1335     case HI6_LW:
1336     case HI6_LWL:
1337     case HI6_LWR:
1338     case HI6_LHU:
1339     case HI6_LH:
1340     case HI6_LBU:
1341     case HI6_LB:
1342     load = 1;
1343     if (rt == 0)
1344     return 0;
1345     }
1346    
1347     switch (instruction_type) {
1348     case HI6_LDL:
1349     case HI6_LDR:
1350     case HI6_LWL:
1351     case HI6_LWR:
1352     case HI6_SDL:
1353     case HI6_SDR:
1354     case HI6_SWL:
1355     case HI6_SWR:
1356     unaligned = 1;
1357     }
1358    
1359     a = *addrp;
1360    
1361     /*
1362     * a1 = gpr[rs] + imm;
1363     *
1364     * 88 08 30 a4 ldq t0,2184(a0)
1365     * 34 12 21 22 lda a1,4660(t0)
1366     */
1367    
1368     alpha_rs = map_MIPS_to_Alpha[rs];
1369     if (alpha_rs < 0) {
1370     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1371     alpha_rs = ALPHA_T0;
1372     }
1373     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1374    
1375     alignment = 0;
1376     switch (instruction_type) {
1377     case HI6_LQ_MDMX:
1378     case HI6_SQ:
1379     alignment = 15;
1380     break;
1381     case HI6_LD:
1382     case HI6_LDL:
1383     case HI6_LDR:
1384     case HI6_SD:
1385     case HI6_SDL:
1386     case HI6_SDR:
1387     alignment = 7;
1388     break;
1389     case HI6_LW:
1390     case HI6_LWL:
1391     case HI6_LWR:
1392     case HI6_LWU:
1393     case HI6_SW:
1394     case HI6_SWL:
1395     case HI6_SWR:
1396     alignment = 3;
1397     break;
1398     case HI6_LH:
1399     case HI6_LHU:
1400     case HI6_SH:
1401     alignment = 1;
1402     break;
1403     }
1404    
1405     if (unaligned) {
1406     /*
1407     * Unaligned load/store: Perform the host load/store at
1408     * an aligned address, and then figure out which bytes to
1409     * actually load into the destination register.
1410     *
1411     * 02 30 20 46 and a1,alignment,t1
1412     * 31 05 22 42 subq a1,t1,a1
1413     */
1414     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1415     *a++ = 0x31; *a++ = 0x05; *a++ = 0x22; *a++ = 0x42;
1416     } else if (alignment > 0) {
1417     /*
1418     * Check alignment:
1419     *
1420     * 02 30 20 46 and a1,0x1,t1
1421     * 02 70 20 46 and a1,0x3,t1 (one of these "and"s)
1422     * 02 f0 20 46 and a1,0x7,t1
1423     * 02 f0 21 46 and a1,0xf,t1
1424     * 01 00 40 e4 beq t1,<okalign>
1425     * 01 80 fa 6b ret
1426     */
1427     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1428     fail = a;
1429     *a++ = 0x01; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4;
1430     *addrp = a;
1431     bintrans_write_chunkreturn_fail(addrp);
1432     a = *addrp;
1433     *fail = ((size_t)a - (size_t)fail - 4) / 4;
1434     }
1435    
1436     alpha_rt = map_MIPS_to_Alpha[rt];
1437    
1438     if (mem->bintrans_32bit_only) {
1439     /* Special case for 32-bit addressing: */
1440    
1441     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_loadstore_32bit) - (size_t)&dummy_cpu;
1442     /* ldq t12,bintrans_loadstore_32bit(a0) */
1443     *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1444    
1445     /* jsr t4,(t12),<after> */
1446     *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1447    
1448     /*
1449     * Now:
1450     * a3 = host page
1451     * t0 = 0 for readonly pages, 1 for read/write pages
1452     * t3 = address of host load/store
1453     */
1454    
1455     /* If this is a store, then the lowest bit must be set: */
1456     if (!load) {
1457     /* 01 00 20 f4 bne t0,<okzzz> */
1458     fail = a;
1459     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4;
1460     bintrans_write_chunkreturn_fail(&a);
1461     *fail = ((size_t)a - (size_t)fail - 4) / 4;
1462     }
1463     } else {
1464     /*
1465     * If the highest 33 bits of the address are either all ones
1466     * or all zeroes, then the tables used for 32-bit load/stores
1467     * can be used.
1468     */
1469     *a++ = 0x81; *a++ = 0xf6; *a++ = 0x23; *a++ = 0x4a; /* srl a1,0x1f,t0 */
1470     *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x44; /* and t0,0x1,t0 */
1471     *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<noll> */
1472     *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1473     *a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; /* addl t0,0x1,t0 */
1474     *a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1475     generic64bit = a;
1476     *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1477     /* <noll>: */
1478     *a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */
1479     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */
1480     generic64bitA = a;
1481     *a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */
1482    
1483     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_loadstore_32bit) - (size_t)&dummy_cpu;
1484     /* ldq t12,bintrans_loadstore_32bit(a0) */
1485     *a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7;
1486    
1487     /* jsr t4,(t12),<after> */
1488     *a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68;
1489    
1490     /*
1491     * Now:
1492     * a3 = host page (or NULL if not found)
1493     * t0 = 0 for readonly pages, 1 for read/write pages
1494     * t3 = (potential) address of host load/store
1495     */
1496    
1497     /* If this is a store, then the lowest bit must be set: */
1498     if (!load) {
1499     /* 01 00 20 f4 bne t0,<okzzz> */
1500     fail = a;
1501     *a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4;
1502     bintrans_write_chunkreturn_fail(&a);
1503     *fail = ((size_t)a - (size_t)fail - 4) / 4;
1504     }
1505    
1506     doloadstore = a;
1507     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1508    
1509    
1510     /*
1511     * Generic (64-bit) load/store:
1512     */
1513    
1514     if (generic64bit != NULL)
1515     *generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4;
1516     if (generic64bitA != NULL)
1517     *generic64bitA = ((size_t)a - (size_t)generic64bitA - 4) / 4;
1518    
1519     *addrp = a;
1520     b = (uint32_t *) *addrp;
1521    
1522     /* Save a0 and the old return address on the stack: */
1523     *b++ = 0x23deff80; /* lda sp,-128(sp) */
1524    
1525     *b++ = 0xb75e0000; /* stq ra,0(sp) */
1526     *b++ = 0xb61e0008; /* stq a0,8(sp) */
1527     *b++ = 0xb4de0010; /* stq t5,16(sp) */
1528     *b++ = 0xb0fe0018; /* stl t6,24(sp) */
1529     *b++ = 0xb71e0020; /* stq t10,32(sp) */
1530     *b++ = 0xb73e0028; /* stq t11,40(sp) */
1531     *b++ = 0xb51e0030; /* stq t7,48(sp) */
1532     *b++ = 0xb6de0038; /* stq t8,56(sp) */
1533     *b++ = 0xb6fe0040; /* stq t9,64(sp) */
1534    
1535     ofs = ((size_t)&dummy_cpu.cd.mips.fast_vaddr_to_hostaddr) - (size_t)&dummy_cpu;
1536    
1537     *b++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
1538    
1539     /* a1 is already vaddr. set a2 = writeflag */
1540     *b++ = 0x225f0000 | (load? 0 : 1);
1541    
1542     /* Call fast_vaddr_to_hostaddr: */
1543     *b++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
1544    
1545     /* Restore the old return address and a0 from the stack: */
1546     *b++ = 0xa75e0000; /* ldq ra,0(sp) */
1547     *b++ = 0xa61e0008; /* ldq a0,8(sp) */
1548     *b++ = 0xa4de0010; /* ldq t5,16(sp) */
1549     *b++ = 0xa0fe0018; /* ldl t6,24(sp) */
1550     *b++ = 0xa71e0020; /* ldq t10,32(sp) */
1551     *b++ = 0xa73e0028; /* ldq t11,40(sp) */
1552     *b++ = 0xa51e0030; /* ldq t7,48(sp) */
1553     *b++ = 0xa6de0038; /* ldq t8,56(sp) */
1554     *b++ = 0xa6fe0040; /* ldq t9,64(sp) */
1555    
1556     *b++ = 0x23de0080; /* lda sp,128(sp) */
1557    
1558     *addrp = (unsigned char *) b;
1559     a = *addrp;
1560    
1561     /*
1562     * NULL? Then return failure.
1563     * 01 00 00 f4 bne v0,f8 <okzz>
1564     */
1565     fail = a;
1566     *a++ = 0x01; *a++ = 0x00; *a++ = 0x00; *a++ = 0xf4;
1567     bintrans_write_chunkreturn_fail(&a);
1568     *fail = ((size_t)a - (size_t)fail - 4) / 4;
1569    
1570     /* The rest of this code was written with t3 as the address. */
1571    
1572     /* 04 14 00 40 addq v0,0,t3 */
1573     *a++ = 0x04; *a++ = 0x14; *a++ = 0x00; *a++ = 0x40;
1574    
1575     if (doloadstore != NULL)
1576     *doloadstore = ((size_t)a - (size_t)doloadstore - 4) / 4;
1577     }
1578    
1579    
1580     switch (instruction_type) {
1581     case HI6_LQ_MDMX:
1582     /* TODO */
1583     break;
1584     case HI6_LD:
1585     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa4; /* ldq t0,0(t3) */
1586     if (bigendian) {
1587     /* remember original 8 bytes of t0: */
1588     *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1589    
1590     /* swap lowest 4 bytes: */
1591     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1592     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1593     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1594     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1595     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1596     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1597     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1598     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1599     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1600    
1601     /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1602     *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1603     *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1604     *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1605    
1606     /* swap highest 4 bytes: */
1607     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1608     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1609     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1610     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1611     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1612     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1613     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1614     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1615     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1616    
1617     /* or the results together: */
1618     *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1619     }
1620     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1621     break;
1622     case HI6_LW:
1623     case HI6_LWU:
1624     if (alpha_rt < 0 || bigendian || instruction_type == HI6_LWU)
1625     alpha_rt = ALPHA_T0;
1626     /* ldl rt,0(t3) */
1627     *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1628     *a++ = 0xa0 | ((alpha_rt >> 3) & 3);
1629     if (bigendian) {
1630     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1631     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1632     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1633     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1634     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1635     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1636     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1637     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1638     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1639     *a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,zero,t0 (sign extend) 32->64 */
1640     }
1641     if (instruction_type == HI6_LWU) {
1642     /* Use only lowest 32 bits: */
1643     *a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; /* zapnot t0,0xf,t0 */
1644     }
1645     if (alpha_rt == ALPHA_T0)
1646     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1647     break;
1648     case HI6_LHU:
1649     case HI6_LH:
1650     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x30; /* ldwu from memory */
1651     if (bigendian) {
1652     *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1653     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1654     *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1655     }
1656     if (instruction_type == HI6_LH) {
1657     *a++ = 0x21; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x73; /* sextw t0,t0 */
1658     }
1659     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1660     break;
1661     case HI6_LBU:
1662     case HI6_LB:
1663     if (alpha_rt < 0)
1664     alpha_rt = ALPHA_T0;
1665     /* ldbu rt,0(t3) */
1666     *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1667     *a++ = 0x28 | ((alpha_rt >> 3) & 3);
1668     if (instruction_type == HI6_LB) {
1669     /* sextb rt,rt */
1670     *a++ = alpha_rt; *a++ = 0x00; *a++ = 0xe0 + alpha_rt; *a++ = 0x73;
1671     }
1672     if (alpha_rt == ALPHA_T0)
1673     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt);
1674     break;
1675    
1676     case HI6_LWL:
1677     /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1678     alpha_rs = map_MIPS_to_Alpha[rs];
1679     if (alpha_rs < 0) {
1680     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1681     alpha_rs = ALPHA_T0;
1682     }
1683     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1684     /* 02 30 20 46 and a1,alignment,t1 */
1685     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1686    
1687     /* ldl t0,0(t3) */
1688     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1689    
1690     if (bigendian) {
1691     /* TODO */
1692     bintrans_write_chunkreturn_fail(&a);
1693     }
1694     /*
1695     * lwl: memory = 0x12 0x34 0x56 0x78
1696     * offset (a1): register rt becomes:
1697     * 0 0x12......
1698     * 1 0x3412....
1699     * 2 0x563412..
1700     * 3 0x78563412
1701     */
1702    
1703     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1704    
1705     /*
1706     10: 03 00 9f 20 lda t3,3
1707     14: a5 05 82 40 cmpeq t3,t1,t4
1708     18: 01 00 a0 e4 beq t4,20 <skip>
1709     */
1710     *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1711     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1712     *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1713    
1714     /* 03 14 20 40 addq t0,0,t2 */
1715     *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1716    
1717     ok_unaligned_load3 = a;
1718     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1719    
1720    
1721    
1722     *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1723     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1724     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1725     /*
1726     * 2 0x563412..
1727     2c: 21 17 21 48 sll t0,0x8,t0
1728     30: 01 10 20 40 addl t0,0,t0
1729     34: 03 f0 7f 44 and t2,0xff,t2
1730     38: 03 04 23 44 or t0,t2,t2
1731     */
1732     *a++ = 0x21; *a++ = 0x17; *a++ = 0x21; *a++ = 0x48;
1733     *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1734     *a++ = 0x03; *a++ = 0xf0; *a++ = 0x7f; *a++ = 0x44;
1735     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1736    
1737     ok_unaligned_load2 = a;
1738     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1739    
1740    
1741    
1742     *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1743     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1744     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1745     /*
1746     * 1 0x3412....
1747     2c: 21 17 22 48 sll t0,0x10,t0
1748     30: 01 10 20 40 addl t0,0,t0
1749     34: 23 76 60 48 zapnot t2,0x3,t2
1750     38: 03 04 23 44 or t0,t2,t2
1751     */
1752     *a++ = 0x21; *a++ = 0x17; *a++ = 0x22; *a++ = 0x48;
1753     *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1754     *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
1755     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1756    
1757     ok_unaligned_load1 = a;
1758     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1759    
1760    
1761    
1762    
1763     /*
1764     * 0 0x12......
1765     2c: 21 17 23 48 sll t0,0x18,t0
1766     30: 01 10 20 40 addl t0,0,t0
1767     34: 23 f6 60 48 zapnot t2,0x7,t2
1768     38: 03 04 23 44 or t0,t2,t2
1769     */
1770     *a++ = 0x21; *a++ = 0x17; *a++ = 0x23; *a++ = 0x48;
1771     *a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40;
1772     *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
1773     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1774    
1775    
1776     *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1777     *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1778     *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1779    
1780     /* 03 10 60 40 addl t2,0,t2 */
1781     *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1782    
1783     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1784     break;
1785    
1786     case HI6_LWR:
1787     /* a1 = 0..3 (or 0..7 for 64-bit loads): */
1788     alpha_rs = map_MIPS_to_Alpha[rs];
1789     if (alpha_rs < 0) {
1790     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1791     alpha_rs = ALPHA_T0;
1792     }
1793     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1794     /* 02 30 20 46 and a1,alignment,t1 */
1795     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1796    
1797     /* ldl t0,0(t3) */
1798     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1799    
1800     if (bigendian) {
1801     /* TODO */
1802     bintrans_write_chunkreturn_fail(&a);
1803     }
1804     /*
1805     * lwr: memory = 0x12 0x34 0x56 0x78
1806     * offset (a1): register rt becomes:
1807     * 0 0x78563412
1808     * 1 0x..785634
1809     * 2 0x....7856
1810     * 3 0x......78
1811     */
1812    
1813     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1814    
1815     /*
1816     10: 03 00 9f 20 lda t3,3
1817     14: a5 05 82 40 cmpeq t3,t1,t4
1818     18: 01 00 a0 e4 beq t4,20 <skip>
1819     */
1820     *a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1821     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1822     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1823    
1824     /*
1825     2c: 81 16 23 48 srl t0,0x18,t0
1826     b0: 21 36 20 48 zapnot t0,0x1,t0
1827     34: 23 d6 7f 48 zapnot t2,0xfe,t2
1828     38: 03 04 23 44 or t0,t2,t2
1829     */
1830     *a++ = 0x81; *a++ = 0x16; *a++ = 0x23; *a++ = 0x48;
1831     *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
1832     *a++ = 0x23; *a++ = 0xd6; *a++ = 0x7f; *a++ = 0x48;
1833     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1834    
1835     ok_unaligned_load3 = a;
1836     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1837    
1838    
1839    
1840     *a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1841     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1842     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1843     /*
1844     2c: 81 16 22 48 srl t0,0x10,t0
1845     b4: 21 76 20 48 zapnot t0,0x3,t0
1846     34: 23 96 7f 48 zapnot t2,0xfc,t2
1847     38: 03 04 23 44 or t0,t2,t2
1848     */
1849     *a++ = 0x81; *a++ = 0x16; *a++ = 0x22; *a++ = 0x48;
1850     *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
1851     *a++ = 0x23; *a++ = 0x96; *a++ = 0x7f; *a++ = 0x48;
1852     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1853    
1854     ok_unaligned_load2 = a;
1855     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1856    
1857    
1858    
1859     *a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20;
1860     *a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40;
1861     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
1862     /*
1863     2c: 81 16 21 48 srl t0,0x8,t0
1864     b8: 21 f6 20 48 zapnot t0,0x7,t0
1865     3c: 23 16 7f 48 zapnot t2,0xf8,t2
1866     40: 03 04 23 44 or t0,t2,t2
1867     */
1868     *a++ = 0x81; *a++ = 0x16; *a++ = 0x21; *a++ = 0x48;
1869     *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
1870     *a++ = 0x23; *a++ = 0x16; *a++ = 0x7f; *a++ = 0x48;
1871     *a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
1872    
1873     ok_unaligned_load1 = a;
1874     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
1875    
1876    
1877    
1878    
1879     /*
1880     * 0 0x12......
1881     */
1882     /* 03 14 20 40 addq t0,0,t2 */
1883     *a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40;
1884    
1885    
1886    
1887     *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
1888     *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
1889     *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
1890    
1891     /* 03 10 60 40 addl t2,0,t2 */
1892     *a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
1893    
1894     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt);
1895     break;
1896    
1897     case HI6_SQ:
1898     /* TODO */
1899     break;
1900     case HI6_SD:
1901     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1902     if (bigendian) {
1903     /* remember original 8 bytes of t0: */
1904     *a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */
1905    
1906     /* swap lowest 4 bytes: */
1907     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1908     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1909     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1910     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1911     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1912     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1913     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1914     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1915     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1916    
1917     /* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */
1918     *a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */
1919     *a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */
1920     *a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */
1921    
1922     /* swap highest 4 bytes: */
1923     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1924     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1925     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1926     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1927     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1928     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1929     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1930     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1931     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1932    
1933     /* or the results together: */
1934     *a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */
1935     }
1936     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb4; /* stq to memory */
1937     break;
1938     case HI6_SW:
1939     if (alpha_rt < 0 || bigendian) {
1940     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1941     alpha_rt = ALPHA_T0;
1942     }
1943     if (bigendian) {
1944     *a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */
1945     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1946     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */
1947     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1948     *a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */
1949     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */
1950     *a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */
1951     *a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */
1952     *a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */
1953     }
1954     /* stl to memory: stl rt,0(t3) */
1955     *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1956     *a++ = 0xb0 | ((alpha_rt >> 3) & 3);
1957     break;
1958     case HI6_SH:
1959     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1960     if (bigendian) {
1961     *a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */
1962     *a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */
1963     *a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */
1964     }
1965     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x34; /* stw to memory */
1966     break;
1967     case HI6_SB:
1968     if (alpha_rt < 0) {
1969     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0);
1970     alpha_rt = ALPHA_T0;
1971     }
1972     /* stb to memory: stb rt,0(t3) */
1973     *a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5);
1974     *a++ = 0x38 | ((alpha_rt >> 3) & 3);
1975     break;
1976    
1977     case HI6_SWL:
1978     /* a1 = 0..3 (or 0..7 for 64-bit stores): */
1979     alpha_rs = map_MIPS_to_Alpha[rs];
1980     if (alpha_rs < 0) {
1981     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
1982     alpha_rs = ALPHA_T0;
1983     }
1984     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
1985     /* 02 30 20 46 and a1,alignment,t1 */
1986     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
1987    
1988     /* ldl t0,0(t3) */
1989     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
1990    
1991     if (bigendian) {
1992     /* TODO */
1993     bintrans_write_chunkreturn_fail(&a);
1994     }
1995    
1996     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
1997    
1998     /*
1999     * swl: memory = 0x12 0x34 0x56 0x78
2000     * register = 0x89abcdef
2001     * offset (a1): memory becomes:
2002     * 0 0x89 0x.. 0x.. 0x..
2003     * 1 0xab 0x89 0x.. 0x..
2004     * 2 0xcd 0xab 0x89 0x..
2005     * 3 0xef 0xcd 0xab 0x89
2006     */
2007    
2008     /*
2009     a5 75 40 40 cmpeq t1,0x03,t4
2010     01 00 a0 e4 beq t4,20 <skip>
2011     */
2012     *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2013     *a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2014    
2015     /* 01 10 60 40 addl t2,0,t0 */
2016     *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2017    
2018     ok_unaligned_load3 = a;
2019     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2020    
2021    
2022    
2023    
2024     *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2025     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2026     /*
2027     2:
2028     e8: 83 16 61 48 srl t2,0x8,t2
2029     ec: 23 f6 60 48 zapnot t2,0x7,t2
2030     f0: 21 16 3f 48 zapnot t0,0xf8,t0
2031     f4: 01 04 23 44 or t0,t2,t0
2032     */
2033     *a++ = 0x83; *a++ = 0x16; *a++ = 0x61; *a++ = 0x48;
2034     *a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48;
2035     *a++ = 0x21; *a++ = 0x16; *a++ = 0x3f; *a++ = 0x48;
2036     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2037    
2038     ok_unaligned_load2 = a;
2039     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2040    
2041    
2042    
2043     *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2044     *a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2045     /*
2046     1:
2047     f8: 83 16 62 48 srl t2,0x10,t2
2048     fc: 23 76 60 48 zapnot t2,0x3,t2
2049     100: 21 96 3f 48 zapnot t0,0xfc,t0
2050     104: 01 04 23 44 or t0,t2,t0
2051     */
2052     *a++ = 0x83; *a++ = 0x16; *a++ = 0x62; *a++ = 0x48;
2053     *a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48;
2054     *a++ = 0x21; *a++ = 0x96; *a++ = 0x3f; *a++ = 0x48;
2055     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2056    
2057     ok_unaligned_load1 = a;
2058     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2059    
2060    
2061    
2062    
2063    
2064     /*
2065     0:
2066     108: 83 16 63 48 srl t2,0x18,t2
2067     10c: 23 36 60 48 zapnot t2,0x1,t2
2068     110: 21 d6 3f 48 zapnot t0,0xfe,t0
2069     114: 01 04 23 44 or t0,t2,t0
2070     */
2071     *a++ = 0x83; *a++ = 0x16; *a++ = 0x63; *a++ = 0x48;
2072     *a++ = 0x23; *a++ = 0x36; *a++ = 0x60; *a++ = 0x48;
2073     *a++ = 0x21; *a++ = 0xd6; *a++ = 0x3f; *a++ = 0x48;
2074     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2075    
2076    
2077     *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2078     *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2079     *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2080    
2081     /* sdl t0,0(t3) */
2082     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2083     break;
2084    
2085     case HI6_SWR:
2086     /* a1 = 0..3 (or 0..7 for 64-bit stores): */
2087     alpha_rs = map_MIPS_to_Alpha[rs];
2088     if (alpha_rs < 0) {
2089     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0);
2090     alpha_rs = ALPHA_T0;
2091     }
2092     *a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22;
2093     /* 02 30 20 46 and a1,alignment,t1 */
2094     *a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46;
2095    
2096     /* ldl t0,0(t3) */
2097     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0;
2098    
2099     if (bigendian) {
2100     /* TODO */
2101     bintrans_write_chunkreturn_fail(&a);
2102     }
2103    
2104     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2);
2105    
2106     /*
2107     * swr: memory = 0x12 0x34 0x56 0x78
2108     * register = 0x89abcdef
2109     * offset (a1): memory becomes:
2110     * 0 0xef 0xcd 0xab 0x89
2111     * 1 0x.. 0xef 0xcd 0xab
2112     * 2 0x.. 0x.. 0xef 0xcd
2113     * 3 0x.. 0x.. 0x.. 0xef
2114     */
2115    
2116    
2117     /*
2118     a5 75 40 40 cmpeq t1,0x03,t4
2119     01 00 a0 e4 beq t4,20 <skip>
2120     */
2121     *a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40;
2122     *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2123    
2124     /*
2125     118: 23 17 63 48 sll t2,0x18,t2
2126     11c: 21 f6 20 48 zapnot t0,0x7,t0
2127     120: 01 04 23 44 or t0,t2,t0
2128     */
2129     *a++ = 0x23; *a++ = 0x17; *a++ = 0x63; *a++ = 0x48;
2130     *a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48;
2131     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2132    
2133     ok_unaligned_load3 = a;
2134     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2135    
2136    
2137    
2138    
2139    
2140     *a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40;
2141     *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2142     /*
2143     2:
2144     124: 23 17 62 48 sll t2,0x10,t2
2145     128: 21 76 20 48 zapnot t0,0x3,t0
2146     12c: 01 04 23 44 or t0,t2,t0
2147     */
2148     *a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48;
2149     *a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48;
2150     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2151    
2152     ok_unaligned_load2 = a;
2153     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2154    
2155    
2156    
2157     *a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40;
2158     *a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4;
2159     /*
2160     1:
2161     130: 23 17 61 48 sll t2,0x8,t2
2162     134: 21 36 20 48 zapnot t0,0x1,t0
2163     138: 01 04 23 44 or t0,t2,t0
2164     */
2165     *a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48;
2166     *a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48;
2167     *a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44;
2168    
2169     ok_unaligned_load1 = a;
2170     *a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3;
2171    
2172    
2173    
2174     /*
2175     0:
2176     13c: 01 10 60 40 addl t2,0,t0
2177     */
2178     *a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40;
2179    
2180    
2181     *ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4;
2182     *ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4;
2183     *ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4;
2184    
2185     /* sdl t0,0(t3) */
2186     *a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0;
2187     break;
2188    
2189     default:
2190     ;
2191     }
2192    
2193     *addrp = a;
2194     bintrans_write_pc_inc(addrp);
2195     return 1;
2196     }
2197    
2198    
2199     /*
2200     * bintrans_write_instruction__lui():
2201     */
2202     static int bintrans_write_instruction__lui(unsigned char **addrp,
2203     int rt, int imm)
2204     {
2205     uint32_t *a;
2206    
2207     /*
2208     * dc fe 3f 24 ldah t0,-292
2209     * 1f 04 ff 5f fnop
2210     * 88 08 30 b4 stq t0,2184(a0)
2211     */
2212     if (rt != 0) {
2213     int alpha_rt = map_MIPS_to_Alpha[rt];
2214     if (alpha_rt < 0)
2215     alpha_rt = ALPHA_T0;
2216    
2217     a = (uint32_t *) *addrp;
2218     *a++ = 0x241f0000 | (alpha_rt << 21) | ((uint32_t)imm & 0xffff);
2219     *addrp = (unsigned char *) a;
2220    
2221     if (alpha_rt == ALPHA_T0) {
2222     *a++ = 0x5fff041f; /* fnop */
2223     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt);
2224     }
2225     }
2226    
2227     bintrans_write_pc_inc(addrp);
2228    
2229     return 1;
2230     }
2231    
2232    
2233     /*
2234     * bintrans_write_instruction__mfmthilo():
2235     */
2236     static int bintrans_write_instruction__mfmthilo(unsigned char **addrp,
2237     int rd, int from_flag, int hi_flag)
2238     {
2239     unsigned char *a;
2240     int ofs;
2241    
2242     a = *addrp;
2243    
2244     /*
2245     * 18 09 30 a4 ldq t0,hi(a0) (or lo)
2246     * 18 09 30 b4 stq t0,rd(a0)
2247     *
2248     * (or if from_flag is cleared then move the other way, it's
2249     * actually not rd then, but rs...)
2250     */
2251    
2252     if (from_flag) {
2253     if (rd != 0) {
2254     /* mfhi or mflo */
2255     if (hi_flag)
2256     ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2257     else
2258     ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2259     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xa4;
2260    
2261     bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd);
2262     }
2263     } else {
2264     /* mthi or mtlo */
2265     bintrans_move_MIPS_reg_into_Alpha_reg(&a, rd, ALPHA_T0);
2266    
2267     if (hi_flag)
2268     ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu;
2269     else
2270     ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu;
2271     *a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4;
2272     }
2273    
2274     *addrp = a;
2275     bintrans_write_pc_inc(addrp);
2276     return 1;
2277     }
2278    
2279    
2280     /*
2281     * bintrans_write_instruction__mfc_mtc():
2282     */
2283     static int bintrans_write_instruction__mfc_mtc(struct memory *mem,
2284     unsigned char **addrp, int coproc_nr, int flag64bit, int rt,
2285     int rd, int mtcflag)
2286     {
2287     uint32_t *a, *jump;
2288     int ofs;
2289    
2290     /*
2291     * NOTE: Only a few registers are readable without side effects.
2292     */
2293     if (rt == 0 && !mtcflag)
2294     return 0;
2295    
2296     if (coproc_nr >= 1)
2297     return 0;
2298    
2299     if (rd == COP0_RANDOM || rd == COP0_COUNT)
2300     return 0;
2301    
2302    
2303     /*************************************************************
2304     *
2305     * TODO: Check for kernel mode, or Coproc X usability bit!
2306     *
2307     *************************************************************/
2308    
2309     a = (uint32_t *) *addrp;
2310    
2311     ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2312     *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2313    
2314     ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2315     *a++ = 0xa4410000 | (ofs & 0xffff); /* ldq t1,reg_rd(t0) */
2316    
2317     if (mtcflag) {
2318     /* mtc: */
2319     *addrp = (unsigned char *) a;
2320     bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0);
2321     a = (uint32_t *) *addrp;
2322    
2323     if (!flag64bit) {
2324     *a++ = 0x40201001; /* addl t0,0,t0 */
2325     *a++ = 0x40401002; /* addl t1,0,t1 */
2326     }
2327    
2328     /*
2329     * In the general case: Only allow mtc if it does NOT
2330     * change the register!!
2331     */
2332    
2333     switch (rd) {
2334     case COP0_INDEX:
2335     break;
2336    
2337     case COP0_EPC:
2338     break;
2339    
2340     /* TODO: Some bits are not writable */
2341     case COP0_ENTRYLO0:
2342     case COP0_ENTRYLO1:
2343     break;
2344    
2345     case COP0_ENTRYHI:
2346     /*
2347     * Entryhi is ok to write to, as long as the
2348     * ASID isn't changed. (That would require
2349     * cache invalidations etc. Instead of checking
2350     * for MMU3K vs others, we just assume that all the
2351     * lowest 12 bits must be the same.
2352     */
2353     /* ff 0f bf 20 lda t4,0x0fff */
2354     /* 03 00 25 44 and t0,t4,t2 */
2355     /* 04 00 45 44 and t1,t4,t3 */
2356     /* a3 05 64 40 cmpeq t2,t3,t2 */
2357     /* 01 00 60 f4 bne t2,<ok> */
2358     *a++ = 0x20bf0fff;
2359     *a++ = 0x44250003;
2360     *a++ = 0x44450004;
2361     *a++ = 0x406405a3;
2362     jump = a;
2363     *a++ = 0; /* later */
2364     *addrp = (unsigned char *) a;
2365     bintrans_write_chunkreturn_fail(addrp);
2366     a = (uint32_t *) *addrp;
2367     *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2368     break;
2369    
2370     case COP0_STATUS:
2371     /* Only allow updates to the status register if
2372     the interrupt enable bits were changed, but no
2373     other bits! */
2374     if (mem->bintrans_32bit_only) {
2375     /* R3000 etc. */
2376     /* t4 = 0x0fe70000; */
2377     *a++ = 0x20bf0000;
2378     *a++ = 0x24a50fe7;
2379     } else {
2380     /* fe 00 bf 20 lda t4,0x00fe */
2381     /* ff ff a5 24 ldah t4,-1(t4) */
2382     *a++ = 0x20bf0000;
2383     *a++ = 0x24a5ffff;
2384     }
2385    
2386     /* 03 00 25 44 and t0,t4,t2 */
2387     /* 04 00 45 44 and t1,t4,t3 */
2388     /* a3 05 64 40 cmpeq t2,t3,t2 */
2389     /* 01 00 60 f4 bne t2,<ok> */
2390     *a++ = 0x44250003;
2391     *a++ = 0x44450004;
2392     *a++ = 0x406405a3;
2393     jump = a;
2394     *a++ = 0; /* later */
2395     *addrp = (unsigned char *) a;
2396     bintrans_write_chunkreturn_fail(addrp);
2397     a = (uint32_t *) *addrp;
2398     *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2399    
2400     /* If enabling interrupt bits would cause an
2401     exception, then don't do it: */
2402     ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2403     *a++ = 0xa4900000 | (ofs & 0xffff); /* ldq t3,coproc[0](a0) */
2404     ofs = ((size_t)&dummy_coproc.reg[COP0_CAUSE]) - (size_t)&dummy_coproc;
2405     *a++ = 0xa4a40000 | (ofs & 0xffff); /* ldq t4,reg_rd(t3) */
2406    
2407     /* 02 00 a1 44 and t4,t0,t1 */
2408     /* 83 16 41 48 srl t1,0x8,t2 */
2409     /* 04 f0 7f 44 and t2,0xff,t3 */
2410     *a++ = 0x44a10002;
2411     *a++ = 0x48411683;
2412     *a++ = 0x447ff004;
2413     /* 01 00 80 e4 beq t3,<ok> */
2414     jump = a;
2415     *a++ = 0; /* later */
2416     *addrp = (unsigned char *) a;
2417     bintrans_write_chunkreturn_fail(addrp);
2418     a = (uint32_t *) *addrp;
2419     *jump = 0xe4800000 | (((size_t)a - (size_t)jump - 4) / 4);
2420     break;
2421    
2422     default:
2423     /* a3 05 22 40 cmpeq t0,t1,t2 */
2424     /* 01 00 60 f4 bne t2,<ok> */
2425     *a++ = 0x402205a3;
2426     jump = a;
2427     *a++ = 0; /* later */
2428     *addrp = (unsigned char *) a;
2429     bintrans_write_chunkreturn_fail(addrp);
2430     a = (uint32_t *) *addrp;
2431     *jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4);
2432     }
2433    
2434     *a++ = 0x40201402; /* addq t0,0,t1 */
2435    
2436     ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu;
2437     *a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */
2438     ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc;
2439     *a++ = 0xb4410000 | (ofs & 0xffff); /* stq t1,reg_rd(t0) */
2440     } else {
2441     /* mfc: */
2442     if (!flag64bit) {
2443     *a++ = 0x40401002; /* addl t1,0,t1 */
2444     }
2445    
2446     *addrp = (unsigned char *) a;
2447     bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T1, rt);
2448     a = (uint32_t *) *addrp;
2449     }
2450    
2451     *addrp = (unsigned char *) a;
2452    
2453     bintrans_write_pc_inc(addrp);
2454     return 1;
2455     }
2456    
2457    
2458     /*
2459     * bintrans_write_instruction__tlb_rfe_etc():
2460     */
2461     static int bintrans_write_instruction__tlb_rfe_etc(unsigned char **addrp,
2462     int itype)
2463     {
2464     uint32_t *a;
2465     int ofs = 0;
2466    
2467     switch (itype) {
2468     case CALL_TLBWI:
2469     case CALL_TLBWR:
2470     case CALL_TLBP:
2471     case CALL_TLBR:
2472     case CALL_RFE:
2473     case CALL_ERET:
2474     case CALL_BREAK:
2475     case CALL_SYSCALL:
2476     break;
2477     default:
2478     return 0;
2479     }
2480    
2481     a = (uint32_t *) *addrp;
2482    
2483     /* a0 = pointer to the cpu struct */
2484    
2485     switch (itype) {
2486     case CALL_TLBWI:
2487     case CALL_TLBWR:
2488     /* a1 = 0 for indexed, 1 for random */
2489     *a++ = 0x223f0000 | (itype == CALL_TLBWR);
2490     break;
2491     case CALL_TLBP:
2492     case CALL_TLBR:
2493     /* a1 = 0 for probe, 1 for read */
2494     *a++ = 0x223f0000 | (itype == CALL_TLBR);
2495     break;
2496     case CALL_BREAK:
2497     case CALL_SYSCALL:
2498     *a++ = 0x223f0000 | (itype == CALL_BREAK? EXCEPTION_BP : EXCEPTION_SYS);
2499     break;
2500     }
2501    
2502     /* Put PC into the cpu struct (both pc and pc_last). */
2503     *a++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2504     *a++ = 0xb4d00000 | ofs_pc_last;/* stq t5,"pc_last"(a0) */
2505    
2506     /* Save a0 and the old return address on the stack: */
2507     *a++ = 0x23deff80; /* lda sp,-128(sp) */
2508    
2509     *a++ = 0xb75e0000; /* stq ra,0(sp) */
2510     *a++ = 0xb61e0008; /* stq a0,8(sp) */
2511     *a++ = 0xb0fe0018; /* stl t6,24(sp) */
2512     *a++ = 0xb71e0020; /* stq t10,32(sp) */
2513     *a++ = 0xb73e0028; /* stq t11,40(sp) */
2514     *a++ = 0xb51e0030; /* stq t7,48(sp) */
2515     *a++ = 0xb6de0038; /* stq t8,56(sp) */
2516     *a++ = 0xb6fe0040; /* stq t9,64(sp) */
2517    
2518     switch (itype) {
2519     case CALL_TLBP:
2520     case CALL_TLBR:
2521     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbpr) - (size_t)&dummy_cpu;
2522     break;
2523     case CALL_TLBWR:
2524     case CALL_TLBWI:
2525     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbwri) - (size_t)&dummy_cpu;
2526     break;
2527     case CALL_RFE:
2528     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_rfe) - (size_t)&dummy_cpu;
2529     break;
2530     case CALL_ERET:
2531     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_eret) - (size_t)&dummy_cpu;
2532     break;
2533     case CALL_BREAK:
2534     case CALL_SYSCALL:
2535     ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_simple_exception) - (size_t)&dummy_cpu;
2536     break;
2537     }
2538    
2539     *a++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */
2540    
2541     /* Call bintrans_fast_tlbwr: */
2542     *a++ = 0x6b5b4000; /* jsr ra,(t12),<after> */
2543    
2544     /* Restore the old return address and a0 from the stack: */
2545     *a++ = 0xa75e0000; /* ldq ra,0(sp) */
2546     *a++ = 0xa61e0008; /* ldq a0,8(sp) */
2547     *a++ = 0xa0fe0018; /* ldl t6,24(sp) */
2548     *a++ = 0xa71e0020; /* ldq t10,32(sp) */
2549     *a++ = 0xa73e0028; /* ldq t11,40(sp) */
2550     *a++ = 0xa51e0030; /* ldq t7,48(sp) */
2551     *a++ = 0xa6de0038; /* ldq t8,56(sp) */
2552     *a++ = 0xa6fe0040; /* ldq t9,64(sp) */
2553    
2554     *a++ = 0x23de0080; /* lda sp,128(sp) */
2555    
2556     /* Load PC from the cpu struct. */
2557     *a++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2558    
2559     *addrp = (unsigned char *) a;
2560    
2561     switch (itype) {
2562     case CALL_ERET:
2563     case CALL_BREAK:
2564     case CALL_SYSCALL:
2565     break;
2566     default:
2567     bintrans_write_pc_inc(addrp);
2568     }
2569    
2570     return 1;
2571     }
2572    
2573    
2574     /*
2575     * bintrans_backend_init():
2576     *
2577     * This is neccessary for broken 2.95.4 compilers on FreeBSD/Alpha 4.9,
2578     * and probably a few others. (For Compaq's CC, and for gcc 3.x, this
2579     * wouldn't be neccessary, and the old code would have worked.)
2580     */
2581     static void bintrans_backend_init(void)
2582     {
2583     int size;
2584     uint32_t *p;
2585    
2586    
2587     /* "runchunk": */
2588     size = 256; /* NOTE: This MUST be enough, or we fail */
2589     p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2590     MAP_ANON | MAP_PRIVATE, -1, 0);
2591    
2592     /* If mmap() failed, try malloc(): */
2593     if (p == NULL) {
2594     p = malloc(size);
2595     if (p == NULL) {
2596     fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2597     exit(1);
2598     }
2599     }
2600    
2601     bintrans_runchunk = (void *)p;
2602    
2603     *p++ = 0x23deffa0; /* lda sp,-0x60(sp) */
2604     *p++ = 0xb75e0000; /* stq ra,0(sp) */
2605     *p++ = 0xb53e0008; /* stq s0,8(sp) */
2606     *p++ = 0xb55e0010; /* stq s1,16(sp) */
2607     *p++ = 0xb57e0018; /* stq s2,24(sp) */
2608     *p++ = 0xb59e0020; /* stq s3,32(sp) */
2609     *p++ = 0xb5be0028; /* stq s4,40(sp) */
2610     *p++ = 0xb5de0030; /* stq s5,48(sp) */
2611     *p++ = 0xb5fe0038; /* stq s6,56(sp) */
2612     *p++ = 0xb7be0058; /* stq gp,0x58(sp) */
2613    
2614     *p++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */
2615     *p++ = 0xa0f00000 | ofs_n; /* ldl t6,"bintrans_instructions_executed"(a0) */
2616     *p++ = 0xa5100000 | ofs_a0; /* ldq t7,"a0"(a0) */
2617     *p++ = 0xa6d00000 | ofs_a1; /* ldq t8,"a1"(a0) */
2618     *p++ = 0xa6f00000 | ofs_s0; /* ldq t9,"s0"(a0) */
2619     *p++ = 0xa1300000 | ofs_ds; /* ldl s0,"delay_slot"(a0) */
2620     *p++ = 0xa5500000 | ofs_ja; /* ldq s1,"delay_jmpaddr"(a0) */
2621     *p++ = 0xa5700000 | ofs_sp; /* ldq s2,"gpr[sp]"(a0) */
2622     *p++ = 0xa5900000 | ofs_ra; /* ldq s3,"gpr[ra]"(a0) */
2623     *p++ = 0xa5b00000 | ofs_t0; /* ldq s4,"gpr[t0]"(a0) */
2624     *p++ = 0xa5d00000 | ofs_t1; /* ldq s5,"gpr[t1]"(a0) */
2625     *p++ = 0xa5f00000 | ofs_t2; /* ldq s6,"gpr[t2]"(a0) */
2626     *p++ = 0xa7100000 | ofs_tbl0; /* ldq t10,table0(a0) */
2627     *p++ = 0xa7300000 | ofs_v0; /* ldq t11,"gpr[v0]"(a0) */
2628    
2629     *p++ = 0x6b514000; /* jsr ra,(a1),<back> */
2630    
2631     *p++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */
2632     *p++ = 0xb0f00000 | ofs_n; /* stl t6,"bintrans_instructions_executed"(a0) */
2633     *p++ = 0xb5100000 | ofs_a0; /* stq t7,"a0"(a0) */
2634     *p++ = 0xb6d00000 | ofs_a1; /* stq t8,"a1"(a0) */
2635     *p++ = 0xb6f00000 | ofs_s0; /* stq t9,"s0"(a0) */
2636     *p++ = 0xb1300000 | ofs_ds; /* stl s0,"delay_slot"(a0) */
2637     *p++ = 0xb5500000 | ofs_ja; /* stq s1,"delay_jmpaddr"(a0) */
2638     *p++ = 0xb5700000 | ofs_sp; /* stq s2,"gpr[sp]"(a0) */
2639     *p++ = 0xb5900000 | ofs_ra; /* stq s3,"gpr[ra]"(a0) */
2640     *p++ = 0xb5b00000 | ofs_t0; /* stq s4,"gpr[t0]"(a0) */
2641     *p++ = 0xb5d00000 | ofs_t1; /* stq s5,"gpr[t1]"(a0) */
2642     *p++ = 0xb5f00000 | ofs_t2; /* stq s6,"gpr[t2]"(a0) */
2643     *p++ = 0xb7300000 | ofs_v0; /* stq t11,"gpr[v0]"(a0) */
2644    
2645     *p++ = 0xa75e0000; /* ldq ra,0(sp) */
2646     *p++ = 0xa53e0008; /* ldq s0,8(sp) */
2647     *p++ = 0xa55e0010; /* ldq s1,16(sp) */
2648     *p++ = 0xa57e0018; /* ldq s2,24(sp) */
2649     *p++ = 0xa59e0020; /* ldq s3,32(sp) */
2650     *p++ = 0xa5be0028; /* ldq s4,40(sp) */
2651     *p++ = 0xa5de0030; /* ldq s5,48(sp) */
2652     *p++ = 0xa5fe0038; /* ldq s6,56(sp) */
2653     *p++ = 0xa7be0058; /* ldq gp,0x58(sp) */
2654     *p++ = 0x23de0060; /* lda sp,0x60(sp) */
2655     *p++ = 0x6bfa8001; /* ret */
2656    
2657    
2658     /* "jump to 32bit pc": */
2659     size = 128; /* WARNING! Don't make this too small. */
2660     p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
2661     MAP_ANON | MAP_PRIVATE, -1, 0);
2662    
2663     /* If mmap() failed, try malloc(): */
2664     if (p == NULL) {
2665     p = malloc(size);
2666     if (p == NULL) {
2667     fprintf(stderr, "bintrans_backend_init(): out of memory\n");
2668     exit(1);
2669     }
2670     }
2671    
2672     bintrans_jump_to_32bit_pc = (void *)p;
2673    
2674     /* Don't execute too many instructions: */
2675     *p++ = 0x205f0000 | (N_SAFE_BINTRANS_LIMIT-1); /* lda t1,safe-1 */
2676    
2677     *p++ = 0x40e20da1; /* cmple t6,t1,t0 */
2678     *p++ = 0xf4200001; /* bne */
2679     *p++ = 0x6bfa8001; /* ret */
2680    
2681     *p++ = 0x40c01411; /* addq t5,0,a1 */
2682    
2683     /*
2684     * Special case for 32-bit addressing:
2685     *
2686     * t1 = 1023;
2687     * t2 = ((a1 >> 22) & t1) * sizeof(void *);
2688     * t3 = ((a1 >> 12) & t1) * sizeof(void *);
2689     * t1 = a1 & 4095;
2690     */
2691     *p++ = 0x205f1ff8; /* lda t1,1023 * 8 */
2692     *p++ = 0x4a227683; /* srl a1,19,t2 */
2693     *p++ = 0x4a213684; /* srl a1, 9,t3 */
2694     *p++ = 0x44620003; /* and t2,t1,t2 */
2695    
2696     /*
2697     * t10 is vaddr_to_hostaddr_table0
2698     *
2699     * a3 = tbl0[t2] (load entry from tbl0)
2700     */
2701     *p++ = 0x43030412; /* addq t10,t2,a2 */
2702     *p++ = 0x44820004; /* and t3,t1,t3 */
2703     *p++ = 0xa6720000; /* ldq a3,0(a2) */
2704     *p++ = 0x205f0ffc; /* lda t1,0xffc */
2705    
2706     /*
2707     * a3 = tbl1[t3] (load entry from tbl1 (whic is a3))
2708     */
2709     *p++ = 0x42640413; /* addq a3,t3,a3 */
2710     *p++ = 0x46220002; /* and a1,t1,t1 */
2711    
2712     *p++ = 0xa6730000 | ofs_c0; /* ldq a3,chunks[0](a3) */
2713    
2714     /*
2715     * NULL? Then just return.
2716     */
2717     *p++ = 0xf6600001; /* bne a3,<ok> */
2718     *p++ = 0x6bfa8001; /* ret */
2719    
2720     *p++ = 0x40530402; /* addq t1,a3,t1 */
2721     *p++ = 0xa0220000; /* ldl t0,0(t1) */
2722    
2723     /* No translation? Then return. */
2724     *p++ = 0xe4200003; /* beq t0,<skip> */
2725    
2726     *p++ = 0xa4700000 | ofs_cb; /* ldq t2,chunk_base_address(a0) */
2727    
2728     *p++ = 0x40230401; /* addq t0,t2,t0 */
2729     *p++ = 0x6be10000; /* jmp (t0) */
2730    
2731     /* Return to the main translation loop. */
2732     *p++ = 0x6bfa8001; /* ret */
2733     }
2734    

  ViewVC Help
Powered by ViewVC 1.1.26