1 |
/* |
2 |
* Copyright (C) 2004-2005 Anders Gavare. All rights reserved. |
3 |
* |
4 |
* Redistribution and use in source and binary forms, with or without |
5 |
* modification, are permitted provided that the following conditions are met: |
6 |
* |
7 |
* 1. Redistributions of source code must retain the above copyright |
8 |
* notice, this list of conditions and the following disclaimer. |
9 |
* 2. Redistributions in binary form must reproduce the above copyright |
10 |
* notice, this list of conditions and the following disclaimer in the |
11 |
* documentation and/or other materials provided with the distribution. |
12 |
* 3. The name of the author may not be used to endorse or promote products |
13 |
* derived from this software without specific prior written permission. |
14 |
* |
15 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 |
* SUCH DAMAGE. |
26 |
* |
27 |
* |
28 |
* $Id: bintrans_alpha.c,v 1.125 2005/07/31 08:47:56 debug Exp $ |
29 |
* |
30 |
* Alpha specific code for dynamic binary translation. |
31 |
* |
32 |
* See bintrans.c for more information. Included from bintrans.c. |
33 |
* |
34 |
* |
35 |
* Some Alpha registers that are reasonable to use: |
36 |
* |
37 |
* t5..t7 6..8 3 |
38 |
* s0..s6 9..15 7 |
39 |
* a1..a5 17..21 5 |
40 |
* t8..t11 22..25 4 |
41 |
* |
42 |
* These can be "mapped" to MIPS registers in the translated code, except a0 |
43 |
* which points to the cpu struct, and t0..t4 (or so) which are used by the |
44 |
* translated code as temporaries. |
45 |
* |
46 |
* 3 + 7 + 5 + 4 = 19 available registers. Of course, all (except s0..s6) must |
47 |
* be saved when calling external functions, such as when calling tlbp and |
48 |
* other external functions. |
49 |
* |
50 |
* Which are the 19 most commonly used MIPS registers? (This will include the |
51 |
* pc, and the "current number of executed translated instructions.) |
52 |
* |
53 |
* The current allocation is as follows: |
54 |
* |
55 |
* Alpha: MIPS: |
56 |
* ------ ----- |
57 |
* |
58 |
* t5 pc (64-bit) |
59 |
* t6 bintrans_instructions_executed (32-bit int) |
60 |
* t7 a0 (mips register 4) (64-bit) |
61 |
* t8 a1 (mips register 5) (64-bit) |
62 |
* t9 s0 (mips register 16) (64-bit) |
63 |
* t10 load table base cached |
64 |
* t11 v0 (mips register 2) (64-bit) |
65 |
* s0 delay_slot (32-bit int) |
66 |
* s1 delay_jmpaddr (64-bit) |
67 |
* s2 sp (mips register 29) (64-bit) |
68 |
* s3 ra (mips register 31) (64-bit) |
69 |
* s4 t0 (mips register 8) (64-bit) |
70 |
* s5 t1 (mips register 9) (64-bit) |
71 |
* s6 store table base cached |
72 |
*/ |
73 |
|
74 |
#define MIPSREG_PC -3 |
75 |
#define MIPSREG_DELAY_SLOT -2 |
76 |
#define MIPSREG_DELAY_JMPADDR -1 |
77 |
|
78 |
#define ALPHA_T0 1 |
79 |
#define ALPHA_T1 2 |
80 |
#define ALPHA_T2 3 |
81 |
#define ALPHA_T3 4 |
82 |
#define ALPHA_T4 5 |
83 |
#define ALPHA_T5 6 |
84 |
#define ALPHA_T6 7 |
85 |
#define ALPHA_T7 8 |
86 |
#define ALPHA_S0 9 |
87 |
#define ALPHA_S1 10 |
88 |
#define ALPHA_S2 11 |
89 |
#define ALPHA_S3 12 |
90 |
#define ALPHA_S4 13 |
91 |
#define ALPHA_S5 14 |
92 |
#define ALPHA_S6 15 |
93 |
#define ALPHA_A0 16 |
94 |
#define ALPHA_A1 17 |
95 |
#define ALPHA_A2 18 |
96 |
#define ALPHA_A3 19 |
97 |
#define ALPHA_A4 20 |
98 |
#define ALPHA_A5 21 |
99 |
#define ALPHA_T8 22 |
100 |
#define ALPHA_T9 23 |
101 |
#define ALPHA_T10 24 |
102 |
#define ALPHA_T11 25 |
103 |
#define ALPHA_ZERO 31 |
104 |
|
105 |
static int map_MIPS_to_Alpha[32] = { |
106 |
ALPHA_ZERO, -1, ALPHA_T11, -1, /* 0 .. 3 */ |
107 |
ALPHA_T7, ALPHA_T8, -1, -1, /* 4 .. 7 */ |
108 |
ALPHA_S4, ALPHA_S5, -1, -1, /* 8 .. 11 */ |
109 |
-1, -1, -1, -1, /* 12 .. 15 */ |
110 |
ALPHA_T9, -1, -1, -1, /* 16 .. 19 */ |
111 |
-1, -1, -1, -1, /* 20 .. 23 */ |
112 |
-1, -1, -1, -1, /* 24 .. 27 */ |
113 |
-1, ALPHA_S2, -1, ALPHA_S3, /* 28 .. 31 */ |
114 |
}; |
115 |
|
116 |
|
117 |
struct cpu dummy_cpu; |
118 |
struct mips_coproc dummy_coproc; |
119 |
struct vth32_table dummy_vth32_table; |
120 |
|
121 |
unsigned char bintrans_alpha_imb[32] = { |
122 |
0x86, 0x00, 0x00, 0x00, /* imb */ |
123 |
0x01, 0x80, 0xfa, 0x6b, /* ret */ |
124 |
0x1f, 0x04, 0xff, 0x47, /* nop */ |
125 |
0x00, 0x00, 0xfe, 0x2e, /* unop */ |
126 |
0x1f, 0x04, 0xff, 0x47, /* nop */ |
127 |
0x00, 0x00, 0xfe, 0x2e, /* unop */ |
128 |
0x1f, 0x04, 0xff, 0x47, /* nop */ |
129 |
0x00, 0x00, 0xfe, 0x2e /* unop */ |
130 |
}; |
131 |
|
132 |
|
133 |
/* |
134 |
* bintrans_host_cacheinvalidate() |
135 |
* |
136 |
* Invalidate the host's instruction cache. On Alpha, we do this by |
137 |
* executing an imb instruction. |
138 |
* |
139 |
* NOTE: A simple asm("imb"); would be enough here, but not all |
140 |
* compilers have such simple constructs, so an entire function has to |
141 |
* be written as bintrans_alpha_imb[] above. |
142 |
*/ |
143 |
static void bintrans_host_cacheinvalidate(unsigned char *p, size_t len) |
144 |
{ |
145 |
/* Long form of ``asm("imb");'' */ |
146 |
|
147 |
void (*f)(void); |
148 |
f = (void *)&bintrans_alpha_imb[0]; |
149 |
f(); |
150 |
} |
151 |
|
152 |
|
153 |
/* note: offsetof (in stdarg.h) could possibly be used, but I'm not sure |
154 |
if it will take care of the compiler problems... */ |
155 |
#define ofs_pc (((size_t)&dummy_cpu.pc) - ((size_t)&dummy_cpu)) |
156 |
#define ofs_pc_last (((size_t)&dummy_cpu.cd.mips.pc_last) - ((size_t)&dummy_cpu)) |
157 |
#define ofs_n (((size_t)&dummy_cpu.cd.mips.bintrans_instructions_executed) - ((size_t)&dummy_cpu)) |
158 |
#define ofs_ds (((size_t)&dummy_cpu.cd.mips.delay_slot) - ((size_t)&dummy_cpu)) |
159 |
#define ofs_ja (((size_t)&dummy_cpu.cd.mips.delay_jmpaddr) - ((size_t)&dummy_cpu)) |
160 |
#define ofs_sp (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_SP]) - ((size_t)&dummy_cpu)) |
161 |
#define ofs_ra (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_RA]) - ((size_t)&dummy_cpu)) |
162 |
#define ofs_a0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A0]) - ((size_t)&dummy_cpu)) |
163 |
#define ofs_a1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_A1]) - ((size_t)&dummy_cpu)) |
164 |
#define ofs_t0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T0]) - ((size_t)&dummy_cpu)) |
165 |
#define ofs_t1 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T1]) - ((size_t)&dummy_cpu)) |
166 |
#define ofs_t2 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_T2]) - ((size_t)&dummy_cpu)) |
167 |
#define ofs_v0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_V0]) - ((size_t)&dummy_cpu)) |
168 |
#define ofs_s0 (((size_t)&dummy_cpu.cd.mips.gpr[MIPS_GPR_S0]) - ((size_t)&dummy_cpu)) |
169 |
#define ofs_tbl0 (((size_t)&dummy_cpu.cd.mips.vaddr_to_hostaddr_table0) - ((size_t)&dummy_cpu)) |
170 |
#define ofs_c0 ((size_t)&dummy_vth32_table.bintrans_chunks[0] - (size_t)&dummy_vth32_table) |
171 |
#define ofs_cb (((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu) |
172 |
|
173 |
#define ofs_h_l (((size_t)&dummy_cpu.cd.mips.host_load) - ((size_t)&dummy_cpu)) |
174 |
#define ofs_h_s (((size_t)&dummy_cpu.cd.mips.host_store) - ((size_t)&dummy_cpu)) |
175 |
|
176 |
|
177 |
static uint32_t bintrans_alpha_load_32bit[18] = { |
178 |
0x4a21f622, /* zapnot a1,0xf,t1 */ |
179 |
0x209f0fff, /* lda t3,4095 */ |
180 |
0x48419682, /* srl t1,0xc,t1 t1 = addr >> 12 */ |
181 |
0x46240004, /* and a1,t3,t3 t3 = addr & 4095 */ |
182 |
0x40580642, /* s8addq t1,t10,t1 &host_load[t1] */ |
183 |
0xa6620000, /* ldq a3,0(t1) a3 = host_load[t1] */ |
184 |
|
185 |
/* NULL? Then return failure at once. */ |
186 |
0xe6600002, /* beq a3, return */ |
187 |
|
188 |
/* The rest of the load/store code was written with t3 as the address. */ |
189 |
/* Add the offset within the page: */ |
190 |
0x42640404, /* addq a3,t3,t3 */ |
191 |
0x6be58000, /* ret (t4) */ |
192 |
|
193 |
/* return: */ |
194 |
0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */ |
195 |
0x44270407, /* or t0,t6,t6 */ |
196 |
0x6bfa8000 /* ret */ |
197 |
}; |
198 |
|
199 |
static uint32_t bintrans_alpha_store_32bit[18] = { |
200 |
0x4a21f622, /* zapnot a1,0xf,t1 */ |
201 |
0x209f0fff, /* lda t3,4095 */ |
202 |
0x48419682, /* srl t1,0xc,t1 t1 = addr >> 12 */ |
203 |
0x46240004, /* and a1,t3,t3 t3 = addr & 4095 */ |
204 |
0x404f0642, /* s8addq t1,s6,t1 &host_store[t1] */ |
205 |
0xa6620000, /* ldq a3,0(t1) a3 = host_store[t1] */ |
206 |
|
207 |
/* NULL? Then return failure at once. */ |
208 |
/* beq a3, return */ |
209 |
0xe6600002, |
210 |
|
211 |
/* The rest of the load/store code was written with t3 as the address. */ |
212 |
0x42640404, /* addq a3,t3,t3 */ |
213 |
0x6be58000, /* ret (t4) */ |
214 |
|
215 |
/* return: */ |
216 |
0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16), /* ldah t0,256 */ |
217 |
0x44270407, /* or t0,t6,t6 */ |
218 |
0x6bfa8000 /* ret */ |
219 |
}; |
220 |
|
221 |
static void (*bintrans_runchunk)(struct cpu *, unsigned char *); |
222 |
|
223 |
static void (*bintrans_jump_to_32bit_pc)(struct cpu *); |
224 |
|
225 |
static void (*bintrans_load_32bit) |
226 |
(struct cpu *) = (void *)bintrans_alpha_load_32bit; |
227 |
|
228 |
static void (*bintrans_store_32bit) |
229 |
(struct cpu *) = (void *)bintrans_alpha_store_32bit; |
230 |
|
231 |
|
232 |
/* |
233 |
* bintrans_write_quickjump(): |
234 |
*/ |
235 |
static void bintrans_write_quickjump(struct memory *mem, |
236 |
unsigned char *quickjump_code, uint32_t chunkoffset) |
237 |
{ |
238 |
int ofs; |
239 |
uint64_t alpha_addr = chunkoffset + |
240 |
(size_t)mem->translation_code_chunk_space; |
241 |
uint32_t *a = (uint32_t *)quickjump_code; |
242 |
|
243 |
ofs = (alpha_addr - ((size_t)a+4)) / 4; |
244 |
|
245 |
/* printf("chunkoffset=%i, %016llx %016llx %i\n", |
246 |
chunkoffset, (long long)alpha_addr, (long long)a, ofs); */ |
247 |
|
248 |
if (ofs > -0xfffff && ofs < 0xfffff) { |
249 |
*a++ = 0xc3e00000 | (ofs & 0x1fffff); /* br <chunk> */ |
250 |
} |
251 |
} |
252 |
|
253 |
|
254 |
/* |
255 |
* bintrans_write_chunkreturn(): |
256 |
*/ |
257 |
static void bintrans_write_chunkreturn(unsigned char **addrp) |
258 |
{ |
259 |
uint32_t *a = (uint32_t *) *addrp; |
260 |
*a++ = 0x6bfa8000; /* ret */ |
261 |
*addrp = (unsigned char *) a; |
262 |
} |
263 |
|
264 |
|
265 |
/* |
266 |
* bintrans_write_chunkreturn_fail(): |
267 |
*/ |
268 |
static void bintrans_write_chunkreturn_fail(unsigned char **addrp) |
269 |
{ |
270 |
uint32_t *a = (uint32_t *) *addrp; |
271 |
/* 00 01 3f 24 ldah t0,256 */ |
272 |
/* 07 04 27 44 or t0,t6,t6 */ |
273 |
*a++ = 0x243f0000 | (BINTRANS_DONT_RUN_NEXT >> 16); |
274 |
*a++ = 0x44270407; |
275 |
*a++ = 0x6bfa8000; /* ret */ |
276 |
*addrp = (unsigned char *) a; |
277 |
} |
278 |
|
279 |
|
280 |
/* |
281 |
* bintrans_move_MIPS_reg_into_Alpha_reg(): |
282 |
*/ |
283 |
static void bintrans_move_MIPS_reg_into_Alpha_reg(unsigned char **addrp, int mipsreg, int alphareg) |
284 |
{ |
285 |
uint32_t *a = (uint32_t *) *addrp; |
286 |
int ofs, alpha_mips_reg; |
287 |
|
288 |
switch (mipsreg) { |
289 |
case MIPSREG_PC: |
290 |
/* addq t5,0,alphareg */ |
291 |
*a++ = 0x40c01400 | alphareg; |
292 |
break; |
293 |
case MIPSREG_DELAY_SLOT: |
294 |
/* addq s0,0,alphareg */ |
295 |
*a++ = 0x41201400 | alphareg; |
296 |
break; |
297 |
case MIPSREG_DELAY_JMPADDR: |
298 |
/* addq s1,0,alphareg */ |
299 |
*a++ = 0x41401400 | alphareg; |
300 |
break; |
301 |
default: |
302 |
alpha_mips_reg = map_MIPS_to_Alpha[mipsreg]; |
303 |
if (alpha_mips_reg < 0) { |
304 |
ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu; |
305 |
/* ldq alphareg,gpr[mipsreg](a0) */ |
306 |
*a++ = 0xa4100000 | (alphareg << 21) | ofs; |
307 |
} else { |
308 |
/* addq alpha_mips_reg,0,alphareg */ |
309 |
*a++ = 0x40001400 | (alpha_mips_reg << 21) | alphareg; |
310 |
} |
311 |
} |
312 |
*addrp = (unsigned char *) a; |
313 |
} |
314 |
|
315 |
|
316 |
/* |
317 |
* bintrans_move_Alpha_reg_into_MIPS_reg(): |
318 |
*/ |
319 |
static void bintrans_move_Alpha_reg_into_MIPS_reg(unsigned char **addrp, int alphareg, int mipsreg) |
320 |
{ |
321 |
uint32_t *a = (uint32_t *) *addrp; |
322 |
int ofs, alpha_mips_reg; |
323 |
|
324 |
switch (mipsreg) { |
325 |
case MIPSREG_PC: |
326 |
/* addq alphareg,0,t5 */ |
327 |
*a++ = 0x40001406 | (alphareg << 21); |
328 |
break; |
329 |
case MIPSREG_DELAY_SLOT: |
330 |
/* addq alphareg,0,s0 */ |
331 |
*a++ = 0x40001409 | (alphareg << 21); |
332 |
break; |
333 |
case MIPSREG_DELAY_JMPADDR: |
334 |
/* addq alphareg,0,s1 */ |
335 |
*a++ = 0x4000140a | (alphareg << 21); |
336 |
break; |
337 |
case 0: /* the zero register */ |
338 |
break; |
339 |
default: |
340 |
alpha_mips_reg = map_MIPS_to_Alpha[mipsreg]; |
341 |
if (alpha_mips_reg < 0) { |
342 |
/* stq alphareg,gpr[mipsreg](a0) */ |
343 |
ofs = ((size_t)&dummy_cpu.cd.mips.gpr[mipsreg]) - (size_t)&dummy_cpu; |
344 |
*a++ = 0xb4100000 | (alphareg << 21) | ofs; |
345 |
} else { |
346 |
/* addq alphareg,0,alpha_mips_reg */ |
347 |
*a++ = 0x40001400 | (alphareg << 21) | alpha_mips_reg; |
348 |
} |
349 |
} |
350 |
*addrp = (unsigned char *) a; |
351 |
} |
352 |
|
353 |
|
354 |
/* |
355 |
* bintrans_write_pc_inc(): |
356 |
*/ |
357 |
static void bintrans_write_pc_inc(unsigned char **addrp) |
358 |
{ |
359 |
uint32_t *a = (uint32_t *) *addrp; |
360 |
|
361 |
/* lda t6,1(t6) */ |
362 |
*a++ = 0x20e70001; |
363 |
|
364 |
/* lda t5,4(t5) */ |
365 |
*a++ = 0x20c60004; |
366 |
|
367 |
*addrp = (unsigned char *) a; |
368 |
} |
369 |
|
370 |
|
371 |
/* |
372 |
* bintrans_write_instruction__addiu_etc(): |
373 |
*/ |
374 |
static int bintrans_write_instruction__addiu_etc( |
375 |
struct memory *mem, unsigned char **addrp, |
376 |
int rt, int rs, int imm, int instruction_type) |
377 |
{ |
378 |
uint32_t *a; |
379 |
unsigned int uimm; |
380 |
int alpha_rs, alpha_rt; |
381 |
|
382 |
/* TODO: overflow detection for ADDI and DADDI */ |
383 |
switch (instruction_type) { |
384 |
case HI6_ADDI: |
385 |
case HI6_DADDI: |
386 |
return 0; |
387 |
} |
388 |
|
389 |
a = (uint32_t *) *addrp; |
390 |
|
391 |
if (rt == 0) |
392 |
goto rt0; |
393 |
|
394 |
uimm = imm & 0xffff; |
395 |
|
396 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
397 |
alpha_rt = map_MIPS_to_Alpha[rt]; |
398 |
|
399 |
if (uimm == 0 && (instruction_type == HI6_ADDI || |
400 |
instruction_type == HI6_ADDIU || instruction_type == HI6_DADDI || |
401 |
instruction_type == HI6_DADDIU || instruction_type == HI6_ORI)) { |
402 |
if (alpha_rs >= 0 && alpha_rt >= 0) { |
403 |
/* addq rs,0,rt */ |
404 |
*a++ = 0x40001400 | (alpha_rs << 21) | alpha_rt; |
405 |
} else { |
406 |
*addrp = (unsigned char *) a; |
407 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0); |
408 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt); |
409 |
a = (uint32_t *) *addrp; |
410 |
} |
411 |
goto rt0; |
412 |
} |
413 |
|
414 |
if (alpha_rs < 0) { |
415 |
/* ldq t0,"rs"(a0) */ |
416 |
*addrp = (unsigned char *) a; |
417 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T0); |
418 |
a = (uint32_t *) *addrp; |
419 |
alpha_rs = ALPHA_T0; |
420 |
} |
421 |
|
422 |
if (alpha_rt < 0) |
423 |
alpha_rt = ALPHA_T0; |
424 |
|
425 |
/* Place the result of the calculation in alpha_rt: */ |
426 |
|
427 |
switch (instruction_type) { |
428 |
case HI6_ADDIU: |
429 |
case HI6_DADDIU: |
430 |
case HI6_ADDI: |
431 |
case HI6_DADDI: |
432 |
if (uimm < 256) { |
433 |
if (instruction_type == HI6_ADDI || |
434 |
instruction_type == HI6_ADDIU) { |
435 |
/* addl rs,uimm,rt */ |
436 |
*a++ = 0x40001000 | (alpha_rs << 21) |
437 |
| (uimm << 13) | alpha_rt; |
438 |
} else { |
439 |
/* addq rs,uimm,rt */ |
440 |
*a++ = 0x40001400 | (alpha_rs << 21) |
441 |
| (uimm << 13) | alpha_rt; |
442 |
} |
443 |
} else { |
444 |
/* lda rt,imm(rs) */ |
445 |
*a++ = 0x20000000 | (alpha_rt << 21) | (alpha_rs << 16) | uimm; |
446 |
if (instruction_type == HI6_ADDI || |
447 |
instruction_type == HI6_ADDIU) { |
448 |
/* sign extend, 32->64 bits: addl t0,zero,t0 */ |
449 |
*a++ = 0x40001000 | (alpha_rt << 21) | alpha_rt; |
450 |
} |
451 |
} |
452 |
break; |
453 |
case HI6_ANDI: |
454 |
case HI6_ORI: |
455 |
case HI6_XORI: |
456 |
if (uimm >= 256) { |
457 |
/* lda t1,4660 */ |
458 |
*a++ = 0x205f0000 | uimm; |
459 |
if (uimm & 0x8000) { |
460 |
/* 01 00 42 24 ldah t1,1(t1) <-- if negative only */ |
461 |
*a++ = 0x24420001; |
462 |
} |
463 |
} |
464 |
|
465 |
switch (instruction_type) { |
466 |
case HI6_ANDI: |
467 |
if (uimm < 256) { |
468 |
/* and rs,uimm,rt */ |
469 |
*a++ = 0x44001000 | (alpha_rs << 21) |
470 |
| (uimm << 13) | alpha_rt; |
471 |
} else { |
472 |
/* and rs,t1,rt */ |
473 |
*a++ = 0x44020000 | (alpha_rs << 21) | alpha_rt; |
474 |
} |
475 |
break; |
476 |
case HI6_ORI: |
477 |
if (uimm < 256) { |
478 |
/* or rs,uimm,rt */ |
479 |
*a++ = 0x44001400 | (alpha_rs << 21) |
480 |
| (uimm << 13) | alpha_rt; |
481 |
} else { |
482 |
/* or rs,t1,rt */ |
483 |
*a++ = 0x44020400 | (alpha_rs << 21) | alpha_rt; |
484 |
} |
485 |
break; |
486 |
case HI6_XORI: |
487 |
if (uimm < 256) { |
488 |
/* xor rs,uimm,rt */ |
489 |
*a++ = 0x44001800 | (alpha_rs << 21) |
490 |
| (uimm << 13) | alpha_rt; |
491 |
} else { |
492 |
/* xor rs,t1,rt */ |
493 |
*a++ = 0x44020800 | (alpha_rs << 21) | alpha_rt; |
494 |
} |
495 |
break; |
496 |
} |
497 |
break; |
498 |
case HI6_SLTI: |
499 |
case HI6_SLTIU: |
500 |
/* lda t1,4660 */ |
501 |
*a++ = 0x205f0000 | uimm; |
502 |
|
503 |
switch (instruction_type) { |
504 |
case HI6_SLTI: |
505 |
/* cmplt rs,t1,rt */ |
506 |
*a++ = 0x400209a0 | (alpha_rs << 21) | alpha_rt; |
507 |
break; |
508 |
case HI6_SLTIU: |
509 |
/* cmpult rs,t1,rt */ |
510 |
*a++ = 0x400203a0 | (alpha_rs << 21) | alpha_rt; |
511 |
break; |
512 |
} |
513 |
break; |
514 |
} |
515 |
|
516 |
if (alpha_rt == ALPHA_T0) { |
517 |
*a++ = 0x5fff041f; /* fnop */ |
518 |
*addrp = (unsigned char *) a; |
519 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt); |
520 |
a = (uint32_t *) *addrp; |
521 |
} |
522 |
|
523 |
rt0: |
524 |
*addrp = (unsigned char *) a; |
525 |
bintrans_write_pc_inc(addrp); |
526 |
return 1; |
527 |
} |
528 |
|
529 |
|
530 |
/* |
531 |
* bintrans_write_instruction__addu_etc(): |
532 |
*/ |
533 |
static int bintrans_write_instruction__addu_etc( |
534 |
struct memory *mem, unsigned char **addrp, |
535 |
int rd, int rs, int rt, int sa, int instruction_type) |
536 |
{ |
537 |
unsigned char *a, *unmodified = NULL; |
538 |
int load64 = 0, store = 1, ofs, alpha_rd = ALPHA_T0; |
539 |
|
540 |
alpha_rd = map_MIPS_to_Alpha[rd]; |
541 |
if (alpha_rd < 0) |
542 |
alpha_rd = ALPHA_T0; |
543 |
|
544 |
switch (instruction_type) { |
545 |
case SPECIAL_DIV: |
546 |
case SPECIAL_DIVU: |
547 |
return 0; |
548 |
} |
549 |
|
550 |
switch (instruction_type) { |
551 |
case SPECIAL_DADDU: |
552 |
case SPECIAL_DSUBU: |
553 |
case SPECIAL_OR: |
554 |
case SPECIAL_AND: |
555 |
case SPECIAL_NOR: |
556 |
case SPECIAL_XOR: |
557 |
case SPECIAL_DSLL: |
558 |
case SPECIAL_DSRL: |
559 |
case SPECIAL_DSRA: |
560 |
case SPECIAL_DSLL32: |
561 |
case SPECIAL_DSRL32: |
562 |
case SPECIAL_DSRA32: |
563 |
case SPECIAL_SLT: |
564 |
case SPECIAL_SLTU: |
565 |
case SPECIAL_MOVZ: |
566 |
case SPECIAL_MOVN: |
567 |
load64 = 1; |
568 |
} |
569 |
|
570 |
switch (instruction_type) { |
571 |
case SPECIAL_MULT: |
572 |
case SPECIAL_MULTU: |
573 |
if (rd != 0) |
574 |
return 0; |
575 |
store = 0; |
576 |
break; |
577 |
default: |
578 |
if (rd == 0) |
579 |
goto rd0; |
580 |
} |
581 |
|
582 |
a = *addrp; |
583 |
|
584 |
if ((instruction_type == SPECIAL_ADDU || instruction_type == SPECIAL_DADDU |
585 |
|| instruction_type == SPECIAL_OR) && rt == 0) { |
586 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
587 |
if (!load64) { |
588 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
589 |
} |
590 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd); |
591 |
*addrp = a; |
592 |
goto rd0; |
593 |
} |
594 |
|
595 |
/* t0 = rs, t1 = rt */ |
596 |
if (load64) { |
597 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
598 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1); |
599 |
} else { |
600 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
601 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
602 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T1); |
603 |
*a++ = 0x02; *a++ = 0x10; *a++ = 0x40; *a++ = 0x40; /* addl t1,0,t1 */ |
604 |
} |
605 |
|
606 |
switch (instruction_type) { |
607 |
case SPECIAL_ADDU: |
608 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x40; /* addl t0,t1,rd */ |
609 |
break; |
610 |
case SPECIAL_DADDU: |
611 |
*a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq t0,t1,rd */ |
612 |
break; |
613 |
case SPECIAL_SUBU: |
614 |
*a++ = 0x20 + alpha_rd; *a++ = 0x01; *a++ = 0x22; *a++ = 0x40; /* subl t0,t1,t0 */ |
615 |
break; |
616 |
case SPECIAL_DSUBU: |
617 |
*a++ = 0x20 + alpha_rd; *a++ = 0x05; *a++ = 0x22; *a++ = 0x40; /* subq t0,t1,t0 */ |
618 |
break; |
619 |
case SPECIAL_AND: |
620 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and t0,t1,t0 */ |
621 |
break; |
622 |
case SPECIAL_OR: |
623 |
*a++ = alpha_rd; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */ |
624 |
break; |
625 |
case SPECIAL_NOR: |
626 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x44; /* or t0,t1,t0 */ |
627 |
*a++ = alpha_rd; *a++ = 0x05; *a++ = 0xe1; *a++ = 0x47; /* not t0,t0 */ |
628 |
break; |
629 |
case SPECIAL_XOR: |
630 |
*a++ = alpha_rd; *a++ = 0x08; *a++ = 0x22; *a++ = 0x44; /* xor t0,t1,t0 */ |
631 |
break; |
632 |
case SPECIAL_SLL: |
633 |
*a++ = 0x21; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */ |
634 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
635 |
break; |
636 |
case SPECIAL_SLLV: |
637 |
/* rd = rt << (rs&31) (logical) t0 = t1 << (t0&31) */ |
638 |
*a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */ |
639 |
*a++ = 0x21; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sll t1,t0,t0 */ |
640 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
641 |
break; |
642 |
case SPECIAL_DSLL: |
643 |
*a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */ |
644 |
break; |
645 |
case SPECIAL_DSLL32: |
646 |
sa += 32; |
647 |
*a++ = 0x20 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sll t1,sa,t0 */ |
648 |
break; |
649 |
case SPECIAL_SRA: |
650 |
*a++ = 0x81; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */ |
651 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
652 |
break; |
653 |
case SPECIAL_SRAV: |
654 |
/* rd = rt >> (rs&31) (arithmetic) t0 = t1 >> (t0&31) */ |
655 |
*a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */ |
656 |
*a++ = 0x81; *a++ = 0x07; *a++ = 0x41; *a++ = 0x48; /* sra t1,t0,t0 */ |
657 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
658 |
break; |
659 |
case SPECIAL_DSRA: |
660 |
*a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */ |
661 |
break; |
662 |
case SPECIAL_DSRA32: |
663 |
sa += 32; |
664 |
*a++ = 0x80 + alpha_rd; *a++ = 0x17 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; /* sra t1,sa,t0 */ |
665 |
break; |
666 |
case SPECIAL_SRL: |
667 |
*a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */ |
668 |
/* Note: bits of sa are distributed among two different bytes. */ |
669 |
*a++ = 0x81; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; |
670 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl */ |
671 |
break; |
672 |
case SPECIAL_SRLV: |
673 |
/* rd = rt >> (rs&31) (logical) t0 = t1 >> (t0&31) */ |
674 |
*a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; /* zapnot t1,0xf,t1 (use only lowest 32 bits) */ |
675 |
*a++ = 0x01; *a++ = 0xf0; *a++ = 0x23; *a++ = 0x44; /* and t0,31,t0 */ |
676 |
*a++ = 0x81; *a++ = 0x06; *a++ = 0x41; *a++ = 0x48; /* srl t1,t0,t0 */ |
677 |
*a++ = alpha_rd; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
678 |
break; |
679 |
case SPECIAL_DSRL: |
680 |
/* Note: bits of sa are distributed among two different bytes. */ |
681 |
*a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; |
682 |
break; |
683 |
case SPECIAL_DSRL32: |
684 |
/* Note: bits of sa are distributed among two different bytes. */ |
685 |
sa += 32; |
686 |
*a++ = 0x80 + alpha_rd; *a++ = 0x16 + ((sa & 7) << 5); *a++ = 0x40 + (sa >> 3); *a++ = 0x48; |
687 |
break; |
688 |
case SPECIAL_SLT: |
689 |
*a++ = 0xa0 + alpha_rd; *a++ = 0x09; *a++ = 0x22; *a++ = 0x40; /* cmplt t0,t1,t0 */ |
690 |
break; |
691 |
case SPECIAL_SLTU: |
692 |
*a++ = 0xa0 + alpha_rd; *a++ = 0x03; *a++ = 0x22; *a++ = 0x40; /* cmpult t0,t1,t0 */ |
693 |
break; |
694 |
case SPECIAL_MULT: |
695 |
case SPECIAL_MULTU: |
696 |
if (instruction_type == SPECIAL_MULTU) { |
697 |
/* 21 f6 21 48 zapnot t0,0xf,t0 */ |
698 |
/* 22 f6 41 48 zapnot t1,0xf,t1 */ |
699 |
*a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; |
700 |
*a++ = 0x22; *a++ = 0xf6; *a++ = 0x41; *a++ = 0x48; |
701 |
} |
702 |
|
703 |
/* 03 04 22 4c mulq t0,t1,t2 */ |
704 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x22; *a++ = 0x4c; |
705 |
|
706 |
/* 01 10 60 40 addl t2,0,t0 */ |
707 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
708 |
|
709 |
ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu; |
710 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4; |
711 |
|
712 |
/* 81 17 64 48 sra t2,0x20,t0 */ |
713 |
*a++ = 0x81; *a++ = 0x17; *a++ = 0x64; *a++ = 0x48; |
714 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,0,t0 */ |
715 |
ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu; |
716 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4; |
717 |
break; |
718 |
case SPECIAL_MOVZ: |
719 |
/* if rt=0 then rd=rs ==> if t1!=0 then t0=unmodified else t0=rd */ |
720 |
/* 00 00 40 f4 bne t1,unmodified */ |
721 |
unmodified = a; |
722 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xf4; |
723 |
alpha_rd = ALPHA_T0; |
724 |
break; |
725 |
case SPECIAL_MOVN: |
726 |
/* if rt!=0 then rd=rs ==> if t1=0 then t0=unmodified else t0=rd */ |
727 |
/* 00 00 40 e4 beq t1,unmodified */ |
728 |
unmodified = a; |
729 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4; |
730 |
alpha_rd = ALPHA_T0; |
731 |
break; |
732 |
} |
733 |
|
734 |
if (store && alpha_rd == ALPHA_T0) { |
735 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd); |
736 |
} |
737 |
|
738 |
if (unmodified != NULL) |
739 |
*unmodified = ((size_t)a - (size_t)unmodified - 4) / 4; |
740 |
|
741 |
*addrp = a; |
742 |
rd0: |
743 |
bintrans_write_pc_inc(addrp); |
744 |
return 1; |
745 |
} |
746 |
|
747 |
|
748 |
/* |
749 |
* bintrans_write_instruction__branch(): |
750 |
*/ |
751 |
static int bintrans_write_instruction__branch(unsigned char **addrp, |
752 |
int instruction_type, int regimm_type, int rt, int rs, int imm) |
753 |
{ |
754 |
uint32_t *a, *b, *c = NULL; |
755 |
int alpha_rs, alpha_rt, likely = 0, ofs; |
756 |
|
757 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
758 |
alpha_rt = map_MIPS_to_Alpha[rt]; |
759 |
|
760 |
switch (instruction_type) { |
761 |
case HI6_BEQL: |
762 |
case HI6_BNEL: |
763 |
case HI6_BLEZL: |
764 |
case HI6_BGTZL: |
765 |
likely = 1; |
766 |
} |
767 |
|
768 |
/* |
769 |
* t0 = gpr[rt]; t1 = gpr[rs]; |
770 |
* |
771 |
* 50 00 30 a4 ldq t0,80(a0) |
772 |
* 58 00 50 a4 ldq t1,88(a0) |
773 |
*/ |
774 |
|
775 |
switch (instruction_type) { |
776 |
case HI6_BEQ: |
777 |
case HI6_BNE: |
778 |
case HI6_BEQL: |
779 |
case HI6_BNEL: |
780 |
if (alpha_rt < 0) { |
781 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0); |
782 |
alpha_rt = ALPHA_T0; |
783 |
} |
784 |
} |
785 |
|
786 |
if (alpha_rs < 0) { |
787 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_T1); |
788 |
alpha_rs = ALPHA_T1; |
789 |
} |
790 |
|
791 |
a = (uint32_t *) *addrp; |
792 |
|
793 |
/* |
794 |
* Compare alpha_rt (t0) and alpha_rs (t1) for equality (BEQ). |
795 |
* If the result was false (equal to zero), then skip a lot |
796 |
* of instructions: |
797 |
* |
798 |
* a1 05 22 40 cmpeq t0,t1,t0 |
799 |
* 01 00 20 e4 beq t0,14 <f+0x14> |
800 |
*/ |
801 |
b = NULL; |
802 |
if ((instruction_type == HI6_BEQ || |
803 |
instruction_type == HI6_BEQL) && rt != rs) { |
804 |
/* cmpeq rt,rs,t0 */ |
805 |
*a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16); |
806 |
b = a; |
807 |
*a++ = 0xe4200001; /* beq */ |
808 |
} |
809 |
if (instruction_type == HI6_BNE || instruction_type == HI6_BNEL) { |
810 |
/* cmpeq rt,rs,t0 */ |
811 |
*a++ = 0x400005a1 | (alpha_rt << 21) | (alpha_rs << 16); |
812 |
b = a; |
813 |
*a++ = 0xf4200001; /* bne */ |
814 |
} |
815 |
if (instruction_type == HI6_BLEZ || instruction_type == HI6_BLEZL) { |
816 |
/* cmple rs,0,t0 */ |
817 |
*a++ = 0x40001da1 | (alpha_rs << 21); |
818 |
b = a; |
819 |
*a++ = 0xe4200001; /* beq */ |
820 |
} |
821 |
if (instruction_type == HI6_BGTZ || instruction_type == HI6_BGTZL) { |
822 |
/* cmple rs,0,t0 */ |
823 |
*a++ = 0x40001da1 | (alpha_rs << 21); |
824 |
b = a; |
825 |
*a++ = 0xf4200001; /* bne */ |
826 |
} |
827 |
if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BLTZ) { |
828 |
/* cmplt rs,0,t0 */ |
829 |
*a++ = 0x400019a1 | (alpha_rs << 21); |
830 |
b = a; |
831 |
*a++ = 0xe4200001; /* beq */ |
832 |
} |
833 |
if (instruction_type == HI6_REGIMM && regimm_type == REGIMM_BGEZ) { |
834 |
*a++ = 0x207fffff; /* lda t2,-1 */ |
835 |
/* cmple rs,t2,t0 */ |
836 |
*a++ = 0x40030da1 | (alpha_rs << 21); |
837 |
b = a; |
838 |
*a++ = 0xf4200001; /* bne */ |
839 |
} |
840 |
|
841 |
/* |
842 |
* Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED |
843 |
* and cpu->delay_jmpaddr = pc + 4 + (imm << 2). |
844 |
* |
845 |
* 04 00 26 20 lda t0,4(t5) add 4 |
846 |
* c8 01 5f 20 lda t1,456 |
847 |
* 4a 04 41 40 s4addq t1,t0,s1 s1 = (t1<<2) + t0 |
848 |
*/ |
849 |
|
850 |
*a++ = 0x20260004; /* lda t0,4(t5) */ |
851 |
*a++ = 0x205f0000 | (imm & 0xffff); /* lda */ |
852 |
*a++ = 0x4041044a; /* s4addq */ |
853 |
|
854 |
/* 02 00 3f 21 lda s0,TO_BE_DELAYED */ |
855 |
*a++ = 0x213f0000 | TO_BE_DELAYED; |
856 |
|
857 |
/* |
858 |
* Special case: "likely"-branches: |
859 |
*/ |
860 |
if (likely) { |
861 |
c = a; |
862 |
*a++ = 0xc3e00001; /* br delayed_ok */ |
863 |
|
864 |
if (b != NULL) |
865 |
*((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4; |
866 |
|
867 |
/* cpu->cd.mips.nullify_next = 1; */ |
868 |
/* 01 00 3f 20 lda t0,1 */ |
869 |
*a++ = 0x203f0001; |
870 |
ofs = (size_t)&dummy_cpu.cd.mips.nullify_next - (size_t)&dummy_cpu; |
871 |
*a++ = 0xb0300000 | (ofs & 0xffff); |
872 |
|
873 |
/* fail, so that the next instruction is handled manually: */ |
874 |
*addrp = (unsigned char *) a; |
875 |
bintrans_write_pc_inc(addrp); |
876 |
bintrans_write_chunkreturn_fail(addrp); |
877 |
a = (uint32_t *) *addrp; |
878 |
|
879 |
if (c != NULL) |
880 |
*((unsigned char *)c) = ((size_t)a - (size_t)c - 4) / 4; |
881 |
} else { |
882 |
/* Normal (non-likely) exit: */ |
883 |
if (b != NULL) |
884 |
*((unsigned char *)b) = ((size_t)a - (size_t)b - 4) / 4; |
885 |
} |
886 |
|
887 |
*addrp = (unsigned char *) a; |
888 |
bintrans_write_pc_inc(addrp); |
889 |
return 1; |
890 |
} |
891 |
|
892 |
|
893 |
/* |
894 |
* bintrans_write_instruction__jr(): |
895 |
*/ |
896 |
static int bintrans_write_instruction__jr(unsigned char **addrp, int rs, int rd, int special) |
897 |
{ |
898 |
uint32_t *a; |
899 |
int alpha_rd; |
900 |
|
901 |
alpha_rd = map_MIPS_to_Alpha[rd]; |
902 |
if (alpha_rd < 0) |
903 |
alpha_rd = ALPHA_T0; |
904 |
|
905 |
/* |
906 |
* Perform the jump by setting cpu->delay_slot = TO_BE_DELAYED |
907 |
* and cpu->delay_jmpaddr = gpr[rs]. |
908 |
*/ |
909 |
|
910 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rs, ALPHA_S1); |
911 |
|
912 |
a = (uint32_t *) *addrp; |
913 |
/* 02 00 3f 21 lda s0,TO_BE_DELAYED */ |
914 |
*a++ = 0x213f0000 | TO_BE_DELAYED; |
915 |
*addrp = (unsigned char *) a; |
916 |
|
917 |
if (special == SPECIAL_JALR && rd != 0) { |
918 |
/* gpr[rd] = retaddr (pc + 8) */ |
919 |
a = (uint32_t *) *addrp; |
920 |
/* lda alpha_rd,8(t5) */ |
921 |
*a++ = 0x20060008 | (alpha_rd << 21); |
922 |
*addrp = (unsigned char *) a; |
923 |
if (alpha_rd == ALPHA_T0) |
924 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rd); |
925 |
} |
926 |
|
927 |
bintrans_write_pc_inc(addrp); |
928 |
return 1; |
929 |
} |
930 |
|
931 |
|
932 |
/* |
933 |
* bintrans_write_instruction__jal(): |
934 |
*/ |
935 |
static int bintrans_write_instruction__jal(unsigned char **addrp, |
936 |
int imm, int link) |
937 |
{ |
938 |
uint32_t *a; |
939 |
|
940 |
a = (uint32_t *) *addrp; |
941 |
|
942 |
/* gpr[31] = retaddr (NOTE: mips register 31 is in alpha reg s3) */ |
943 |
if (link) { |
944 |
*a++ = 0x21860008; /* lda s3,8(t5) */ |
945 |
} |
946 |
|
947 |
/* Set the jmpaddr to top 4 bits of pc + lowest 28 bits of imm*4: */ |
948 |
|
949 |
/* |
950 |
* imm = 4*imm; |
951 |
* t0 = ((pc + 4) & ~0x0fffffff) | imm; |
952 |
* |
953 |
* 04 00 26 20 lda t0,4(t5) <-- because the jump is from the delay slot |
954 |
* 23 01 5f 24 ldah t1,291 |
955 |
* 67 45 42 20 lda t1,17767(t1) |
956 |
* 00 f0 7f 24 ldah t2,-4096 |
957 |
* 04 00 23 44 and t0,t2,t3 |
958 |
* 0a 04 44 44 or t1,t3,s1 |
959 |
*/ |
960 |
imm *= 4; |
961 |
*a++ = 0x20260004; |
962 |
*a++ = 0x245f0000 | ((imm >> 16) + (imm & 0x8000? 1 : 0)); |
963 |
*a++ = 0x20420000 | (imm & 0xffff); |
964 |
*a++ = 0x247ff000; |
965 |
*a++ = 0x44230004; |
966 |
*a++ = 0x4444040a; |
967 |
|
968 |
/* 02 00 3f 21 lda s0,TO_BE_DELAYED */ |
969 |
*a++ = 0x213f0000 | TO_BE_DELAYED; |
970 |
|
971 |
/* If the machine continues executing here, it will return |
972 |
to the main loop, which is fine. */ |
973 |
|
974 |
*addrp = (unsigned char *) a; |
975 |
bintrans_write_pc_inc(addrp); |
976 |
return 1; |
977 |
} |
978 |
|
979 |
|
980 |
/* |
981 |
* bintrans_write_instruction__delayedbranch(): |
982 |
*/ |
983 |
static int bintrans_write_instruction__delayedbranch( |
984 |
struct memory *mem, unsigned char **addrp, |
985 |
uint32_t *potential_chunk_p, uint32_t *chunks, |
986 |
int only_care_about_chunk_p, int p, int forward) |
987 |
{ |
988 |
unsigned char *a, *skip=NULL, *generic64bit; |
989 |
int ofs; |
990 |
uint64_t alpha_addr, subaddr; |
991 |
|
992 |
a = *addrp; |
993 |
|
994 |
if (!only_care_about_chunk_p) { |
995 |
/* Skip all of this if there is no branch: */ |
996 |
skip = a; |
997 |
*a++ = 0; *a++ = 0; *a++ = 0x20; *a++ = 0xe5; /* beq s0,skip */ |
998 |
|
999 |
/* |
1000 |
* Perform the jump by setting cpu->delay_slot = 0 |
1001 |
* and pc = cpu->delay_jmpaddr. |
1002 |
*/ |
1003 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T3); |
1004 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_DELAY_JMPADDR, ALPHA_T5); |
1005 |
|
1006 |
/* 00 00 3f 21 lda s0,0 */ |
1007 |
*a++ = 0; *a++ = 0; *a++ = 0x3f; *a++ = 0x21; |
1008 |
|
1009 |
} |
1010 |
|
1011 |
if (potential_chunk_p == NULL) { |
1012 |
if (mem->bintrans_32bit_only) { |
1013 |
/* 34 12 70 a7 ldq t12,4660(a0) */ |
1014 |
ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu; |
1015 |
*a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7; |
1016 |
|
1017 |
/* 00 00 fb 6b jmp (t12) */ |
1018 |
*a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b; |
1019 |
} else { |
1020 |
/* |
1021 |
* If the highest 32 bits of the address are either |
1022 |
* 0x00000000 or 0xffffffff, then the tables used for |
1023 |
* 32-bit load/stores can be used. |
1024 |
* |
1025 |
* 81 16 24 4a srl a1,0x20,t0 |
1026 |
* 03 00 20 e4 beq t0,14 <ok1> |
1027 |
* 01 30 20 40 addl t0,0x1,t0 |
1028 |
* 01 00 20 e4 beq t0,14 <ok1> |
1029 |
* 01 00 e0 c3 br 18 <nook> |
1030 |
*/ |
1031 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; |
1032 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; |
1033 |
*a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; |
1034 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; |
1035 |
generic64bit = a; |
1036 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1037 |
|
1038 |
/* 34 12 70 a7 ldq t12,4660(a0) */ |
1039 |
ofs = (size_t)&dummy_cpu.cd.mips.bintrans_jump_to_32bit_pc - (size_t)&dummy_cpu; |
1040 |
*a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7; |
1041 |
|
1042 |
/* 00 00 fb 6b jmp (t12) */ |
1043 |
*a++ = 0; *a++ = 0; *a++ = 0xfb; *a++ = 0x6b; |
1044 |
|
1045 |
|
1046 |
if (generic64bit != NULL) |
1047 |
*generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4; |
1048 |
|
1049 |
/* Not much we can do here if this wasn't to the same |
1050 |
physical page... */ |
1051 |
|
1052 |
*a++ = 0xfc; *a++ = 0xff; *a++ = 0x84; *a++ = 0x20; /* lda t3,-4(t3) */ |
1053 |
|
1054 |
/* |
1055 |
* Compare the old pc (t3) and the new pc (t0). If they are on the |
1056 |
* same virtual page (which means that they are on the same physical |
1057 |
* page), then we can check the right chunk pointer, and if it |
1058 |
* is non-NULL, then we can jump there. Otherwise just return. |
1059 |
* |
1060 |
* 00 f0 5f 20 lda t1,-4096 |
1061 |
* 01 00 22 44 and t0,t1,t0 |
1062 |
* 04 00 82 44 and t3,t1,t3 |
1063 |
* a3 05 24 40 cmpeq t0,t3,t2 |
1064 |
* 01 00 60 f4 bne t2,7c <ok2> |
1065 |
* 00 80 fa 6b ret |
1066 |
*/ |
1067 |
*a++ = 0x00; *a++ = 0xf0; *a++ = 0x5f; *a++ = 0x20; /* lda */ |
1068 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x22; *a++ = 0x44; /* and */ |
1069 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0x82; *a++ = 0x44; /* and */ |
1070 |
*a++ = 0xa3; *a++ = 0x05; *a++ = 0x24; *a++ = 0x40; /* cmpeq */ |
1071 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x60; *a++ = 0xf4; /* bne */ |
1072 |
*a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1073 |
|
1074 |
/* Don't execute too many instructions. (see comment below) */ |
1075 |
*a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255; |
1076 |
*a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */ |
1077 |
*a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */ |
1078 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */ |
1079 |
*a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1080 |
|
1081 |
/* 15 bits at a time, which means max 60 bits, but |
1082 |
that should be enough. the top 4 bits are probably |
1083 |
not used by userland alpha code. (TODO: verify this) */ |
1084 |
alpha_addr = (size_t)chunks; |
1085 |
subaddr = (alpha_addr >> 45) & 0x7fff; |
1086 |
|
1087 |
/* |
1088 |
* 00 00 3f 20 lda t0,0 |
1089 |
* 21 f7 21 48 sll t0,0xf,t0 |
1090 |
* 34 12 21 20 lda t0,4660(t0) |
1091 |
* 21 f7 21 48 sll t0,0xf,t0 |
1092 |
* 34 12 21 20 lda t0,4660(t0) |
1093 |
* 21 f7 21 48 sll t0,0xf,t0 |
1094 |
* 34 12 21 20 lda t0,4660(t0) |
1095 |
*/ |
1096 |
|
1097 |
/* Start with the topmost 15 bits: */ |
1098 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20; |
1099 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1100 |
|
1101 |
subaddr = (alpha_addr >> 30) & 0x7fff; |
1102 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1103 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1104 |
|
1105 |
subaddr = (alpha_addr >> 15) & 0x7fff; |
1106 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1107 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1108 |
|
1109 |
subaddr = alpha_addr & 0x7fff; |
1110 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1111 |
|
1112 |
/* |
1113 |
* t2 = pc |
1114 |
* t1 = t2 & 0xfff |
1115 |
* t0 += t1 |
1116 |
* |
1117 |
* ff 0f 5f 20 lda t1,4095 |
1118 |
* 02 00 62 44 and t2,t1,t1 |
1119 |
* 01 04 22 40 addq t0,t1,t0 |
1120 |
*/ |
1121 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, MIPSREG_PC, ALPHA_T2); |
1122 |
*a++ = 0xff; *a++ = 0x0f; *a++ = 0x5f; *a++ = 0x20; /* lda */ |
1123 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0x62; *a++ = 0x44; /* and */ |
1124 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x22; *a++ = 0x40; /* addq */ |
1125 |
|
1126 |
/* |
1127 |
* Load the chunk pointer (actually, a 32-bit offset) into t0. |
1128 |
* If it is zero, then skip the following. |
1129 |
* Add cpu->chunk_base_address to t0. |
1130 |
* Jump to t0. |
1131 |
*/ |
1132 |
|
1133 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */ |
1134 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */ |
1135 |
|
1136 |
/* ldq t2,chunk_base_address(a0) */ |
1137 |
ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu; |
1138 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4; |
1139 |
/* addq t0,t2,t0 */ |
1140 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40; |
1141 |
|
1142 |
/* 00 00 e1 6b jmp (t0) */ |
1143 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */ |
1144 |
|
1145 |
/* Failure, then return to the main loop. */ |
1146 |
*a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1147 |
} |
1148 |
} else { |
1149 |
/* |
1150 |
* Just to make sure that we don't become too unreliant |
1151 |
* on the main program loop, we need to return every once |
1152 |
* in a while (interrupts etc). |
1153 |
* |
1154 |
* Load the "nr of instructions executed" (which is an int) |
1155 |
* and see if it is below a certain threshold. If so, then |
1156 |
* we go on with the fast path (bintrans), otherwise we |
1157 |
* abort by returning. |
1158 |
* |
1159 |
* f4 01 5f 20 lda t1,500 (some low number...) |
1160 |
* a1 0d c2 40 cmple t6,t1,t0 |
1161 |
* 01 00 20 f4 bne t0,14 <f+0x14> |
1162 |
*/ |
1163 |
if (!only_care_about_chunk_p && !forward) { |
1164 |
*a++ = (N_SAFE_BINTRANS_LIMIT-1)&255; *a++ = ((N_SAFE_BINTRANS_LIMIT-1) >> 8)&255; |
1165 |
*a++ = 0x5f; *a++ = 0x20; /* lda t1,0x1fff */ |
1166 |
*a++ = 0xa1; *a++ = 0x0d; *a++ = 0xe2; *a++ = 0x40; /* cmple t6,t1,t0 */ |
1167 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xf4; /* bne */ |
1168 |
*a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1169 |
} |
1170 |
|
1171 |
/* |
1172 |
* potential_chunk_p points to an "uint32_t". |
1173 |
* If this value is non-NULL, then it is a piece of Alpha |
1174 |
* machine language code corresponding to the address |
1175 |
* we're jumping to. Otherwise, those instructions haven't |
1176 |
* been translated yet, so we have to return to the main |
1177 |
* loop. (Actually, we have to add cpu->chunk_base_address, |
1178 |
* because the uint32_t is limited to 32-bit offsets.) |
1179 |
* |
1180 |
* Case 1: The value is non-NULL already at translation |
1181 |
* time. Then we can make a direct (fast) native |
1182 |
* Alpha jump to the code chunk. |
1183 |
* |
1184 |
* Case 2: The value was NULL at translation time, then we |
1185 |
* have to check during runtime. |
1186 |
*/ |
1187 |
|
1188 |
/* Case 1: */ |
1189 |
/* printf("%08x ", *potential_chunk_p); */ |
1190 |
alpha_addr = *potential_chunk_p + (size_t)mem->translation_code_chunk_space; |
1191 |
ofs = (alpha_addr - ((size_t)a+4)) / 4; |
1192 |
/* printf("%016llx %016llx %i\n", (long long)alpha_addr, (long long)a, ofs); */ |
1193 |
|
1194 |
if ((*potential_chunk_p) != 0 && ofs > -0xfffff && ofs < 0xfffff) { |
1195 |
*a++ = ofs & 255; *a++ = (ofs >> 8) & 255; *a++ = 0xe0 + ((ofs >> 16) & 0x1f); *a++ = 0xc3; /* br <chunk> */ |
1196 |
} else { |
1197 |
/* Case 2: */ |
1198 |
|
1199 |
bintrans_register_potential_quick_jump(mem, a, p); |
1200 |
|
1201 |
/* 15 bits at a time, which means max 60 bits, but |
1202 |
that should be enough. the top 4 bits are probably |
1203 |
not used by userland alpha code. (TODO: verify this) */ |
1204 |
alpha_addr = (size_t)potential_chunk_p; |
1205 |
subaddr = (alpha_addr >> 45) & 0x7fff; |
1206 |
|
1207 |
/* |
1208 |
* 00 00 3f 20 lda t0,0 |
1209 |
* 21 f7 21 48 sll t0,0xf,t0 |
1210 |
* 34 12 21 20 lda t0,4660(t0) |
1211 |
* 21 f7 21 48 sll t0,0xf,t0 |
1212 |
* 34 12 21 20 lda t0,4660(t0) |
1213 |
* 21 f7 21 48 sll t0,0xf,t0 |
1214 |
* 34 12 21 20 lda t0,4660(t0) |
1215 |
*/ |
1216 |
|
1217 |
/* Start with the topmost 15 bits: */ |
1218 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x3f; *a++ = 0x20; |
1219 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1220 |
|
1221 |
subaddr = (alpha_addr >> 30) & 0x7fff; |
1222 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1223 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1224 |
|
1225 |
subaddr = (alpha_addr >> 15) & 0x7fff; |
1226 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1227 |
*a++ = 0x21; *a++ = 0xf7; *a++ = 0x21; *a++ = 0x48; /* sll */ |
1228 |
|
1229 |
subaddr = alpha_addr & 0x7fff; |
1230 |
*a++ = (subaddr & 255); *a++ = (subaddr >> 8); *a++ = 0x21; *a++ = 0x20; |
1231 |
|
1232 |
/* |
1233 |
* Load the chunk pointer into t0. |
1234 |
* If it is NULL (zero), then skip the following jump. |
1235 |
* Jump to t0. |
1236 |
*/ |
1237 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x21; *a++ = 0xa0; /* ldl t0,0(t0) */ |
1238 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<skip> */ |
1239 |
|
1240 |
/* ldq t2,chunk_base_address(a0) */ |
1241 |
ofs = ((size_t)&dummy_cpu.cd.mips.chunk_base_address) - (size_t)&dummy_cpu; |
1242 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x70; *a++ = 0xa4; |
1243 |
/* addq t0,t2,t0 */ |
1244 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x40; |
1245 |
|
1246 |
/* 00 00 e1 6b jmp (t0) */ |
1247 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x6b; /* jmp (t0) */ |
1248 |
|
1249 |
/* "Failure", then let's return to the main loop. */ |
1250 |
*a++ = 0x00; *a++ = 0x80; *a++ = 0xfa; *a++ = 0x6b; /* ret */ |
1251 |
} |
1252 |
} |
1253 |
|
1254 |
if (skip != NULL) { |
1255 |
*skip = ((size_t)a - (size_t)skip - 4) / 4; |
1256 |
skip ++; |
1257 |
*skip = (((size_t)a - (size_t)skip - 4) / 4) >> 8; |
1258 |
} |
1259 |
|
1260 |
*addrp = a; |
1261 |
return 1; |
1262 |
} |
1263 |
|
1264 |
|
1265 |
/* |
1266 |
* bintrans_write_instruction__loadstore(): |
1267 |
*/ |
1268 |
static int bintrans_write_instruction__loadstore( |
1269 |
struct memory *mem, unsigned char **addrp, |
1270 |
int rt, int imm, int rs, int instruction_type, int bigendian, |
1271 |
int do_alignment_check) |
1272 |
{ |
1273 |
unsigned char *a, *fail, *generic64bit = NULL, *generic64bitA = NULL; |
1274 |
unsigned char *doloadstore = NULL, |
1275 |
*ok_unaligned_load3, *ok_unaligned_load2, *ok_unaligned_load1; |
1276 |
uint32_t *b; |
1277 |
int need_imm_lda = 0; |
1278 |
int ofs, alignment, load = 0, alpha_rs, alpha_rt, unaligned = 0; |
1279 |
|
1280 |
/* TODO: Not yet: */ |
1281 |
if (instruction_type == HI6_LQ_MDMX || instruction_type == HI6_SQ) { |
1282 |
return 0; |
1283 |
} |
1284 |
|
1285 |
switch (instruction_type) { |
1286 |
case HI6_LQ_MDMX: |
1287 |
case HI6_LD: |
1288 |
case HI6_LDL: |
1289 |
case HI6_LDR: |
1290 |
case HI6_LWU: |
1291 |
case HI6_LW: |
1292 |
case HI6_LWL: |
1293 |
case HI6_LWR: |
1294 |
case HI6_LHU: |
1295 |
case HI6_LH: |
1296 |
case HI6_LBU: |
1297 |
case HI6_LB: |
1298 |
load = 1; |
1299 |
if (rt == 0) |
1300 |
return 0; |
1301 |
} |
1302 |
|
1303 |
switch (instruction_type) { |
1304 |
case HI6_LDL: |
1305 |
case HI6_LDR: |
1306 |
case HI6_LWL: |
1307 |
case HI6_LWR: |
1308 |
case HI6_SDL: |
1309 |
case HI6_SDR: |
1310 |
case HI6_SWL: |
1311 |
case HI6_SWR: |
1312 |
unaligned = 1; |
1313 |
} |
1314 |
|
1315 |
a = *addrp; |
1316 |
|
1317 |
/* |
1318 |
* a1 = gpr[rs] + imm; |
1319 |
* |
1320 |
* rs_ofs 30 a4 ldq a1,rs(a0) |
1321 |
* imm 21 22 lda a1,imm(a1) |
1322 |
*/ |
1323 |
|
1324 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
1325 |
if (alpha_rs < 0) { |
1326 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_A1); |
1327 |
alpha_rs = ALPHA_A1; |
1328 |
} else |
1329 |
need_imm_lda = 1; |
1330 |
|
1331 |
if (imm != 0 || need_imm_lda) { |
1332 |
*a++ = imm; *a++ = (imm >> 8); |
1333 |
*a++ = 0x20 + alpha_rs; *a++ = 0x22; |
1334 |
} |
1335 |
|
1336 |
alignment = 0; |
1337 |
switch (instruction_type) { |
1338 |
case HI6_LQ_MDMX: |
1339 |
case HI6_SQ: |
1340 |
alignment = 15; |
1341 |
break; |
1342 |
case HI6_LD: |
1343 |
case HI6_LDL: |
1344 |
case HI6_LDR: |
1345 |
case HI6_SD: |
1346 |
case HI6_SDL: |
1347 |
case HI6_SDR: |
1348 |
alignment = 7; |
1349 |
break; |
1350 |
case HI6_LW: |
1351 |
case HI6_LWL: |
1352 |
case HI6_LWR: |
1353 |
case HI6_LWU: |
1354 |
case HI6_SW: |
1355 |
case HI6_SWL: |
1356 |
case HI6_SWR: |
1357 |
alignment = 3; |
1358 |
break; |
1359 |
case HI6_LH: |
1360 |
case HI6_LHU: |
1361 |
case HI6_SH: |
1362 |
alignment = 1; |
1363 |
break; |
1364 |
} |
1365 |
|
1366 |
if (unaligned) { |
1367 |
/* |
1368 |
* Unaligned load/store: Perform the host load/store at |
1369 |
* an aligned address, and then figure out which bytes to |
1370 |
* actually load into the destination register. |
1371 |
* |
1372 |
* 02 30 20 46 and a1,alignment,t1 |
1373 |
* 31 05 22 42 subq a1,t1,a1 |
1374 |
*/ |
1375 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1376 |
*a++ = 0x31; *a++ = 0x05; *a++ = 0x22; *a++ = 0x42; |
1377 |
} else if (alignment > 0 && do_alignment_check) { |
1378 |
/* |
1379 |
* Check alignment: |
1380 |
* |
1381 |
* 02 30 20 46 and a1,0x1,t1 |
1382 |
* 02 70 20 46 and a1,0x3,t1 (one of these "and"s) |
1383 |
* 02 f0 20 46 and a1,0x7,t1 |
1384 |
* 02 f0 21 46 and a1,0xf,t1 |
1385 |
* 01 00 40 e4 beq t1,<okalign> |
1386 |
* 00 80 fa 6b ret |
1387 |
*/ |
1388 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1389 |
fail = a; |
1390 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x40; *a++ = 0xe4; |
1391 |
*addrp = a; |
1392 |
bintrans_write_chunkreturn_fail(addrp); |
1393 |
a = *addrp; |
1394 |
*fail = ((size_t)a - (size_t)fail - 4) / 4; |
1395 |
} |
1396 |
|
1397 |
alpha_rt = map_MIPS_to_Alpha[rt]; |
1398 |
|
1399 |
if (mem->bintrans_32bit_only) { |
1400 |
/* Special case for 32-bit addressing: */ |
1401 |
|
1402 |
if (load) |
1403 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_load_32bit) - (size_t)&dummy_cpu; |
1404 |
else |
1405 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_store_32bit) - (size_t)&dummy_cpu; |
1406 |
/* ldq t12,bintrans_loadstore_32bit(a0) */ |
1407 |
*a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7; |
1408 |
|
1409 |
/* jsr t4,(t12),<after> */ |
1410 |
*a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68; |
1411 |
|
1412 |
/* Now: a3 = host page, t3 = address of host load/store */ |
1413 |
} else { |
1414 |
/* |
1415 |
* If the highest 33 bits of the address are either all ones |
1416 |
* or all zeroes, then the tables used for 32-bit load/stores |
1417 |
* can be used. |
1418 |
*/ |
1419 |
*a++ = 0x81; *a++ = 0xf6; *a++ = 0x23; *a++ = 0x4a; /* srl a1,0x1f,t0 */ |
1420 |
*a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x44; /* and t0,0x1,t0 */ |
1421 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<noll> */ |
1422 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */ |
1423 |
*a++ = 0x01; *a++ = 0x30; *a++ = 0x20; *a++ = 0x40; /* addl t0,0x1,t0 */ |
1424 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */ |
1425 |
generic64bit = a; |
1426 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */ |
1427 |
/* <noll>: */ |
1428 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x24; *a++ = 0x4a; /* srl a1,0x20,t0 */ |
1429 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x20; *a++ = 0xe4; /* beq t0,<ok> */ |
1430 |
generic64bitA = a; |
1431 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; /* br <generic> */ |
1432 |
|
1433 |
if (load) |
1434 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_load_32bit) - (size_t)&dummy_cpu; |
1435 |
else |
1436 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_store_32bit) - (size_t)&dummy_cpu; |
1437 |
/* ldq t12,bintrans_loadstore_32bit(a0) */ |
1438 |
*a++ = ofs; *a++ = ofs >> 8; *a++ = 0x70; *a++ = 0xa7; |
1439 |
|
1440 |
/* jsr t4,(t12),<after> */ |
1441 |
*a++ = 0x00; *a++ = 0x40; *a++ = 0xbb; *a++ = 0x68; |
1442 |
|
1443 |
/* |
1444 |
* Now: a3 = host page |
1445 |
* t3 = (potential) address of host load/store |
1446 |
*/ |
1447 |
|
1448 |
doloadstore = a; |
1449 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1450 |
|
1451 |
|
1452 |
/* |
1453 |
* Generic (64-bit) load/store: |
1454 |
*/ |
1455 |
|
1456 |
if (generic64bit != NULL) |
1457 |
*generic64bit = ((size_t)a - (size_t)generic64bit - 4) / 4; |
1458 |
if (generic64bitA != NULL) |
1459 |
*generic64bitA = ((size_t)a - (size_t)generic64bitA - 4) / 4; |
1460 |
|
1461 |
*addrp = a; |
1462 |
b = (uint32_t *) *addrp; |
1463 |
|
1464 |
/* Save a0 and the old return address on the stack: */ |
1465 |
*b++ = 0x23deff80; /* lda sp,-128(sp) */ |
1466 |
|
1467 |
*b++ = 0xb75e0000; /* stq ra,0(sp) */ |
1468 |
*b++ = 0xb61e0008; /* stq a0,8(sp) */ |
1469 |
*b++ = 0xb4de0010; /* stq t5,16(sp) */ |
1470 |
*b++ = 0xb0fe0018; /* stl t6,24(sp) */ |
1471 |
*b++ = 0xb71e0020; /* stq t10,32(sp) */ |
1472 |
*b++ = 0xb73e0028; /* stq t11,40(sp) */ |
1473 |
*b++ = 0xb51e0030; /* stq t7,48(sp) */ |
1474 |
*b++ = 0xb6de0038; /* stq t8,56(sp) */ |
1475 |
*b++ = 0xb6fe0040; /* stq t9,64(sp) */ |
1476 |
|
1477 |
ofs = ((size_t)&dummy_cpu.cd.mips.fast_vaddr_to_hostaddr) - (size_t)&dummy_cpu; |
1478 |
|
1479 |
*b++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */ |
1480 |
|
1481 |
/* a1 is already vaddr. set a2 = writeflag */ |
1482 |
*b++ = 0x225f0000 | (load? 0 : 1); |
1483 |
|
1484 |
/* Call fast_vaddr_to_hostaddr: */ |
1485 |
*b++ = 0x6b5b4000; /* jsr ra,(t12),<after> */ |
1486 |
|
1487 |
/* Restore the old return address and a0 from the stack: */ |
1488 |
*b++ = 0xa75e0000; /* ldq ra,0(sp) */ |
1489 |
*b++ = 0xa61e0008; /* ldq a0,8(sp) */ |
1490 |
*b++ = 0xa4de0010; /* ldq t5,16(sp) */ |
1491 |
*b++ = 0xa0fe0018; /* ldl t6,24(sp) */ |
1492 |
*b++ = 0xa71e0020; /* ldq t10,32(sp) */ |
1493 |
*b++ = 0xa73e0028; /* ldq t11,40(sp) */ |
1494 |
*b++ = 0xa51e0030; /* ldq t7,48(sp) */ |
1495 |
*b++ = 0xa6de0038; /* ldq t8,56(sp) */ |
1496 |
*b++ = 0xa6fe0040; /* ldq t9,64(sp) */ |
1497 |
|
1498 |
*b++ = 0x23de0080; /* lda sp,128(sp) */ |
1499 |
|
1500 |
*addrp = (unsigned char *) b; |
1501 |
a = *addrp; |
1502 |
|
1503 |
/* |
1504 |
* NULL? Then return failure. |
1505 |
* 01 00 00 f4 bne v0,f8 <okzz> |
1506 |
*/ |
1507 |
fail = a; |
1508 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x00; *a++ = 0xf4; |
1509 |
bintrans_write_chunkreturn_fail(&a); |
1510 |
*fail = ((size_t)a - (size_t)fail - 4) / 4; |
1511 |
|
1512 |
/* The rest of this code was written with t3 as the address. */ |
1513 |
|
1514 |
/* 04 14 00 40 addq v0,0,t3 */ |
1515 |
*a++ = 0x04; *a++ = 0x14; *a++ = 0x00; *a++ = 0x40; |
1516 |
|
1517 |
if (doloadstore != NULL) |
1518 |
*doloadstore = ((size_t)a - (size_t)doloadstore - 4) / 4; |
1519 |
} |
1520 |
|
1521 |
|
1522 |
switch (instruction_type) { |
1523 |
case HI6_LQ_MDMX: |
1524 |
/* TODO */ |
1525 |
break; |
1526 |
case HI6_LD: |
1527 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa4; /* ldq t0,0(t3) */ |
1528 |
if (bigendian) { |
1529 |
/* remember original 8 bytes of t0: */ |
1530 |
*a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */ |
1531 |
|
1532 |
/* swap lowest 4 bytes: */ |
1533 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1534 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1535 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1536 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1537 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1538 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1539 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1540 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1541 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1542 |
|
1543 |
/* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */ |
1544 |
*a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */ |
1545 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */ |
1546 |
*a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */ |
1547 |
|
1548 |
/* swap highest 4 bytes: */ |
1549 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1550 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1551 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1552 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1553 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1554 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1555 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1556 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1557 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1558 |
|
1559 |
/* or the results together: */ |
1560 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */ |
1561 |
} |
1562 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt); |
1563 |
break; |
1564 |
case HI6_LW: |
1565 |
case HI6_LWU: |
1566 |
if (alpha_rt < 0 || bigendian || instruction_type == HI6_LWU) |
1567 |
alpha_rt = ALPHA_T0; |
1568 |
/* ldl rt,0(t3) */ |
1569 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5); |
1570 |
*a++ = 0xa0 | ((alpha_rt >> 3) & 3); |
1571 |
if (bigendian) { |
1572 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1573 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1574 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1575 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1576 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1577 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1578 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1579 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1580 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1581 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x3f; *a++ = 0x40; /* addl t0,zero,t0 (sign extend) 32->64 */ |
1582 |
} |
1583 |
if (instruction_type == HI6_LWU) { |
1584 |
/* Use only lowest 32 bits: */ |
1585 |
*a++ = 0x21; *a++ = 0xf6; *a++ = 0x21; *a++ = 0x48; /* zapnot t0,0xf,t0 */ |
1586 |
} |
1587 |
if (alpha_rt == ALPHA_T0) |
1588 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt); |
1589 |
break; |
1590 |
case HI6_LHU: |
1591 |
case HI6_LH: |
1592 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x30; /* ldwu from memory */ |
1593 |
if (bigendian) { |
1594 |
*a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */ |
1595 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1596 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */ |
1597 |
} |
1598 |
if (instruction_type == HI6_LH) { |
1599 |
*a++ = 0x21; *a++ = 0x00; *a++ = 0xe1; *a++ = 0x73; /* sextw t0,t0 */ |
1600 |
} |
1601 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt); |
1602 |
break; |
1603 |
case HI6_LBU: |
1604 |
case HI6_LB: |
1605 |
if (alpha_rt < 0) |
1606 |
alpha_rt = ALPHA_T0; |
1607 |
/* ldbu rt,0(t3) */ |
1608 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5); |
1609 |
*a++ = 0x28 | ((alpha_rt >> 3) & 3); |
1610 |
if (instruction_type == HI6_LB) { |
1611 |
/* sextb rt,rt */ |
1612 |
*a++ = alpha_rt; *a++ = 0x00; *a++ = 0xe0 + alpha_rt; *a++ = 0x73; |
1613 |
} |
1614 |
if (alpha_rt == ALPHA_T0) |
1615 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rt); |
1616 |
break; |
1617 |
|
1618 |
case HI6_LWL: |
1619 |
/* a1 = 0..3 (or 0..7 for 64-bit loads): */ |
1620 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
1621 |
if (alpha_rs < 0) { |
1622 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
1623 |
alpha_rs = ALPHA_T0; |
1624 |
} |
1625 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
1626 |
/* 02 30 20 46 and a1,alignment,t1 */ |
1627 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1628 |
|
1629 |
/* ldl t0,0(t3) */ |
1630 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0; |
1631 |
|
1632 |
if (bigendian) { |
1633 |
/* TODO */ |
1634 |
bintrans_write_chunkreturn_fail(&a); |
1635 |
} |
1636 |
/* |
1637 |
* lwl: memory = 0x12 0x34 0x56 0x78 |
1638 |
* offset (a1): register rt becomes: |
1639 |
* 0 0x12...... |
1640 |
* 1 0x3412.... |
1641 |
* 2 0x563412.. |
1642 |
* 3 0x78563412 |
1643 |
*/ |
1644 |
|
1645 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2); |
1646 |
|
1647 |
/* |
1648 |
10: 03 00 9f 20 lda t3,3 |
1649 |
14: a5 05 82 40 cmpeq t3,t1,t4 |
1650 |
18: 01 00 a0 e4 beq t4,20 <skip> |
1651 |
*/ |
1652 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1653 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1654 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1655 |
|
1656 |
/* 03 14 20 40 addq t0,0,t2 */ |
1657 |
*a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40; |
1658 |
|
1659 |
ok_unaligned_load3 = a; |
1660 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1661 |
|
1662 |
|
1663 |
|
1664 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1665 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1666 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1667 |
/* |
1668 |
* 2 0x563412.. |
1669 |
2c: 21 17 21 48 sll t0,0x8,t0 |
1670 |
30: 01 10 20 40 addl t0,0,t0 |
1671 |
34: 03 f0 7f 44 and t2,0xff,t2 |
1672 |
38: 03 04 23 44 or t0,t2,t2 |
1673 |
*/ |
1674 |
*a++ = 0x21; *a++ = 0x17; *a++ = 0x21; *a++ = 0x48; |
1675 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40; |
1676 |
*a++ = 0x03; *a++ = 0xf0; *a++ = 0x7f; *a++ = 0x44; |
1677 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1678 |
|
1679 |
ok_unaligned_load2 = a; |
1680 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1681 |
|
1682 |
|
1683 |
|
1684 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1685 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1686 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1687 |
/* |
1688 |
* 1 0x3412.... |
1689 |
2c: 21 17 22 48 sll t0,0x10,t0 |
1690 |
30: 01 10 20 40 addl t0,0,t0 |
1691 |
34: 23 76 60 48 zapnot t2,0x3,t2 |
1692 |
38: 03 04 23 44 or t0,t2,t2 |
1693 |
*/ |
1694 |
*a++ = 0x21; *a++ = 0x17; *a++ = 0x22; *a++ = 0x48; |
1695 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40; |
1696 |
*a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48; |
1697 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1698 |
|
1699 |
ok_unaligned_load1 = a; |
1700 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1701 |
|
1702 |
|
1703 |
|
1704 |
|
1705 |
/* |
1706 |
* 0 0x12...... |
1707 |
2c: 21 17 23 48 sll t0,0x18,t0 |
1708 |
30: 01 10 20 40 addl t0,0,t0 |
1709 |
34: 23 f6 60 48 zapnot t2,0x7,t2 |
1710 |
38: 03 04 23 44 or t0,t2,t2 |
1711 |
*/ |
1712 |
*a++ = 0x21; *a++ = 0x17; *a++ = 0x23; *a++ = 0x48; |
1713 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x20; *a++ = 0x40; |
1714 |
*a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48; |
1715 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1716 |
|
1717 |
|
1718 |
*ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4; |
1719 |
*ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4; |
1720 |
*ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4; |
1721 |
|
1722 |
/* 03 10 60 40 addl t2,0,t2 */ |
1723 |
*a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
1724 |
|
1725 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt); |
1726 |
break; |
1727 |
|
1728 |
case HI6_LWR: |
1729 |
/* a1 = 0..3 (or 0..7 for 64-bit loads): */ |
1730 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
1731 |
if (alpha_rs < 0) { |
1732 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
1733 |
alpha_rs = ALPHA_T0; |
1734 |
} |
1735 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
1736 |
/* 02 30 20 46 and a1,alignment,t1 */ |
1737 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1738 |
|
1739 |
/* ldl t0,0(t3) */ |
1740 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0; |
1741 |
|
1742 |
if (bigendian) { |
1743 |
/* TODO */ |
1744 |
bintrans_write_chunkreturn_fail(&a); |
1745 |
} |
1746 |
/* |
1747 |
* lwr: memory = 0x12 0x34 0x56 0x78 |
1748 |
* offset (a1): register rt becomes: |
1749 |
* 0 0x78563412 |
1750 |
* 1 0x..785634 |
1751 |
* 2 0x....7856 |
1752 |
* 3 0x......78 |
1753 |
*/ |
1754 |
|
1755 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2); |
1756 |
|
1757 |
/* |
1758 |
10: 03 00 9f 20 lda t3,3 |
1759 |
14: a5 05 82 40 cmpeq t3,t1,t4 |
1760 |
18: 01 00 a0 e4 beq t4,20 <skip> |
1761 |
*/ |
1762 |
*a++ = 0x03; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1763 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1764 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1765 |
|
1766 |
/* |
1767 |
2c: 81 16 23 48 srl t0,0x18,t0 |
1768 |
b0: 21 36 20 48 zapnot t0,0x1,t0 |
1769 |
34: 23 d6 7f 48 zapnot t2,0xfe,t2 |
1770 |
38: 03 04 23 44 or t0,t2,t2 |
1771 |
*/ |
1772 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x23; *a++ = 0x48; |
1773 |
*a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48; |
1774 |
*a++ = 0x23; *a++ = 0xd6; *a++ = 0x7f; *a++ = 0x48; |
1775 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1776 |
|
1777 |
ok_unaligned_load3 = a; |
1778 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1779 |
|
1780 |
|
1781 |
|
1782 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1783 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1784 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1785 |
/* |
1786 |
2c: 81 16 22 48 srl t0,0x10,t0 |
1787 |
b4: 21 76 20 48 zapnot t0,0x3,t0 |
1788 |
34: 23 96 7f 48 zapnot t2,0xfc,t2 |
1789 |
38: 03 04 23 44 or t0,t2,t2 |
1790 |
*/ |
1791 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x22; *a++ = 0x48; |
1792 |
*a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48; |
1793 |
*a++ = 0x23; *a++ = 0x96; *a++ = 0x7f; *a++ = 0x48; |
1794 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1795 |
|
1796 |
ok_unaligned_load2 = a; |
1797 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1798 |
|
1799 |
|
1800 |
|
1801 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0x9f; *a++ = 0x20; |
1802 |
*a++ = 0xa5; *a++ = 0x05; *a++ = 0x82; *a++ = 0x40; |
1803 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1804 |
/* |
1805 |
2c: 81 16 21 48 srl t0,0x8,t0 |
1806 |
b8: 21 f6 20 48 zapnot t0,0x7,t0 |
1807 |
3c: 23 16 7f 48 zapnot t2,0xf8,t2 |
1808 |
40: 03 04 23 44 or t0,t2,t2 |
1809 |
*/ |
1810 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0x21; *a++ = 0x48; |
1811 |
*a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48; |
1812 |
*a++ = 0x23; *a++ = 0x16; *a++ = 0x7f; *a++ = 0x48; |
1813 |
*a++ = 0x03; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1814 |
|
1815 |
ok_unaligned_load1 = a; |
1816 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1817 |
|
1818 |
|
1819 |
|
1820 |
|
1821 |
/* |
1822 |
* 0 0x12...... |
1823 |
*/ |
1824 |
/* 03 14 20 40 addq t0,0,t2 */ |
1825 |
*a++ = 0x03; *a++ = 0x14; *a++ = 0x20; *a++ = 0x40; |
1826 |
|
1827 |
|
1828 |
|
1829 |
*ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4; |
1830 |
*ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4; |
1831 |
*ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4; |
1832 |
|
1833 |
/* 03 10 60 40 addl t2,0,t2 */ |
1834 |
*a++ = 0x03; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
1835 |
|
1836 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T2, rt); |
1837 |
break; |
1838 |
|
1839 |
case HI6_SQ: |
1840 |
/* TODO */ |
1841 |
break; |
1842 |
case HI6_SD: |
1843 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0); |
1844 |
if (bigendian) { |
1845 |
/* remember original 8 bytes of t0: */ |
1846 |
*a++ = 0x05; *a++ = 0x04; *a++ = 0x3f; *a++ = 0x40; /* addq t0,zero,t4 */ |
1847 |
|
1848 |
/* swap lowest 4 bytes: */ |
1849 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1850 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1851 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1852 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1853 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1854 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1855 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1856 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1857 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1858 |
|
1859 |
/* save result in (top 4 bytes of) t1, then t4. get back top bits of t4: */ |
1860 |
*a++ = 0x22; *a++ = 0x17; *a++ = 0x24; *a++ = 0x48; /* sll t0,0x20,t1 */ |
1861 |
*a++ = 0x81; *a++ = 0x16; *a++ = 0xa4; *a++ = 0x48; /* srl t4,0x20,t0 */ |
1862 |
*a++ = 0x05; *a++ = 0x14; *a++ = 0x40; *a++ = 0x40; /* addq t1,0,t4 */ |
1863 |
|
1864 |
/* swap highest 4 bytes: */ |
1865 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1866 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1867 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1868 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1869 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1870 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1871 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1872 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1873 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1874 |
|
1875 |
/* or the results together: */ |
1876 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0xa1; *a++ = 0x44; /* or t4,t0,t0 */ |
1877 |
} |
1878 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb4; /* stq to memory */ |
1879 |
break; |
1880 |
case HI6_SW: |
1881 |
if (alpha_rt < 0 || bigendian) { |
1882 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0); |
1883 |
alpha_rt = ALPHA_T0; |
1884 |
} |
1885 |
if (bigendian) { |
1886 |
*a++ = 0x62; *a++ = 0x71; *a++ = 0x20; *a++ = 0x48; /* insbl t0,3,t1 */ |
1887 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1888 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; /* sll t2,16,t2 */ |
1889 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1890 |
*a++ = 0xc3; *a++ = 0x50; *a++ = 0x20; *a++ = 0x48; /* extbl t0,2,t2 */ |
1891 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; /* sll t2,8,t2 */ |
1892 |
*a++ = 0x02; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t1 */ |
1893 |
*a++ = 0xc3; *a++ = 0x70; *a++ = 0x20; *a++ = 0x48; /* extbl t0,3,t2 */ |
1894 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x62; *a++ = 0x44; /* or t2,t1,t0 */ |
1895 |
} |
1896 |
/* stl to memory: stl rt,0(t3) */ |
1897 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5); |
1898 |
*a++ = 0xb0 | ((alpha_rt >> 3) & 3); |
1899 |
break; |
1900 |
case HI6_SH: |
1901 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0); |
1902 |
if (bigendian) { |
1903 |
*a++ = 0x62; *a++ = 0x31; *a++ = 0x20; *a++ = 0x48; /* insbl t0,1,t1 */ |
1904 |
*a++ = 0xc3; *a++ = 0x30; *a++ = 0x20; *a++ = 0x48; /* extbl t0,1,t2 */ |
1905 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x43; *a++ = 0x44; /* or t1,t2,t0 */ |
1906 |
} |
1907 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0x34; /* stw to memory */ |
1908 |
break; |
1909 |
case HI6_SB: |
1910 |
if (alpha_rt < 0) { |
1911 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T0); |
1912 |
alpha_rt = ALPHA_T0; |
1913 |
} |
1914 |
/* stb to memory: stb rt,0(t3) */ |
1915 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x04 | ((alpha_rt & 7) << 5); |
1916 |
*a++ = 0x38 | ((alpha_rt >> 3) & 3); |
1917 |
break; |
1918 |
|
1919 |
case HI6_SWL: |
1920 |
/* a1 = 0..3 (or 0..7 for 64-bit stores): */ |
1921 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
1922 |
if (alpha_rs < 0) { |
1923 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
1924 |
alpha_rs = ALPHA_T0; |
1925 |
} |
1926 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
1927 |
/* 02 30 20 46 and a1,alignment,t1 */ |
1928 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
1929 |
|
1930 |
/* ldl t0,0(t3) */ |
1931 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0; |
1932 |
|
1933 |
if (bigendian) { |
1934 |
/* TODO */ |
1935 |
bintrans_write_chunkreturn_fail(&a); |
1936 |
} |
1937 |
|
1938 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2); |
1939 |
|
1940 |
/* |
1941 |
* swl: memory = 0x12 0x34 0x56 0x78 |
1942 |
* register = 0x89abcdef |
1943 |
* offset (a1): memory becomes: |
1944 |
* 0 0x89 0x.. 0x.. 0x.. |
1945 |
* 1 0xab 0x89 0x.. 0x.. |
1946 |
* 2 0xcd 0xab 0x89 0x.. |
1947 |
* 3 0xef 0xcd 0xab 0x89 |
1948 |
*/ |
1949 |
|
1950 |
/* |
1951 |
a5 75 40 40 cmpeq t1,0x03,t4 |
1952 |
01 00 a0 e4 beq t4,20 <skip> |
1953 |
*/ |
1954 |
*a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40; |
1955 |
*a++ = 0x02; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1956 |
|
1957 |
/* 01 10 60 40 addl t2,0,t0 */ |
1958 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
1959 |
|
1960 |
ok_unaligned_load3 = a; |
1961 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1962 |
|
1963 |
|
1964 |
|
1965 |
|
1966 |
*a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40; |
1967 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1968 |
/* |
1969 |
2: |
1970 |
e8: 83 16 61 48 srl t2,0x8,t2 |
1971 |
ec: 23 f6 60 48 zapnot t2,0x7,t2 |
1972 |
f0: 21 16 3f 48 zapnot t0,0xf8,t0 |
1973 |
f4: 01 04 23 44 or t0,t2,t0 |
1974 |
*/ |
1975 |
*a++ = 0x83; *a++ = 0x16; *a++ = 0x61; *a++ = 0x48; |
1976 |
*a++ = 0x23; *a++ = 0xf6; *a++ = 0x60; *a++ = 0x48; |
1977 |
*a++ = 0x21; *a++ = 0x16; *a++ = 0x3f; *a++ = 0x48; |
1978 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1979 |
|
1980 |
ok_unaligned_load2 = a; |
1981 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
1982 |
|
1983 |
|
1984 |
|
1985 |
*a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40; |
1986 |
*a++ = 0x05; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
1987 |
/* |
1988 |
1: |
1989 |
f8: 83 16 62 48 srl t2,0x10,t2 |
1990 |
fc: 23 76 60 48 zapnot t2,0x3,t2 |
1991 |
100: 21 96 3f 48 zapnot t0,0xfc,t0 |
1992 |
104: 01 04 23 44 or t0,t2,t0 |
1993 |
*/ |
1994 |
*a++ = 0x83; *a++ = 0x16; *a++ = 0x62; *a++ = 0x48; |
1995 |
*a++ = 0x23; *a++ = 0x76; *a++ = 0x60; *a++ = 0x48; |
1996 |
*a++ = 0x21; *a++ = 0x96; *a++ = 0x3f; *a++ = 0x48; |
1997 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
1998 |
|
1999 |
ok_unaligned_load1 = a; |
2000 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2001 |
|
2002 |
|
2003 |
|
2004 |
|
2005 |
|
2006 |
/* |
2007 |
0: |
2008 |
108: 83 16 63 48 srl t2,0x18,t2 |
2009 |
10c: 23 36 60 48 zapnot t2,0x1,t2 |
2010 |
110: 21 d6 3f 48 zapnot t0,0xfe,t0 |
2011 |
114: 01 04 23 44 or t0,t2,t0 |
2012 |
*/ |
2013 |
*a++ = 0x83; *a++ = 0x16; *a++ = 0x63; *a++ = 0x48; |
2014 |
*a++ = 0x23; *a++ = 0x36; *a++ = 0x60; *a++ = 0x48; |
2015 |
*a++ = 0x21; *a++ = 0xd6; *a++ = 0x3f; *a++ = 0x48; |
2016 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2017 |
|
2018 |
|
2019 |
*ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4; |
2020 |
*ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4; |
2021 |
*ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4; |
2022 |
|
2023 |
/* sdl t0,0(t3) */ |
2024 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0; |
2025 |
break; |
2026 |
|
2027 |
case HI6_SWR: |
2028 |
/* a1 = 0..3 (or 0..7 for 64-bit stores): */ |
2029 |
alpha_rs = map_MIPS_to_Alpha[rs]; |
2030 |
if (alpha_rs < 0) { |
2031 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rs, ALPHA_T0); |
2032 |
alpha_rs = ALPHA_T0; |
2033 |
} |
2034 |
*a++ = imm; *a++ = (imm >> 8); *a++ = 0x20 + alpha_rs; *a++ = 0x22; |
2035 |
/* 02 30 20 46 and a1,alignment,t1 */ |
2036 |
*a++ = 0x02; *a++ = 0x10 + alignment * 0x20; *a++ = 0x20 + (alignment >> 3); *a++ = 0x46; |
2037 |
|
2038 |
/* ldl t0,0(t3) */ |
2039 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xa0; |
2040 |
|
2041 |
if (bigendian) { |
2042 |
/* TODO */ |
2043 |
bintrans_write_chunkreturn_fail(&a); |
2044 |
} |
2045 |
|
2046 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rt, ALPHA_T2); |
2047 |
|
2048 |
/* |
2049 |
* swr: memory = 0x12 0x34 0x56 0x78 |
2050 |
* register = 0x89abcdef |
2051 |
* offset (a1): memory becomes: |
2052 |
* 0 0xef 0xcd 0xab 0x89 |
2053 |
* 1 0x.. 0xef 0xcd 0xab |
2054 |
* 2 0x.. 0x.. 0xef 0xcd |
2055 |
* 3 0x.. 0x.. 0x.. 0xef |
2056 |
*/ |
2057 |
|
2058 |
|
2059 |
/* |
2060 |
a5 75 40 40 cmpeq t1,0x03,t4 |
2061 |
01 00 a0 e4 beq t4,20 <skip> |
2062 |
*/ |
2063 |
*a++ = 0xa5; *a++ = 0x75; *a++ = 0x40; *a++ = 0x40; |
2064 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2065 |
|
2066 |
/* |
2067 |
118: 23 17 63 48 sll t2,0x18,t2 |
2068 |
11c: 21 f6 20 48 zapnot t0,0x7,t0 |
2069 |
120: 01 04 23 44 or t0,t2,t0 |
2070 |
*/ |
2071 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x63; *a++ = 0x48; |
2072 |
*a++ = 0x21; *a++ = 0xf6; *a++ = 0x20; *a++ = 0x48; |
2073 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2074 |
|
2075 |
ok_unaligned_load3 = a; |
2076 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2077 |
|
2078 |
|
2079 |
|
2080 |
|
2081 |
|
2082 |
*a++ = 0xa5; *a++ = 0x55; *a++ = 0x40; *a++ = 0x40; |
2083 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2084 |
/* |
2085 |
2: |
2086 |
124: 23 17 62 48 sll t2,0x10,t2 |
2087 |
128: 21 76 20 48 zapnot t0,0x3,t0 |
2088 |
12c: 01 04 23 44 or t0,t2,t0 |
2089 |
*/ |
2090 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x62; *a++ = 0x48; |
2091 |
*a++ = 0x21; *a++ = 0x76; *a++ = 0x20; *a++ = 0x48; |
2092 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2093 |
|
2094 |
ok_unaligned_load2 = a; |
2095 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2096 |
|
2097 |
|
2098 |
|
2099 |
*a++ = 0xa5; *a++ = 0x35; *a++ = 0x40; *a++ = 0x40; |
2100 |
*a++ = 0x04; *a++ = 0x00; *a++ = 0xa0; *a++ = 0xe4; |
2101 |
/* |
2102 |
1: |
2103 |
130: 23 17 61 48 sll t2,0x8,t2 |
2104 |
134: 21 36 20 48 zapnot t0,0x1,t0 |
2105 |
138: 01 04 23 44 or t0,t2,t0 |
2106 |
*/ |
2107 |
*a++ = 0x23; *a++ = 0x17; *a++ = 0x61; *a++ = 0x48; |
2108 |
*a++ = 0x21; *a++ = 0x36; *a++ = 0x20; *a++ = 0x48; |
2109 |
*a++ = 0x01; *a++ = 0x04; *a++ = 0x23; *a++ = 0x44; |
2110 |
|
2111 |
ok_unaligned_load1 = a; |
2112 |
*a++ = 0x01; *a++ = 0x00; *a++ = 0xe0; *a++ = 0xc3; |
2113 |
|
2114 |
|
2115 |
|
2116 |
/* |
2117 |
0: |
2118 |
13c: 01 10 60 40 addl t2,0,t0 |
2119 |
*/ |
2120 |
*a++ = 0x01; *a++ = 0x10; *a++ = 0x60; *a++ = 0x40; |
2121 |
|
2122 |
|
2123 |
*ok_unaligned_load3 = ((size_t)a - (size_t)ok_unaligned_load3 - 4) / 4; |
2124 |
*ok_unaligned_load2 = ((size_t)a - (size_t)ok_unaligned_load2 - 4) / 4; |
2125 |
*ok_unaligned_load1 = ((size_t)a - (size_t)ok_unaligned_load1 - 4) / 4; |
2126 |
|
2127 |
/* sdl t0,0(t3) */ |
2128 |
*a++ = 0x00; *a++ = 0x00; *a++ = 0x24; *a++ = 0xb0; |
2129 |
break; |
2130 |
|
2131 |
default: |
2132 |
; |
2133 |
} |
2134 |
|
2135 |
*addrp = a; |
2136 |
bintrans_write_pc_inc(addrp); |
2137 |
return 1; |
2138 |
} |
2139 |
|
2140 |
|
2141 |
/* |
2142 |
* bintrans_write_instruction__lui(): |
2143 |
*/ |
2144 |
static int bintrans_write_instruction__lui(unsigned char **addrp, |
2145 |
int rt, int imm) |
2146 |
{ |
2147 |
uint32_t *a; |
2148 |
|
2149 |
/* |
2150 |
* dc fe 3f 24 ldah t0,-292 |
2151 |
* 1f 04 ff 5f fnop |
2152 |
* 88 08 30 b4 stq t0,2184(a0) |
2153 |
*/ |
2154 |
if (rt != 0) { |
2155 |
int alpha_rt = map_MIPS_to_Alpha[rt]; |
2156 |
if (alpha_rt < 0) |
2157 |
alpha_rt = ALPHA_T0; |
2158 |
|
2159 |
a = (uint32_t *) *addrp; |
2160 |
*a++ = 0x241f0000 | (alpha_rt << 21) | ((uint32_t)imm & 0xffff); |
2161 |
*addrp = (unsigned char *) a; |
2162 |
|
2163 |
if (alpha_rt == ALPHA_T0) { |
2164 |
*a++ = 0x5fff041f; /* fnop */ |
2165 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T0, rt); |
2166 |
} |
2167 |
} |
2168 |
|
2169 |
bintrans_write_pc_inc(addrp); |
2170 |
|
2171 |
return 1; |
2172 |
} |
2173 |
|
2174 |
|
2175 |
/* |
2176 |
* bintrans_write_instruction__mfmthilo(): |
2177 |
*/ |
2178 |
static int bintrans_write_instruction__mfmthilo(unsigned char **addrp, |
2179 |
int rd, int from_flag, int hi_flag) |
2180 |
{ |
2181 |
unsigned char *a; |
2182 |
int ofs; |
2183 |
|
2184 |
a = *addrp; |
2185 |
|
2186 |
/* |
2187 |
* 18 09 30 a4 ldq t0,hi(a0) (or lo) |
2188 |
* 18 09 30 b4 stq t0,rd(a0) |
2189 |
* |
2190 |
* (or if from_flag is cleared then move the other way, it's |
2191 |
* actually not rd then, but rs...) |
2192 |
*/ |
2193 |
|
2194 |
if (from_flag) { |
2195 |
if (rd != 0) { |
2196 |
/* mfhi or mflo */ |
2197 |
if (hi_flag) |
2198 |
ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu; |
2199 |
else |
2200 |
ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu; |
2201 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xa4; |
2202 |
|
2203 |
bintrans_move_Alpha_reg_into_MIPS_reg(&a, ALPHA_T0, rd); |
2204 |
} |
2205 |
} else { |
2206 |
/* mthi or mtlo */ |
2207 |
bintrans_move_MIPS_reg_into_Alpha_reg(&a, rd, ALPHA_T0); |
2208 |
|
2209 |
if (hi_flag) |
2210 |
ofs = ((size_t)&dummy_cpu.cd.mips.hi) - (size_t)&dummy_cpu; |
2211 |
else |
2212 |
ofs = ((size_t)&dummy_cpu.cd.mips.lo) - (size_t)&dummy_cpu; |
2213 |
*a++ = (ofs & 255); *a++ = (ofs >> 8); *a++ = 0x30; *a++ = 0xb4; |
2214 |
} |
2215 |
|
2216 |
*addrp = a; |
2217 |
bintrans_write_pc_inc(addrp); |
2218 |
return 1; |
2219 |
} |
2220 |
|
2221 |
|
2222 |
/* |
2223 |
* bintrans_write_instruction__mfc_mtc(): |
2224 |
*/ |
2225 |
static int bintrans_write_instruction__mfc_mtc(struct memory *mem, |
2226 |
unsigned char **addrp, int coproc_nr, int flag64bit, int rt, |
2227 |
int rd, int mtcflag) |
2228 |
{ |
2229 |
uint32_t *a, *jump; |
2230 |
int ofs; |
2231 |
|
2232 |
/* |
2233 |
* NOTE: Only a few registers are readable without side effects. |
2234 |
*/ |
2235 |
if (rt == 0 && !mtcflag) |
2236 |
return 0; |
2237 |
|
2238 |
if (coproc_nr >= 1) |
2239 |
return 0; |
2240 |
|
2241 |
if (rd == COP0_RANDOM || rd == COP0_COUNT) |
2242 |
return 0; |
2243 |
|
2244 |
|
2245 |
/************************************************************* |
2246 |
* |
2247 |
* TODO: Check for kernel mode, or Coproc X usability bit! |
2248 |
* |
2249 |
*************************************************************/ |
2250 |
|
2251 |
a = (uint32_t *) *addrp; |
2252 |
|
2253 |
ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu; |
2254 |
*a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */ |
2255 |
|
2256 |
ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc; |
2257 |
*a++ = 0xa4410000 | (ofs & 0xffff); /* ldq t1,reg_rd(t0) */ |
2258 |
|
2259 |
if (mtcflag) { |
2260 |
/* mtc: */ |
2261 |
*addrp = (unsigned char *) a; |
2262 |
bintrans_move_MIPS_reg_into_Alpha_reg(addrp, rt, ALPHA_T0); |
2263 |
a = (uint32_t *) *addrp; |
2264 |
|
2265 |
if (!flag64bit) { |
2266 |
*a++ = 0x40201001; /* addl t0,0,t0 */ |
2267 |
*a++ = 0x40401002; /* addl t1,0,t1 */ |
2268 |
} |
2269 |
|
2270 |
/* |
2271 |
* In the general case: Only allow mtc if it does NOT |
2272 |
* change the register!! |
2273 |
*/ |
2274 |
|
2275 |
switch (rd) { |
2276 |
case COP0_INDEX: |
2277 |
break; |
2278 |
|
2279 |
case COP0_EPC: |
2280 |
break; |
2281 |
|
2282 |
/* TODO: Some bits are not writable */ |
2283 |
case COP0_ENTRYLO0: |
2284 |
case COP0_ENTRYLO1: |
2285 |
break; |
2286 |
|
2287 |
case COP0_ENTRYHI: |
2288 |
/* |
2289 |
* Entryhi is ok to write to, as long as the |
2290 |
* ASID isn't changed. (That would require |
2291 |
* cache invalidations etc. Instead of checking |
2292 |
* for MMU3K vs others, we just assume that all the |
2293 |
* lowest 12 bits must be the same. |
2294 |
*/ |
2295 |
/* ff 0f bf 20 lda t4,0x0fff */ |
2296 |
/* 03 00 25 44 and t0,t4,t2 */ |
2297 |
/* 04 00 45 44 and t1,t4,t3 */ |
2298 |
/* a3 05 64 40 cmpeq t2,t3,t2 */ |
2299 |
/* 01 00 60 f4 bne t2,<ok> */ |
2300 |
*a++ = 0x20bf0fff; |
2301 |
*a++ = 0x44250003; |
2302 |
*a++ = 0x44450004; |
2303 |
*a++ = 0x406405a3; |
2304 |
jump = a; |
2305 |
*a++ = 0; /* later */ |
2306 |
*addrp = (unsigned char *) a; |
2307 |
bintrans_write_chunkreturn_fail(addrp); |
2308 |
a = (uint32_t *) *addrp; |
2309 |
*jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4); |
2310 |
break; |
2311 |
|
2312 |
case COP0_STATUS: |
2313 |
/* Only allow updates to the status register if |
2314 |
the interrupt enable bits were changed, but no |
2315 |
other bits! */ |
2316 |
if (mem->bintrans_32bit_only) { |
2317 |
/* R3000 etc. */ |
2318 |
/* t4 = 0x0fe70000; */ |
2319 |
*a++ = 0x20bf0000; |
2320 |
*a++ = 0x24a50fe7; |
2321 |
} else { |
2322 |
/* fe 00 bf 20 lda t4,0x00fe */ |
2323 |
/* ff ff a5 24 ldah t4,-1(t4) */ |
2324 |
*a++ = 0x20bf0000; |
2325 |
*a++ = 0x24a5ffff; |
2326 |
} |
2327 |
|
2328 |
/* 03 00 25 44 and t0,t4,t2 */ |
2329 |
/* 04 00 45 44 and t1,t4,t3 */ |
2330 |
/* a3 05 64 40 cmpeq t2,t3,t2 */ |
2331 |
/* 01 00 60 f4 bne t2,<ok> */ |
2332 |
*a++ = 0x44250003; |
2333 |
*a++ = 0x44450004; |
2334 |
*a++ = 0x406405a3; |
2335 |
jump = a; |
2336 |
*a++ = 0; /* later */ |
2337 |
*addrp = (unsigned char *) a; |
2338 |
bintrans_write_chunkreturn_fail(addrp); |
2339 |
a = (uint32_t *) *addrp; |
2340 |
*jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4); |
2341 |
|
2342 |
/* If enabling interrupt bits would cause an |
2343 |
exception, then don't do it: */ |
2344 |
ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu; |
2345 |
*a++ = 0xa4900000 | (ofs & 0xffff); /* ldq t3,coproc[0](a0) */ |
2346 |
ofs = ((size_t)&dummy_coproc.reg[COP0_CAUSE]) - (size_t)&dummy_coproc; |
2347 |
*a++ = 0xa4a40000 | (ofs & 0xffff); /* ldq t4,reg_rd(t3) */ |
2348 |
|
2349 |
/* 02 00 a1 44 and t4,t0,t1 */ |
2350 |
/* 83 16 41 48 srl t1,0x8,t2 */ |
2351 |
/* 04 f0 7f 44 and t2,0xff,t3 */ |
2352 |
*a++ = 0x44a10002; |
2353 |
*a++ = 0x48411683; |
2354 |
*a++ = 0x447ff004; |
2355 |
/* 01 00 80 e4 beq t3,<ok> */ |
2356 |
jump = a; |
2357 |
*a++ = 0; /* later */ |
2358 |
*addrp = (unsigned char *) a; |
2359 |
bintrans_write_chunkreturn_fail(addrp); |
2360 |
a = (uint32_t *) *addrp; |
2361 |
*jump = 0xe4800000 | (((size_t)a - (size_t)jump - 4) / 4); |
2362 |
break; |
2363 |
|
2364 |
default: |
2365 |
/* a3 05 22 40 cmpeq t0,t1,t2 */ |
2366 |
/* 01 00 60 f4 bne t2,<ok> */ |
2367 |
*a++ = 0x402205a3; |
2368 |
jump = a; |
2369 |
*a++ = 0; /* later */ |
2370 |
*addrp = (unsigned char *) a; |
2371 |
bintrans_write_chunkreturn_fail(addrp); |
2372 |
a = (uint32_t *) *addrp; |
2373 |
*jump = 0xf4600000 | (((size_t)a - (size_t)jump - 4) / 4); |
2374 |
} |
2375 |
|
2376 |
*a++ = 0x40201402; /* addq t0,0,t1 */ |
2377 |
|
2378 |
ofs = ((size_t)&dummy_cpu.cd.mips.coproc[0]) - (size_t)&dummy_cpu; |
2379 |
*a++ = 0xa4300000 | (ofs & 0xffff); /* ldq t0,coproc[0](a0) */ |
2380 |
ofs = ((size_t)&dummy_coproc.reg[rd]) - (size_t)&dummy_coproc; |
2381 |
*a++ = 0xb4410000 | (ofs & 0xffff); /* stq t1,reg_rd(t0) */ |
2382 |
} else { |
2383 |
/* mfc: */ |
2384 |
if (!flag64bit) { |
2385 |
*a++ = 0x40401002; /* addl t1,0,t1 */ |
2386 |
} |
2387 |
|
2388 |
*addrp = (unsigned char *) a; |
2389 |
bintrans_move_Alpha_reg_into_MIPS_reg(addrp, ALPHA_T1, rt); |
2390 |
a = (uint32_t *) *addrp; |
2391 |
} |
2392 |
|
2393 |
*addrp = (unsigned char *) a; |
2394 |
|
2395 |
bintrans_write_pc_inc(addrp); |
2396 |
return 1; |
2397 |
} |
2398 |
|
2399 |
|
2400 |
/* |
2401 |
* bintrans_write_instruction__tlb_rfe_etc(): |
2402 |
*/ |
2403 |
static int bintrans_write_instruction__tlb_rfe_etc(unsigned char **addrp, |
2404 |
int itype) |
2405 |
{ |
2406 |
uint32_t *a; |
2407 |
int ofs = 0; |
2408 |
|
2409 |
switch (itype) { |
2410 |
case CALL_TLBWI: |
2411 |
case CALL_TLBWR: |
2412 |
case CALL_TLBP: |
2413 |
case CALL_TLBR: |
2414 |
case CALL_RFE: |
2415 |
case CALL_ERET: |
2416 |
case CALL_BREAK: |
2417 |
case CALL_SYSCALL: |
2418 |
break; |
2419 |
default: |
2420 |
return 0; |
2421 |
} |
2422 |
|
2423 |
a = (uint32_t *) *addrp; |
2424 |
|
2425 |
/* a0 = pointer to the cpu struct */ |
2426 |
|
2427 |
switch (itype) { |
2428 |
case CALL_TLBWI: |
2429 |
case CALL_TLBWR: |
2430 |
/* a1 = 0 for indexed, 1 for random */ |
2431 |
*a++ = 0x223f0000 | (itype == CALL_TLBWR); |
2432 |
break; |
2433 |
case CALL_TLBP: |
2434 |
case CALL_TLBR: |
2435 |
/* a1 = 0 for probe, 1 for read */ |
2436 |
*a++ = 0x223f0000 | (itype == CALL_TLBR); |
2437 |
break; |
2438 |
case CALL_BREAK: |
2439 |
case CALL_SYSCALL: |
2440 |
*a++ = 0x223f0000 | (itype == CALL_BREAK? EXCEPTION_BP : EXCEPTION_SYS); |
2441 |
break; |
2442 |
} |
2443 |
|
2444 |
/* Put PC into the cpu struct (both pc and pc_last). */ |
2445 |
*a++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */ |
2446 |
*a++ = 0xb4d00000 | ofs_pc_last;/* stq t5,"pc_last"(a0) */ |
2447 |
|
2448 |
/* Save a0 and the old return address on the stack: */ |
2449 |
*a++ = 0x23deff80; /* lda sp,-128(sp) */ |
2450 |
|
2451 |
*a++ = 0xb75e0000; /* stq ra,0(sp) */ |
2452 |
*a++ = 0xb61e0008; /* stq a0,8(sp) */ |
2453 |
*a++ = 0xb0fe0018; /* stl t6,24(sp) */ |
2454 |
*a++ = 0xb71e0020; /* stq t10,32(sp) */ |
2455 |
*a++ = 0xb73e0028; /* stq t11,40(sp) */ |
2456 |
*a++ = 0xb51e0030; /* stq t7,48(sp) */ |
2457 |
*a++ = 0xb6de0038; /* stq t8,56(sp) */ |
2458 |
*a++ = 0xb6fe0040; /* stq t9,64(sp) */ |
2459 |
|
2460 |
switch (itype) { |
2461 |
case CALL_TLBP: |
2462 |
case CALL_TLBR: |
2463 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbpr) - (size_t)&dummy_cpu; |
2464 |
break; |
2465 |
case CALL_TLBWR: |
2466 |
case CALL_TLBWI: |
2467 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_tlbwri) - (size_t)&dummy_cpu; |
2468 |
break; |
2469 |
case CALL_RFE: |
2470 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_rfe) - (size_t)&dummy_cpu; |
2471 |
break; |
2472 |
case CALL_ERET: |
2473 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_fast_eret) - (size_t)&dummy_cpu; |
2474 |
break; |
2475 |
case CALL_BREAK: |
2476 |
case CALL_SYSCALL: |
2477 |
ofs = ((size_t)&dummy_cpu.cd.mips.bintrans_simple_exception) - (size_t)&dummy_cpu; |
2478 |
break; |
2479 |
} |
2480 |
|
2481 |
*a++ = 0xa7700000 | ofs; /* ldq t12,0(a0) */ |
2482 |
|
2483 |
/* Call bintrans_fast_tlbwr: */ |
2484 |
*a++ = 0x6b5b4000; /* jsr ra,(t12),<after> */ |
2485 |
|
2486 |
/* Restore the old return address and a0 from the stack: */ |
2487 |
*a++ = 0xa75e0000; /* ldq ra,0(sp) */ |
2488 |
*a++ = 0xa61e0008; /* ldq a0,8(sp) */ |
2489 |
*a++ = 0xa0fe0018; /* ldl t6,24(sp) */ |
2490 |
*a++ = 0xa71e0020; /* ldq t10,32(sp) */ |
2491 |
*a++ = 0xa73e0028; /* ldq t11,40(sp) */ |
2492 |
*a++ = 0xa51e0030; /* ldq t7,48(sp) */ |
2493 |
*a++ = 0xa6de0038; /* ldq t8,56(sp) */ |
2494 |
*a++ = 0xa6fe0040; /* ldq t9,64(sp) */ |
2495 |
|
2496 |
*a++ = 0x23de0080; /* lda sp,128(sp) */ |
2497 |
|
2498 |
/* Load PC from the cpu struct. */ |
2499 |
*a++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */ |
2500 |
|
2501 |
*addrp = (unsigned char *) a; |
2502 |
|
2503 |
switch (itype) { |
2504 |
case CALL_ERET: |
2505 |
case CALL_BREAK: |
2506 |
case CALL_SYSCALL: |
2507 |
/* Increase the nr of instructions: */ |
2508 |
a = (uint32_t *) *addrp; |
2509 |
*a++ = 0x20e70001; /* lda t6,1(t6) */ |
2510 |
*addrp = (unsigned char *) a; |
2511 |
break; |
2512 |
default: |
2513 |
bintrans_write_pc_inc(addrp); |
2514 |
} |
2515 |
|
2516 |
return 1; |
2517 |
} |
2518 |
|
2519 |
|
2520 |
/* |
2521 |
* bintrans_backend_init(): |
2522 |
* |
2523 |
* This is neccessary for broken 2.95.4 compilers on FreeBSD/Alpha 4.9, |
2524 |
* and probably a few others. (For Compaq's CC, and for gcc 3.x, this |
2525 |
* wouldn't be neccessary, and the old code would have worked.) |
2526 |
*/ |
2527 |
static void bintrans_backend_init(void) |
2528 |
{ |
2529 |
int size; |
2530 |
uint32_t *p, *q; |
2531 |
|
2532 |
|
2533 |
/* "runchunk": */ |
2534 |
size = 256; /* NOTE: This MUST be enough, or we fail */ |
2535 |
p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, |
2536 |
MAP_ANON | MAP_PRIVATE, -1, 0); |
2537 |
|
2538 |
/* If mmap() failed, try malloc(): */ |
2539 |
if (p == NULL) { |
2540 |
p = malloc(size); |
2541 |
if (p == NULL) { |
2542 |
fprintf(stderr, "bintrans_backend_init(): out of memory\n"); |
2543 |
exit(1); |
2544 |
} |
2545 |
} |
2546 |
|
2547 |
bintrans_runchunk = (void *)p; |
2548 |
|
2549 |
*p++ = 0x23deffa0; /* lda sp,-0x60(sp) */ |
2550 |
*p++ = 0xb75e0000; /* stq ra,0(sp) */ |
2551 |
*p++ = 0xb53e0008; /* stq s0,8(sp) */ |
2552 |
*p++ = 0xb55e0010; /* stq s1,16(sp) */ |
2553 |
*p++ = 0xb57e0018; /* stq s2,24(sp) */ |
2554 |
*p++ = 0xb59e0020; /* stq s3,32(sp) */ |
2555 |
*p++ = 0xb5be0028; /* stq s4,40(sp) */ |
2556 |
*p++ = 0xb5de0030; /* stq s5,48(sp) */ |
2557 |
*p++ = 0xb5fe0038; /* stq s6,56(sp) */ |
2558 |
*p++ = 0xb7be0058; /* stq gp,0x58(sp) */ |
2559 |
|
2560 |
*p++ = 0xa4d00000 | ofs_pc; /* ldq t5,"pc"(a0) */ |
2561 |
*p++ = 0xa0f00000 | ofs_n; /* ldl t6,"bintrans_instructions_executed"(a0) */ |
2562 |
*p++ = 0xa5100000 | ofs_a0; /* ldq t7,"a0"(a0) */ |
2563 |
*p++ = 0xa6d00000 | ofs_a1; /* ldq t8,"a1"(a0) */ |
2564 |
*p++ = 0xa6f00000 | ofs_s0; /* ldq t9,"s0"(a0) */ |
2565 |
*p++ = 0xa1300000 | ofs_ds; /* ldl s0,"delay_slot"(a0) */ |
2566 |
*p++ = 0xa5500000 | ofs_ja; /* ldq s1,"delay_jmpaddr"(a0) */ |
2567 |
*p++ = 0xa5700000 | ofs_sp; /* ldq s2,"gpr[sp]"(a0) */ |
2568 |
*p++ = 0xa5900000 | ofs_ra; /* ldq s3,"gpr[ra]"(a0) */ |
2569 |
*p++ = 0xa5b00000 | ofs_t0; /* ldq s4,"gpr[t0]"(a0) */ |
2570 |
*p++ = 0xa5d00000 | ofs_t1; /* ldq s5,"gpr[t1]"(a0) */ |
2571 |
*p++ = 0xa5f00000 | ofs_h_s; /* ldq s6,host_store(a0) */ |
2572 |
*p++ = 0xa7100000 | ofs_h_l; /* ldq t10,host_load(a0) */ |
2573 |
*p++ = 0xa7300000 | ofs_v0; /* ldq t11,"gpr[v0]"(a0) */ |
2574 |
|
2575 |
*p++ = 0x6b514000; /* jsr ra,(a1),<back> */ |
2576 |
|
2577 |
*p++ = 0xb4d00000 | ofs_pc; /* stq t5,"pc"(a0) */ |
2578 |
*p++ = 0xb0f00000 | ofs_n; /* stl t6,"bintrans_instructions_executed"(a0) */ |
2579 |
*p++ = 0xb5100000 | ofs_a0; /* stq t7,"a0"(a0) */ |
2580 |
*p++ = 0xb6d00000 | ofs_a1; /* stq t8,"a1"(a0) */ |
2581 |
*p++ = 0xb6f00000 | ofs_s0; /* stq t9,"s0"(a0) */ |
2582 |
*p++ = 0xb1300000 | ofs_ds; /* stl s0,"delay_slot"(a0) */ |
2583 |
*p++ = 0xb5500000 | ofs_ja; /* stq s1,"delay_jmpaddr"(a0) */ |
2584 |
*p++ = 0xb5700000 | ofs_sp; /* stq s2,"gpr[sp]"(a0) */ |
2585 |
*p++ = 0xb5900000 | ofs_ra; /* stq s3,"gpr[ra]"(a0) */ |
2586 |
*p++ = 0xb5b00000 | ofs_t0; /* stq s4,"gpr[t0]"(a0) */ |
2587 |
*p++ = 0xb5d00000 | ofs_t1; /* stq s5,"gpr[t1]"(a0) */ |
2588 |
*p++ = 0xb7300000 | ofs_v0; /* stq t11,"gpr[v0]"(a0) */ |
2589 |
|
2590 |
*p++ = 0xa75e0000; /* ldq ra,0(sp) */ |
2591 |
*p++ = 0xa53e0008; /* ldq s0,8(sp) */ |
2592 |
*p++ = 0xa55e0010; /* ldq s1,16(sp) */ |
2593 |
*p++ = 0xa57e0018; /* ldq s2,24(sp) */ |
2594 |
*p++ = 0xa59e0020; /* ldq s3,32(sp) */ |
2595 |
*p++ = 0xa5be0028; /* ldq s4,40(sp) */ |
2596 |
*p++ = 0xa5de0030; /* ldq s5,48(sp) */ |
2597 |
*p++ = 0xa5fe0038; /* ldq s6,56(sp) */ |
2598 |
*p++ = 0xa7be0058; /* ldq gp,0x58(sp) */ |
2599 |
*p++ = 0x23de0060; /* lda sp,0x60(sp) */ |
2600 |
*p++ = 0x6bfa8000; /* ret */ |
2601 |
|
2602 |
|
2603 |
/* "jump to 32bit pc": */ |
2604 |
size = 128; /* WARNING! Don't make this too small. */ |
2605 |
p = (uint32_t *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, |
2606 |
MAP_ANON | MAP_PRIVATE, -1, 0); |
2607 |
|
2608 |
/* If mmap() failed, try malloc(): */ |
2609 |
if (p == NULL) { |
2610 |
p = malloc(size); |
2611 |
if (p == NULL) { |
2612 |
fprintf(stderr, "bintrans_backend_init(): out of memory\n"); |
2613 |
exit(1); |
2614 |
} |
2615 |
} |
2616 |
|
2617 |
bintrans_jump_to_32bit_pc = (void *)p; |
2618 |
|
2619 |
/* Don't execute too many instructions: */ |
2620 |
*p++ = 0x205f0000 | (N_SAFE_BINTRANS_LIMIT-1); /* lda t1,safe-1 */ |
2621 |
|
2622 |
*p++ = 0x40e20da1; /* cmple t6,t1,t0 */ |
2623 |
q = p; /* *q is updated later */ |
2624 |
*p++ = 0xe4200001; /* beq ret (far below) */ |
2625 |
|
2626 |
/* |
2627 |
* Special case for 32-bit addressing: |
2628 |
* |
2629 |
* t1 = 1023; |
2630 |
* t2 = ((pc >> 22) & t1) * sizeof(void *); |
2631 |
* t3 = ((pc >> 12) & t1) * sizeof(void *); |
2632 |
* t1 = pc & 4095; |
2633 |
*/ |
2634 |
*p++ = 0x205f1ff8; /* lda t1,1023 * 8 */ |
2635 |
*p++ = 0x48c27683; /* srl t5,19,t2 */ |
2636 |
*p++ = 0x48c13684; /* srl t5, 9,t3 */ |
2637 |
|
2638 |
*p++ = 0x44620003; /* and t2,t1,t2 */ |
2639 |
|
2640 |
/* ldq a2, vaddr_to_hostaddr_table0(a0) */ |
2641 |
*p++ = 0xa6500000 | ofs_tbl0; |
2642 |
|
2643 |
/* a3 = tbl0[t2] (load entry from tbl0) */ |
2644 |
*p++ = 0x40720412; /* addq t2,a2,a2 */ |
2645 |
*p++ = 0x44820004; /* and t3,t1,t3 */ |
2646 |
*p++ = 0xa6720000; /* ldq a3,0(a2) */ |
2647 |
*p++ = 0x205f0ffc; /* lda t1,0xffc */ |
2648 |
|
2649 |
/* |
2650 |
* a3 = tbl1[t3] (load entry from tbl1 (which is a3)) |
2651 |
*/ |
2652 |
*p++ = 0x42640413; /* addq a3,t3,a3 */ |
2653 |
|
2654 |
*p++ = 0xa6730000 | ofs_c0; /* ldq a3,chunks[0](a3) */ |
2655 |
|
2656 |
*p++ = 0x44c20002; /* and t5,t1,t1 */ |
2657 |
|
2658 |
/* |
2659 |
* NULL? Then just return. |
2660 |
*/ |
2661 |
*p++ = 0xf6600001; /* bne a3,<ok> */ |
2662 |
*p++ = 0x6bfa8001; /* ret */ |
2663 |
|
2664 |
*p++ = 0x40530402; /* addq t1,a3,t1 */ |
2665 |
*p++ = 0xa0220000; /* ldl t0,0(t1) */ |
2666 |
|
2667 |
*p++ = 0xa4700000 | ofs_cb; /* ldq t2,chunk_base_address(a0) */ |
2668 |
|
2669 |
/* No translation? Then return. */ |
2670 |
*p++ = 0xe4200002; /* beq t0,<skip> */ |
2671 |
|
2672 |
*p++ = 0x40230401; /* addq t0,t2,t0 */ |
2673 |
*p++ = 0x6be10000; /* jmp (t0) */ |
2674 |
|
2675 |
/* Now, update *q to point here: */ |
2676 |
*q = 0xe4200000 | (((size_t)p - (size_t)q)/4 - 1); /* beq ret */ |
2677 |
|
2678 |
/* Return to the main translation loop. */ |
2679 |
*p++ = 0x6bfa8001; /* ret */ |
2680 |
} |
2681 |
|