/[gxemul]/upstream/0.4.6/experiments/native_cc_ld_test.i
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /upstream/0.4.6/experiments/native_cc_ld_test.i

Parent Directory Parent Directory | Revision Log Revision Log


Revision 43 - (show annotations)
Mon Oct 8 16:22:43 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 3600 byte(s)
0.4.6
1 /*
2 Idea 2007-06-06 on how to use a C compiler + linker as a native code generation backend.
3
4 gcc native_cc_ld_test.i -Wall -O3 -fomit-frame-pointer -fpeephole -fno-builtin -c
5 ld native_cc_ld_test.o -o native_cc_ld_test -e f -Ttext 0x1234560
6
7 objdump -d native_cc_ld_test
8
9 The text part of that binary should then be easy to just copy directly into the
10 translation cache. (The address 0x12340040 in the example is where I want the code
11 fragment to end up in the cache.) It might even be possible to only do the cc step,
12 and skip the ld step, if the code is position-independent.
13
14
15 A couple of tricks are used:
16
17 o) Note that the cpu and ic structs only contain just enough to mimic the cpu and ic
18 structs in the emulator itself. The dummy fillers are there to make sure that
19 the interesting fields (reg, next_ic, and ninstrs) end up at the correct offsets.
20
21 o) No #include directives are needed, if reasonable types are used (int, unsigned long long,
22 etc). These will have to be detected before running the compiler. Also, this makes it
23 possible to skip the preprocessor, i.e. output a .i file instead of a .c file.
24
25 o) Values in the cpu struct that are used are first loaded into local variables, used,
26 and then stored back before any kind of return path (e.g. in a generic load/store,
27 or at the end of the function, or on a non-samepage branch).
28
29 o) Delay slots are handled by setting a "condition", then executing the next instruction,
30 then branching. TODO: How about instructions in delay slots which may cause exceptions?
31
32 o) Samepage-branches can be implemented using C labels (goto).
33
34
35 Good:
36
37 o) Somewhat portable. The same mechanism could be used for amd64, Alpha, MIPS, and most likely
38 several other host architectures.
39
40 o) A good optimizing compiler will generate very good code, probably much better code than
41 I would be able to generate manually.
42
43
44 Bad:
45
46 o) Very high overhead. Calling cc + ld on my laptop takes 1/30th of a second, which is quite high.
47 On my older Alpha workstation, it takes about 1/10th of a second. This means that the
48 mechanism which desides whether or not to actually natively translate a block of code must
49 take into account how much the overhead is vs how much time will be saved etc.
50
51 */
52
53 struct cpu;
54
55 struct ic {
56 void (*f)(struct cpu *, struct ic*);
57 long arg[3];
58 };
59
60 struct cpu {
61 char dummy[800];
62 int reg[32];
63 char dummy2[80];
64 struct ic* next_ic;
65 char dummy3[120];
66 int ninstrs;
67 void *host_load[1048576];
68 void *host_store[1048576];
69 };
70
71 void f(struct cpu *cpu, struct ic *ic)
72 {
73 int cond0;
74 void (*g0)(struct cpu *, struct ic *) = (void (*)(struct cpu *, struct ic *)) 0x123801234560ULL;
75
76 unsigned int r2 = cpu->reg[2];
77 unsigned int r3 = cpu->reg[3];
78 unsigned int r4 = cpu->reg[4];
79 unsigned int r9 = cpu->reg[9];
80
81 unsigned int addr0;
82 unsigned char *page0;
83
84 unsigned int ninstrs = cpu->ninstrs;
85
86 ninstrs --;
87
88 L0:
89
90 /* st.b r3,r0,r2 */
91 addr0 = r2;
92 page0 = (unsigned char *) cpu->host_store[addr0 >> 12];
93 if (page0 == (void *)0) {
94 cpu->reg[2] = r2;
95 cpu->reg[3] = r3;
96 cpu->reg[4] = r4;
97 cpu->reg[9] = r9;
98 cpu->ninstrs = ninstrs;
99 g0(cpu, ic + 0);
100 return;
101 }
102
103 page0[addr0 & 0xfff] = r3;
104
105 ninstrs ++;
106
107 /* addu r2,r2,1 */
108 r2 = r2 + 1;
109 ninstrs ++;
110
111 /* or r9,r0,r4 */
112 r9 = r4;
113 ninstrs ++;
114
115 /* bcnd.n gt0,r9,L0 */
116 /* subu r4, r4, 1 */
117 cond0 = (int)r9 > 0;
118 r4 = r4 - 1;
119 ninstrs += 2;
120 if (cond0)
121 goto L0;
122
123 cpu->reg[2] = r2;
124 cpu->reg[3] = r3;
125 cpu->reg[4] = r4;
126 cpu->reg[9] = r9;
127 cpu->ninstrs = ninstrs;
128
129 cpu->next_ic = ic + 5;
130 }

  ViewVC Help
Powered by ViewVC 1.1.26