/[gxemul]/upstream/0.4.6/experiments/native_cc_ld_test.i
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /upstream/0.4.6/experiments/native_cc_ld_test.i

Parent Directory Parent Directory | Revision Log Revision Log


Revision 43 - (hide annotations)
Mon Oct 8 16:22:43 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 3600 byte(s)
0.4.6
1 dpavlin 42 /*
2     Idea 2007-06-06 on how to use a C compiler + linker as a native code generation backend.
3    
4     gcc native_cc_ld_test.i -Wall -O3 -fomit-frame-pointer -fpeephole -fno-builtin -c
5     ld native_cc_ld_test.o -o native_cc_ld_test -e f -Ttext 0x1234560
6    
7     objdump -d native_cc_ld_test
8    
9     The text part of that binary should then be easy to just copy directly into the
10     translation cache. (The address 0x12340040 in the example is where I want the code
11     fragment to end up in the cache.) It might even be possible to only do the cc step,
12     and skip the ld step, if the code is position-independent.
13    
14    
15     A couple of tricks are used:
16    
17     o) Note that the cpu and ic structs only contain just enough to mimic the cpu and ic
18     structs in the emulator itself. The dummy fillers are there to make sure that
19     the interesting fields (reg, next_ic, and ninstrs) end up at the correct offsets.
20    
21     o) No #include directives are needed, if reasonable types are used (int, unsigned long long,
22     etc). These will have to be detected before running the compiler. Also, this makes it
23     possible to skip the preprocessor, i.e. output a .i file instead of a .c file.
24    
25     o) Values in the cpu struct that are used are first loaded into local variables, used,
26     and then stored back before any kind of return path (e.g. in a generic load/store,
27     or at the end of the function, or on a non-samepage branch).
28    
29     o) Delay slots are handled by setting a "condition", then executing the next instruction,
30     then branching. TODO: How about instructions in delay slots which may cause exceptions?
31    
32     o) Samepage-branches can be implemented using C labels (goto).
33    
34    
35     Good:
36    
37     o) Somewhat portable. The same mechanism could be used for amd64, Alpha, MIPS, and most likely
38     several other host architectures.
39    
40     o) A good optimizing compiler will generate very good code, probably much better code than
41     I would be able to generate manually.
42    
43    
44     Bad:
45    
46     o) Very high overhead. Calling cc + ld on my laptop takes 1/30th of a second, which is quite high.
47     On my older Alpha workstation, it takes about 1/10th of a second. This means that the
48     mechanism which desides whether or not to actually natively translate a block of code must
49     take into account how much the overhead is vs how much time will be saved etc.
50    
51     */
52    
53     struct cpu;
54    
55     struct ic {
56     void (*f)(struct cpu *, struct ic*);
57     long arg[3];
58     };
59    
60     struct cpu {
61     char dummy[800];
62     int reg[32];
63     char dummy2[80];
64     struct ic* next_ic;
65     char dummy3[120];
66     int ninstrs;
67     void *host_load[1048576];
68     void *host_store[1048576];
69     };
70    
71     void f(struct cpu *cpu, struct ic *ic)
72     {
73     int cond0;
74     void (*g0)(struct cpu *, struct ic *) = (void (*)(struct cpu *, struct ic *)) 0x123801234560ULL;
75    
76     unsigned int r2 = cpu->reg[2];
77     unsigned int r3 = cpu->reg[3];
78     unsigned int r4 = cpu->reg[4];
79     unsigned int r9 = cpu->reg[9];
80    
81     unsigned int addr0;
82     unsigned char *page0;
83    
84     unsigned int ninstrs = cpu->ninstrs;
85    
86     ninstrs --;
87    
88     L0:
89    
90     /* st.b r3,r0,r2 */
91     addr0 = r2;
92     page0 = (unsigned char *) cpu->host_store[addr0 >> 12];
93     if (page0 == (void *)0) {
94     cpu->reg[2] = r2;
95     cpu->reg[3] = r3;
96     cpu->reg[4] = r4;
97     cpu->reg[9] = r9;
98     cpu->ninstrs = ninstrs;
99     g0(cpu, ic + 0);
100     return;
101     }
102    
103     page0[addr0 & 0xfff] = r3;
104    
105     ninstrs ++;
106    
107     /* addu r2,r2,1 */
108     r2 = r2 + 1;
109     ninstrs ++;
110    
111     /* or r9,r0,r4 */
112     r9 = r4;
113     ninstrs ++;
114    
115     /* bcnd.n gt0,r9,L0 */
116     /* subu r4, r4, 1 */
117     cond0 = (int)r9 > 0;
118     r4 = r4 - 1;
119     ninstrs += 2;
120     if (cond0)
121     goto L0;
122    
123     cpu->reg[2] = r2;
124     cpu->reg[3] = r3;
125     cpu->reg[4] = r4;
126     cpu->reg[9] = r9;
127     cpu->ninstrs = ninstrs;
128    
129     cpu->next_ic = ic + 5;
130     }

  ViewVC Help
Powered by ViewVC 1.1.26