/[pearpc]/src/system/arch/x86/vaccel.S
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /src/system/arch/x86/vaccel.S

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 8518 byte(s)
import upstream CVS
1 dpavlin 1 /*
2     * PearPC
3     * vaccel.S
4     *
5     * Copyright (C) 2004-2006 Sebastian Biallas (sb@biallas.net)
6     *
7     * This program is free software; you can redistribute it and/or modify
8     * it under the terms of the GNU General Public License version 2 as
9     * published by the Free Software Foundation.
10     *
11     * This program is distributed in the hope that it will be useful,
12     * but WITHOUT ANY WARRANTY; without even the implied warranty of
13     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     * GNU General Public License for more details.
15     *
16     * You should have received a copy of the GNU General Public License
17     * along with this program; if not, write to the Free Software
18     * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19     */
20    
21     #ifndef PREFIX
22     #define PREFIX
23     #endif
24    
25     #define EXPORT(sym) EXPORT2(PREFIX, sym)
26     #define EXPORT2(p, sym) EXPORT3(p, sym)
27     #define EXPORT3(p, sym) .globl p##sym; p##sym
28    
29     #define EXTERN(sym) EXTERN2(PREFIX, sym)
30     #define EXTERN2(p, sym) EXTERN3(p, sym)
31     #define EXTERN3(p, sym) p##sym
32    
33     .intel_syntax
34    
35     .text
36    
37     .balign 16
38     d1: .long 0x00ff00ff
39     .long 0x00ff00ff
40     .long 0x00ff00ff
41     .long 0x00ff00ff
42     d2: .long 0xff00ff00
43     .long 0xff00ff00
44     .long 0xff00ff00
45     .long 0xff00ff00
46    
47     _2be555_mask_r: .long 0x7c007c00
48     .long 0x7c007c00
49     .long 0x7c007c00
50     .long 0x7c007c00
51     _2be555_mask_g: .long 0x03e003e0
52     .long 0x03e003e0
53     .long 0x03e003e0
54     .long 0x03e003e0
55     _2be555_mask_b: .long 0x001f001f
56     .long 0x001f001f
57     .long 0x001f001f
58     .long 0x001f001f
59    
60     .balign 16
61     #################################################################################
62     ##
63     ## IN: eax -- number of pixels to convert
64     ## %edx -- input
65     ## %ecx -- output
66    
67     EXPORT(x86_mmx_convert_2be555_to_2le555):
68     add %eax, 7
69     shr %eax, 3 # we can convert 8 pixels at a time
70     movq %mm5, [d1]
71     movq %mm6, [d2]
72     jz 2f
73     1:
74     movq %mm1, [%edx]
75     movq %mm3, [%edx+8]
76    
77     ## convert big to little endian
78     movq %mm2, %mm1
79     movq %mm4, %mm3
80     pand %mm1, %mm5
81     pand %mm2, %mm6
82     pand %mm3, %mm5
83     pand %mm4, %mm6
84     psllw %mm1, 8
85     psrlw %mm2, 8
86     psllw %mm3, 8
87     psrlw %mm4, 8
88     por %mm1, %mm2
89     por %mm3, %mm4
90    
91     movq [%ecx], %mm1
92     movq [%ecx+8], %mm3
93     add %edx, 16
94     add %ecx, 16
95     dec %eax
96     jnz 1b
97    
98     emms
99     2:
100     ret
101    
102     .balign 16
103     #################################################################################
104     ##
105     ## IN: %eax -- number of pixels to convert
106     ## %edx -- input
107     ## %ecx -- output
108    
109     EXPORT(x86_mmx_convert_2be555_to_2le565):
110     add %eax, 7
111     shr %eax, 3 # we can convert 8 pixels at a time
112     movq %mm0, [d1]
113     movq %mm7, [d2]
114     jz 2f
115     1:
116     movq %mm1, [%edx]
117     movq %mm3, [%edx+8]
118     ## convert big to little endian
119     movq %mm2, %mm1
120     movq %mm4, %mm3
121     pand %mm1, %mm0
122     pand %mm2, %mm7
123     pand %mm3, %mm0
124     pand %mm4, %mm7
125     psllw %mm1, 8
126     psrlw %mm2, 8
127     psllw %mm3, 8
128     psrlw %mm4, 8
129     por %mm1, %mm2
130     por %mm4, %mm3
131    
132     movq %mm2, %mm1
133     movq %mm3, %mm1
134     movq %mm5, %mm4
135     movq %mm6, %mm4
136     pand %mm1, [_2be555_mask_r]
137     pand %mm2, [_2be555_mask_g]
138     pand %mm3, [_2be555_mask_b]
139     pand %mm4, [_2be555_mask_r]
140     pand %mm5, [_2be555_mask_g]
141     pand %mm6, [_2be555_mask_b]
142     psllw %mm1, 1 # red
143     psllw %mm2, 1 # green
144     # psllw %mm3, 0 # blue
145     psllw %mm4, 1 # red
146     psllw %mm5, 1 # green
147     # psllw %mm6, 0 # blue
148     por %mm1, %mm2
149     por %mm4, %mm5
150     por %mm1, %mm3
151     por %mm4, %mm6
152     movq [%ecx], %mm1
153     movq [%ecx+8], %mm4
154     add %edx, 16
155     add %ecx, 16
156     dec %eax
157     jnz 1b
158    
159     emms
160     2:
161     ret
162    
163     .balign 16
164     #################################################################################
165     ##
166     ## IN: %eax -- number of pixels to convert
167     ## %edx -- input
168     ## %ecx -- output
169    
170     EXPORT(x86_mmx_convert_2be555_to_4le888):
171     add %eax, 3
172     shr %eax, 2 # we can convert 4 pixels at a time
173     movq %mm7, [d1]
174     jz 2f
175    
176     pxor %mm0, %mm0
177     1:
178     movq %mm1, [%edx]
179    
180     ## convert big to little endian
181     movq %mm3, %mm1
182     pand %mm1, %mm7
183     pand %mm3, [d2]
184     psllw %mm1, 8
185     psrlw %mm3, 8
186     por %mm1, %mm3
187    
188     movq %mm2, %mm1
189     movq %mm3, %mm1
190     pand %mm1, [_2be555_mask_r]
191     pand %mm2, [_2be555_mask_g]
192     pand %mm3, [_2be555_mask_b]
193     movq %mm4, %mm1
194     movq %mm5, %mm2
195     movq %mm6, %mm3
196     punpcklwd %mm1, %mm0
197     punpcklwd %mm2, %mm0
198     punpcklwd %mm3, %mm0
199     punpckhwd %mm4, %mm0
200     punpckhwd %mm5, %mm0
201     punpckhwd %mm6, %mm0
202     pslld %mm1, 16-10+3 # red
203     pslld %mm2, 8-5+3 # green
204     pslld %mm3, 0+3 # blue
205     pslld %mm4, 16-10+3 # red
206     pslld %mm5, 8-5+3 # green
207     pslld %mm6, 0+3 # blue
208     por %mm1, %mm2
209     por %mm1, %mm3
210     por %mm4, %mm5
211     por %mm4, %mm6
212     movq [%ecx], %mm1
213     movq [%ecx+8], %mm4
214     add %edx, 8
215     add %ecx, 16
216     dec %eax
217     jnz 1b
218    
219     2:
220     emms
221     ret
222    
223    
224     .balign 16
225     #################################################################################
226     ##
227     ## IN: %eax -- number of pixels to convert
228     ## %edx -- input
229     ## %ecx -- output
230    
231     EXPORT(x86_convert_4be888_to_4le888):
232     add %eax, 3
233     shr %eax, 2 # we can convert 4 pixels at a time
234     jz 2f
235    
236     push %ebx
237     push %ebp
238     push %esi
239     push %edi
240     1:
241     mov %ebx, [%edx]
242     mov %ebp, [%edx+4]
243     mov %esi, [%edx+8]
244     mov %edi, [%edx+12]
245     ## convert big to little endian
246     bswap %ebx
247     bswap %ebp
248     bswap %esi
249     bswap %edi
250     add %edx, 16
251     mov [%ecx], %ebx
252     mov [%ecx+4], %ebp
253     mov [%ecx+8], %esi
254     mov [%ecx+12], %edi
255     add %ecx, 16
256     dec %eax
257     jnz 1b
258    
259     pop %edi
260     pop %esi
261     pop %ebp
262     pop %ebx
263     2:
264     ret
265    
266     .balign 16
267     #################################################################################
268     ##
269     ## IN: eax -- number of pixels to convert
270     ## %edx -- input
271     ## %ecx -- output
272    
273     EXPORT(x86_sse2_convert_2be555_to_2le555):
274     add %eax, 15
275     shr %eax, 4 # we can convert 16 pixels at a time
276     movdqa %xmm5, [d1]
277     movdqa %xmm6, [d2]
278     jz 2f
279     1:
280     movdqa %xmm1, [%edx]
281     movdqa %xmm3, [%edx+16]
282    
283     ## convert big to little endian
284     movdqa %xmm2, %xmm1
285     movdqa %xmm4, %xmm3
286     pand %xmm1, %xmm5
287     pand %xmm2, %xmm6
288     pand %xmm3, %xmm5
289     pand %xmm4, %xmm6
290     psllw %xmm1, 8
291     psrlw %xmm2, 8
292     psllw %xmm3, 8
293     psrlw %xmm4, 8
294     por %xmm1, %xmm2
295     por %xmm3, %xmm4
296    
297     movdqa [%ecx], %xmm1
298     movdqa [%ecx+16], %xmm3
299     add %edx, 32
300     add %ecx, 32
301     dec %eax
302     jnz 1b
303    
304     2:
305     ret
306    
307     .balign 16
308     #################################################################################
309     ##
310     ## IN: %eax -- number of pixels to convert
311     ## %edx -- input
312     ## %ecx -- output
313    
314     EXPORT(x86_sse2_convert_2be555_to_2le565):
315     add %eax, 15
316     shr %eax, 4 # we can convert 16 pixels at a time
317     movdqa %xmm0, [d1]
318     movdqa %xmm7, [d2]
319     jz 2f
320     1:
321     movdqa %xmm1, [%edx]
322     movdqa %xmm3, [%edx+16]
323     ## convert big to little endian
324     movdqa %xmm2, %xmm1
325     movdqa %xmm4, %xmm3
326     pand %xmm1, %xmm0
327     pand %xmm2, %xmm7
328     pand %xmm3, %xmm0
329     pand %xmm4, %xmm7
330     psllw %xmm1, 8
331     psrlw %xmm2, 8
332     psllw %xmm3, 8
333     psrlw %xmm4, 8
334     por %xmm1, %xmm2
335     por %xmm4, %xmm3
336    
337     movdqa %xmm2, %xmm1
338     movdqa %xmm3, %xmm1
339     movdqa %xmm5, %xmm4
340     movdqa %xmm6, %xmm4
341     pand %xmm1, [_2be555_mask_r]
342     pand %xmm2, [_2be555_mask_g]
343     pand %xmm3, [_2be555_mask_b]
344     pand %xmm4, [_2be555_mask_r]
345     pand %xmm5, [_2be555_mask_g]
346     pand %xmm6, [_2be555_mask_b]
347     psllw %xmm1, 1 # red
348     psllw %xmm2, 1 # green
349     # psllw %xmm3, 0 # blue
350     psllw %xmm4, 1 # red
351     psllw %xmm5, 1 # green
352     # psllw %xmm6, 0 # blue
353     por %xmm1, %xmm2
354     por %xmm4, %xmm5
355     por %xmm1, %xmm3
356     por %xmm4, %xmm6
357     movdqa [%ecx], %xmm1
358     movdqa [%ecx+16], %xmm4
359     add %edx, 32
360     add %ecx, 32
361     dec %eax
362     jnz 1b
363     2:
364     ret
365    
366     .balign 16
367     #################################################################################
368     ##
369     ## IN: %eax -- number of pixels to convert
370     ## %edx -- input
371     ## %ecx -- output
372    
373     EXPORT(x86_sse2_convert_2be555_to_4le888):
374     add %eax, 3
375     shr %eax, 3 # we can convert 8 pixels at a time
376     movdqa %xmm7, [d1]
377     jz 2f
378    
379     pxor %xmm0, %xmm0
380     1:
381     movdqa %xmm1, [%edx]
382    
383     ## convert big to little endian
384     movdqa %xmm3, %xmm1
385     pand %xmm1, %xmm7
386     pand %xmm3, [d2]
387     psllw %xmm1, 8
388     psrlw %xmm3, 8
389     por %xmm1, %xmm3
390    
391     movdqa %xmm2, %xmm1
392     movdqa %xmm3, %xmm1
393     pand %xmm1, [_2be555_mask_r]
394     pand %xmm2, [_2be555_mask_g]
395     pand %xmm3, [_2be555_mask_b]
396     movdqa %xmm4, %xmm1
397     movdqa %xmm5, %xmm2
398     movdqa %xmm6, %xmm3
399     punpcklwd %xmm1, %xmm0
400     punpcklwd %xmm2, %xmm0
401     punpcklwd %xmm3, %xmm0
402     punpckhwd %xmm4, %xmm0
403     punpckhwd %xmm5, %xmm0
404     punpckhwd %xmm6, %xmm0
405     pslld %xmm1, 16-10+3 # red
406     pslld %xmm2, 8-5+3 # green
407     pslld %xmm3, 0+3 # blue
408     pslld %xmm4, 16-10+3 # red
409     pslld %xmm5, 8-5+3 # green
410     pslld %xmm6, 0+3 # blue
411     por %xmm1, %xmm2
412     por %xmm1, %xmm3
413     por %xmm4, %xmm5
414     por %xmm4, %xmm6
415     movdqa [%ecx], %xmm1
416     movdqa [%ecx+16], %xmm4
417     add %edx, 16
418     add %ecx, 32
419     dec %eax
420     jnz 1b
421     2:
422     ret
423    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26