/[pearpc]/src/system/arch/x86/vaccel.S
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /src/system/arch/x86/vaccel.S

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Sep 5 17:11:21 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 8518 byte(s)
import upstream CVS
1 /*
2 * PearPC
3 * vaccel.S
4 *
5 * Copyright (C) 2004-2006 Sebastian Biallas (sb@biallas.net)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21 #ifndef PREFIX
22 #define PREFIX
23 #endif
24
25 #define EXPORT(sym) EXPORT2(PREFIX, sym)
26 #define EXPORT2(p, sym) EXPORT3(p, sym)
27 #define EXPORT3(p, sym) .globl p##sym; p##sym
28
29 #define EXTERN(sym) EXTERN2(PREFIX, sym)
30 #define EXTERN2(p, sym) EXTERN3(p, sym)
31 #define EXTERN3(p, sym) p##sym
32
33 .intel_syntax
34
35 .text
36
37 .balign 16
38 d1: .long 0x00ff00ff
39 .long 0x00ff00ff
40 .long 0x00ff00ff
41 .long 0x00ff00ff
42 d2: .long 0xff00ff00
43 .long 0xff00ff00
44 .long 0xff00ff00
45 .long 0xff00ff00
46
47 _2be555_mask_r: .long 0x7c007c00
48 .long 0x7c007c00
49 .long 0x7c007c00
50 .long 0x7c007c00
51 _2be555_mask_g: .long 0x03e003e0
52 .long 0x03e003e0
53 .long 0x03e003e0
54 .long 0x03e003e0
55 _2be555_mask_b: .long 0x001f001f
56 .long 0x001f001f
57 .long 0x001f001f
58 .long 0x001f001f
59
60 .balign 16
61 #################################################################################
62 ##
63 ## IN: eax -- number of pixels to convert
64 ## %edx -- input
65 ## %ecx -- output
66
67 EXPORT(x86_mmx_convert_2be555_to_2le555):
68 add %eax, 7
69 shr %eax, 3 # we can convert 8 pixels at a time
70 movq %mm5, [d1]
71 movq %mm6, [d2]
72 jz 2f
73 1:
74 movq %mm1, [%edx]
75 movq %mm3, [%edx+8]
76
77 ## convert big to little endian
78 movq %mm2, %mm1
79 movq %mm4, %mm3
80 pand %mm1, %mm5
81 pand %mm2, %mm6
82 pand %mm3, %mm5
83 pand %mm4, %mm6
84 psllw %mm1, 8
85 psrlw %mm2, 8
86 psllw %mm3, 8
87 psrlw %mm4, 8
88 por %mm1, %mm2
89 por %mm3, %mm4
90
91 movq [%ecx], %mm1
92 movq [%ecx+8], %mm3
93 add %edx, 16
94 add %ecx, 16
95 dec %eax
96 jnz 1b
97
98 emms
99 2:
100 ret
101
102 .balign 16
103 #################################################################################
104 ##
105 ## IN: %eax -- number of pixels to convert
106 ## %edx -- input
107 ## %ecx -- output
108
109 EXPORT(x86_mmx_convert_2be555_to_2le565):
110 add %eax, 7
111 shr %eax, 3 # we can convert 8 pixels at a time
112 movq %mm0, [d1]
113 movq %mm7, [d2]
114 jz 2f
115 1:
116 movq %mm1, [%edx]
117 movq %mm3, [%edx+8]
118 ## convert big to little endian
119 movq %mm2, %mm1
120 movq %mm4, %mm3
121 pand %mm1, %mm0
122 pand %mm2, %mm7
123 pand %mm3, %mm0
124 pand %mm4, %mm7
125 psllw %mm1, 8
126 psrlw %mm2, 8
127 psllw %mm3, 8
128 psrlw %mm4, 8
129 por %mm1, %mm2
130 por %mm4, %mm3
131
132 movq %mm2, %mm1
133 movq %mm3, %mm1
134 movq %mm5, %mm4
135 movq %mm6, %mm4
136 pand %mm1, [_2be555_mask_r]
137 pand %mm2, [_2be555_mask_g]
138 pand %mm3, [_2be555_mask_b]
139 pand %mm4, [_2be555_mask_r]
140 pand %mm5, [_2be555_mask_g]
141 pand %mm6, [_2be555_mask_b]
142 psllw %mm1, 1 # red
143 psllw %mm2, 1 # green
144 # psllw %mm3, 0 # blue
145 psllw %mm4, 1 # red
146 psllw %mm5, 1 # green
147 # psllw %mm6, 0 # blue
148 por %mm1, %mm2
149 por %mm4, %mm5
150 por %mm1, %mm3
151 por %mm4, %mm6
152 movq [%ecx], %mm1
153 movq [%ecx+8], %mm4
154 add %edx, 16
155 add %ecx, 16
156 dec %eax
157 jnz 1b
158
159 emms
160 2:
161 ret
162
163 .balign 16
164 #################################################################################
165 ##
166 ## IN: %eax -- number of pixels to convert
167 ## %edx -- input
168 ## %ecx -- output
169
170 EXPORT(x86_mmx_convert_2be555_to_4le888):
171 add %eax, 3
172 shr %eax, 2 # we can convert 4 pixels at a time
173 movq %mm7, [d1]
174 jz 2f
175
176 pxor %mm0, %mm0
177 1:
178 movq %mm1, [%edx]
179
180 ## convert big to little endian
181 movq %mm3, %mm1
182 pand %mm1, %mm7
183 pand %mm3, [d2]
184 psllw %mm1, 8
185 psrlw %mm3, 8
186 por %mm1, %mm3
187
188 movq %mm2, %mm1
189 movq %mm3, %mm1
190 pand %mm1, [_2be555_mask_r]
191 pand %mm2, [_2be555_mask_g]
192 pand %mm3, [_2be555_mask_b]
193 movq %mm4, %mm1
194 movq %mm5, %mm2
195 movq %mm6, %mm3
196 punpcklwd %mm1, %mm0
197 punpcklwd %mm2, %mm0
198 punpcklwd %mm3, %mm0
199 punpckhwd %mm4, %mm0
200 punpckhwd %mm5, %mm0
201 punpckhwd %mm6, %mm0
202 pslld %mm1, 16-10+3 # red
203 pslld %mm2, 8-5+3 # green
204 pslld %mm3, 0+3 # blue
205 pslld %mm4, 16-10+3 # red
206 pslld %mm5, 8-5+3 # green
207 pslld %mm6, 0+3 # blue
208 por %mm1, %mm2
209 por %mm1, %mm3
210 por %mm4, %mm5
211 por %mm4, %mm6
212 movq [%ecx], %mm1
213 movq [%ecx+8], %mm4
214 add %edx, 8
215 add %ecx, 16
216 dec %eax
217 jnz 1b
218
219 2:
220 emms
221 ret
222
223
224 .balign 16
225 #################################################################################
226 ##
227 ## IN: %eax -- number of pixels to convert
228 ## %edx -- input
229 ## %ecx -- output
230
231 EXPORT(x86_convert_4be888_to_4le888):
232 add %eax, 3
233 shr %eax, 2 # we can convert 4 pixels at a time
234 jz 2f
235
236 push %ebx
237 push %ebp
238 push %esi
239 push %edi
240 1:
241 mov %ebx, [%edx]
242 mov %ebp, [%edx+4]
243 mov %esi, [%edx+8]
244 mov %edi, [%edx+12]
245 ## convert big to little endian
246 bswap %ebx
247 bswap %ebp
248 bswap %esi
249 bswap %edi
250 add %edx, 16
251 mov [%ecx], %ebx
252 mov [%ecx+4], %ebp
253 mov [%ecx+8], %esi
254 mov [%ecx+12], %edi
255 add %ecx, 16
256 dec %eax
257 jnz 1b
258
259 pop %edi
260 pop %esi
261 pop %ebp
262 pop %ebx
263 2:
264 ret
265
266 .balign 16
267 #################################################################################
268 ##
269 ## IN: eax -- number of pixels to convert
270 ## %edx -- input
271 ## %ecx -- output
272
273 EXPORT(x86_sse2_convert_2be555_to_2le555):
274 add %eax, 15
275 shr %eax, 4 # we can convert 16 pixels at a time
276 movdqa %xmm5, [d1]
277 movdqa %xmm6, [d2]
278 jz 2f
279 1:
280 movdqa %xmm1, [%edx]
281 movdqa %xmm3, [%edx+16]
282
283 ## convert big to little endian
284 movdqa %xmm2, %xmm1
285 movdqa %xmm4, %xmm3
286 pand %xmm1, %xmm5
287 pand %xmm2, %xmm6
288 pand %xmm3, %xmm5
289 pand %xmm4, %xmm6
290 psllw %xmm1, 8
291 psrlw %xmm2, 8
292 psllw %xmm3, 8
293 psrlw %xmm4, 8
294 por %xmm1, %xmm2
295 por %xmm3, %xmm4
296
297 movdqa [%ecx], %xmm1
298 movdqa [%ecx+16], %xmm3
299 add %edx, 32
300 add %ecx, 32
301 dec %eax
302 jnz 1b
303
304 2:
305 ret
306
307 .balign 16
308 #################################################################################
309 ##
310 ## IN: %eax -- number of pixels to convert
311 ## %edx -- input
312 ## %ecx -- output
313
314 EXPORT(x86_sse2_convert_2be555_to_2le565):
315 add %eax, 15
316 shr %eax, 4 # we can convert 16 pixels at a time
317 movdqa %xmm0, [d1]
318 movdqa %xmm7, [d2]
319 jz 2f
320 1:
321 movdqa %xmm1, [%edx]
322 movdqa %xmm3, [%edx+16]
323 ## convert big to little endian
324 movdqa %xmm2, %xmm1
325 movdqa %xmm4, %xmm3
326 pand %xmm1, %xmm0
327 pand %xmm2, %xmm7
328 pand %xmm3, %xmm0
329 pand %xmm4, %xmm7
330 psllw %xmm1, 8
331 psrlw %xmm2, 8
332 psllw %xmm3, 8
333 psrlw %xmm4, 8
334 por %xmm1, %xmm2
335 por %xmm4, %xmm3
336
337 movdqa %xmm2, %xmm1
338 movdqa %xmm3, %xmm1
339 movdqa %xmm5, %xmm4
340 movdqa %xmm6, %xmm4
341 pand %xmm1, [_2be555_mask_r]
342 pand %xmm2, [_2be555_mask_g]
343 pand %xmm3, [_2be555_mask_b]
344 pand %xmm4, [_2be555_mask_r]
345 pand %xmm5, [_2be555_mask_g]
346 pand %xmm6, [_2be555_mask_b]
347 psllw %xmm1, 1 # red
348 psllw %xmm2, 1 # green
349 # psllw %xmm3, 0 # blue
350 psllw %xmm4, 1 # red
351 psllw %xmm5, 1 # green
352 # psllw %xmm6, 0 # blue
353 por %xmm1, %xmm2
354 por %xmm4, %xmm5
355 por %xmm1, %xmm3
356 por %xmm4, %xmm6
357 movdqa [%ecx], %xmm1
358 movdqa [%ecx+16], %xmm4
359 add %edx, 32
360 add %ecx, 32
361 dec %eax
362 jnz 1b
363 2:
364 ret
365
366 .balign 16
367 #################################################################################
368 ##
369 ## IN: %eax -- number of pixels to convert
370 ## %edx -- input
371 ## %ecx -- output
372
373 EXPORT(x86_sse2_convert_2be555_to_4le888):
374 add %eax, 3
375 shr %eax, 3 # we can convert 8 pixels at a time
376 movdqa %xmm7, [d1]
377 jz 2f
378
379 pxor %xmm0, %xmm0
380 1:
381 movdqa %xmm1, [%edx]
382
383 ## convert big to little endian
384 movdqa %xmm3, %xmm1
385 pand %xmm1, %xmm7
386 pand %xmm3, [d2]
387 psllw %xmm1, 8
388 psrlw %xmm3, 8
389 por %xmm1, %xmm3
390
391 movdqa %xmm2, %xmm1
392 movdqa %xmm3, %xmm1
393 pand %xmm1, [_2be555_mask_r]
394 pand %xmm2, [_2be555_mask_g]
395 pand %xmm3, [_2be555_mask_b]
396 movdqa %xmm4, %xmm1
397 movdqa %xmm5, %xmm2
398 movdqa %xmm6, %xmm3
399 punpcklwd %xmm1, %xmm0
400 punpcklwd %xmm2, %xmm0
401 punpcklwd %xmm3, %xmm0
402 punpckhwd %xmm4, %xmm0
403 punpckhwd %xmm5, %xmm0
404 punpckhwd %xmm6, %xmm0
405 pslld %xmm1, 16-10+3 # red
406 pslld %xmm2, 8-5+3 # green
407 pslld %xmm3, 0+3 # blue
408 pslld %xmm4, 16-10+3 # red
409 pslld %xmm5, 8-5+3 # green
410 pslld %xmm6, 0+3 # blue
411 por %xmm1, %xmm2
412 por %xmm1, %xmm3
413 por %xmm4, %xmm5
414 por %xmm4, %xmm6
415 movdqa [%ecx], %xmm1
416 movdqa [%ecx+16], %xmm4
417 add %edx, 16
418 add %ecx, 32
419 dec %eax
420 jnz 1b
421 2:
422 ret
423

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26