3 #include "../../SDL_internal.h"
13 #define PRECISION_FACTOR (1<<PRECISION)
37 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
46 { 0,
V(1.0),
V(1.402), -
V(0.3441), -
V(0.7141),
V(1.772)},
48 { 16,
V(1.1644),
V(1.596), -
V(0.3918), -
V(0.813),
V(2.0172)},
50 { 16,
V(1.1644),
V(1.7927), -
V(0.2132), -
V(0.5329),
V(2.1124)}
55 { 0, {{
V(0.299),
V(0.587),
V(0.114)}, {-
V(0.1687), -
V(0.3313),
V(0.5)}, {
V(0.5), -
V(0.4187), -
V(0.0813)}}},
57 { 16, {{
V(0.2568),
V(0.5041),
V(0.0979)}, {-
V(0.1482), -
V(0.291),
V(0.4392)}, {
V(0.4392), -
V(0.3678), -
V(0.0714)}}},
59 { 16, {{
V(0.1826),
V(0.6142),
V(0.062)}, {-
V(0.1006), -
V(0.3386),
V(0.4392)}, {
V(0.4392), -
V(0.3989), -
V(0.0403)}}}
63 #define YUV_FORMAT_420 1
64 #define YUV_FORMAT_422 2
65 #define YUV_FORMAT_NV12 3
68 #define RGB_FORMAT_RGB565 1
69 #define RGB_FORMAT_RGB24 2
70 #define RGB_FORMAT_RGBA 3
71 #define RGB_FORMAT_BGRA 4
72 #define RGB_FORMAT_ARGB 5
73 #define RGB_FORMAT_ABGR 6
80 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
81 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
82 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
83 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
84 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
85 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
86 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
87 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
88 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
89 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
90 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
91 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
92 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
98 #define STD_FUNCTION_NAME yuv420_rgb565_std
99 #define YUV_FORMAT YUV_FORMAT_420
100 #define RGB_FORMAT RGB_FORMAT_RGB565
103 #define STD_FUNCTION_NAME yuv420_rgb24_std
104 #define YUV_FORMAT YUV_FORMAT_420
105 #define RGB_FORMAT RGB_FORMAT_RGB24
108 #define STD_FUNCTION_NAME yuv420_rgba_std
109 #define YUV_FORMAT YUV_FORMAT_420
110 #define RGB_FORMAT RGB_FORMAT_RGBA
113 #define STD_FUNCTION_NAME yuv420_bgra_std
114 #define YUV_FORMAT YUV_FORMAT_420
115 #define RGB_FORMAT RGB_FORMAT_BGRA
118 #define STD_FUNCTION_NAME yuv420_argb_std
119 #define YUV_FORMAT YUV_FORMAT_420
120 #define RGB_FORMAT RGB_FORMAT_ARGB
123 #define STD_FUNCTION_NAME yuv420_abgr_std
124 #define YUV_FORMAT YUV_FORMAT_420
125 #define RGB_FORMAT RGB_FORMAT_ABGR
128 #define STD_FUNCTION_NAME yuv422_rgb565_std
129 #define YUV_FORMAT YUV_FORMAT_422
130 #define RGB_FORMAT RGB_FORMAT_RGB565
133 #define STD_FUNCTION_NAME yuv422_rgb24_std
134 #define YUV_FORMAT YUV_FORMAT_422
135 #define RGB_FORMAT RGB_FORMAT_RGB24
138 #define STD_FUNCTION_NAME yuv422_rgba_std
139 #define YUV_FORMAT YUV_FORMAT_422
140 #define RGB_FORMAT RGB_FORMAT_RGBA
143 #define STD_FUNCTION_NAME yuv422_bgra_std
144 #define YUV_FORMAT YUV_FORMAT_422
145 #define RGB_FORMAT RGB_FORMAT_BGRA
148 #define STD_FUNCTION_NAME yuv422_argb_std
149 #define YUV_FORMAT YUV_FORMAT_422
150 #define RGB_FORMAT RGB_FORMAT_ARGB
153 #define STD_FUNCTION_NAME yuv422_abgr_std
154 #define YUV_FORMAT YUV_FORMAT_422
155 #define RGB_FORMAT RGB_FORMAT_ABGR
158 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
159 #define YUV_FORMAT YUV_FORMAT_NV12
160 #define RGB_FORMAT RGB_FORMAT_RGB565
163 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
164 #define YUV_FORMAT YUV_FORMAT_NV12
165 #define RGB_FORMAT RGB_FORMAT_RGB24
168 #define STD_FUNCTION_NAME yuvnv12_rgba_std
169 #define YUV_FORMAT YUV_FORMAT_NV12
170 #define RGB_FORMAT RGB_FORMAT_RGBA
173 #define STD_FUNCTION_NAME yuvnv12_bgra_std
174 #define YUV_FORMAT YUV_FORMAT_NV12
175 #define RGB_FORMAT RGB_FORMAT_BGRA
178 #define STD_FUNCTION_NAME yuvnv12_argb_std
179 #define YUV_FORMAT YUV_FORMAT_NV12
180 #define RGB_FORMAT RGB_FORMAT_ARGB
183 #define STD_FUNCTION_NAME yuvnv12_abgr_std
184 #define YUV_FORMAT YUV_FORMAT_NV12
185 #define RGB_FORMAT RGB_FORMAT_ABGR
200 *rgb_ptr2=
RGB+(
y+1)*RGB_stride;
203 *y_ptr2=
Y+(
y+1)*Y_stride,
204 *u_ptr=U+(
y/2)*UV_stride,
205 *v_ptr=
V+(
y/2)*UV_stride;
212 y_tmp =
param->matrix[0][0]*rgb_ptr1[0] +
param->matrix[0][1]*rgb_ptr1[1] +
param->matrix[0][2]*rgb_ptr1[2];
213 u_tmp =
param->matrix[1][0]*rgb_ptr1[0] +
param->matrix[1][1]*rgb_ptr1[1] +
param->matrix[1][2]*rgb_ptr1[2];
214 v_tmp =
param->matrix[2][0]*rgb_ptr1[0] +
param->matrix[2][1]*rgb_ptr1[1] +
param->matrix[2][2]*rgb_ptr1[2];
217 y_tmp =
param->matrix[0][0]*rgb_ptr1[3] +
param->matrix[0][1]*rgb_ptr1[4] +
param->matrix[0][2]*rgb_ptr1[5];
218 u_tmp +=
param->matrix[1][0]*rgb_ptr1[3] +
param->matrix[1][1]*rgb_ptr1[4] +
param->matrix[1][2]*rgb_ptr1[5];
219 v_tmp +=
param->matrix[2][0]*rgb_ptr1[3] +
param->matrix[2][1]*rgb_ptr1[4] +
param->matrix[2][2]*rgb_ptr1[5];
222 y_tmp =
param->matrix[0][0]*rgb_ptr2[0] +
param->matrix[0][1]*rgb_ptr2[1] +
param->matrix[0][2]*rgb_ptr2[2];
223 u_tmp +=
param->matrix[1][0]*rgb_ptr2[0] +
param->matrix[1][1]*rgb_ptr2[1] +
param->matrix[1][2]*rgb_ptr2[2];
224 v_tmp +=
param->matrix[2][0]*rgb_ptr2[0] +
param->matrix[2][1]*rgb_ptr2[1] +
param->matrix[2][2]*rgb_ptr2[2];
227 y_tmp =
param->matrix[0][0]*rgb_ptr2[3] +
param->matrix[0][1]*rgb_ptr2[4] +
param->matrix[0][2]*rgb_ptr2[5];
228 u_tmp +=
param->matrix[1][0]*rgb_ptr2[3] +
param->matrix[1][1]*rgb_ptr2[4] +
param->matrix[1][2]*rgb_ptr2[5];
229 v_tmp +=
param->matrix[2][0]*rgb_ptr2[3] +
param->matrix[2][1]*rgb_ptr2[4] +
param->matrix[2][2]*rgb_ptr2[5];
247 #define SSE_FUNCTION_NAME yuv420_rgb565_sse
248 #define STD_FUNCTION_NAME yuv420_rgb565_std
249 #define YUV_FORMAT YUV_FORMAT_420
250 #define RGB_FORMAT RGB_FORMAT_RGB565
254 #define SSE_FUNCTION_NAME yuv420_rgb565_sseu
255 #define STD_FUNCTION_NAME yuv420_rgb565_std
256 #define YUV_FORMAT YUV_FORMAT_420
257 #define RGB_FORMAT RGB_FORMAT_RGB565
260 #define SSE_FUNCTION_NAME yuv420_rgb24_sse
261 #define STD_FUNCTION_NAME yuv420_rgb24_std
262 #define YUV_FORMAT YUV_FORMAT_420
263 #define RGB_FORMAT RGB_FORMAT_RGB24
267 #define SSE_FUNCTION_NAME yuv420_rgb24_sseu
268 #define STD_FUNCTION_NAME yuv420_rgb24_std
269 #define YUV_FORMAT YUV_FORMAT_420
270 #define RGB_FORMAT RGB_FORMAT_RGB24
273 #define SSE_FUNCTION_NAME yuv420_rgba_sse
274 #define STD_FUNCTION_NAME yuv420_rgba_std
275 #define YUV_FORMAT YUV_FORMAT_420
276 #define RGB_FORMAT RGB_FORMAT_RGBA
280 #define SSE_FUNCTION_NAME yuv420_rgba_sseu
281 #define STD_FUNCTION_NAME yuv420_rgba_std
282 #define YUV_FORMAT YUV_FORMAT_420
283 #define RGB_FORMAT RGB_FORMAT_RGBA
286 #define SSE_FUNCTION_NAME yuv420_bgra_sse
287 #define STD_FUNCTION_NAME yuv420_bgra_std
288 #define YUV_FORMAT YUV_FORMAT_420
289 #define RGB_FORMAT RGB_FORMAT_BGRA
293 #define SSE_FUNCTION_NAME yuv420_bgra_sseu
294 #define STD_FUNCTION_NAME yuv420_bgra_std
295 #define YUV_FORMAT YUV_FORMAT_420
296 #define RGB_FORMAT RGB_FORMAT_BGRA
299 #define SSE_FUNCTION_NAME yuv420_argb_sse
300 #define STD_FUNCTION_NAME yuv420_argb_std
301 #define YUV_FORMAT YUV_FORMAT_420
302 #define RGB_FORMAT RGB_FORMAT_ARGB
306 #define SSE_FUNCTION_NAME yuv420_argb_sseu
307 #define STD_FUNCTION_NAME yuv420_argb_std
308 #define YUV_FORMAT YUV_FORMAT_420
309 #define RGB_FORMAT RGB_FORMAT_ARGB
312 #define SSE_FUNCTION_NAME yuv420_abgr_sse
313 #define STD_FUNCTION_NAME yuv420_abgr_std
314 #define YUV_FORMAT YUV_FORMAT_420
315 #define RGB_FORMAT RGB_FORMAT_ABGR
319 #define SSE_FUNCTION_NAME yuv420_abgr_sseu
320 #define STD_FUNCTION_NAME yuv420_abgr_std
321 #define YUV_FORMAT YUV_FORMAT_420
322 #define RGB_FORMAT RGB_FORMAT_ABGR
325 #define SSE_FUNCTION_NAME yuv422_rgb565_sse
326 #define STD_FUNCTION_NAME yuv422_rgb565_std
327 #define YUV_FORMAT YUV_FORMAT_422
328 #define RGB_FORMAT RGB_FORMAT_RGB565
332 #define SSE_FUNCTION_NAME yuv422_rgb565_sseu
333 #define STD_FUNCTION_NAME yuv422_rgb565_std
334 #define YUV_FORMAT YUV_FORMAT_422
335 #define RGB_FORMAT RGB_FORMAT_RGB565
338 #define SSE_FUNCTION_NAME yuv422_rgb24_sse
339 #define STD_FUNCTION_NAME yuv422_rgb24_std
340 #define YUV_FORMAT YUV_FORMAT_422
341 #define RGB_FORMAT RGB_FORMAT_RGB24
345 #define SSE_FUNCTION_NAME yuv422_rgb24_sseu
346 #define STD_FUNCTION_NAME yuv422_rgb24_std
347 #define YUV_FORMAT YUV_FORMAT_422
348 #define RGB_FORMAT RGB_FORMAT_RGB24
351 #define SSE_FUNCTION_NAME yuv422_rgba_sse
352 #define STD_FUNCTION_NAME yuv422_rgba_std
353 #define YUV_FORMAT YUV_FORMAT_422
354 #define RGB_FORMAT RGB_FORMAT_RGBA
358 #define SSE_FUNCTION_NAME yuv422_rgba_sseu
359 #define STD_FUNCTION_NAME yuv422_rgba_std
360 #define YUV_FORMAT YUV_FORMAT_422
361 #define RGB_FORMAT RGB_FORMAT_RGBA
364 #define SSE_FUNCTION_NAME yuv422_bgra_sse
365 #define STD_FUNCTION_NAME yuv422_bgra_std
366 #define YUV_FORMAT YUV_FORMAT_422
367 #define RGB_FORMAT RGB_FORMAT_BGRA
371 #define SSE_FUNCTION_NAME yuv422_bgra_sseu
372 #define STD_FUNCTION_NAME yuv422_bgra_std
373 #define YUV_FORMAT YUV_FORMAT_422
374 #define RGB_FORMAT RGB_FORMAT_BGRA
377 #define SSE_FUNCTION_NAME yuv422_argb_sse
378 #define STD_FUNCTION_NAME yuv422_argb_std
379 #define YUV_FORMAT YUV_FORMAT_422
380 #define RGB_FORMAT RGB_FORMAT_ARGB
384 #define SSE_FUNCTION_NAME yuv422_argb_sseu
385 #define STD_FUNCTION_NAME yuv422_argb_std
386 #define YUV_FORMAT YUV_FORMAT_422
387 #define RGB_FORMAT RGB_FORMAT_ARGB
390 #define SSE_FUNCTION_NAME yuv422_abgr_sse
391 #define STD_FUNCTION_NAME yuv422_abgr_std
392 #define YUV_FORMAT YUV_FORMAT_422
393 #define RGB_FORMAT RGB_FORMAT_ABGR
397 #define SSE_FUNCTION_NAME yuv422_abgr_sseu
398 #define STD_FUNCTION_NAME yuv422_abgr_std
399 #define YUV_FORMAT YUV_FORMAT_422
400 #define RGB_FORMAT RGB_FORMAT_ABGR
403 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse
404 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
405 #define YUV_FORMAT YUV_FORMAT_NV12
406 #define RGB_FORMAT RGB_FORMAT_RGB565
410 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu
411 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
412 #define YUV_FORMAT YUV_FORMAT_NV12
413 #define RGB_FORMAT RGB_FORMAT_RGB565
416 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse
417 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
418 #define YUV_FORMAT YUV_FORMAT_NV12
419 #define RGB_FORMAT RGB_FORMAT_RGB24
423 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu
424 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
425 #define YUV_FORMAT YUV_FORMAT_NV12
426 #define RGB_FORMAT RGB_FORMAT_RGB24
429 #define SSE_FUNCTION_NAME yuvnv12_rgba_sse
430 #define STD_FUNCTION_NAME yuvnv12_rgba_std
431 #define YUV_FORMAT YUV_FORMAT_NV12
432 #define RGB_FORMAT RGB_FORMAT_RGBA
436 #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu
437 #define STD_FUNCTION_NAME yuvnv12_rgba_std
438 #define YUV_FORMAT YUV_FORMAT_NV12
439 #define RGB_FORMAT RGB_FORMAT_RGBA
442 #define SSE_FUNCTION_NAME yuvnv12_bgra_sse
443 #define STD_FUNCTION_NAME yuvnv12_bgra_std
444 #define YUV_FORMAT YUV_FORMAT_NV12
445 #define RGB_FORMAT RGB_FORMAT_BGRA
449 #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu
450 #define STD_FUNCTION_NAME yuvnv12_bgra_std
451 #define YUV_FORMAT YUV_FORMAT_NV12
452 #define RGB_FORMAT RGB_FORMAT_BGRA
455 #define SSE_FUNCTION_NAME yuvnv12_argb_sse
456 #define STD_FUNCTION_NAME yuvnv12_argb_std
457 #define YUV_FORMAT YUV_FORMAT_NV12
458 #define RGB_FORMAT RGB_FORMAT_ARGB
462 #define SSE_FUNCTION_NAME yuvnv12_argb_sseu
463 #define STD_FUNCTION_NAME yuvnv12_argb_std
464 #define YUV_FORMAT YUV_FORMAT_NV12
465 #define RGB_FORMAT RGB_FORMAT_ARGB
468 #define SSE_FUNCTION_NAME yuvnv12_abgr_sse
469 #define STD_FUNCTION_NAME yuvnv12_abgr_std
470 #define YUV_FORMAT YUV_FORMAT_NV12
471 #define RGB_FORMAT RGB_FORMAT_ABGR
475 #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu
476 #define STD_FUNCTION_NAME yuvnv12_abgr_std
477 #define YUV_FORMAT YUV_FORMAT_NV12
478 #define RGB_FORMAT RGB_FORMAT_ABGR
482 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
483 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \
484 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \
485 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \
486 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \
487 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \
488 B2 = _mm_unpackhi_epi8(RGB3, RGB6);
490 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
491 RGB1 = _mm_unpacklo_epi8(R1, G2); \
492 RGB2 = _mm_unpackhi_epi8(R1, G2); \
493 RGB3 = _mm_unpacklo_epi8(R2, B1); \
494 RGB4 = _mm_unpackhi_epi8(R2, B1); \
495 RGB5 = _mm_unpacklo_epi8(G1, B2); \
496 RGB6 = _mm_unpackhi_epi8(G1, B2); \
498 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
502 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
503 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
505 #define RGB2YUV_16(R, G, B, Y, U, V) \
506 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \
507 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
508 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \
509 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \
510 Y = _mm_srai_epi16(Y, PRECISION); \
511 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \
512 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
513 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \
514 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \
515 U = _mm_srai_epi16(U, PRECISION); \
516 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \
517 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
518 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \
519 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \
520 V = _mm_srai_epi16(V, PRECISION);
523 __m128i r1, r2, b1, b2, g1, g2; \
524 __m128i r_16, g_16, b_16; \
525 __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \
526 __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \
527 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \
528 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \
529 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \
530 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \
531 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \
533 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
535 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
536 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
537 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
538 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
539 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
540 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
541 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
542 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
543 y = _mm_packus_epi16(y1_16, y2_16); \
544 u1 = _mm_packus_epi16(u1_16, u2_16); \
545 v1 = _mm_packus_epi16(v1_16, v2_16); \
547 SAVE_SI128((__m128i*)(y_ptr1), y); \
549 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
550 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
551 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
552 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
553 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
554 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
555 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
556 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
557 y = _mm_packus_epi16(y1_16, y2_16); \
558 u2 = _mm_packus_epi16(u1_16, u2_16); \
559 v2 = _mm_packus_epi16(v1_16, v2_16); \
561 SAVE_SI128((__m128i*)(y_ptr2), y); \
563 u1_tmp = _mm_avg_epu8(u1, u2); \
564 v1_tmp = _mm_avg_epu8(v1, v2); \
566 rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \
567 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \
568 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \
569 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \
570 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \
571 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \
573 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
575 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
576 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
577 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
578 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
579 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
580 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
581 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
582 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
583 y = _mm_packus_epi16(y1_16, y2_16); \
584 u1 = _mm_packus_epi16(u1_16, u2_16); \
585 v1 = _mm_packus_epi16(v1_16, v2_16); \
587 SAVE_SI128((__m128i*)(y_ptr1+16), y); \
589 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
590 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
591 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
592 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
593 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
594 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
595 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
596 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
597 y = _mm_packus_epi16(y1_16, y2_16); \
598 u2 = _mm_packus_epi16(u1_16, u2_16); \
599 v2 = _mm_packus_epi16(v1_16, v2_16); \
601 SAVE_SI128((__m128i*)(y_ptr2+16), y); \
603 u2_tmp = _mm_avg_epu8(u1, u2); \
604 v2_tmp = _mm_avg_epu8(v1, v2); \
606 u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \
607 v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \
608 u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \
609 v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \
610 u1 = _mm_avg_epu8(u1, u2); \
611 v1 = _mm_avg_epu8(v1, v2); \
612 SAVE_SI128((__m128i*)(u_ptr), u1); \
613 SAVE_SI128((__m128i*)(v_ptr), v1);
620 #define LOAD_SI128 _mm_load_si128
621 #define SAVE_SI128 _mm_stream_si128
625 for(ypos=0; ypos<(
height-1); ypos+=2)
628 *rgb_ptr2=
RGB+(ypos+1)*RGB_stride;
631 *y_ptr2=
Y+(ypos+1)*Y_stride,
632 *u_ptr=U+(ypos/2)*UV_stride,
633 *v_ptr=
V+(ypos/2)*UV_stride;
635 for(xpos=0; xpos<(
width-31); xpos+=32)
656 #define LOAD_SI128 _mm_loadu_si128
657 #define SAVE_SI128 _mm_storeu_si128
661 for(ypos=0; ypos<(
height-1); ypos+=2)
664 *rgb_ptr2=
RGB+(ypos+1)*RGB_stride;
667 *y_ptr2=
Y+(ypos+1)*Y_stride,
668 *u_ptr=U+(ypos/2)*UV_stride,
669 *v_ptr=
V+(ypos/2)*UV_stride;
671 for(xpos=0; xpos<(
width-31); xpos+=32)