SDL  2.0
yuv_rgb_sse_func.h File Reference

Go to the source code of this file.

Macros

#define LOAD_SI128   _mm_loadu_si128
 
#define SAVE_SI128   _mm_storeu_si128
 
#define UV2RGB_16(U, V, R1, G1, B1, R2, G2, B2)
 
#define ADD_Y2RGB_16(Y1, Y2, R1, G1, B1, R2, G2, B2)
 
#define PACK_RGB565_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4)
 
#define PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)
 
#define PACK_RGB24_32_STEP2(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)
 
#define PACK_RGB24_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)
 
#define PACK_RGBA_32(R1, R2, G1, G2, B1, B2, A1, A2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, RGB7, RGB8)
 
#define PACK_PIXEL
 
#define SAVE_LINE1
 
#define SAVE_LINE2
 
#define READ_Y(y_ptr)   y = LOAD_SI128((const __m128i*)(y_ptr)); \
 
#define READ_UV
 
#define YUV2RGB_32
 

Functions

void SSE_FUNCTION_NAME (uint32_t width, uint32_t height, const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, uint8_t *RGB, uint32_t RGB_stride, YCbCrType yuv_type)
 

Macro Definition Documentation

◆ ADD_Y2RGB_16

#define ADD_Y2RGB_16 (   Y1,
  Y2,
  R1,
  G1,
  B1,
  R2,
  G2,
  B2 
)
Value:
Y1 = _mm_mullo_epi16(_mm_sub_epi16(Y1, _mm_set1_epi16(param->y_shift)), _mm_set1_epi16(param->y_factor)); \
Y2 = _mm_mullo_epi16(_mm_sub_epi16(Y2, _mm_set1_epi16(param->y_shift)), _mm_set1_epi16(param->y_factor)); \
\
R1 = _mm_srai_epi16(_mm_add_epi16(R1, Y1), PRECISION); \
G1 = _mm_srai_epi16(_mm_add_epi16(G1, Y1), PRECISION); \
B1 = _mm_srai_epi16(_mm_add_epi16(B1, Y1), PRECISION); \
R2 = _mm_srai_epi16(_mm_add_epi16(R2, Y2), PRECISION); \
G2 = _mm_srai_epi16(_mm_add_epi16(G2, Y2), PRECISION); \
B2 = _mm_srai_epi16(_mm_add_epi16(B2, Y2), PRECISION); \

Definition at line 40 of file yuv_rgb_sse_func.h.

◆ LOAD_SI128

#define LOAD_SI128   _mm_loadu_si128

Definition at line 23 of file yuv_rgb_sse_func.h.

◆ PACK_PIXEL

#define PACK_PIXEL
Value:
__m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \
PACK_RGB565_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, rgb_1, rgb_2, rgb_3, rgb_4) \
PACK_RGB565_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, rgb_5, rgb_6, rgb_7, rgb_8) \

Definition at line 126 of file yuv_rgb_sse_func.h.

◆ PACK_RGB24_32

#define PACK_RGB24_32 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  RGB1,
  RGB2,
  RGB3,
  RGB4,
  RGB5,
  RGB6 
)
Value:
PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
PACK_RGB24_32_STEP2(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
PACK_RGB24_32_STEP2(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \
PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \

Definition at line 94 of file yuv_rgb_sse_func.h.

◆ PACK_RGB24_32_STEP1

#define PACK_RGB24_32_STEP1 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  RGB1,
  RGB2,
  RGB3,
  RGB4,
  RGB5,
  RGB6 
)
Value:
RGB1 = _mm_packus_epi16(_mm_and_si128(R1,_mm_set1_epi16(0xFF)), _mm_and_si128(R2,_mm_set1_epi16(0xFF))); \
RGB2 = _mm_packus_epi16(_mm_and_si128(G1,_mm_set1_epi16(0xFF)), _mm_and_si128(G2,_mm_set1_epi16(0xFF))); \
RGB3 = _mm_packus_epi16(_mm_and_si128(B1,_mm_set1_epi16(0xFF)), _mm_and_si128(B2,_mm_set1_epi16(0xFF))); \
RGB4 = _mm_packus_epi16(_mm_srli_epi16(R1,8), _mm_srli_epi16(R2,8)); \
RGB5 = _mm_packus_epi16(_mm_srli_epi16(G1,8), _mm_srli_epi16(G2,8)); \
RGB6 = _mm_packus_epi16(_mm_srli_epi16(B1,8), _mm_srli_epi16(B2,8)); \

Definition at line 78 of file yuv_rgb_sse_func.h.

◆ PACK_RGB24_32_STEP2

#define PACK_RGB24_32_STEP2 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  RGB1,
  RGB2,
  RGB3,
  RGB4,
  RGB5,
  RGB6 
)
Value:
R1 = _mm_packus_epi16(_mm_and_si128(RGB1,_mm_set1_epi16(0xFF)), _mm_and_si128(RGB2,_mm_set1_epi16(0xFF))); \
R2 = _mm_packus_epi16(_mm_and_si128(RGB3,_mm_set1_epi16(0xFF)), _mm_and_si128(RGB4,_mm_set1_epi16(0xFF))); \
G1 = _mm_packus_epi16(_mm_and_si128(RGB5,_mm_set1_epi16(0xFF)), _mm_and_si128(RGB6,_mm_set1_epi16(0xFF))); \
G2 = _mm_packus_epi16(_mm_srli_epi16(RGB1,8), _mm_srli_epi16(RGB2,8)); \
B1 = _mm_packus_epi16(_mm_srli_epi16(RGB3,8), _mm_srli_epi16(RGB4,8)); \
B2 = _mm_packus_epi16(_mm_srli_epi16(RGB5,8), _mm_srli_epi16(RGB6,8)); \

Definition at line 86 of file yuv_rgb_sse_func.h.

◆ PACK_RGB565_32

#define PACK_RGB565_32 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  RGB1,
  RGB2,
  RGB3,
  RGB4 
)
Value:
{ \
__m128i red_mask, tmp1, tmp2, tmp3, tmp4; \
\
red_mask = _mm_set1_epi16((short)0xF800); \
RGB1 = _mm_and_si128(_mm_unpacklo_epi8(_mm_setzero_si128(), R1), red_mask); \
RGB2 = _mm_and_si128(_mm_unpackhi_epi8(_mm_setzero_si128(), R1), red_mask); \
RGB3 = _mm_and_si128(_mm_unpacklo_epi8(_mm_setzero_si128(), R2), red_mask); \
RGB4 = _mm_and_si128(_mm_unpackhi_epi8(_mm_setzero_si128(), R2), red_mask); \
tmp1 = _mm_slli_epi16(_mm_srli_epi16(_mm_unpacklo_epi8(G1, _mm_setzero_si128()), 2), 5); \
tmp2 = _mm_slli_epi16(_mm_srli_epi16(_mm_unpackhi_epi8(G1, _mm_setzero_si128()), 2), 5); \
tmp3 = _mm_slli_epi16(_mm_srli_epi16(_mm_unpacklo_epi8(G2, _mm_setzero_si128()), 2), 5); \
tmp4 = _mm_slli_epi16(_mm_srli_epi16(_mm_unpackhi_epi8(G2, _mm_setzero_si128()), 2), 5); \
RGB1 = _mm_or_si128(RGB1, tmp1); \
RGB2 = _mm_or_si128(RGB2, tmp2); \
RGB3 = _mm_or_si128(RGB3, tmp3); \
RGB4 = _mm_or_si128(RGB4, tmp4); \
tmp1 = _mm_srli_epi16(_mm_unpacklo_epi8(B1, _mm_setzero_si128()), 3); \
tmp2 = _mm_srli_epi16(_mm_unpackhi_epi8(B1, _mm_setzero_si128()), 3); \
tmp3 = _mm_srli_epi16(_mm_unpacklo_epi8(B2, _mm_setzero_si128()), 3); \
tmp4 = _mm_srli_epi16(_mm_unpackhi_epi8(B2, _mm_setzero_si128()), 3); \
RGB1 = _mm_or_si128(RGB1, tmp1); \
RGB2 = _mm_or_si128(RGB2, tmp2); \
RGB3 = _mm_or_si128(RGB3, tmp3); \
RGB4 = _mm_or_si128(RGB4, tmp4); \
}

Definition at line 51 of file yuv_rgb_sse_func.h.

◆ PACK_RGBA_32

#define PACK_RGBA_32 (   R1,
  R2,
  G1,
  G2,
  B1,
  B2,
  A1,
  A2,
  RGB1,
  RGB2,
  RGB3,
  RGB4,
  RGB5,
  RGB6,
  RGB7,
  RGB8 
)
Value:
{ \
__m128i lo_ab, hi_ab, lo_gr, hi_gr; \
\
lo_ab = _mm_unpacklo_epi8( A1, B1 ); \
hi_ab = _mm_unpackhi_epi8( A1, B1 ); \
lo_gr = _mm_unpacklo_epi8( G1, R1 ); \
hi_gr = _mm_unpackhi_epi8( G1, R1 ); \
RGB1 = _mm_unpacklo_epi16( lo_ab, lo_gr ); \
RGB2 = _mm_unpackhi_epi16( lo_ab, lo_gr ); \
RGB3 = _mm_unpacklo_epi16( hi_ab, hi_gr ); \
RGB4 = _mm_unpackhi_epi16( hi_ab, hi_gr ); \
\
lo_ab = _mm_unpacklo_epi8( A2, B2 ); \
hi_ab = _mm_unpackhi_epi8( A2, B2 ); \
lo_gr = _mm_unpacklo_epi8( G2, R2 ); \
hi_gr = _mm_unpackhi_epi8( G2, R2 ); \
RGB5 = _mm_unpacklo_epi16( lo_ab, lo_gr ); \
RGB6 = _mm_unpackhi_epi16( lo_ab, lo_gr ); \
RGB7 = _mm_unpacklo_epi16( hi_ab, hi_gr ); \
RGB8 = _mm_unpackhi_epi16( hi_ab, hi_gr ); \
}

Definition at line 101 of file yuv_rgb_sse_func.h.

◆ READ_UV

#define READ_UV
Value:
u = LOAD_SI128((const __m128i*)(u_ptr)); \
v = LOAD_SI128((const __m128i*)(v_ptr)); \

Definition at line 255 of file yuv_rgb_sse_func.h.

◆ READ_Y

#define READ_Y (   y_ptr)    y = LOAD_SI128((const __m128i*)(y_ptr)); \

Definition at line 252 of file yuv_rgb_sse_func.h.

◆ SAVE_LINE1

#define SAVE_LINE1
Value:
SAVE_SI128((__m128i*)(rgb_ptr1), rgb_1); \
SAVE_SI128((__m128i*)(rgb_ptr1+16), rgb_2); \
SAVE_SI128((__m128i*)(rgb_ptr1+32), rgb_3); \
SAVE_SI128((__m128i*)(rgb_ptr1+48), rgb_4); \

Definition at line 193 of file yuv_rgb_sse_func.h.

◆ SAVE_LINE2

#define SAVE_LINE2
Value:
SAVE_SI128((__m128i*)(rgb_ptr2), rgb_5); \
SAVE_SI128((__m128i*)(rgb_ptr2+16), rgb_6); \
SAVE_SI128((__m128i*)(rgb_ptr2+32), rgb_7); \
SAVE_SI128((__m128i*)(rgb_ptr2+48), rgb_8); \

Definition at line 199 of file yuv_rgb_sse_func.h.

◆ SAVE_SI128

#define SAVE_SI128   _mm_storeu_si128

Definition at line 24 of file yuv_rgb_sse_func.h.

◆ UV2RGB_16

#define UV2RGB_16 (   U,
  V,
  R1,
  G1,
  B1,
  R2,
  G2,
  B2 
)
Value:
r_tmp = _mm_mullo_epi16(V, _mm_set1_epi16(param->v_r_factor)); \
g_tmp = _mm_add_epi16( \
_mm_mullo_epi16(U, _mm_set1_epi16(param->u_g_factor)), \
_mm_mullo_epi16(V, _mm_set1_epi16(param->v_g_factor))); \
b_tmp = _mm_mullo_epi16(U, _mm_set1_epi16(param->u_b_factor)); \
R1 = _mm_unpacklo_epi16(r_tmp, r_tmp); \
G1 = _mm_unpacklo_epi16(g_tmp, g_tmp); \
B1 = _mm_unpacklo_epi16(b_tmp, b_tmp); \
R2 = _mm_unpackhi_epi16(r_tmp, r_tmp); \
G2 = _mm_unpackhi_epi16(g_tmp, g_tmp); \
B2 = _mm_unpackhi_epi16(b_tmp, b_tmp); \

Definition at line 27 of file yuv_rgb_sse_func.h.

◆ YUV2RGB_32

#define YUV2RGB_32

Definition at line 304 of file yuv_rgb_sse_func.h.

Function Documentation

◆ SSE_FUNCTION_NAME()

void SSE_FUNCTION_NAME ( uint32_t  width,
uint32_t  height,
const uint8_t Y,
const uint8_t U,
const uint8_t V,
uint32_t  Y_stride,
uint32_t  UV_stride,
uint8_t RGB,
uint32_t  RGB_stride,
YCbCrType  yuv_type 
)

Definition at line 385 of file yuv_rgb_sse_func.h.

389 {
390  const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]);
391 #if YUV_FORMAT == YUV_FORMAT_420
392  const int y_pixel_stride = 1;
393  const int uv_pixel_stride = 1;
394  const int uv_x_sample_interval = 2;
395  const int uv_y_sample_interval = 2;
396 #elif YUV_FORMAT == YUV_FORMAT_422
397  const int y_pixel_stride = 2;
398  const int uv_pixel_stride = 4;
399  const int uv_x_sample_interval = 2;
400  const int uv_y_sample_interval = 1;
401 #elif YUV_FORMAT == YUV_FORMAT_NV12
402  const int y_pixel_stride = 1;
403  const int uv_pixel_stride = 2;
404  const int uv_x_sample_interval = 2;
405  const int uv_y_sample_interval = 2;
406 #endif
407 #if RGB_FORMAT == RGB_FORMAT_RGB565
408  const int rgb_pixel_stride = 2;
409 #elif RGB_FORMAT == RGB_FORMAT_RGB24
410  const int rgb_pixel_stride = 3;
411 #elif RGB_FORMAT == RGB_FORMAT_RGBA || RGB_FORMAT == RGB_FORMAT_BGRA || \
412  RGB_FORMAT == RGB_FORMAT_ARGB || RGB_FORMAT == RGB_FORMAT_ABGR
413  const int rgb_pixel_stride = 4;
414 #else
415 #error Unknown RGB pixel size
416 #endif
417 
418  if (width >= 32) {
419  uint32_t xpos, ypos;
420  for(ypos=0; ypos<(height-(uv_y_sample_interval-1)); ypos+=uv_y_sample_interval)
421  {
422  const uint8_t *y_ptr1=Y+ypos*Y_stride,
423  *y_ptr2=Y+(ypos+1)*Y_stride,
424  *u_ptr=U+(ypos/uv_y_sample_interval)*UV_stride,
425  *v_ptr=V+(ypos/uv_y_sample_interval)*UV_stride;
426 
427  uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
428  *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
429 
430  for(xpos=0; xpos<(width-31); xpos+=32)
431  {
432  YUV2RGB_32
433  {
434  PACK_PIXEL
435  SAVE_LINE1
436  if (uv_y_sample_interval > 1)
437  {
438  SAVE_LINE2
439  }
440  }
441 
442  y_ptr1+=32*y_pixel_stride;
443  y_ptr2+=32*y_pixel_stride;
446  rgb_ptr1+=32*rgb_pixel_stride;
447  rgb_ptr2+=32*rgb_pixel_stride;
448  }
449  }
450 
451  /* Catch the last line, if needed */
452  if (uv_y_sample_interval == 2 && ypos == (height-1))
453  {
454  const uint8_t *y_ptr=Y+ypos*Y_stride,
455  *u_ptr=U+(ypos/uv_y_sample_interval)*UV_stride,
456  *v_ptr=V+(ypos/uv_y_sample_interval)*UV_stride;
457 
458  uint8_t *rgb_ptr=RGB+ypos*RGB_stride;
459 
460  STD_FUNCTION_NAME(width, 1, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type);
461  }
462  }
463 
464  /* Catch the right column, if needed */
465  {
466  int converted = (width & ~31);
467  if (converted != width)
468  {
469  const uint8_t *y_ptr=Y+converted*y_pixel_stride,
470  *u_ptr=U+converted*uv_pixel_stride/uv_x_sample_interval,
471  *v_ptr=V+converted*uv_pixel_stride/uv_x_sample_interval;
472 
473  uint8_t *rgb_ptr=RGB+converted*rgb_pixel_stride;
474 
475  STD_FUNCTION_NAME(width-converted, height, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type);
476  }
477  }
478 }

References PACK_PIXEL, RGB, SAVE_LINE1, SAVE_LINE2, STD_FUNCTION_NAME(), uv_pixel_stride, uv_x_sample_interval, uv_y_sample_interval, V, Y, y_pixel_stride, YUV2RGB, and YUV2RGB_32.

y_pixel_stride
#define y_pixel_stride
YUV2RGB
static const YUV2RGBParam YUV2RGB[3]
Definition: yuv_rgb.c:44
YUV2RGBParam
Definition: yuv_rgb.c:24
width
GLint GLint GLsizei width
Definition: SDL_opengl.h:1572
PACK_RGB24_32_STEP1
#define PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)
Definition: yuv_rgb_sse_func.h:78
PACK_PIXEL
#define PACK_PIXEL
Definition: yuv_rgb_sse_func.h:126
PACK_RGB565_32
#define PACK_RGB565_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4)
Definition: yuv_rgb_sse_func.h:51
RGB
@ RGB
Definition: edid.h:20
V
#define V(value)
Definition: yuv_rgb.c:37
SAVE_LINE2
#define SAVE_LINE2
Definition: yuv_rgb_sse_func.h:199
SAVE_SI128
#define SAVE_SI128
Definition: yuv_rgb_sse_func.h:24
height
GLint GLint GLsizei GLsizei height
Definition: SDL_opengl.h:1572
param
GLfloat param
Definition: SDL_opengl_glext.h:373
YUV2RGB_32
#define YUV2RGB_32
Definition: yuv_rgb_sse_func.h:304
PRECISION
#define PRECISION
Definition: yuv_rgb.c:12
uint32_t
unsigned int uint32_t
Definition: SDL_config_windows.h:63
uint8_t
unsigned char uint8_t
Definition: SDL_config_windows.h:59
STD_FUNCTION_NAME
void STD_FUNCTION_NAME(uint32_t width, uint32_t height, const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, uint8_t *RGB, uint32_t RGB_stride, YCbCrType yuv_type)
Definition: yuv_rgb_std_func.h:72
uv_y_sample_interval
#define uv_y_sample_interval
Y
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp if else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if SINGLE_SCANLINE ifc b endif else if vars_spilled word LINE_SAVED_REGS endif subs Y
Definition: pixman-arm-simd-asm.h:554
SAVE_LINE1
#define SAVE_LINE1
Definition: yuv_rgb_sse_func.h:193
uv_pixel_stride
#define uv_pixel_stride
uv_x_sample_interval
#define uv_x_sample_interval
LOAD_SI128
#define LOAD_SI128
Definition: yuv_rgb_sse_func.h:23