From 472a415c8fa0a77c26765ce0bee0ca2330f7d179 Mon Sep 17 00:00:00 2001 From: Anonymous Maarten Date: Fri, 19 Jan 2024 00:01:36 +0100 Subject: [PATCH] video: move standard c, sse and lsx implementations of yuv2rgb to its own source --- Makefile.os2 | 5 +- Makefile.w32 | 5 +- VisualC-GDK/SDL/SDL.vcxproj | 10 +- VisualC-WinRT/SDL-UWP.vcxproj | 12 +- VisualC/SDL/SDL.vcxproj | 10 +- src/video/yuv2rgb/yuv_rgb.h | 401 +---------------- src/video/yuv2rgb/yuv_rgb_common.h | 13 + src/video/yuv2rgb/yuv_rgb_internal.h | 74 ++++ src/video/yuv2rgb/yuv_rgb_lsx.c | 44 ++ src/video/yuv2rgb/yuv_rgb_lsx.h | 407 ++++++++++++++++++ .../yuv2rgb/{yuv_rgb.c => yuv_rgb_sse.c} | 396 +++-------------- src/video/yuv2rgb/yuv_rgb_sse.h | 266 ++++++++++++ src/video/yuv2rgb/yuv_rgb_std.c | 179 ++++++++ src/video/yuv2rgb/yuv_rgb_std.h | 131 ++++++ 14 files changed, 1222 insertions(+), 731 deletions(-) create mode 100644 src/video/yuv2rgb/yuv_rgb_common.h create mode 100644 src/video/yuv2rgb/yuv_rgb_internal.h create mode 100644 src/video/yuv2rgb/yuv_rgb_lsx.c create mode 100644 src/video/yuv2rgb/yuv_rgb_lsx.h rename src/video/yuv2rgb/{yuv_rgb.c => yuv_rgb_sse.c} (53%) create mode 100644 src/video/yuv2rgb/yuv_rgb_sse.h create mode 100644 src/video/yuv2rgb/yuv_rgb_std.c create mode 100644 src/video/yuv2rgb/yuv_rgb_std.h diff --git a/Makefile.os2 b/Makefile.os2 index a063d683e..c6ab898b0 100644 --- a/Makefile.os2 +++ b/Makefile.os2 @@ -81,7 +81,7 @@ SRCS+= SDL_events.c SDL_quit.c SDL_keyboard.c SDL_mouse.c SDL_windowevents.c & SDL_clipboardevents.c SDL_dropevents.c SDL_displayevents.c SDL_gesture.c & SDL_sensor.c SDL_touch.c SRCS+= SDL_haptic.c SDL_hidapi.c SDL_gamecontroller.c SDL_joystick.c controller_type.c -SRCS+= SDL_render.c yuv_rgb.c SDL_yuv.c SDL_yuv_sw.c SDL_blendfillrect.c & +SRCS+= SDL_render.c yuv_rgb_lsx.c yuv_rgb_sse.c yuv_rgb_std.c SDL_yuv.c SDL_yuv_sw.c SDL_blendfillrect.c & SDL_blendline.c SDL_blendpoint.c SDL_drawline.c SDL_drawpoint.c & SDL_render_sw.c SDL_rotate.c SDL_triangle.c SRCS+= SDL_blit.c SDL_blit_0.c SDL_blit_1.c SDL_blit_A.c SDL_blit_auto.c & @@ -158,6 +158,9 @@ SDL_hidapi.obj: SDL_hidapi.c wcc386 $(CFLAGS_DLL) -za99 -fo=$^@ $< !endif +yuv_rgb_sse.obj: yuv_rgb_sse.c + wcc386 $(CFLAGS_DLL) -wcd=202 -fo=$^@ $< + $(LIBICONV_LIB): "src/core/os2/iconv2.lbc" @echo * Creating: $@ wlib -q -b -n -c -pa -s -t -zld -ii -io $@ @$< diff --git a/Makefile.w32 b/Makefile.w32 index 24efceb02..9567f4abf 100644 --- a/Makefile.w32 +++ b/Makefile.w32 @@ -60,7 +60,7 @@ SRCS+= SDL_events.c SDL_quit.c SDL_keyboard.c SDL_mouse.c SDL_windowevents.c & SDL_clipboardevents.c SDL_dropevents.c SDL_displayevents.c SDL_gesture.c & SDL_sensor.c SDL_touch.c SRCS+= SDL_haptic.c SDL_hidapi.c SDL_gamecontroller.c SDL_joystick.c controller_type.c -SRCS+= SDL_render.c yuv_rgb.c SDL_yuv.c SDL_yuv_sw.c SDL_blendfillrect.c & +SRCS+= SDL_render.c yuv_rgb_lsx.c yuv_rgb_sse.c yuv_rgb_std.c SDL_yuv.c SDL_yuv_sw.c SDL_blendfillrect.c & SDL_blendline.c SDL_blendpoint.c SDL_drawline.c SDL_drawpoint.c & SDL_render_sw.c SDL_rotate.c SDL_triangle.c SRCS+= SDL_blit.c SDL_blit_0.c SDL_blit_1.c SDL_blit_A.c SDL_blit_auto.c & @@ -147,6 +147,9 @@ SDL_RLEaccel.obj: SDL_RLEaccel.c SDL_malloc.obj: SDL_malloc.c wcc386 $(CFLAGS_DLL) -wcd=201 -fo=$^@ $< +yuv_rgb_sse.obj: yuv_rgb_sse.c + wcc386 $(CFLAGS_DLL) -wcd=202 -fo=$^@ $< + # SDL2libm MSRCS= e_atan2.c e_exp.c e_fmod.c e_log10.c e_log.c e_pow.c e_rem_pio2.c e_sqrt.c & k_cos.c k_rem_pio2.c k_sin.c k_tan.c & diff --git a/VisualC-GDK/SDL/SDL.vcxproj b/VisualC-GDK/SDL/SDL.vcxproj index 6aee7ccb9..ed071aa9d 100644 --- a/VisualC-GDK/SDL/SDL.vcxproj +++ b/VisualC-GDK/SDL/SDL.vcxproj @@ -520,7 +520,13 @@ + + + + + + @@ -755,7 +761,9 @@ - + + + diff --git a/VisualC-WinRT/SDL-UWP.vcxproj b/VisualC-WinRT/SDL-UWP.vcxproj index 20d4e6368..696505f35 100644 --- a/VisualC-WinRT/SDL-UWP.vcxproj +++ b/VisualC-WinRT/SDL-UWP.vcxproj @@ -185,6 +185,14 @@ + + + + + + + + @@ -346,7 +354,9 @@ true - + + + {89e9b32e-a86a-47c3-a948-d2b1622925ce} diff --git a/VisualC/SDL/SDL.vcxproj b/VisualC/SDL/SDL.vcxproj index 24f332939..d52b5c058 100644 --- a/VisualC/SDL/SDL.vcxproj +++ b/VisualC/SDL/SDL.vcxproj @@ -439,7 +439,13 @@ + + + + + + @@ -624,7 +630,9 @@ - + + + diff --git a/src/video/yuv2rgb/yuv_rgb.h b/src/video/yuv2rgb/yuv_rgb.h index 5668c0fc4..c3593168f 100644 --- a/src/video/yuv2rgb/yuv_rgb.h +++ b/src/video/yuv2rgb/yuv_rgb.h @@ -1,412 +1,33 @@ +#ifndef YUV_RGB_H_ +#define YUV_RGB_H_ + // Copyright 2016 Adrien Descamps // Distributed under BSD 3-Clause License // Provide optimized functions to convert images from 8bits yuv420 to rgb24 format -// There are a few slightly different variations of the YCbCr color space with different parameters that +// There are a few slightly different variations of the YCbCr color space with different parameters that // change the conversion matrix. // The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here. // See the respective standards for details // The matrix values used are derived from http://www.equasys.de/colorconversion.html // YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor -// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This +// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This // is suboptimal for image quality, but by far the fastest method. // For all methods, width and height should be even, if not, the last row/column of the result image won't be affected. // For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected. -#include "SDL_stdinc.h" /*#include */ -typedef enum -{ - YCBCR_JPEG, - YCBCR_601, - YCBCR_709 -} YCbCrType; - // yuv to rgb, standard c implementation -void yuv420_rgb565_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); +#include "yuv_rgb_std.h" -void yuv420_rgb24_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); +// yuv to rgb, sse2 implementation +#include "yuv_rgb_sse.h" -void yuv420_rgba_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); +// yuv to rgb, lsx implementation +#include "yuv_rgb_lsx.h" -void yuv420_bgra_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_argb_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_abgr_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgb565_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgb24_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgba_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_bgra_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_argb_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_abgr_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgb565_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgb24_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgba_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_bgra_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_argb_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_abgr_std( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -// yuv to rgb, sse implementation -// pointers must be 16 byte aligned, and strides must be divisable by 16 -void yuv420_rgb565_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_rgb24_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_rgba_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_bgra_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_argb_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_abgr_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgb565_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgb24_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgba_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_bgra_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_argb_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_abgr_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgb565_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgb24_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgba_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_bgra_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_argb_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_abgr_sse( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -// yuv to rgb, sse implementation -// pointers do not need to be 16 byte aligned -void yuv420_rgb565_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_rgb24_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_rgba_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_bgra_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_argb_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_abgr_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgb565_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgb24_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_rgba_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_bgra_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_argb_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv422_abgr_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgb565_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgb24_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_rgba_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_bgra_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_argb_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuvnv12_abgr_sseu( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - - -// rgb to yuv, standard c implementation -void rgb24_yuv420_std( - uint32_t width, uint32_t height, - const uint8_t *rgb, uint32_t rgb_stride, - uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - YCbCrType yuv_type); - -// rgb to yuv, sse implementation -// pointers must be 16 byte aligned, and strides must be divisible by 16 -void rgb24_yuv420_sse( - uint32_t width, uint32_t height, - const uint8_t *rgb, uint32_t rgb_stride, - uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - YCbCrType yuv_type); - -// rgb to yuv, sse implementation -// pointers do not need to be 16 byte aligned -void rgb24_yuv420_sseu( - uint32_t width, uint32_t height, - const uint8_t *rgb, uint32_t rgb_stride, - uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - YCbCrType yuv_type); - - -//yuv420 to bgra, lsx implementation -void yuv420_rgb24_lsx( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_rgba_lsx( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_bgra_lsx( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_argb_lsx( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); - -void yuv420_abgr_lsx( - uint32_t width, uint32_t height, - const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride, - uint8_t *rgb, uint32_t rgb_stride, - YCbCrType yuv_type); +#endif /* YUV_RGB_H_ */ diff --git a/src/video/yuv2rgb/yuv_rgb_common.h b/src/video/yuv2rgb/yuv_rgb_common.h new file mode 100644 index 000000000..ae787ed5f --- /dev/null +++ b/src/video/yuv2rgb/yuv_rgb_common.h @@ -0,0 +1,13 @@ +#ifndef YUV_RGB_COMMON_H_ +#define YUV_RGB_COMMON_H_ +// Copyright 2016 Adrien Descamps +// Distributed under BSD 3-Clause License + +typedef enum +{ + YCBCR_JPEG, + YCBCR_601, + YCBCR_709 +} YCbCrType; + +#endif /* YUV_RGB_COMMON_H_ */ diff --git a/src/video/yuv2rgb/yuv_rgb_internal.h b/src/video/yuv2rgb/yuv_rgb_internal.h new file mode 100644 index 000000000..cad978b5f --- /dev/null +++ b/src/video/yuv2rgb/yuv_rgb_internal.h @@ -0,0 +1,74 @@ +// Copyright 2016 Adrien Descamps +// Distributed under BSD 3-Clause License + +#define PRECISION 6 +#define PRECISION_FACTOR (1<[0-255]) +// for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255]) +// all values are rounded to the fourth decimal + +static const YUV2RGBParam YUV2RGB[3] = { + // ITU-T T.871 (JPEG) + {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)}, + // ITU-R BT.601-7 + {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)}, + // ITU-R BT.709-6 + {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)} +}; + +static const RGB2YUVParam RGB2YUV[3] = { + // ITU-T T.871 (JPEG) + {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}}, + // ITU-R BT.601-7 + {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}}, + // ITU-R BT.709-6 + {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}} +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +/* The various layouts of YUV data we support */ +#define YUV_FORMAT_420 1 +#define YUV_FORMAT_422 2 +#define YUV_FORMAT_NV12 3 + +/* The various formats of RGB pixel that we support */ +#define RGB_FORMAT_RGB565 1 +#define RGB_FORMAT_RGB24 2 +#define RGB_FORMAT_RGBA 3 +#define RGB_FORMAT_BGRA 4 +#define RGB_FORMAT_ARGB 5 +#define RGB_FORMAT_ABGR 6 diff --git a/src/video/yuv2rgb/yuv_rgb_lsx.c b/src/video/yuv2rgb/yuv_rgb_lsx.c new file mode 100644 index 000000000..8d84dd142 --- /dev/null +++ b/src/video/yuv2rgb/yuv_rgb_lsx.c @@ -0,0 +1,44 @@ +// Copyright 2016 Adrien Descamps +// Distributed under BSD 3-Clause License +#include "../../SDL_internal.h" + +#if SDL_HAVE_YUV +#include "yuv_rgb.h" + +#include "SDL_cpuinfo.h" + +#ifdef __loongarch_sx + +#define LSX_FUNCTION_NAME yuv420_rgb24_lsx +#define STD_FUNCTION_NAME yuv420_rgb24_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_lsx_func.h" + +#define LSX_FUNCTION_NAME yuv420_rgba_lsx +#define STD_FUNCTION_NAME yuv420_rgba_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_lsx_func.h" + +#define LSX_FUNCTION_NAME yuv420_bgra_lsx +#define STD_FUNCTION_NAME yuv420_bgra_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_lsx_func.h" + +#define LSX_FUNCTION_NAME yuv420_argb_lsx +#define STD_FUNCTION_NAME yuv420_argb_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_lsx_func.h" + +#define LSX_FUNCTION_NAME yuv420_abgr_lsx +#define STD_FUNCTION_NAME yuv420_abgr_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_lsx_func.h" + +#endif //__loongarch_sx + +#endif /* SDL_HAVE_YUV */ diff --git a/src/video/yuv2rgb/yuv_rgb_lsx.h b/src/video/yuv2rgb/yuv_rgb_lsx.h new file mode 100644 index 000000000..bcffd95c6 --- /dev/null +++ b/src/video/yuv2rgb/yuv_rgb_lsx.h @@ -0,0 +1,407 @@ +// Copyright 2016 Adrien Descamps +// Distributed under BSD 3-Clause License + +// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format + +// There are a few slightly different variations of the YCbCr color space with different parameters that +// change the conversion matrix. +// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here. +// See the respective standards for details +// The matrix values used are derived from http://www.equasys.de/colorconversion.html + +// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor +// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This +// is suboptimal for image quality, but by far the fastest method. + +// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected. +// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected. + +/*#include */ +#include "yuv_rgb_common.h" + +#include "SDL_stdinc.h" + +// yuv to rgb, standard c implementation +void yuv420_rgb565_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgba_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_bgra_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_argb_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_abgr_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb565_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgba_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_bgra_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_argb_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_abgr_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb565_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgba_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_bgra_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_argb_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_abgr_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv to rgb, sse implementation +// pointers must be 16 byte aligned, and strides must be divisable by 16 +void yuv420_rgb565_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgba_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_bgra_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_argb_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_abgr_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb565_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgba_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_bgra_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_argb_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_abgr_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb565_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgba_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_bgra_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_argb_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_abgr_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv to rgb, sse implementation +// pointers do not need to be 16 byte aligned +void yuv420_rgb565_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgba_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_bgra_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_argb_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_abgr_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb565_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgba_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_bgra_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_argb_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_abgr_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb565_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgba_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_bgra_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_argb_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_abgr_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + + +// rgb to yuv, standard c implementation +void rgb24_yuv420_std( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgb to yuv, sse implementation +// pointers must be 16 byte aligned, and strides must be divisible by 16 +void rgb24_yuv420_sse( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgb to yuv, sse implementation +// pointers do not need to be 16 byte aligned +void rgb24_yuv420_sseu( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + + +//yuv420 to bgra, lsx implementation +void yuv420_rgb24_lsx( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgba_lsx( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_bgra_lsx( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_argb_lsx( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_abgr_lsx( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); diff --git a/src/video/yuv2rgb/yuv_rgb.c b/src/video/yuv2rgb/yuv_rgb_sse.c similarity index 53% rename from src/video/yuv2rgb/yuv_rgb.c rename to src/video/yuv2rgb/yuv_rgb_sse.c index 71bcde9c6..b22a89fa4 100644 --- a/src/video/yuv2rgb/yuv_rgb.c +++ b/src/video/yuv2rgb/yuv_rgb_sse.c @@ -3,254 +3,12 @@ #include "../../SDL_internal.h" #if SDL_HAVE_YUV - #include "yuv_rgb.h" +#include "yuv_rgb_internal.h" #include "SDL_cpuinfo.h" /*#include */ -#define PRECISION 6 -#define PRECISION_FACTOR (1<[0-255]) -// for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255]) -// all values are rounded to the fourth decimal - -static const YUV2RGBParam YUV2RGB[3] = { - // ITU-T T.871 (JPEG) - {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)}, - // ITU-R BT.601-7 - {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)}, - // ITU-R BT.709-6 - {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)} -}; - -static const RGB2YUVParam RGB2YUV[3] = { - // ITU-T T.871 (JPEG) - {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}}, - // ITU-R BT.601-7 - {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}}, - // ITU-R BT.709-6 - {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}} -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/* The various layouts of YUV data we support */ -#define YUV_FORMAT_420 1 -#define YUV_FORMAT_422 2 -#define YUV_FORMAT_NV12 3 - -/* The various formats of RGB pixel that we support */ -#define RGB_FORMAT_RGB565 1 -#define RGB_FORMAT_RGB24 2 -#define RGB_FORMAT_RGBA 3 -#define RGB_FORMAT_BGRA 4 -#define RGB_FORMAT_ARGB 5 -#define RGB_FORMAT_ABGR 6 - -// divide by PRECISION_FACTOR and clamp to [0:255] interval -// input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range -static uint8_t clampU8(int32_t v) -{ - static const uint8_t lut[512] = - {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, - 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, - 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, - 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158, - 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224, - 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 - }; - return lut[((v+128*PRECISION_FACTOR)>>PRECISION)&511]; -} - - -#define STD_FUNCTION_NAME yuv420_rgb565_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_RGB565 -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv420_rgb24_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_RGB24 -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv420_rgba_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_RGBA -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv420_bgra_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_BGRA -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv420_argb_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_ARGB -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv420_abgr_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_ABGR -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv422_rgb565_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_RGB565 -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv422_rgb24_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_RGB24 -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv422_rgba_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_RGBA -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv422_bgra_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_BGRA -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv422_argb_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_ARGB -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuv422_abgr_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_ABGR -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuvnv12_rgb565_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_RGB565 -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuvnv12_rgb24_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_RGB24 -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuvnv12_rgba_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_RGBA -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuvnv12_bgra_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_BGRA -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuvnv12_argb_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_ARGB -#include "yuv_rgb_std_func.h" - -#define STD_FUNCTION_NAME yuvnv12_abgr_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_ABGR -#include "yuv_rgb_std_func.h" - -void rgb24_yuv420_std( - uint32_t width, uint32_t height, - const uint8_t *RGB, uint32_t RGB_stride, - uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, - YCbCrType yuv_type) -{ - const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); - - uint32_t x, y; - for(y=0; y<(height-1); y+=2) - { - const uint8_t *rgb_ptr1=RGB+y*RGB_stride, - *rgb_ptr2=RGB+(y+1)*RGB_stride; - - uint8_t *y_ptr1=Y+y*Y_stride, - *y_ptr2=Y+(y+1)*Y_stride, - *u_ptr=U+(y/2)*UV_stride, - *v_ptr=V+(y/2)*UV_stride; - - for(x=0; x<(width-1); x+=2) - { - // compute yuv for the four pixels, u and v values are summed - int32_t y_tmp, u_tmp, v_tmp; - - y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2]; - u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2]; - v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2]; - y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5]; - u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5]; - v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5]; - y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2]; - u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2]; - v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2]; - y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5]; - u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5]; - v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5]; - y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<matrix[0][0])), \ - _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ + _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \ Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<matrix[1][0])), \ - _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ + _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \ U = _mm_add_epi16(U, _mm_set1_epi16(128<matrix[2][0])), \ - _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ + _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ V = _mm_add_epi16(V, _mm_set1_epi16(128<*/ +#include "yuv_rgb_common.h" + +#include "SDL_stdinc.h" + +// yuv to rgb, sse implementation +// pointers must be 16 byte aligned, and strides must be divisable by 16 +void yuv420_rgb565_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgba_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_bgra_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_argb_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_abgr_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb565_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgba_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_bgra_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_argb_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_abgr_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb565_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb24_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgba_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_bgra_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_argb_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_abgr_sse( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +// yuv to rgb, sse implementation +// pointers do not need to be 16 byte aligned +void yuv420_rgb565_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgba_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_bgra_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_argb_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_abgr_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb565_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgba_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_bgra_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_argb_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_abgr_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb565_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb24_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgba_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_bgra_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_argb_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_abgr_sseu( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + + +// rgb to yuv, standard c implementation +void rgb24_yuv420_std( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgb to yuv, sse implementation +// pointers must be 16 byte aligned, and strides must be divisible by 16 +void rgb24_yuv420_sse( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); + +// rgb to yuv, sse implementation +// pointers do not need to be 16 byte aligned +void rgb24_yuv420_sseu( + uint32_t width, uint32_t height, + const uint8_t *rgb, uint32_t rgb_stride, + uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + YCbCrType yuv_type); diff --git a/src/video/yuv2rgb/yuv_rgb_std.c b/src/video/yuv2rgb/yuv_rgb_std.c new file mode 100644 index 000000000..a222a3abb --- /dev/null +++ b/src/video/yuv2rgb/yuv_rgb_std.c @@ -0,0 +1,179 @@ +// Copyright 2016 Adrien Descamps +// Distributed under BSD 3-Clause License +#include "../../SDL_internal.h" + +#if SDL_HAVE_YUV +#include "yuv_rgb.h" +#include "yuv_rgb_internal.h" + +// divide by PRECISION_FACTOR and clamp to [0:255] interval +// input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range +static uint8_t clampU8(int32_t v) +{ + static const uint8_t lut[512] = + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, + 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, + 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, + 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158, + 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224, + 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 + }; + return lut[((v+128*PRECISION_FACTOR)>>PRECISION)&511]; +} + + +#define STD_FUNCTION_NAME yuv420_rgb565_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv420_rgb24_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv420_rgba_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv420_bgra_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv420_argb_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv420_abgr_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv422_rgb565_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv422_rgb24_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv422_rgba_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv422_bgra_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv422_argb_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuv422_abgr_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuvnv12_rgb565_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuvnv12_rgb24_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuvnv12_rgba_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuvnv12_bgra_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuvnv12_argb_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_std_func.h" + +#define STD_FUNCTION_NAME yuvnv12_abgr_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_std_func.h" + +void rgb24_yuv420_std( + uint32_t width, uint32_t height, + const uint8_t *RGB, uint32_t RGB_stride, + uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + YCbCrType yuv_type) +{ + const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); + + uint32_t x, y; + for(y=0; y<(height-1); y+=2) + { + const uint8_t *rgb_ptr1=RGB+y*RGB_stride, + *rgb_ptr2=RGB+(y+1)*RGB_stride; + + uint8_t *y_ptr1=Y+y*Y_stride, + *y_ptr2=Y+(y+1)*Y_stride, + *u_ptr=U+(y/2)*UV_stride, + *v_ptr=V+(y/2)*UV_stride; + + for(x=0; x<(width-1); x+=2) + { + // compute yuv for the four pixels, u and v values are summed + int32_t y_tmp, u_tmp, v_tmp; + + y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2]; + u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2]; + v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2]; + y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5]; + u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5]; + v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5]; + y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2]; + u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2]; + v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2]; + y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5]; + u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5]; + v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5]; + y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<*/ +#include "yuv_rgb_common.h" + +#include "SDL_stdinc.h" + +// yuv to rgb, standard c implementation +void yuv420_rgb565_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_rgba_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_bgra_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_argb_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv420_abgr_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb565_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_rgba_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_bgra_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_argb_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuv422_abgr_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb565_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgb24_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_rgba_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_bgra_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_argb_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type); + +void yuvnv12_abgr_std( + uint32_t width, uint32_t height, + const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, + uint8_t *rgb, uint32_t rgb_stride, + YCbCrType yuv_type);