Implement accurate, performant 32-bit scalar blitter for ARGB dst case

This commit is contained in:
Isaac Aronson 2023-10-10 08:09:10 -05:00 committed by Sam Lantinga
parent e5bbe32641
commit 5cec91e27a
2 changed files with 58 additions and 16 deletions

View File

@ -493,6 +493,13 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
} \
}
/* Convert any 32-bit 4-bpp pixel to ARGB format */
#define PIXEL_TO_ARGB_PIXEL(src, srcfmt, dst) \
do { \
Uint8 a, r, g, b; \
RGBA_FROM_PIXEL(src, srcfmt, r, g, b, a); \
dst = a << 24 | r << 16 | g << 8 | b; \
} while (0)
/* Blend a single color channel or alpha value */
#define ALPHA_BLEND_CHANNEL(sC, dC, sA) \
do { \
@ -509,7 +516,28 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
ALPHA_BLEND_CHANNEL(sG, dG, A); \
ALPHA_BLEND_CHANNEL(sB, dB, A); \
} while (0)
/* Blend the ARGB values of two 32-bit pixels */
#define ALPHA_BLEND_ARGB_PIXELS(src, dst) \
do { \
Uint32 srcA = src >> 24; \
src |= 0xFF000000; \
\
Uint32 srcRB = src & 0x00FF00FF; \
Uint32 dstRB = dst & 0x00FF00FF; \
\
Uint32 srcGA = (src >> 8) & 0x00FF00FF; \
Uint32 dstGA = (dst >> 8) & 0x00FF00FF; \
\
Uint32 resRB = ((srcRB - dstRB) * srcA) + (dstRB << 8) - dstRB; \
resRB += 0x00010001; \
resRB += (resRB >> 8) & 0x00FF00FF; \
resRB = (resRB >> 8) & 0x00FF00FF; \
Uint32 resGA = ((srcGA - dstGA) * srcA) + (dstGA << 8) - dstGA; \
resGA += 0x00010001; \
resGA += (resGA >> 8) & 0x00FF00FF; \
resGA &= 0xFF00FF00; \
dst = resRB | resGA; \
} while (0)
/* Blend the RGBA values of two pixels */
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \
do { \

View File

@ -1073,22 +1073,36 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
}
while (height--) {
/* *INDENT-OFF* */ /* clang-format off */
DUFFS_LOOP4(
{
DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
if (sA) {
DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
if (srcbpp == 4 && dstbpp == 4 && dstfmt->Ashift == 24 && dstfmt->Rshift == 16 && dstfmt->Gshift == 8 &&
dstfmt->Bshift == 0) {
DUFFS_LOOP4(
{
PIXEL_TO_ARGB_PIXEL(*(Uint32 *) src, srcfmt, Pixel);
Uint32 blended = *(Uint32 *) dst;
ALPHA_BLEND_ARGB_PIXELS(Pixel, blended);
*(Uint32*)dst = blended;
src += srcbpp;
dst += dstbpp;
},
width);
} else {
/* *INDENT-OFF* */ /* clang-format off */
DUFFS_LOOP4(
{
DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
if (sA) {
DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
}
src += srcbpp;
dst += dstbpp;
},
width);
/* *INDENT-ON* */ /* clang-format on */
src += srcskip;
dst += dstskip;
}
src += srcbpp;
dst += dstbpp;
},
width);
/* *INDENT-ON* */ /* clang-format on */
src += srcskip;
dst += dstskip;
}
if (freeFormat) {
SDL_DestroyPixelFormat(dstfmt);