a regression test now iterates over 8388625 pixels per pass.

* app/composite/gimp-composite-*test.c: a regression test now
	iterates over 8388625 pixels per pass.

* app/composite/gimp-composite-mmx.c
* app/composite/gimp-composite-sse.c
* app/composite/gimp-composite-sse2.c:
	Ensured that a clobbered condition code register is reflected in
	the clobbered register list for each asm() statement.
	This should FIX bug #147013.
This commit is contained in:
Helvetix Victorinox 2004-09-03 02:53:19 +00:00
parent 4fbc8764b4
commit bac9cfa083
15 changed files with 39 additions and 117 deletions

View File

@ -1,3 +1,15 @@
2004-09-02 Helvetix Victorinox <helvetix@gimp.org>
* app/composite/gimp-composite-*test.c: a regression test now
iterates over 8388625 pixels per pass.
* app/composite/gimp-composite-mmx.c
* app/composite/gimp-composite-sse.c
* app/composite/gimp-composite-sse2.c:
Ensured that a clobbered condition code register is reflected in
the clobbered register list for each asm() statement.
This should FIX bug #147013.
2004-09-03 Sven Neumann <sven@gimp.org>
* libgimpbase/Makefile.am

View File

@ -79,7 +79,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 10;
n_pixels = 1048593;
n_pixels = 8388625;
argv++, argc--;
while (argc >= 2)

View File

@ -79,7 +79,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 10;
n_pixels = 1048593;
n_pixels = 8388625;
argv++, argc--;
while (argc >= 2)

View File

@ -20,6 +20,7 @@ static struct install_table {
{ GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_multiply_rgba8_rgba8_rgba8_mmx },
{ GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_screen_rgba8_rgba8_rgba8_mmx },
{ GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_difference_rgba8_rgba8_rgba8_mmx },
{ GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_VA8, GIMP_PIXELFORMAT_VA8, GIMP_PIXELFORMAT_VA8, gimp_composite_addition_va8_va8_va8_mmx },
{ GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_addition_rgba8_rgba8_rgba8_mmx },
{ GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_subtract_rgba8_rgba8_rgba8_mmx },
{ GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_darken_rgba8_rgba8_rgba8_mmx },

View File

@ -65,6 +65,17 @@ gimp_composite_mmx_test (int iterations, int n_pixels)
}
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_VA8, GIMP_PIXELFORMAT_VA8, GIMP_PIXELFORMAT_VA8, GIMP_PIXELFORMAT_VA8, n_pixels, (unsigned char *) va8A, (unsigned char *) va8B, (unsigned char *) va8B, (unsigned char *) va8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_VA8, GIMP_PIXELFORMAT_VA8, GIMP_PIXELFORMAT_VA8, GIMP_PIXELFORMAT_VA8, n_pixels, (unsigned char *) va8A, (unsigned char *) va8B, (unsigned char *) va8B, (unsigned char *) va8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_addition_va8_va8_va8_mmx, &special_ctx);
if (gimp_composite_regression_compare_contexts ("addition", &generic_ctx, &special_ctx))
{
printf("addition_va8_va8_va8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("addition_va8_va8_va8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
@ -211,7 +222,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 10;
n_pixels = 1048593;
n_pixels = 8388625;
argv++, argc--;
while (argc >= 2)

View File

@ -1314,8 +1314,11 @@ gimp_composite_addition_va8_va8_va8_mmx (GimpCompositeContext *_op)
"\t" pminub(mm3, mm2, mm4) "\n"
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
/*"\tmovq %%mm1, %0\n"*/
#if 0
"\tmovq %%mm1, %0\n"
#else
"\tmovntq %%mm1, %0\n"
#endif
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");

View File

@ -15,7 +15,7 @@ extern gboolean gimp_composite_mmx_install (void);
* can actually be compiled like we expect it to. If all is okay,
* define the manifest constant COMPILE_IS_OKAY.
*/
#if !defined(__INTEL_COMPILER)
#if !defined(__INTEL_COMPILER) || defined(USE_INTEL_COMPILER_ANYWAY)
#if defined(USE_MMX)
#if defined(ARCH_X86)
#if __GNUC__ >= 3

View File

@ -211,7 +211,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 10;
n_pixels = 1048593;
n_pixels = 8388625;
argv++, argc--;
while (argc >= 2)

View File

@ -48,111 +48,6 @@
#define pminub(src,dst,tmp) "pminub " "%%" #src ", %%" #dst
#define pmaxub(src,dst,tmp) "pmaxub " "%%" #src ", %%" #dst
#if 0
/*
* Double-word divide. Adjusted for subsequent unsigned packing
* (high-order bit of each word is cleared)
* Clobbers eax, ecx edx
*/
#define pdivwX(dividend,divisor,quotient) "movd %%" #dividend ",%%eax; " \
"movd %%" #divisor ",%%ecx; " \
"xorl %%edx,%%edx; " \
"divw %%cx; " \
"roll $16, %%eax; " \
"roll $16, %%ecx; " \
"xorl %%edx,%%edx; " \
"divw %%cx; " \
"btr $15, %%eax; " \
"roll $16, %%eax; " \
"btr $15, %%eax; " \
"movd %%eax,%%" #quotient ";"
#endif
#if 0
/*
* Quadword divide. No adjustment for subsequent unsigned packing
* (high-order bit of each word is left alone)
*/
#define pdivwqX(dividend,divisor,quotient) "movd %%" #dividend ",%%eax; " \
"movd %%" #divisor ",%%ecx; " \
"xorl %%edx,%%edx; " \
"divw %%cx; " \
"roll $16, %%eax; " \
"roll $16, %%ecx; " \
"xorl %%edx,%%edx; " \
"divw %%cx; " \
"roll $16, %%eax; " \
"movd %%eax,%%" #quotient "; " \
"psrlq $32,%%" #dividend ";" \
"psrlq $32,%%" #divisor ";" \
"movd %%" #dividend ",%%eax; " \
"movd %%" #divisor ",%%ecx; " \
"xorl %%edx,%%edx; " \
"divw %%cx; " \
"roll $16, %%eax; " \
"roll $16, %%ecx; " \
"xorl %%edx,%%edx; " \
"divw %%cx; " \
"roll $16, %%eax; " \
"movd %%eax,%%" #divisor ";" \
"psllq $32,%%" #divisor ";" \
"por %%" #divisor ",%%" #quotient ";"
/*
* Quadword divide. Adjusted for subsequent unsigned packing
* (high-order bit of each word is cleared)
*/
#define pdivwuqX(dividend,divisor,quotient) \
pdivwX(dividend,divisor,quotient) \
"psrlq $32,%%" #dividend ";" \
"psrlq $32,%%" #divisor ";" \
pdivwX(dividend,divisor,quotient) \
"movd %%eax,%%" #divisor ";" \
"psllq $32,%%" #divisor ";" \
"por %%" #divisor ",%%" #quotient ";"
/* equivalent to the INT_MULT() macro in gimp-composite-generic.c */
/*
* opr2 = INT_MULT(opr1, opr2, t)
*
* Operates across quad-words using x86 word (16bit) value.
* Result is left in opr2
*
* opr1 = opr1 * opr2 + w128
* opr2 = opr1
* opr2 = ((opr2 >> 8) + opr1) >> 8
*/
#define pmulwX(opr1,opr2,w128) \
"\tpmullw %%"#opr2", %%"#opr1"; " \
"\tpaddw %%"#w128", %%"#opr1"; " \
"\tmovq %%"#opr1", %%"#opr2"; " \
"\tpsrlw $8, %%"#opr2"; " \
"\tpaddw %%"#opr1", %%"#opr2"; " \
"\tpsrlw $8, %%"#opr2"\n"
/* a = INT_MULT(a,b) */
#define mmx_int_mult(a,b,w128) \
"\tpmullw %%"#b", %%"#a"; " \
"\tpaddw %%"#w128", %%"#a"; " \
"\tmovq %%"#a", %%"#b"; " \
"\tpsrlw $8, %%"#b"; " \
"\tpaddw %%"#a", %%"#b"; " \
"\tpsrlw $8, %%"#b"\n"
static const guint32 rgba8_alpha_mask_64[2] = { 0xFF000000, 0xFF000000 };
static const guint32 rgba8_b1_64[2] = { 0x01010101, 0x01010101 };
static const guint32 rgba8_b255_64[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
static const guint32 rgba8_w1_64[2] = { 0x00010001, 0x00010001 };
static const guint32 rgba8_w2_64[2] = { 0x00020002, 0x00020002 };
static const guint32 rgba8_w128_64[2] = { 0x00800080, 0x00800080 };
static const guint32 rgba8_w256_64[2] = { 0x01000100, 0x01000100 };
static const guint32 rgba8_w255_64[2] = { 0X00FF00FF, 0X00FF00FF };
static const guint32 va8_alpha_mask[2] = { 0xFF00FF00, 0xFF00FF00 };
static const guint32 va8_b255[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
static const guint32 va8_w1[2] = { 0x00010001, 0x00010001 };
static const guint32 va8_w255[2] = { 0x00FF00FF, 0x00FF00FF };
#endif
/*
*
*/

View File

@ -9,7 +9,7 @@ extern gboolean gimp_composite_sse_init (void);
*/
extern gboolean gimp_composite_sse_install (void);
#if !defined(__INTEL_COMPILER)
#if !defined(__INTEL_COMPILER) || defined(USE_INTEL_COMPILER_ANYWAY)
#if defined(USE_SSE)
#if defined(ARCH_X86)
#if __GNUC__ >= 3

View File

@ -156,7 +156,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 10;
n_pixels = 1048593;
n_pixels = 8388625;
argv++, argc--;
while (argc >= 2)

View File

@ -9,7 +9,7 @@ extern gboolean gimp_composite_sse2_init (void);
*/
extern gboolean gimp_composite_sse2_install (void);
#if !defined(__INTEL_COMPILER)
#if !defined(__INTEL_COMPILER) || defined(USE_INTEL_COMPILER_ANYWAY)
#if defined(USE_SSE)
#if defined(ARCH_X86)
#if __GNUC__ >= 3

View File

@ -79,7 +79,7 @@ main (int argc, char *argv[])
putenv ("GIMP_COMPOSITE=0x1");
iterations = 10;
n_pixels = 1048593;
n_pixels = 8388625;
argv++, argc--;
while (argc >= 2)

View File

@ -109,7 +109,7 @@
"movd %%eax,%%" #divisor ";" \
"psllq $32,%%" #divisor ";" \
"por %%" #divisor ",%%" #quotient ";"
#define pdivwqX_clobber "%eax", "%ecx", "%edx"
#define pdivwqX_clobber "%eax", "%ecx", "%edx", "%cc"
/*
* Quadword divide. Adjusted for subsequent unsigned packing

View File

@ -473,7 +473,7 @@ op.add_option('-t', '--test', action='store_true', dest='test',
help='generate regression testing code')
op.add_option('-i', '--iterations', action='store', type='int', dest='iterations', default=10,
help='number of iterations in regression tests')
op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=128*8192+16+1,
op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=1024*8192+16+1,
help='number of pixels in each regression test iteration')
op.add_option('-r', '--requires', action='append', type='string', dest='requires', default=[],
help='cpp #if conditionals')