diff options
Diffstat (limited to 'jpeg/jidctint.c')
| -rw-r--r-- | jpeg/jidctint.c | 307 |
1 files changed, 184 insertions, 123 deletions
diff --git a/jpeg/jidctint.c b/jpeg/jidctint.c index 76fe5d9cf..6437079a3 100644 --- a/jpeg/jidctint.c +++ b/jpeg/jidctint.c @@ -2,7 +2,7 @@ * jidctint.c * * Copyright (C) 1991-1998, Thomas G. Lane. - * Modification developed 2002-2013 by Guido Vollbeding. + * Modification developed 2002-2016 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -166,6 +166,7 @@ /* * Perform dequantization and inverse DCT on one block of coefficients. * + * Optimized algorithm with 12 multiplications in the 1-D kernel. * cK represents sqrt(2) * cos(K*pi/16). */ @@ -230,13 +231,6 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, * The rotator is c(-6). */ - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - - z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ - tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ - tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ - z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; @@ -247,6 +241,13 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp0 = z2 + z3; tmp1 = z2 - z3; + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; @@ -306,6 +307,12 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, wsptr = workspace; for (ctr = 0; ctr < DCTSIZE; ctr++) { outptr = output_buf[ctr] + output_col; + + /* Add range center and fudge factor for final descale and range-limit. */ + z2 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); + /* Rows of zeroes can be exploited in the same way as we did with columns. * However, the column calculation has created many nonzero AC terms, so * the simplification applies less often (typically 5% to 10% of the time). @@ -318,7 +325,7 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) + JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3) & RANGE_MASK]; outptr[0] = dcval; @@ -339,6 +346,11 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, * The rotator is c(-6). */ + z3 = (INT32) wsptr[4]; + + tmp0 = (z2 + z3) << CONST_BITS; + tmp1 = (z2 - z3) << CONST_BITS; + z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[6]; @@ -346,13 +358,6 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ - /* Add fudge factor here for final descale. */ - z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); - z3 = (INT32) wsptr[4]; - - tmp0 = (z2 + z3) << CONST_BITS; - tmp1 = (z2 - z3) << CONST_BITS; - tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; @@ -424,7 +429,7 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* * Perform dequantization and inverse DCT on one block of coefficients, - * producing a 7x7 output block. + * producing a reduced-size 7x7 output block. * * Optimized algorithm with 12 multiplications in the 1-D kernel. * cK represents sqrt(2) * cos(K*pi/14). @@ -508,8 +513,10 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp13 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp13 <<= CONST_BITS; z1 = (INT32) wsptr[2]; @@ -644,8 +651,10 @@ jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[4]; tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ @@ -763,8 +772,10 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp12 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp12 <<= CONST_BITS; tmp0 = (INT32) wsptr[2]; tmp1 = (INT32) wsptr[4]; @@ -875,8 +886,10 @@ jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp2 = (INT32) wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; @@ -972,8 +985,10 @@ jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[2]; tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ @@ -1014,11 +1029,11 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; ISLOW_MULT_TYPE * quantptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); - SHIFT_TEMPS + ISHIFT_TEMPS /* Pass 1: process columns from input. */ @@ -1027,8 +1042,8 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Column 0 */ tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); - /* Add fudge factor here for final descale. */ - tmp4 += ONE << 2; + /* Add range center and fudge factor for final descale and range-limit. */ + tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); tmp0 = tmp4 + tmp5; tmp2 = tmp4 - tmp5; @@ -1045,14 +1060,14 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Row 0 */ outptr = output_buf[0] + output_col; - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; + outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; + outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; /* Row 1 */ outptr = output_buf[1] + output_col; - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; + outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; + outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; } @@ -1069,17 +1084,21 @@ jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { - int dcval; + DCTELEM dcval; ISLOW_MULT_TYPE * quantptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); - SHIFT_TEMPS + ISHIFT_TEMPS /* 1x1 is trivial: just take the DC coefficient divided by 8. */ + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + dcval = DEQUANTIZE(coef_block[0], quantptr[0]); - dcval = (int) DESCALE((INT32) dcval, 3); + /* Add range center and fudge factor for descale and range-limit. */ + dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); - output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; + output_buf[0][output_col] = + range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK]; } @@ -1178,8 +1197,10 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[2]; @@ -1361,8 +1382,10 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + z3 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ @@ -1554,8 +1577,10 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp10 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp10 <<= CONST_BITS; z1 = (INT32) wsptr[2]; @@ -1758,8 +1783,10 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + z3 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; @@ -1979,8 +2006,10 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + z1 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); z1 <<= CONST_BITS; z2 = (INT32) wsptr[2]; @@ -2206,8 +2235,10 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + z1 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); z1 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ @@ -2438,8 +2469,10 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + z1 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); z1 <<= CONST_BITS; z2 = (INT32) wsptr[2]; @@ -2591,7 +2624,7 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp0 <<= CONST_BITS; /* Add fudge factor here for final descale. */ - tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ @@ -2689,8 +2722,10 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[4]; @@ -2886,13 +2921,6 @@ jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, * The rotator is c(-6). */ - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - - z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ - tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ - tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ - z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; @@ -2903,6 +2931,13 @@ jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp0 = z2 + z3; tmp1 = z2 - z3; + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; @@ -2964,8 +2999,10 @@ jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[4]; @@ -3182,8 +3219,10 @@ jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + z1 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); z1 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ @@ -3366,8 +3405,10 @@ jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + z3 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; @@ -3542,8 +3583,10 @@ jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + z3 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ @@ -3707,6 +3750,15 @@ jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, * The rotator is c(-6). */ + /* Add range center and fudge factor for final descale and range-limit. */ + z2 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); + z3 = (INT32) wsptr[4]; + + tmp0 = (z2 + z3) << CONST_BITS; + tmp1 = (z2 - z3) << CONST_BITS; + z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[6]; @@ -3714,13 +3766,6 @@ jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ - /* Add fudge factor here for final descale. */ - z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); - z3 = (INT32) wsptr[4]; - - tmp0 = (z2 + z3) << CONST_BITS; - tmp1 = (z2 - z3) << CONST_BITS; - tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; @@ -3852,8 +3897,10 @@ jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[4]; tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ @@ -3954,8 +4001,8 @@ jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = wsptr[0] + (ONE << 2); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2)); tmp2 = wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; @@ -4003,11 +4050,11 @@ jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp1; + DCTELEM tmp0, tmp1; ISLOW_MULT_TYPE * quantptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); - SHIFT_TEMPS + ISHIFT_TEMPS /* Pass 1: empty. */ @@ -4019,8 +4066,8 @@ jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]); - /* Add fudge factor here for final descale. */ - tmp0 += ONE << 2; + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); /* Odd part */ @@ -4028,8 +4075,8 @@ jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; + outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; + outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; } @@ -4174,6 +4221,15 @@ jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, * The rotator is c(-6). */ + /* Add range center and fudge factor for final descale and range-limit. */ + z2 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); + z3 = (INT32) wsptr[4]; + + tmp0 = (z2 + z3) << CONST_BITS; + tmp1 = (z2 - z3) << CONST_BITS; + z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[6]; @@ -4181,13 +4237,6 @@ jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ - /* Add fudge factor here for final descale. */ - z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); - z3 = (INT32) wsptr[4]; - - tmp0 = (z2 + z3) << CONST_BITS; - tmp1 = (z2 - z3) << CONST_BITS; - tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; @@ -4377,8 +4426,10 @@ jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp23 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp23 <<= CONST_BITS; z1 = (INT32) wsptr[2]; @@ -4558,8 +4609,10 @@ jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp10 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp10 <<= CONST_BITS; tmp12 = (INT32) wsptr[4]; tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ @@ -4716,8 +4769,10 @@ jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp12 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp12 <<= CONST_BITS; tmp13 = (INT32) wsptr[2]; tmp14 = (INT32) wsptr[4]; @@ -4829,13 +4884,6 @@ jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, * The rotator is c(-6). */ - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - - z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ - tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ - tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ - z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; @@ -4846,6 +4894,13 @@ jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp0 = z2 + z3; tmp1 = z2 - z3; + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; @@ -4908,8 +4963,10 @@ jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp2 = (INT32) wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; @@ -5021,8 +5078,10 @@ jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[2]; tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ @@ -5117,8 +5176,10 @@ jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - /* Add fudge factor here for final descale. */ - tmp10 = wsptr[0] + (ONE << (CONST_BITS+2)); + /* Add range center and fudge factor for final descale and range-limit. */ + tmp10 = wsptr[0] + + ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) + + (ONE << (CONST_BITS+2))); /* Odd part */ @@ -5148,20 +5209,20 @@ jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { - INT32 tmp0, tmp1; + DCTELEM tmp0, tmp1; ISLOW_MULT_TYPE * quantptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); - SHIFT_TEMPS + ISHIFT_TEMPS /* Process 1 column from input, store into output array. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; /* Even part */ - + tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); - /* Add fudge factor here for final descale. */ - tmp0 += ONE << 2; + /* Add range center and fudge factor for final descale and range-limit. */ + tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); /* Odd part */ @@ -5169,10 +5230,10 @@ jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Final output stage */ - output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) - & RANGE_MASK]; - output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) - & RANGE_MASK]; + output_buf[0][output_col] = + range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; + output_buf[1][output_col] = + range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; } #endif /* IDCT_SCALING_SUPPORTED */ |
