1 files changed, 184 insertions, 123 deletions
diff --git a/jpeg/jidctint.c b/jpeg/jidctint.c
index 76fe5d9cf..6437079a3 100644
--- a/jpeg/jidctint.c
+++ b/jpeg/jidctint.c
@@ -2,7 +2,7 @@
  * jidctint.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2013 by Guido Vollbeding.
+ * Modification developed 2002-2016 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -166,6 +166,7 @@
 /*
  * Perform dequantization and inverse DCT on one block of coefficients.
  *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
  * cK represents sqrt(2) * cos(K*pi/16).
  */
 
@@ -230,13 +231,6 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
      * The rotator is c(-6).
      */
 
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     z2 <<= CONST_BITS;
@@ -247,6 +241,13 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     tmp0 = z2 + z3;
     tmp1 = z2 - z3;
 
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -306,6 +307,12 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   wsptr = workspace;
   for (ctr = 0; ctr < DCTSIZE; ctr++) {
     outptr = output_buf[ctr] + output_col;
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+
     /* Rows of zeroes can be exploited in the same way as we did with columns.
      * However, the column calculation has created many nonzero AC terms, so
      * the simplification applies less often (typically 5% to 10% of the time).
@@ -318,7 +325,7 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
 	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+      JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
 				  & RANGE_MASK];
 
       outptr[0] = dcval;
@@ -339,6 +346,11 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
      * The rotator is c(-6).
      */
 
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
 
@@ -346,13 +358,6 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
 
-    /* Add fudge factor here for final descale. */
-    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -424,7 +429,7 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
 /*
  * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x7 output block.
+ * producing a reduced-size 7x7 output block.
  *
  * Optimized algorithm with 12 multiplications in the 1-D kernel.
  * cK represents sqrt(2) * cos(K*pi/14).
@@ -508,8 +513,10 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp13 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
     tmp13 <<= CONST_BITS;
 
     z1 = (INT32) wsptr[2];
@@ -644,8 +651,10 @@ jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp0 <<= CONST_BITS;
     tmp2 = (INT32) wsptr[4];
     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
@@ -763,8 +772,10 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp12 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
     tmp12 <<= CONST_BITS;
     tmp0 = (INT32) wsptr[2];
     tmp1 = (INT32) wsptr[4];
@@ -875,8 +886,10 @@ jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp2 = (INT32) wsptr[2];
 
     tmp10 = (tmp0 + tmp2) << CONST_BITS;
@@ -972,8 +985,10 @@ jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp0 <<= CONST_BITS;
     tmp2 = (INT32) wsptr[2];
     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
@@ -1014,11 +1029,11 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 	       JCOEFPTR coef_block,
 	       JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
   ISLOW_MULT_TYPE * quantptr;
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  SHIFT_TEMPS
+  ISHIFT_TEMPS
 
   /* Pass 1: process columns from input. */
 
@@ -1027,8 +1042,8 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   /* Column 0 */
   tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
   tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
-  /* Add fudge factor here for final descale. */
-  tmp4 += ONE << 2;
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
 
   tmp0 = tmp4 + tmp5;
   tmp2 = tmp4 - tmp5;
@@ -1045,14 +1060,14 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   /* Row 0 */
   outptr = output_buf[0] + output_col;
 
-  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
 
   /* Row 1 */
   outptr = output_buf[1] + output_col;
 
-  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
 }
 
 
@@ -1069,17 +1084,21 @@ jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 	       JCOEFPTR coef_block,
 	       JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  int dcval;
+  DCTELEM dcval;
   ISLOW_MULT_TYPE * quantptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  SHIFT_TEMPS
+  ISHIFT_TEMPS
 
   /* 1x1 is trivial: just take the DC coefficient divided by 8. */
+
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
   dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
-  dcval = (int) DESCALE((INT32) dcval, 3);
+  /* Add range center and fudge factor for descale and range-limit. */
+  dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
 
-  output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
+  output_buf[0][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
 }
 
 
@@ -1178,8 +1197,10 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp0 <<= CONST_BITS;
 
     z1 = (INT32) wsptr[2];
@@ -1361,8 +1382,10 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
     z3 <<= CONST_BITS;
     z4 = (INT32) wsptr[4];
     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
@@ -1554,8 +1577,10 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
     tmp10 <<= CONST_BITS;
 
     z1 = (INT32) wsptr[2];
@@ -1758,8 +1783,10 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
     z3 <<= CONST_BITS;
 
     z4 = (INT32) wsptr[4];
@@ -1979,8 +2006,10 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
     z1 <<= CONST_BITS;
 
     z2 = (INT32) wsptr[2];
@@ -2206,8 +2235,10 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
     z1 <<= CONST_BITS;
     z4 = (INT32) wsptr[4];
     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
@@ -2438,8 +2469,10 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
     z1 <<= CONST_BITS;
 
     z2 = (INT32) wsptr[2];
@@ -2591,7 +2624,7 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     tmp0 <<= CONST_BITS;
     /* Add fudge factor here for final descale. */
-    tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
 
     z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
@@ -2689,8 +2722,10 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp0 <<= CONST_BITS;
 
     z1 = (INT32) wsptr[4];
@@ -2886,13 +2921,6 @@ jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
      * The rotator is c(-6).
      */
 
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     z2 <<= CONST_BITS;
@@ -2903,6 +2931,13 @@ jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     tmp0 = z2 + z3;
     tmp1 = z2 - z3;
 
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -2964,8 +2999,10 @@ jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp0 <<= CONST_BITS;
 
     z1 = (INT32) wsptr[4];
@@ -3182,8 +3219,10 @@ jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
     z1 <<= CONST_BITS;
     z4 = (INT32) wsptr[4];
     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
@@ -3366,8 +3405,10 @@ jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
     z3 <<= CONST_BITS;
 
     z4 = (INT32) wsptr[4];
@@ -3542,8 +3583,10 @@ jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
     z3 <<= CONST_BITS;
     z4 = (INT32) wsptr[4];
     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
@@ -3707,6 +3750,15 @@ jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
      * The rotator is c(-6).
      */
 
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
 
@@ -3714,13 +3766,6 @@ jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
 
-    /* Add fudge factor here for final descale. */
-    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -3852,8 +3897,10 @@ jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp0 <<= CONST_BITS;
     tmp2 = (INT32) wsptr[4];
     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
@@ -3954,8 +4001,8 @@ jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = wsptr[0] + (ONE << 2);
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
     tmp2 = wsptr[2];
 
     tmp10 = (tmp0 + tmp2) << CONST_BITS;
@@ -4003,11 +4050,11 @@ jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 	       JCOEFPTR coef_block,
 	       JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1;
+  DCTELEM tmp0, tmp1;
   ISLOW_MULT_TYPE * quantptr;
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  SHIFT_TEMPS
+  ISHIFT_TEMPS
 
   /* Pass 1: empty. */
 
@@ -4019,8 +4066,8 @@ jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   /* Even part */
 
   tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
-  /* Add fudge factor here for final descale. */
-  tmp0 += ONE << 2;
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
 
   /* Odd part */
 
@@ -4028,8 +4075,8 @@ jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
   /* Final output stage */
 
-  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
 }
 
 
@@ -4174,6 +4221,15 @@ jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
      * The rotator is c(-6).
      */
 
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
 
@@ -4181,13 +4237,6 @@ jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
 
-    /* Add fudge factor here for final descale. */
-    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -4377,8 +4426,10 @@ jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp23 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
     tmp23 <<= CONST_BITS;
 
     z1 = (INT32) wsptr[2];
@@ -4558,8 +4609,10 @@ jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
     tmp10 <<= CONST_BITS;
     tmp12 = (INT32) wsptr[4];
     tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
@@ -4716,8 +4769,10 @@ jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp12 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
     tmp12 <<= CONST_BITS;
     tmp13 = (INT32) wsptr[2];
     tmp14 = (INT32) wsptr[4];
@@ -4829,13 +4884,6 @@ jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
      * The rotator is c(-6).
      */
 
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     z2 <<= CONST_BITS;
@@ -4846,6 +4894,13 @@ jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     tmp0 = z2 + z3;
     tmp1 = z2 - z3;
 
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -4908,8 +4963,10 @@ jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp2 = (INT32) wsptr[2];
 
     tmp10 = (tmp0 + tmp2) << CONST_BITS;
@@ -5021,8 +5078,10 @@ jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
     tmp0 <<= CONST_BITS;
     tmp2 = (INT32) wsptr[2];
     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
@@ -5117,8 +5176,10 @@ jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
     /* Even part */
 
-    /* Add fudge factor here for final descale. */
-    tmp10 = wsptr[0] + (ONE << (CONST_BITS+2));
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
+	       (ONE << (CONST_BITS+2)));
 
     /* Odd part */
 
@@ -5148,20 +5209,20 @@ jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 	       JCOEFPTR coef_block,
 	       JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1;
+  DCTELEM tmp0, tmp1;
   ISLOW_MULT_TYPE * quantptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  SHIFT_TEMPS
+  ISHIFT_TEMPS
 
   /* Process 1 column from input, store into output array. */
 
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
 
   /* Even part */
-    
+
   tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
-  /* Add fudge factor here for final descale. */
-  tmp0 += ONE << 2;
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
 
   /* Odd part */
 
@@ -5169,10 +5230,10 @@ jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
 
   /* Final output stage */
 
-  output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3)
-					  & RANGE_MASK];
-  output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3)
-					  & RANGE_MASK];
+  output_buf[0][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  output_buf[1][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
 }
 
 #endif /* IDCT_SCALING_SUPPORTED */