X-Git-Url: https://rtime.felk.cvut.cz/gitweb/l4.git/blobdiff_plain/c06638d2e57a3230381daf1ccce3cf0dfd0eaa92..51f012d0ec4f0ad687159ed6685b73f90f1cfc71:/l4/pkg/libjpeg/lib/contrib/jfdctint.c diff --git a/l4/pkg/libjpeg/lib/contrib/jfdctint.c b/l4/pkg/libjpeg/lib/contrib/jfdctint.c index 1dde58c49..a2ef2031b 100644 --- a/l4/pkg/libjpeg/lib/contrib/jfdctint.c +++ b/l4/pkg/libjpeg/lib/contrib/jfdctint.c @@ -2,7 +2,7 @@ * jfdctint.c * * Copyright (C) 1991-1996, Thomas G. Lane. - * Modification developed 2003-2009 by Guido Vollbeding. + * Modification developed 2003-2013 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -165,16 +165,18 @@ jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * cK represents sqrt(2) * cos(K*pi/16). + */ dataptr = data; for (ctr = 0; ctr < DCTSIZE; ctr++) { elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + * rotator "c1" should be "c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); @@ -196,47 +198,49 @@ jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-1); - dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), - CONST_BITS-PASS1_BITS); - dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), - CONST_BITS-PASS1_BITS); + + dataptr[2] = (DCTELEM) + RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) + RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ + CONST_BITS-PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; - z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-1); - tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ - tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ - tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ - tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ - tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ - tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ - tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ - tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ - + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; - dataptr[1] = (DCTELEM) - RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); - dataptr[3] = (DCTELEM) - RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); - dataptr[5] = (DCTELEM) - RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); - dataptr[7] = (DCTELEM) - RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp0 += z1 + tmp12; + tmp3 += z1 + tmp13; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp1 += z1 + tmp13; + tmp2 += z1 + tmp12; + + dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS); dataptr += DCTSIZE; /* advance pointer to next row */ } @@ -244,12 +248,13 @@ jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. + * cK represents sqrt(2) * cos(K*pi/16). */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + * rotator "c1" should be "c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; @@ -271,47 +276,49 @@ jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); + dataptr[DCTSIZE*2] = (DCTELEM) - RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); + RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) - RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); + RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ + CONST_BITS+PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; - z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); - tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ - tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ - tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ - tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ - tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ - tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ - tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ - tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ - + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; - dataptr[DCTSIZE*1] = (DCTELEM) - RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*3] = (DCTELEM) - RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*5] = (DCTELEM) - RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*7] = (DCTELEM) - RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp0 += z1 + tmp12; + tmp3 += z1 + tmp13; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp1 += z1 + tmp13; + tmp2 += z1 + tmp12; + + dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } @@ -338,10 +345,11 @@ jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* cK represents sqrt(2) * cos(K*pi/14). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * cK represents sqrt(2) * cos(K*pi/14). + */ dataptr = data; for (ctr = 0; ctr < 7; ctr++) { @@ -472,10 +480,11 @@ jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* cK represents sqrt(2) * cos(K*pi/12). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * cK represents sqrt(2) * cos(K*pi/12). + */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { @@ -585,12 +594,13 @@ jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* We scale the results further by 2 as part of output adaption */ - /* scaling for different DCT size. */ - /* cK represents sqrt(2) * cos(K*pi/10). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * We scale the results further by 2 as part of output adaption + * scaling for different DCT size. + * cK represents sqrt(2) * cos(K*pi/10). + */ dataptr = data; for (ctr = 0; ctr < 5; ctr++) { @@ -695,11 +705,12 @@ jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */ - /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * We must also scale the output by (8/4)**2 = 2**2, which we add here. + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. + */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { @@ -737,6 +748,7 @@ jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ dataptr = data; @@ -787,12 +799,13 @@ jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* We scale the results further by 2**2 as part of output adaption */ - /* scaling for different DCT size. */ - /* cK represents sqrt(2) * cos(K*pi/6). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * We scale the results further by 2**2 as part of output adaption + * scaling for different DCT size. + * cK represents sqrt(2) * cos(K*pi/6). + */ dataptr = data; for (ctr = 0; ctr < 3; ctr++) { @@ -869,8 +882,9 @@ jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT. */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT. + */ /* Row 0 */ elemptr = sample_data[0] + start_col; @@ -935,11 +949,12 @@ jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* we scale the results further by 2 as part of output adaption */ - /* scaling for different DCT size. */ - /* cK represents sqrt(2) * cos(K*pi/18). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * we scale the results further by 2 as part of output adaption + * scaling for different DCT size. + * cK represents sqrt(2) * cos(K*pi/18). + */ dataptr = data; ctr = 0; @@ -1084,11 +1099,12 @@ jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* we scale the results further by 2 as part of output adaption */ - /* scaling for different DCT size. */ - /* cK represents sqrt(2) * cos(K*pi/20). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * we scale the results further by 2 as part of output adaption + * scaling for different DCT size. + * cK represents sqrt(2) * cos(K*pi/20). + */ dataptr = data; ctr = 0; @@ -1248,11 +1264,12 @@ jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* we scale the results further by 2 as part of output adaption */ - /* scaling for different DCT size. */ - /* cK represents sqrt(2) * cos(K*pi/22). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * we scale the results further by 2 as part of output adaption + * scaling for different DCT size. + * cK represents sqrt(2) * cos(K*pi/22). + */ dataptr = data; ctr = 0; @@ -1430,9 +1447,10 @@ jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT. */ - /* cK represents sqrt(2) * cos(K*pi/24). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT. + * cK represents sqrt(2) * cos(K*pi/24). + */ dataptr = data; ctr = 0; @@ -1596,9 +1614,10 @@ jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT. */ - /* cK represents sqrt(2) * cos(K*pi/26). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT. + * cK represents sqrt(2) * cos(K*pi/26). + */ dataptr = data; ctr = 0; @@ -1794,9 +1813,10 @@ jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT. */ - /* cK represents sqrt(2) * cos(K*pi/28). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT. + * cK represents sqrt(2) * cos(K*pi/28). + */ dataptr = data; ctr = 0; @@ -1995,9 +2015,10 @@ jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT. */ - /* cK represents sqrt(2) * cos(K*pi/30). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT. + * cK represents sqrt(2) * cos(K*pi/30). + */ dataptr = data; ctr = 0; @@ -2173,10 +2194,11 @@ jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* cK represents sqrt(2) * cos(K*pi/32). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * cK represents sqrt(2) * cos(K*pi/32). + */ dataptr = data; ctr = 0; @@ -2275,6 +2297,7 @@ jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. * We must also scale the output by (8/16)**2 = 1/2**2. + * cK represents sqrt(2) * cos(K*pi/32). */ dataptr = data; @@ -2380,10 +2403,11 @@ jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). + */ dataptr = data; ctr = 0; @@ -2475,12 +2499,13 @@ jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. * We must also scale the output by 8/16 = 1/2. + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + * rotator "c1" should be "c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; @@ -2501,43 +2526,43 @@ jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1); - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), - CONST_BITS+PASS1_BITS+1); - dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), - CONST_BITS+PASS1_BITS+1); + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ + CONST_BITS+PASS1_BITS+1); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; - z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ - - tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ - tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ - tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ - tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ - tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ - tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ - tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ - tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; - dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, - CONST_BITS+PASS1_BITS+1); - dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, - CONST_BITS+PASS1_BITS+1); - dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, - CONST_BITS+PASS1_BITS+1); - dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, - CONST_BITS+PASS1_BITS+1); + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp0 += z1 + tmp12; + tmp3 += z1 + tmp13; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp1 += z1 + tmp13; + tmp2 += z1 + tmp12; + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1); dataptr++; /* advance pointer to next column */ } @@ -2564,10 +2589,11 @@ jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Zero bottom row of output coefficient block. */ MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). + */ dataptr = data; for (ctr = 0; ctr < 7; ctr++) { @@ -2727,10 +2753,11 @@ jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Zero 2 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). + */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { @@ -2866,10 +2893,11 @@ jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Zero 3 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). + */ dataptr = data; for (ctr = 0; ctr < 5; ctr++) { @@ -2999,17 +3027,19 @@ jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Zero 4 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* We must also scale the output by 8/4 = 2, which we add here. */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * We must also scale the output by 8/4 = 2, which we add here. + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + * rotator "c1" should be "c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); @@ -3032,47 +3062,49 @@ jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1)); - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-2); - dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), - CONST_BITS-PASS1_BITS-1); - dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), - CONST_BITS-PASS1_BITS-1); + + dataptr[2] = (DCTELEM) + RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ + CONST_BITS-PASS1_BITS-1); + dataptr[6] = (DCTELEM) + RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ + CONST_BITS-PASS1_BITS-1); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; - z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-2); - tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ - tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ - tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ - tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ - tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ - tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ - tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ - tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ - + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; - dataptr[1] = (DCTELEM) - RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1); - dataptr[3] = (DCTELEM) - RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1); - dataptr[5] = (DCTELEM) - RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1); - dataptr[7] = (DCTELEM) - RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1); + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp0 += z1 + tmp12; + tmp3 += z1 + tmp13; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp1 += z1 + tmp13; + tmp2 += z1 + tmp12; + + dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1); + dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1); + dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1); + dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1); dataptr += DCTSIZE; /* advance pointer to next row */ } @@ -3080,7 +3112,8 @@ jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. - * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + * 4-point FDCT kernel, + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ dataptr = data; @@ -3099,7 +3132,7 @@ jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Odd part */ - tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ + tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); @@ -3134,12 +3167,13 @@ jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* We scale the results further by 2 as part of output adaption */ - /* scaling for different DCT size. */ - /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * We scale the results further by 2 as part of output adaption + * scaling for different DCT size. + * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). + */ dataptr = data; for (ctr = 0; ctr < 3; ctr++) { @@ -3234,12 +3268,13 @@ jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */ - /* 4-point FDCT kernel, */ - /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. + * 4-point FDCT kernel, + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. + */ dataptr = data; for (ctr = 0; ctr < 2; ctr++) { @@ -3323,10 +3358,12 @@ jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) */ /* Even part */ + /* Apply unsigned->signed conversion */ data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); /* Odd part */ + data[1] = (DCTELEM) ((tmp0 - tmp1) << 5); } @@ -3350,9 +3387,11 @@ jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) int ctr; SHIFT_TEMPS - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + */ dataptr = data; ctr = 0; @@ -3360,7 +3399,7 @@ jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + * rotator "c1" should be "c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); @@ -3382,39 +3421,43 @@ jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), - CONST_BITS-PASS1_BITS); - dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), - CONST_BITS-PASS1_BITS); + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ + dataptr[2] = (DCTELEM) + DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) + DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ + CONST_BITS-PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; - z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ - - tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ - tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ - tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ - tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ - tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ - tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ - tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ - tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; - dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); - dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); - dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); - dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp0 += z1 + tmp12; + tmp3 += z1 + tmp13; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp1 += z1 + tmp13; + tmp2 += z1 + tmp12; + + dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS); ctr++; @@ -3541,10 +3584,11 @@ jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). + */ dataptr = data; ctr = 0; @@ -3721,10 +3765,11 @@ jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). + */ dataptr = data; ctr = 0; @@ -3870,10 +3915,11 @@ jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). + */ dataptr = data; ctr = 0; @@ -4015,11 +4061,13 @@ jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* We must also scale the output by 8/4 = 2, which we add here. */ - /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * We must also scale the output by 8/4 = 2, which we add here. + * 4-point FDCT kernel, + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. + */ dataptr = data; for (ctr = 0; ctr < DCTSIZE; ctr++) { @@ -4057,12 +4105,13 @@ jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + * rotator "c1" should be "c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; @@ -4084,47 +4133,49 @@ jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); + dataptr[DCTSIZE*2] = (DCTELEM) - RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); + RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ + CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) - RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); + RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ + CONST_BITS+PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; - z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); - tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ - tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ - tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ - tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ - tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ - tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ - tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ - tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ - + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; - dataptr[DCTSIZE*1] = (DCTELEM) - RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*3] = (DCTELEM) - RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*5] = (DCTELEM) - RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*7] = (DCTELEM) - RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp0 += z1 + tmp12; + tmp3 += z1 + tmp13; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp1 += z1 + tmp13; + tmp2 += z1 + tmp12; + + dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } @@ -4150,12 +4201,13 @@ jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - /* We scale the results further by 2 as part of output adaption */ - /* scaling for different DCT size. */ - /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT; + * furthermore, we scale the results by 2**PASS1_BITS. + * We scale the results further by 2 as part of output adaption + * scaling for different DCT size. + * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). + */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { @@ -4255,9 +4307,10 @@ jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT. */ - /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */ + /* Pass 1: process rows. + * Note results are scaled up by sqrt(8) compared to a true DCT. + * We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. + */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { @@ -4329,18 +4382,23 @@ jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); - tmp0 = GETJSAMPLE(sample_data[0][start_col]); - tmp1 = GETJSAMPLE(sample_data[1][start_col]); + /* Pass 1: empty. */ - /* We leave the results scaled up by an overall factor of 8. + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. * We must also scale the output by (8/1)*(8/2) = 2**5. */ /* Even part */ + + tmp0 = GETJSAMPLE(sample_data[0][start_col]); + tmp1 = GETJSAMPLE(sample_data[1][start_col]); + /* Apply unsigned->signed conversion */ data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); /* Odd part */ + data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5); }