]> rtime.felk.cvut.cz Git - frescor/ffmpeg.git/blob - libpostproc/postprocess.c
a81cf20db18d3b1fa09de6f64b2c24c67ea45313
[frescor/ffmpeg.git] / libpostproc / postprocess.c
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3  *
4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22
23 /**
24  * @file postprocess.c
25  * postprocessing.
26  */
27
28 /*
29                         C       MMX     MMX2    3DNow   AltiVec
30 isVertDC                Ec      Ec                      Ec
31 isVertMinMaxOk          Ec      Ec                      Ec
32 doVertLowPass           E               e       e       Ec
33 doVertDefFilter         Ec      Ec      e       e       Ec
34 isHorizDC               Ec      Ec                      Ec
35 isHorizMinMaxOk         a       E                       Ec
36 doHorizLowPass          E               e       e       Ec
37 doHorizDefFilter        Ec      Ec      e       e       Ec
38 do_a_deblock            Ec      E       Ec      E
39 deRing                  E               e       e*      Ecp
40 Vertical RKAlgo1        E               a       a
41 Horizontal RKAlgo1                      a       a
42 Vertical X1#            a               E       E
43 Horizontal X1#          a               E       E
44 LinIpolDeinterlace      e               E       E*
45 CubicIpolDeinterlace    a               e       e*
46 LinBlendDeinterlace     e               E       E*
47 MedianDeinterlace#      E       Ec      Ec
48 TempDeNoiser#           E               e       e       Ec
49
50 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51 # more or less selfinvented filters so the exactness isnt too meaningfull
52 E = Exact implementation
53 e = allmost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
57 */
58
59 /*
60 TODO:
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66         (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
68 split this huge file
69 optimize c versions
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 ...
72 */
73
74 //Changelog: use the Subversion log
75
76 #include "config.h"
77 #include "avutil.h"
78 #include <inttypes.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #ifdef HAVE_MALLOC_H
83 #include <malloc.h>
84 #endif
85 //#undef HAVE_MMX2
86 //#define HAVE_3DNOW
87 //#undef HAVE_MMX
88 //#undef ARCH_X86
89 //#define DEBUG_BRIGHTNESS
90 #ifdef USE_FASTMEMCPY
91 #include "libvo/fastmemcpy.h"
92 #endif
93 #include "postprocess.h"
94 #include "postprocess_internal.h"
95
96 #include "mangle.h" //FIXME should be supressed
97
98 #ifdef HAVE_ALTIVEC_H
99 #include <altivec.h>
100 #endif
101
102 #define GET_MODE_BUFFER_SIZE 500
103 #define OPTIONS_ARRAY_SIZE 10
104 #define BLOCK_SIZE 8
105 #define TEMP_STRIDE 8
106 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
107
108 #if defined(ARCH_X86) || defined(ARCH_X86_64)
109 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
110 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
111 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
112 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
113 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
114 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
115 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
116 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
117 #endif
118
119 static uint8_t clip_table[3*256];
120 static uint8_t * const clip_tab= clip_table + 256;
121
122 static const int attribute_used deringThreshold= 20;
123
124
125 static struct PPFilter filters[]=
126 {
127         {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
128         {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
129 /*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
130         {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
131         {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
132         {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
133         {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
134         {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
135         {"dr", "dering",                1, 5, 6, DERING},
136         {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
137         {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
138         {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
139         {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
140         {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
141         {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
142         {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
143         {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
144         {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
145         {NULL, NULL,0,0,0,0} //End Marker
146 };
147
148 static const char *replaceTable[]=
149 {
150         "default",      "hdeblock:a,vdeblock:a,dering:a",
151         "de",           "hdeblock:a,vdeblock:a,dering:a",
152         "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
153         "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
154         "ac",           "ha:a:128:7,va:a,dering:a",
155         NULL //End Marker
156 };
157
158
159 #if defined(ARCH_X86) || defined(ARCH_X86_64)
160 static inline void prefetchnta(void *p)
161 {
162         asm volatile(   "prefetchnta (%0)\n\t"
163                 : : "r" (p)
164         );
165 }
166
167 static inline void prefetcht0(void *p)
168 {
169         asm volatile(   "prefetcht0 (%0)\n\t"
170                 : : "r" (p)
171         );
172 }
173
174 static inline void prefetcht1(void *p)
175 {
176         asm volatile(   "prefetcht1 (%0)\n\t"
177                 : : "r" (p)
178         );
179 }
180
181 static inline void prefetcht2(void *p)
182 {
183         asm volatile(   "prefetcht2 (%0)\n\t"
184                 : : "r" (p)
185         );
186 }
187 #endif
188
189 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
190
191 /**
192  * Check if the given 8x8 Block is mostly "flat"
193  */
194 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
195 {
196         int numEq= 0;
197         int y;
198         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
199         const int dcThreshold= dcOffset*2 + 1;
200
201         for(y=0; y<BLOCK_SIZE; y++)
202         {
203                 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
204                 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
205                 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
206                 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
207                 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
208                 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
209                 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
210                 src+= stride;
211         }
212         return numEq > c->ppMode.flatnessThreshold;
213 }
214
215 /**
216  * Check if the middle 8x8 Block in the given 8x16 block is flat
217  */
218 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
219         int numEq= 0;
220         int y;
221         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
222         const int dcThreshold= dcOffset*2 + 1;
223
224         src+= stride*4; // src points to begin of the 8x8 Block
225         for(y=0; y<BLOCK_SIZE-1; y++)
226         {
227                 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
228                 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
229                 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
230                 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
231                 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
232                 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
233                 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
234                 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
235                 src+= stride;
236         }
237         return numEq > c->ppMode.flatnessThreshold;
238 }
239
240 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
241 {
242         int i;
243 #if 1
244         for(i=0; i<2; i++){
245                 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
246                 src += stride;
247                 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
248                 src += stride;
249                 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
250                 src += stride;
251                 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
252                 src += stride;
253         }
254 #else
255         for(i=0; i<8; i++){
256                 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
257                 src += stride;
258         }
259 #endif
260         return 1;
261 }
262
263 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
264 {
265 #if 1
266 #if 1
267         int x;
268         src+= stride*4;
269         for(x=0; x<BLOCK_SIZE; x+=4)
270         {
271                 if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
272                 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
273                 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
274                 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
275         }
276 #else
277         int x;
278         src+= stride*3;
279         for(x=0; x<BLOCK_SIZE; x++)
280         {
281                 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
282         }
283 #endif
284         return 1;
285 #else
286         int x;
287         src+= stride*4;
288         for(x=0; x<BLOCK_SIZE; x++)
289         {
290                 int min=255;
291                 int max=0;
292                 int y;
293                 for(y=0; y<8; y++){
294                         int v= src[x + y*stride];
295                         if(v>max) max=v;
296                         if(v<min) min=v;
297                 }
298                 if(max-min > 2*QP) return 0;
299         }
300         return 1;
301 #endif
302 }
303
304 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
305         if( isHorizDC_C(src, stride, c) ){
306                 if( isHorizMinMaxOk_C(src, stride, c->QP) )
307                         return 1;
308                 else
309                         return 0;
310         }else{
311                 return 2;
312         }
313 }
314
315 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
316         if( isVertDC_C(src, stride, c) ){
317                 if( isVertMinMaxOk_C(src, stride, c->QP) )
318                         return 1;
319                 else
320                         return 0;
321         }else{
322                 return 2;
323         }
324 }
325
326 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
327 {
328         int y;
329         for(y=0; y<BLOCK_SIZE; y++)
330         {
331                 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
332
333                 if(FFABS(middleEnergy) < 8*c->QP)
334                 {
335                         const int q=(dst[3] - dst[4])/2;
336                         const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
337                         const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
338
339                         int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
340                         d= FFMAX(d, 0);
341
342                         d= (5*d + 32) >> 6;
343                         d*= FFSIGN(-middleEnergy);
344
345                         if(q>0)
346                         {
347                                 d= d<0 ? 0 : d;
348                                 d= d>q ? q : d;
349                         }
350                         else
351                         {
352                                 d= d>0 ? 0 : d;
353                                 d= d<q ? q : d;
354                         }
355
356                         dst[3]-= d;
357                         dst[4]+= d;
358                 }
359                 dst+= stride;
360         }
361 }
362
363 /**
364  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
365  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
366  */
367 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
368 {
369         int y;
370         for(y=0; y<BLOCK_SIZE; y++)
371         {
372                 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
373                 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
374
375                 int sums[10];
376                 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
377                 sums[1] = sums[0] - first  + dst[3];
378                 sums[2] = sums[1] - first  + dst[4];
379                 sums[3] = sums[2] - first  + dst[5];
380                 sums[4] = sums[3] - first  + dst[6];
381                 sums[5] = sums[4] - dst[0] + dst[7];
382                 sums[6] = sums[5] - dst[1] + last;
383                 sums[7] = sums[6] - dst[2] + last;
384                 sums[8] = sums[7] - dst[3] + last;
385                 sums[9] = sums[8] - dst[4] + last;
386
387                 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
388                 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
389                 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
390                 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
391                 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
392                 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
393                 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
394                 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
395
396                 dst+= stride;
397         }
398 }
399
400 /**
401  * Experimental Filter 1 (Horizontal)
402  * will not damage linear gradients
403  * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
404  * can only smooth blocks at the expected locations (it cant smooth them if they did move)
405  * MMX2 version does correct clipping C version doesnt
406  * not identical with the vertical one
407  */
408 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
409 {
410         int y;
411         static uint64_t *lut= NULL;
412         if(lut==NULL)
413         {
414                 int i;
415                 lut = av_malloc(256*8);
416                 for(i=0; i<256; i++)
417                 {
418                         int v= i < 128 ? 2*i : 2*(i-256);
419 /*
420 //Simulate 112242211 9-Tap filter
421                         uint64_t a= (v/16) & 0xFF;
422                         uint64_t b= (v/8) & 0xFF;
423                         uint64_t c= (v/4) & 0xFF;
424                         uint64_t d= (3*v/8) & 0xFF;
425 */
426 //Simulate piecewise linear interpolation
427                         uint64_t a= (v/16) & 0xFF;
428                         uint64_t b= (v*3/16) & 0xFF;
429                         uint64_t c= (v*5/16) & 0xFF;
430                         uint64_t d= (7*v/16) & 0xFF;
431                         uint64_t A= (0x100 - a)&0xFF;
432                         uint64_t B= (0x100 - b)&0xFF;
433                         uint64_t C= (0x100 - c)&0xFF;
434                         uint64_t D= (0x100 - c)&0xFF;
435
436                         lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
437                                 (D<<24) | (C<<16) | (B<<8) | (A);
438                         //lut[i] = (v<<32) | (v<<24);
439                 }
440         }
441
442         for(y=0; y<BLOCK_SIZE; y++)
443         {
444                 int a= src[1] - src[2];
445                 int b= src[3] - src[4];
446                 int c= src[5] - src[6];
447
448                 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
449
450                 if(d < QP)
451                 {
452                         int v = d * FFSIGN(-b);
453
454                         src[1] +=v/8;
455                         src[2] +=v/4;
456                         src[3] +=3*v/8;
457                         src[4] -=3*v/8;
458                         src[5] -=v/4;
459                         src[6] -=v/8;
460
461                 }
462                 src+=stride;
463         }
464 }
465
466 /**
467  * accurate deblock filter
468  */
469 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
470         int y;
471         const int QP= c->QP;
472         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
473         const int dcThreshold= dcOffset*2 + 1;
474 //START_TIMER
475         src+= step*4; // src points to begin of the 8x8 Block
476         for(y=0; y<8; y++){
477                 int numEq= 0;
478
479                 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
480                 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
481                 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
482                 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
483                 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
484                 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
485                 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
486                 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
487                 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
488                 if(numEq > c->ppMode.flatnessThreshold){
489                         int min, max, x;
490
491                         if(src[0] > src[step]){
492                             max= src[0];
493                             min= src[step];
494                         }else{
495                             max= src[step];
496                             min= src[0];
497                         }
498                         for(x=2; x<8; x+=2){
499                                 if(src[x*step] > src[(x+1)*step]){
500                                         if(src[x    *step] > max) max= src[ x   *step];
501                                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
502                                 }else{
503                                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
504                                         if(src[ x   *step] < min) min= src[ x   *step];
505                                 }
506                         }
507                         if(max-min < 2*QP){
508                                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
509                                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
510
511                                 int sums[10];
512                                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
513                                 sums[1] = sums[0] - first       + src[3*step];
514                                 sums[2] = sums[1] - first       + src[4*step];
515                                 sums[3] = sums[2] - first       + src[5*step];
516                                 sums[4] = sums[3] - first       + src[6*step];
517                                 sums[5] = sums[4] - src[0*step] + src[7*step];
518                                 sums[6] = sums[5] - src[1*step] + last;
519                                 sums[7] = sums[6] - src[2*step] + last;
520                                 sums[8] = sums[7] - src[3*step] + last;
521                                 sums[9] = sums[8] - src[4*step] + last;
522
523                                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
524                                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
525                                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
526                                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
527                                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
528                                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
529                                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
530                                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
531                         }
532                 }else{
533                         const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
534
535                         if(FFABS(middleEnergy) < 8*QP)
536                         {
537                                 const int q=(src[3*step] - src[4*step])/2;
538                                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
539                                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
540
541                                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
542                                 d= FFMAX(d, 0);
543
544                                 d= (5*d + 32) >> 6;
545                                 d*= FFSIGN(-middleEnergy);
546
547                                 if(q>0)
548                                 {
549                                         d= d<0 ? 0 : d;
550                                         d= d>q ? q : d;
551                                 }
552                                 else
553                                 {
554                                         d= d>0 ? 0 : d;
555                                         d= d<q ? q : d;
556                                 }
557
558                                 src[3*step]-= d;
559                                 src[4*step]+= d;
560                         }
561                 }
562
563                 src += stride;
564         }
565 /*if(step==16){
566     STOP_TIMER("step16")
567 }else{
568     STOP_TIMER("stepX")
569 }*/
570 }
571
572 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
573 //Plain C versions
574 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
575 #define COMPILE_C
576 #endif
577
578 #ifdef ARCH_POWERPC
579 #ifdef HAVE_ALTIVEC
580 #define COMPILE_ALTIVEC
581 #endif //HAVE_ALTIVEC
582 #endif //ARCH_POWERPC
583
584 #if defined(ARCH_X86) || defined(ARCH_X86_64)
585
586 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
587 #define COMPILE_MMX
588 #endif
589
590 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
591 #define COMPILE_MMX2
592 #endif
593
594 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
595 #define COMPILE_3DNOW
596 #endif
597 #endif /* defined(ARCH_X86) || defined(ARCH_X86_64) */
598
599 #undef HAVE_MMX
600 #undef HAVE_MMX2
601 #undef HAVE_3DNOW
602 #undef HAVE_ALTIVEC
603
604 #ifdef COMPILE_C
605 #undef HAVE_MMX
606 #undef HAVE_MMX2
607 #undef HAVE_3DNOW
608 #define RENAME(a) a ## _C
609 #include "postprocess_template.c"
610 #endif
611
612 #ifdef ARCH_POWERPC
613 #ifdef COMPILE_ALTIVEC
614 #undef RENAME
615 #define HAVE_ALTIVEC
616 #define RENAME(a) a ## _altivec
617 #include "postprocess_altivec_template.c"
618 #include "postprocess_template.c"
619 #endif
620 #endif //ARCH_POWERPC
621
622 //MMX versions
623 #ifdef COMPILE_MMX
624 #undef RENAME
625 #define HAVE_MMX
626 #undef HAVE_MMX2
627 #undef HAVE_3DNOW
628 #define RENAME(a) a ## _MMX
629 #include "postprocess_template.c"
630 #endif
631
632 //MMX2 versions
633 #ifdef COMPILE_MMX2
634 #undef RENAME
635 #define HAVE_MMX
636 #define HAVE_MMX2
637 #undef HAVE_3DNOW
638 #define RENAME(a) a ## _MMX2
639 #include "postprocess_template.c"
640 #endif
641
642 //3DNOW versions
643 #ifdef COMPILE_3DNOW
644 #undef RENAME
645 #define HAVE_MMX
646 #undef HAVE_MMX2
647 #define HAVE_3DNOW
648 #define RENAME(a) a ## _3DNow
649 #include "postprocess_template.c"
650 #endif
651
652 // minor note: the HAVE_xyz is messed up after that line so dont use it
653
654 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
655         QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
656 {
657         PPContext *c= (PPContext *)vc;
658         PPMode *ppMode= (PPMode *)vm;
659         c->ppMode= *ppMode; //FIXME
660
661         // useing ifs here as they are faster than function pointers allthough the
662         // difference wouldnt be messureable here but its much better because
663         // someone might exchange the cpu whithout restarting mplayer ;)
664 #ifdef RUNTIME_CPUDETECT
665 #if defined(ARCH_X86) || defined(ARCH_X86_64)
666         // ordered per speed fasterst first
667         if(c->cpuCaps & PP_CPU_CAPS_MMX2)
668                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669         else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
670                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671         else if(c->cpuCaps & PP_CPU_CAPS_MMX)
672                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
673         else
674                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675 #else
676 #ifdef ARCH_POWERPC
677 #ifdef HAVE_ALTIVEC
678         if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
679                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
680         else
681 #endif
682 #endif
683                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684 #endif
685 #else //RUNTIME_CPUDETECT
686 #ifdef HAVE_MMX2
687                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688 #elif defined (HAVE_3DNOW)
689                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
690 #elif defined (HAVE_MMX)
691                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
692 #elif defined (HAVE_ALTIVEC)
693                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
694 #else
695                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
696 #endif
697 #endif //!RUNTIME_CPUDETECT
698 }
699
700 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
701 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
702
703 /* -pp Command line Help
704 */
705 char *pp_help=
706 "Available postprocessing filters:\n"
707 "Filters                        Options\n"
708 "short  long name       short   long option     Description\n"
709 "*      *               a       autoq           CPU power dependent enabler\n"
710 "                       c       chrom           chrominance filtering enabled\n"
711 "                       y       nochrom         chrominance filtering disabled\n"
712 "                       n       noluma          luma filtering disabled\n"
713 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
714 "       1. difference factor: default=32, higher -> more deblocking\n"
715 "       2. flatness threshold: default=39, lower -> more deblocking\n"
716 "                       the h & v deblocking filters share these\n"
717 "                       so you can't set different thresholds for h / v\n"
718 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
719 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
720 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
721 "h1     x1hdeblock                              experimental h deblock filter 1\n"
722 "v1     x1vdeblock                              experimental v deblock filter 1\n"
723 "dr     dering                                  deringing filter\n"
724 "al     autolevels                              automatic brightness / contrast\n"
725 "                       f        fullyrange     stretch luminance to (0..255)\n"
726 "lb     linblenddeint                           linear blend deinterlacer\n"
727 "li     linipoldeint                            linear interpolating deinterlace\n"
728 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
729 "md     mediandeint                             median deinterlacer\n"
730 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
731 "l5     lowpass5                                FIR lowpass deinterlacer\n"
732 "de     default                                 hb:a,vb:a,dr:a\n"
733 "fa     fast                                    h1:a,v1:a,dr:a\n"
734 "ac                                             ha:a:128:7,va:a,dr:a\n"
735 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
736 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
737 "fq     forceQuant      <quantizer>             force quantizer\n"
738 "Usage:\n"
739 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
740 "long form example:\n"
741 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
742 "short form example:\n"
743 "vb:a/hb:a/lb                                   de,-vb\n"
744 "more examples:\n"
745 "tn:64:128:256\n"
746 "\n"
747 ;
748
749 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
750 {
751         char temp[GET_MODE_BUFFER_SIZE];
752         char *p= temp;
753         const char *filterDelimiters= ",/";
754         const char *optionDelimiters= ":";
755         struct PPMode *ppMode;
756         char *filterToken;
757
758         ppMode= av_malloc(sizeof(PPMode));
759
760         ppMode->lumMode= 0;
761         ppMode->chromMode= 0;
762         ppMode->maxTmpNoise[0]= 700;
763         ppMode->maxTmpNoise[1]= 1500;
764         ppMode->maxTmpNoise[2]= 3000;
765         ppMode->maxAllowedY= 234;
766         ppMode->minAllowedY= 16;
767         ppMode->baseDcDiff= 256/8;
768         ppMode->flatnessThreshold= 56-16-1;
769         ppMode->maxClippedThreshold= 0.01;
770         ppMode->error=0;
771
772         strncpy(temp, name, GET_MODE_BUFFER_SIZE);
773
774         av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
775
776         for(;;){
777                 char *filterName;
778                 int q= 1000000; //PP_QUALITY_MAX;
779                 int chrom=-1;
780                 int luma=-1;
781                 char *option;
782                 char *options[OPTIONS_ARRAY_SIZE];
783                 int i;
784                 int filterNameOk=0;
785                 int numOfUnknownOptions=0;
786                 int enable=1; //does the user want us to enabled or disabled the filter
787
788                 filterToken= strtok(p, filterDelimiters);
789                 if(filterToken == NULL) break;
790                 p+= strlen(filterToken) + 1; // p points to next filterToken
791                 filterName= strtok(filterToken, optionDelimiters);
792                 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
793
794                 if(*filterName == '-')
795                 {
796                         enable=0;
797                         filterName++;
798                 }
799
800                 for(;;){ //for all options
801                         option= strtok(NULL, optionDelimiters);
802                         if(option == NULL) break;
803
804                         av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
805                         if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
806                         else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
807                         else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
808                         else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
809                         else
810                         {
811                                 options[numOfUnknownOptions] = option;
812                                 numOfUnknownOptions++;
813                         }
814                         if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
815                 }
816                 options[numOfUnknownOptions] = NULL;
817
818                 /* replace stuff from the replace Table */
819                 for(i=0; replaceTable[2*i]!=NULL; i++)
820                 {
821                         if(!strcmp(replaceTable[2*i], filterName))
822                         {
823                                 int newlen= strlen(replaceTable[2*i + 1]);
824                                 int plen;
825                                 int spaceLeft;
826
827                                 if(p==NULL) p= temp, *p=0;      //last filter
828                                 else p--, *p=',';               //not last filter
829
830                                 plen= strlen(p);
831                                 spaceLeft= p - temp + plen;
832                                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
833                                 {
834                                         ppMode->error++;
835                                         break;
836                                 }
837                                 memmove(p + newlen, p, plen+1);
838                                 memcpy(p, replaceTable[2*i + 1], newlen);
839                                 filterNameOk=1;
840                         }
841                 }
842
843                 for(i=0; filters[i].shortName!=NULL; i++)
844                 {
845                         if(   !strcmp(filters[i].longName, filterName)
846                            || !strcmp(filters[i].shortName, filterName))
847                         {
848                                 ppMode->lumMode &= ~filters[i].mask;
849                                 ppMode->chromMode &= ~filters[i].mask;
850
851                                 filterNameOk=1;
852                                 if(!enable) break; // user wants to disable it
853
854                                 if(q >= filters[i].minLumQuality && luma)
855                                         ppMode->lumMode|= filters[i].mask;
856                                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
857                                         if(q >= filters[i].minChromQuality)
858                                                 ppMode->chromMode|= filters[i].mask;
859
860                                 if(filters[i].mask == LEVEL_FIX)
861                                 {
862                                         int o;
863                                         ppMode->minAllowedY= 16;
864                                         ppMode->maxAllowedY= 234;
865                                         for(o=0; options[o]!=NULL; o++)
866                                         {
867                                                 if(  !strcmp(options[o],"fullyrange")
868                                                    ||!strcmp(options[o],"f"))
869                                                 {
870                                                         ppMode->minAllowedY= 0;
871                                                         ppMode->maxAllowedY= 255;
872                                                         numOfUnknownOptions--;
873                                                 }
874                                         }
875                                 }
876                                 else if(filters[i].mask == TEMP_NOISE_FILTER)
877                                 {
878                                         int o;
879                                         int numOfNoises=0;
880
881                                         for(o=0; options[o]!=NULL; o++)
882                                         {
883                                                 char *tail;
884                                                 ppMode->maxTmpNoise[numOfNoises]=
885                                                         strtol(options[o], &tail, 0);
886                                                 if(tail!=options[o])
887                                                 {
888                                                         numOfNoises++;
889                                                         numOfUnknownOptions--;
890                                                         if(numOfNoises >= 3) break;
891                                                 }
892                                         }
893                                 }
894                                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
895                                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
896                                 {
897                                         int o;
898
899                                         for(o=0; options[o]!=NULL && o<2; o++)
900                                         {
901                                                 char *tail;
902                                                 int val= strtol(options[o], &tail, 0);
903                                                 if(tail==options[o]) break;
904
905                                                 numOfUnknownOptions--;
906                                                 if(o==0) ppMode->baseDcDiff= val;
907                                                 else ppMode->flatnessThreshold= val;
908                                         }
909                                 }
910                                 else if(filters[i].mask == FORCE_QUANT)
911                                 {
912                                         int o;
913                                         ppMode->forcedQuant= 15;
914
915                                         for(o=0; options[o]!=NULL && o<1; o++)
916                                         {
917                                                 char *tail;
918                                                 int val= strtol(options[o], &tail, 0);
919                                                 if(tail==options[o]) break;
920
921                                                 numOfUnknownOptions--;
922                                                 ppMode->forcedQuant= val;
923                                         }
924                                 }
925                         }
926                 }
927                 if(!filterNameOk) ppMode->error++;
928                 ppMode->error += numOfUnknownOptions;
929         }
930
931         av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
932         if(ppMode->error)
933         {
934                 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
935                 av_free(ppMode);
936                 return NULL;
937         }
938         return ppMode;
939 }
940
941 void pp_free_mode(pp_mode_t *mode){
942     av_free(mode);
943 }
944
945 static void reallocAlign(void **p, int alignment, int size){
946         av_free(*p);
947         *p= av_mallocz(size);
948 }
949
950 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
951         int mbWidth = (width+15)>>4;
952         int mbHeight= (height+15)>>4;
953         int i;
954
955         c->stride= stride;
956         c->qpStride= qpStride;
957
958         reallocAlign((void **)&c->tempDst, 8, stride*24);
959         reallocAlign((void **)&c->tempSrc, 8, stride*24);
960         reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
961         reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
962         for(i=0; i<256; i++)
963                 c->yHistogram[i]= width*height/64*15/256;
964
965         for(i=0; i<3; i++)
966         {
967                 //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
968                 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
969                 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
970         }
971
972         reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
973         reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
974         reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
975         reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
976 }
977
978 static void global_init(void){
979         int i;
980         memset(clip_table, 0, 256);
981         for(i=256; i<512; i++)
982                 clip_table[i]= i;
983         memset(clip_table+512, 0, 256);
984 }
985
986 static const char * context_to_name(void * ptr) {
987     return "postproc";
988 }
989
990 static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
991
992 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
993         PPContext *c= av_malloc(sizeof(PPContext));
994         int stride= (width+15)&(~15);    //assumed / will realloc if needed
995         int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
996
997         global_init();
998
999         memset(c, 0, sizeof(PPContext));
1000         c->av_class = &av_codec_context_class;
1001         c->cpuCaps= cpuCaps;
1002         if(cpuCaps&PP_FORMAT){
1003                 c->hChromaSubSample= cpuCaps&0x3;
1004                 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1005         }else{
1006                 c->hChromaSubSample= 1;
1007                 c->vChromaSubSample= 1;
1008         }
1009
1010         reallocBuffers(c, width, height, stride, qpStride);
1011
1012         c->frameNum=-1;
1013
1014         return c;
1015 }
1016
1017 void pp_free_context(void *vc){
1018         PPContext *c = (PPContext*)vc;
1019         int i;
1020
1021         for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1022         for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1023
1024         av_free(c->tempBlocks);
1025         av_free(c->yHistogram);
1026         av_free(c->tempDst);
1027         av_free(c->tempSrc);
1028         av_free(c->deintTemp);
1029         av_free(c->stdQPTable);
1030         av_free(c->nonBQPTable);
1031         av_free(c->forcedQPTable);
1032
1033         memset(c, 0, sizeof(PPContext));
1034
1035         av_free(c);
1036 }
1037
1038 void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1039                  uint8_t * dst[3], int dstStride[3],
1040                  int width, int height,
1041                  QP_STORE_T *QP_store,  int QPStride,
1042                  pp_mode_t *vm,  void *vc, int pict_type)
1043 {
1044         int mbWidth = (width+15)>>4;
1045         int mbHeight= (height+15)>>4;
1046         PPMode *mode = (PPMode*)vm;
1047         PPContext *c = (PPContext*)vc;
1048         int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1049         int absQPStride = FFABS(QPStride);
1050
1051         // c->stride and c->QPStride are always positive
1052         if(c->stride < minStride || c->qpStride < absQPStride)
1053                 reallocBuffers(c, width, height,
1054                                 FFMAX(minStride, c->stride),
1055                                 FFMAX(c->qpStride, absQPStride));
1056
1057         if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1058         {
1059                 int i;
1060                 QP_store= c->forcedQPTable;
1061                 absQPStride = QPStride = 0;
1062                 if(mode->lumMode & FORCE_QUANT)
1063                         for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1064                 else
1065                         for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1066         }
1067
1068         if(pict_type & PP_PICT_TYPE_QP2){
1069                 int i;
1070                 const int count= mbHeight * absQPStride;
1071                 for(i=0; i<(count>>2); i++){
1072                         ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1073                 }
1074                 for(i<<=2; i<count; i++){
1075                         c->stdQPTable[i] = QP_store[i]>>1;
1076                 }
1077                 QP_store= c->stdQPTable;
1078                 QPStride= absQPStride;
1079         }
1080
1081 if(0){
1082 int x,y;
1083 for(y=0; y<mbHeight; y++){
1084         for(x=0; x<mbWidth; x++){
1085                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1086         }
1087         av_log(c, AV_LOG_INFO, "\n");
1088 }
1089         av_log(c, AV_LOG_INFO, "\n");
1090 }
1091
1092         if((pict_type&7)!=3)
1093         {
1094                 if (QPStride >= 0) {
1095                         int i;
1096                         const int count= mbHeight * QPStride;
1097                         for(i=0; i<(count>>2); i++){
1098                                 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1099                         }
1100                         for(i<<=2; i<count; i++){
1101                                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1102                         }
1103                 } else {
1104                         int i,j;
1105                         for(i=0; i<mbHeight; i++) {
1106                                     for(j=0; j<absQPStride; j++) {
1107                                         c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1108                                 }
1109                         }
1110                 }
1111         }
1112
1113         av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1114                mode->lumMode, mode->chromMode);
1115
1116         postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1117                 width, height, QP_store, QPStride, 0, mode, c);
1118
1119         width  = (width )>>c->hChromaSubSample;
1120         height = (height)>>c->vChromaSubSample;
1121
1122         if(mode->chromMode)
1123         {
1124                 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1125                         width, height, QP_store, QPStride, 1, mode, c);
1126                 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1127                         width, height, QP_store, QPStride, 2, mode, c);
1128         }
1129         else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1130         {
1131                 linecpy(dst[1], src[1], height, srcStride[1]);
1132                 linecpy(dst[2], src[2], height, srcStride[2]);
1133         }
1134         else
1135         {
1136                 int y;
1137                 for(y=0; y<height; y++)
1138                 {
1139                         memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1140                         memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1141                 }
1142         }
1143 }
1144