]> rtime.felk.cvut.cz Git - frescor/ffmpeg.git/blob - libpostproc/postprocess.c
Remove mangle.h, it is just a duplicate of parts of libavutil/internal.h.
[frescor/ffmpeg.git] / libpostproc / postprocess.c
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3  *
4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22
23 /**
24  * @file postprocess.c
25  * postprocessing.
26  */
27
28 /*
29                         C       MMX     MMX2    3DNow   AltiVec
30 isVertDC                Ec      Ec                      Ec
31 isVertMinMaxOk          Ec      Ec                      Ec
32 doVertLowPass           E               e       e       Ec
33 doVertDefFilter         Ec      Ec      e       e       Ec
34 isHorizDC               Ec      Ec                      Ec
35 isHorizMinMaxOk         a       E                       Ec
36 doHorizLowPass          E               e       e       Ec
37 doHorizDefFilter        Ec      Ec      e       e       Ec
38 do_a_deblock            Ec      E       Ec      E
39 deRing                  E               e       e*      Ecp
40 Vertical RKAlgo1        E               a       a
41 Horizontal RKAlgo1                      a       a
42 Vertical X1#            a               E       E
43 Horizontal X1#          a               E       E
44 LinIpolDeinterlace      e               E       E*
45 CubicIpolDeinterlace    a               e       e*
46 LinBlendDeinterlace     e               E       E*
47 MedianDeinterlace#      E       Ec      Ec
48 TempDeNoiser#           E               e       e       Ec
49
50 * i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = allmost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
57 */
58
59 /*
60 TODO:
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66         (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
68 split this huge file
69 optimize c versions
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 ...
72 */
73
74 //Changelog: use the Subversion log
75
76 #include "config.h"
77 #include "avutil.h"
78 #include <inttypes.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #ifdef HAVE_MALLOC_H
83 #include <malloc.h>
84 #endif
85 //#undef HAVE_MMX2
86 //#define HAVE_3DNOW
87 //#undef HAVE_MMX
88 //#undef ARCH_X86
89 //#define DEBUG_BRIGHTNESS
90 #include "postprocess.h"
91 #include "postprocess_internal.h"
92
93 #ifdef HAVE_ALTIVEC_H
94 #include <altivec.h>
95 #endif
96
97 #define GET_MODE_BUFFER_SIZE 500
98 #define OPTIONS_ARRAY_SIZE 10
99 #define BLOCK_SIZE 8
100 #define TEMP_STRIDE 8
101 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
102
103 #if defined(ARCH_X86)
104 static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
105 static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
106 static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
107 static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
108 static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
109 static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
110 static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
111 static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
112 #endif
113
114 static uint8_t clip_table[3*256];
115 static uint8_t * const clip_tab= clip_table + 256;
116
117 static const int attribute_used deringThreshold= 20;
118
119
120 static struct PPFilter filters[]=
121 {
122         {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
123         {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
124 /*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
125         {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
126         {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
127         {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
128         {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
129         {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
130         {"dr", "dering",                1, 5, 6, DERING},
131         {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
132         {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
133         {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
134         {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
135         {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
136         {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
137         {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
138         {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
139         {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
140         {NULL, NULL,0,0,0,0} //End Marker
141 };
142
143 static const char *replaceTable[]=
144 {
145         "default",      "hdeblock:a,vdeblock:a,dering:a",
146         "de",           "hdeblock:a,vdeblock:a,dering:a",
147         "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
148         "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
149         "ac",           "ha:a:128:7,va:a,dering:a",
150         NULL //End Marker
151 };
152
153
154 #if defined(ARCH_X86)
155 static inline void prefetchnta(void *p)
156 {
157         asm volatile(   "prefetchnta (%0)\n\t"
158                 : : "r" (p)
159         );
160 }
161
162 static inline void prefetcht0(void *p)
163 {
164         asm volatile(   "prefetcht0 (%0)\n\t"
165                 : : "r" (p)
166         );
167 }
168
169 static inline void prefetcht1(void *p)
170 {
171         asm volatile(   "prefetcht1 (%0)\n\t"
172                 : : "r" (p)
173         );
174 }
175
176 static inline void prefetcht2(void *p)
177 {
178         asm volatile(   "prefetcht2 (%0)\n\t"
179                 : : "r" (p)
180         );
181 }
182 #endif
183
184 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
185
186 /**
187  * Check if the given 8x8 Block is mostly "flat"
188  */
189 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
190 {
191         int numEq= 0;
192         int y;
193         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
194         const int dcThreshold= dcOffset*2 + 1;
195
196         for(y=0; y<BLOCK_SIZE; y++)
197         {
198                 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
199                 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
200                 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
201                 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
202                 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
203                 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
204                 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
205                 src+= stride;
206         }
207         return numEq > c->ppMode.flatnessThreshold;
208 }
209
210 /**
211  * Check if the middle 8x8 Block in the given 8x16 block is flat
212  */
213 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
214         int numEq= 0;
215         int y;
216         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
217         const int dcThreshold= dcOffset*2 + 1;
218
219         src+= stride*4; // src points to begin of the 8x8 Block
220         for(y=0; y<BLOCK_SIZE-1; y++)
221         {
222                 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
223                 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
224                 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
225                 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
226                 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
227                 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
228                 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
229                 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
230                 src+= stride;
231         }
232         return numEq > c->ppMode.flatnessThreshold;
233 }
234
235 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
236 {
237         int i;
238 #if 1
239         for(i=0; i<2; i++){
240                 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
241                 src += stride;
242                 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
243                 src += stride;
244                 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
245                 src += stride;
246                 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
247                 src += stride;
248         }
249 #else
250         for(i=0; i<8; i++){
251                 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
252                 src += stride;
253         }
254 #endif
255         return 1;
256 }
257
258 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
259 {
260 #if 1
261 #if 1
262         int x;
263         src+= stride*4;
264         for(x=0; x<BLOCK_SIZE; x+=4)
265         {
266                 if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
267                 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
268                 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
269                 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
270         }
271 #else
272         int x;
273         src+= stride*3;
274         for(x=0; x<BLOCK_SIZE; x++)
275         {
276                 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
277         }
278 #endif
279         return 1;
280 #else
281         int x;
282         src+= stride*4;
283         for(x=0; x<BLOCK_SIZE; x++)
284         {
285                 int min=255;
286                 int max=0;
287                 int y;
288                 for(y=0; y<8; y++){
289                         int v= src[x + y*stride];
290                         if(v>max) max=v;
291                         if(v<min) min=v;
292                 }
293                 if(max-min > 2*QP) return 0;
294         }
295         return 1;
296 #endif
297 }
298
299 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
300         if( isHorizDC_C(src, stride, c) ){
301                 if( isHorizMinMaxOk_C(src, stride, c->QP) )
302                         return 1;
303                 else
304                         return 0;
305         }else{
306                 return 2;
307         }
308 }
309
310 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
311         if( isVertDC_C(src, stride, c) ){
312                 if( isVertMinMaxOk_C(src, stride, c->QP) )
313                         return 1;
314                 else
315                         return 0;
316         }else{
317                 return 2;
318         }
319 }
320
321 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
322 {
323         int y;
324         for(y=0; y<BLOCK_SIZE; y++)
325         {
326                 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
327
328                 if(FFABS(middleEnergy) < 8*c->QP)
329                 {
330                         const int q=(dst[3] - dst[4])/2;
331                         const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
332                         const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
333
334                         int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
335                         d= FFMAX(d, 0);
336
337                         d= (5*d + 32) >> 6;
338                         d*= FFSIGN(-middleEnergy);
339
340                         if(q>0)
341                         {
342                                 d= d<0 ? 0 : d;
343                                 d= d>q ? q : d;
344                         }
345                         else
346                         {
347                                 d= d>0 ? 0 : d;
348                                 d= d<q ? q : d;
349                         }
350
351                         dst[3]-= d;
352                         dst[4]+= d;
353                 }
354                 dst+= stride;
355         }
356 }
357
358 /**
359  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
360  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
361  */
362 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
363 {
364         int y;
365         for(y=0; y<BLOCK_SIZE; y++)
366         {
367                 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
368                 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
369
370                 int sums[10];
371                 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
372                 sums[1] = sums[0] - first  + dst[3];
373                 sums[2] = sums[1] - first  + dst[4];
374                 sums[3] = sums[2] - first  + dst[5];
375                 sums[4] = sums[3] - first  + dst[6];
376                 sums[5] = sums[4] - dst[0] + dst[7];
377                 sums[6] = sums[5] - dst[1] + last;
378                 sums[7] = sums[6] - dst[2] + last;
379                 sums[8] = sums[7] - dst[3] + last;
380                 sums[9] = sums[8] - dst[4] + last;
381
382                 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
383                 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
384                 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
385                 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
386                 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
387                 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
388                 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
389                 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
390
391                 dst+= stride;
392         }
393 }
394
395 /**
396  * Experimental Filter 1 (Horizontal)
397  * will not damage linear gradients
398  * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
399  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
400  * MMX2 version does correct clipping C version does not
401  * not identical with the vertical one
402  */
403 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
404 {
405         int y;
406         static uint64_t *lut= NULL;
407         if(lut==NULL)
408         {
409                 int i;
410                 lut = av_malloc(256*8);
411                 for(i=0; i<256; i++)
412                 {
413                         int v= i < 128 ? 2*i : 2*(i-256);
414 /*
415 //Simulate 112242211 9-Tap filter
416                         uint64_t a= (v/16) & 0xFF;
417                         uint64_t b= (v/8) & 0xFF;
418                         uint64_t c= (v/4) & 0xFF;
419                         uint64_t d= (3*v/8) & 0xFF;
420 */
421 //Simulate piecewise linear interpolation
422                         uint64_t a= (v/16) & 0xFF;
423                         uint64_t b= (v*3/16) & 0xFF;
424                         uint64_t c= (v*5/16) & 0xFF;
425                         uint64_t d= (7*v/16) & 0xFF;
426                         uint64_t A= (0x100 - a)&0xFF;
427                         uint64_t B= (0x100 - b)&0xFF;
428                         uint64_t C= (0x100 - c)&0xFF;
429                         uint64_t D= (0x100 - c)&0xFF;
430
431                         lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
432                                 (D<<24) | (C<<16) | (B<<8) | (A);
433                         //lut[i] = (v<<32) | (v<<24);
434                 }
435         }
436
437         for(y=0; y<BLOCK_SIZE; y++)
438         {
439                 int a= src[1] - src[2];
440                 int b= src[3] - src[4];
441                 int c= src[5] - src[6];
442
443                 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
444
445                 if(d < QP)
446                 {
447                         int v = d * FFSIGN(-b);
448
449                         src[1] +=v/8;
450                         src[2] +=v/4;
451                         src[3] +=3*v/8;
452                         src[4] -=3*v/8;
453                         src[5] -=v/4;
454                         src[6] -=v/8;
455
456                 }
457                 src+=stride;
458         }
459 }
460
461 /**
462  * accurate deblock filter
463  */
464 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
465         int y;
466         const int QP= c->QP;
467         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
468         const int dcThreshold= dcOffset*2 + 1;
469 //START_TIMER
470         src+= step*4; // src points to begin of the 8x8 Block
471         for(y=0; y<8; y++){
472                 int numEq= 0;
473
474                 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
475                 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
476                 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
477                 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
478                 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
479                 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
480                 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
481                 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
482                 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
483                 if(numEq > c->ppMode.flatnessThreshold){
484                         int min, max, x;
485
486                         if(src[0] > src[step]){
487                             max= src[0];
488                             min= src[step];
489                         }else{
490                             max= src[step];
491                             min= src[0];
492                         }
493                         for(x=2; x<8; x+=2){
494                                 if(src[x*step] > src[(x+1)*step]){
495                                         if(src[x    *step] > max) max= src[ x   *step];
496                                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
497                                 }else{
498                                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
499                                         if(src[ x   *step] < min) min= src[ x   *step];
500                                 }
501                         }
502                         if(max-min < 2*QP){
503                                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
504                                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
505
506                                 int sums[10];
507                                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
508                                 sums[1] = sums[0] - first       + src[3*step];
509                                 sums[2] = sums[1] - first       + src[4*step];
510                                 sums[3] = sums[2] - first       + src[5*step];
511                                 sums[4] = sums[3] - first       + src[6*step];
512                                 sums[5] = sums[4] - src[0*step] + src[7*step];
513                                 sums[6] = sums[5] - src[1*step] + last;
514                                 sums[7] = sums[6] - src[2*step] + last;
515                                 sums[8] = sums[7] - src[3*step] + last;
516                                 sums[9] = sums[8] - src[4*step] + last;
517
518                                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
519                                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
520                                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
521                                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
522                                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
523                                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
524                                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
525                                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
526                         }
527                 }else{
528                         const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
529
530                         if(FFABS(middleEnergy) < 8*QP)
531                         {
532                                 const int q=(src[3*step] - src[4*step])/2;
533                                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
534                                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
535
536                                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
537                                 d= FFMAX(d, 0);
538
539                                 d= (5*d + 32) >> 6;
540                                 d*= FFSIGN(-middleEnergy);
541
542                                 if(q>0)
543                                 {
544                                         d= d<0 ? 0 : d;
545                                         d= d>q ? q : d;
546                                 }
547                                 else
548                                 {
549                                         d= d>0 ? 0 : d;
550                                         d= d<q ? q : d;
551                                 }
552
553                                 src[3*step]-= d;
554                                 src[4*step]+= d;
555                         }
556                 }
557
558                 src += stride;
559         }
560 /*if(step==16){
561     STOP_TIMER("step16")
562 }else{
563     STOP_TIMER("stepX")
564 }*/
565 }
566
567 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
568 //Plain C versions
569 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
570 #define COMPILE_C
571 #endif
572
573 #ifdef ARCH_POWERPC
574 #ifdef HAVE_ALTIVEC
575 #define COMPILE_ALTIVEC
576 #endif //HAVE_ALTIVEC
577 #endif //ARCH_POWERPC
578
579 #if defined(ARCH_X86)
580
581 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
582 #define COMPILE_MMX
583 #endif
584
585 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
586 #define COMPILE_MMX2
587 #endif
588
589 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
590 #define COMPILE_3DNOW
591 #endif
592 #endif /* defined(ARCH_X86) */
593
594 #undef HAVE_MMX
595 #undef HAVE_MMX2
596 #undef HAVE_3DNOW
597 #undef HAVE_ALTIVEC
598
599 #ifdef COMPILE_C
600 #undef HAVE_MMX
601 #undef HAVE_MMX2
602 #undef HAVE_3DNOW
603 #define RENAME(a) a ## _C
604 #include "postprocess_template.c"
605 #endif
606
607 #ifdef ARCH_POWERPC
608 #ifdef COMPILE_ALTIVEC
609 #undef RENAME
610 #define HAVE_ALTIVEC
611 #define RENAME(a) a ## _altivec
612 #include "postprocess_altivec_template.c"
613 #include "postprocess_template.c"
614 #endif
615 #endif //ARCH_POWERPC
616
617 //MMX versions
618 #ifdef COMPILE_MMX
619 #undef RENAME
620 #define HAVE_MMX
621 #undef HAVE_MMX2
622 #undef HAVE_3DNOW
623 #define RENAME(a) a ## _MMX
624 #include "postprocess_template.c"
625 #endif
626
627 //MMX2 versions
628 #ifdef COMPILE_MMX2
629 #undef RENAME
630 #define HAVE_MMX
631 #define HAVE_MMX2
632 #undef HAVE_3DNOW
633 #define RENAME(a) a ## _MMX2
634 #include "postprocess_template.c"
635 #endif
636
637 //3DNOW versions
638 #ifdef COMPILE_3DNOW
639 #undef RENAME
640 #define HAVE_MMX
641 #undef HAVE_MMX2
642 #define HAVE_3DNOW
643 #define RENAME(a) a ## _3DNow
644 #include "postprocess_template.c"
645 #endif
646
647 // minor note: the HAVE_xyz is messed up after that line so do not use it.
648
649 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
650         QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
651 {
652         PPContext *c= (PPContext *)vc;
653         PPMode *ppMode= (PPMode *)vm;
654         c->ppMode= *ppMode; //FIXME
655
656         // Using ifs here as they are faster than function pointers although the
657         // difference would not be measureable here but it is much better because
658         // someone might exchange the CPU whithout restarting MPlayer ;)
659 #ifdef RUNTIME_CPUDETECT
660 #if defined(ARCH_X86)
661         // ordered per speed fasterst first
662         if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664         else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666         else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668         else
669                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 #else
671 #ifdef ARCH_POWERPC
672 #ifdef HAVE_ALTIVEC
673         if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
674                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675         else
676 #endif
677 #endif
678                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
679 #endif
680 #else //RUNTIME_CPUDETECT
681 #ifdef HAVE_MMX2
682                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683 #elif defined (HAVE_3DNOW)
684                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 #elif defined (HAVE_MMX)
686                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687 #elif defined (HAVE_ALTIVEC)
688                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689 #else
690                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691 #endif
692 #endif //!RUNTIME_CPUDETECT
693 }
694
695 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
696 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
697
698 /* -pp Command line Help
699 */
700 char *pp_help=
701 "Available postprocessing filters:\n"
702 "Filters                        Options\n"
703 "short  long name       short   long option     Description\n"
704 "*      *               a       autoq           CPU power dependent enabler\n"
705 "                       c       chrom           chrominance filtering enabled\n"
706 "                       y       nochrom         chrominance filtering disabled\n"
707 "                       n       noluma          luma filtering disabled\n"
708 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
709 "       1. difference factor: default=32, higher -> more deblocking\n"
710 "       2. flatness threshold: default=39, lower -> more deblocking\n"
711 "                       the h & v deblocking filters share these\n"
712 "                       so you can't set different thresholds for h / v\n"
713 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
714 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
715 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
716 "h1     x1hdeblock                              experimental h deblock filter 1\n"
717 "v1     x1vdeblock                              experimental v deblock filter 1\n"
718 "dr     dering                                  deringing filter\n"
719 "al     autolevels                              automatic brightness / contrast\n"
720 "                       f        fullyrange     stretch luminance to (0..255)\n"
721 "lb     linblenddeint                           linear blend deinterlacer\n"
722 "li     linipoldeint                            linear interpolating deinterlace\n"
723 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
724 "md     mediandeint                             median deinterlacer\n"
725 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
726 "l5     lowpass5                                FIR lowpass deinterlacer\n"
727 "de     default                                 hb:a,vb:a,dr:a\n"
728 "fa     fast                                    h1:a,v1:a,dr:a\n"
729 "ac                                             ha:a:128:7,va:a,dr:a\n"
730 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
731 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
732 "fq     forceQuant      <quantizer>             force quantizer\n"
733 "Usage:\n"
734 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
735 "long form example:\n"
736 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
737 "short form example:\n"
738 "vb:a/hb:a/lb                                   de,-vb\n"
739 "more examples:\n"
740 "tn:64:128:256\n"
741 "\n"
742 ;
743
744 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
745 {
746         char temp[GET_MODE_BUFFER_SIZE];
747         char *p= temp;
748         const char *filterDelimiters= ",/";
749         const char *optionDelimiters= ":";
750         struct PPMode *ppMode;
751         char *filterToken;
752
753         ppMode= av_malloc(sizeof(PPMode));
754
755         ppMode->lumMode= 0;
756         ppMode->chromMode= 0;
757         ppMode->maxTmpNoise[0]= 700;
758         ppMode->maxTmpNoise[1]= 1500;
759         ppMode->maxTmpNoise[2]= 3000;
760         ppMode->maxAllowedY= 234;
761         ppMode->minAllowedY= 16;
762         ppMode->baseDcDiff= 256/8;
763         ppMode->flatnessThreshold= 56-16-1;
764         ppMode->maxClippedThreshold= 0.01;
765         ppMode->error=0;
766
767         strncpy(temp, name, GET_MODE_BUFFER_SIZE);
768
769         av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
770
771         for(;;){
772                 char *filterName;
773                 int q= 1000000; //PP_QUALITY_MAX;
774                 int chrom=-1;
775                 int luma=-1;
776                 char *option;
777                 char *options[OPTIONS_ARRAY_SIZE];
778                 int i;
779                 int filterNameOk=0;
780                 int numOfUnknownOptions=0;
781                 int enable=1; //does the user want us to enabled or disabled the filter
782
783                 filterToken= strtok(p, filterDelimiters);
784                 if(filterToken == NULL) break;
785                 p+= strlen(filterToken) + 1; // p points to next filterToken
786                 filterName= strtok(filterToken, optionDelimiters);
787                 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
788
789                 if(*filterName == '-')
790                 {
791                         enable=0;
792                         filterName++;
793                 }
794
795                 for(;;){ //for all options
796                         option= strtok(NULL, optionDelimiters);
797                         if(option == NULL) break;
798
799                         av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
800                         if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
801                         else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
802                         else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
803                         else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
804                         else
805                         {
806                                 options[numOfUnknownOptions] = option;
807                                 numOfUnknownOptions++;
808                         }
809                         if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
810                 }
811                 options[numOfUnknownOptions] = NULL;
812
813                 /* replace stuff from the replace Table */
814                 for(i=0; replaceTable[2*i]!=NULL; i++)
815                 {
816                         if(!strcmp(replaceTable[2*i], filterName))
817                         {
818                                 int newlen= strlen(replaceTable[2*i + 1]);
819                                 int plen;
820                                 int spaceLeft;
821
822                                 if(p==NULL) p= temp, *p=0;      //last filter
823                                 else p--, *p=',';               //not last filter
824
825                                 plen= strlen(p);
826                                 spaceLeft= p - temp + plen;
827                                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
828                                 {
829                                         ppMode->error++;
830                                         break;
831                                 }
832                                 memmove(p + newlen, p, plen+1);
833                                 memcpy(p, replaceTable[2*i + 1], newlen);
834                                 filterNameOk=1;
835                         }
836                 }
837
838                 for(i=0; filters[i].shortName!=NULL; i++)
839                 {
840                         if(   !strcmp(filters[i].longName, filterName)
841                            || !strcmp(filters[i].shortName, filterName))
842                         {
843                                 ppMode->lumMode &= ~filters[i].mask;
844                                 ppMode->chromMode &= ~filters[i].mask;
845
846                                 filterNameOk=1;
847                                 if(!enable) break; // user wants to disable it
848
849                                 if(q >= filters[i].minLumQuality && luma)
850                                         ppMode->lumMode|= filters[i].mask;
851                                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
852                                         if(q >= filters[i].minChromQuality)
853                                                 ppMode->chromMode|= filters[i].mask;
854
855                                 if(filters[i].mask == LEVEL_FIX)
856                                 {
857                                         int o;
858                                         ppMode->minAllowedY= 16;
859                                         ppMode->maxAllowedY= 234;
860                                         for(o=0; options[o]!=NULL; o++)
861                                         {
862                                                 if(  !strcmp(options[o],"fullyrange")
863                                                    ||!strcmp(options[o],"f"))
864                                                 {
865                                                         ppMode->minAllowedY= 0;
866                                                         ppMode->maxAllowedY= 255;
867                                                         numOfUnknownOptions--;
868                                                 }
869                                         }
870                                 }
871                                 else if(filters[i].mask == TEMP_NOISE_FILTER)
872                                 {
873                                         int o;
874                                         int numOfNoises=0;
875
876                                         for(o=0; options[o]!=NULL; o++)
877                                         {
878                                                 char *tail;
879                                                 ppMode->maxTmpNoise[numOfNoises]=
880                                                         strtol(options[o], &tail, 0);
881                                                 if(tail!=options[o])
882                                                 {
883                                                         numOfNoises++;
884                                                         numOfUnknownOptions--;
885                                                         if(numOfNoises >= 3) break;
886                                                 }
887                                         }
888                                 }
889                                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
890                                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
891                                 {
892                                         int o;
893
894                                         for(o=0; options[o]!=NULL && o<2; o++)
895                                         {
896                                                 char *tail;
897                                                 int val= strtol(options[o], &tail, 0);
898                                                 if(tail==options[o]) break;
899
900                                                 numOfUnknownOptions--;
901                                                 if(o==0) ppMode->baseDcDiff= val;
902                                                 else ppMode->flatnessThreshold= val;
903                                         }
904                                 }
905                                 else if(filters[i].mask == FORCE_QUANT)
906                                 {
907                                         int o;
908                                         ppMode->forcedQuant= 15;
909
910                                         for(o=0; options[o]!=NULL && o<1; o++)
911                                         {
912                                                 char *tail;
913                                                 int val= strtol(options[o], &tail, 0);
914                                                 if(tail==options[o]) break;
915
916                                                 numOfUnknownOptions--;
917                                                 ppMode->forcedQuant= val;
918                                         }
919                                 }
920                         }
921                 }
922                 if(!filterNameOk) ppMode->error++;
923                 ppMode->error += numOfUnknownOptions;
924         }
925
926         av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
927         if(ppMode->error)
928         {
929                 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
930                 av_free(ppMode);
931                 return NULL;
932         }
933         return ppMode;
934 }
935
936 void pp_free_mode(pp_mode_t *mode){
937     av_free(mode);
938 }
939
940 static void reallocAlign(void **p, int alignment, int size){
941         av_free(*p);
942         *p= av_mallocz(size);
943 }
944
945 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
946         int mbWidth = (width+15)>>4;
947         int mbHeight= (height+15)>>4;
948         int i;
949
950         c->stride= stride;
951         c->qpStride= qpStride;
952
953         reallocAlign((void **)&c->tempDst, 8, stride*24);
954         reallocAlign((void **)&c->tempSrc, 8, stride*24);
955         reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
956         reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
957         for(i=0; i<256; i++)
958                 c->yHistogram[i]= width*height/64*15/256;
959
960         for(i=0; i<3; i++)
961         {
962                 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
963                 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
964                 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
965         }
966
967         reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
968         reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
969         reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
970         reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
971 }
972
973 static void global_init(void){
974         int i;
975         memset(clip_table, 0, 256);
976         for(i=256; i<512; i++)
977                 clip_table[i]= i;
978         memset(clip_table+512, 0, 256);
979 }
980
981 static const char * context_to_name(void * ptr) {
982     return "postproc";
983 }
984
985 static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
986
987 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
988         PPContext *c= av_malloc(sizeof(PPContext));
989         int stride= (width+15)&(~15);    //assumed / will realloc if needed
990         int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
991
992         global_init();
993
994         memset(c, 0, sizeof(PPContext));
995         c->av_class = &av_codec_context_class;
996         c->cpuCaps= cpuCaps;
997         if(cpuCaps&PP_FORMAT){
998                 c->hChromaSubSample= cpuCaps&0x3;
999                 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1000         }else{
1001                 c->hChromaSubSample= 1;
1002                 c->vChromaSubSample= 1;
1003         }
1004
1005         reallocBuffers(c, width, height, stride, qpStride);
1006
1007         c->frameNum=-1;
1008
1009         return c;
1010 }
1011
1012 void pp_free_context(void *vc){
1013         PPContext *c = (PPContext*)vc;
1014         int i;
1015
1016         for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1017         for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1018
1019         av_free(c->tempBlocks);
1020         av_free(c->yHistogram);
1021         av_free(c->tempDst);
1022         av_free(c->tempSrc);
1023         av_free(c->deintTemp);
1024         av_free(c->stdQPTable);
1025         av_free(c->nonBQPTable);
1026         av_free(c->forcedQPTable);
1027
1028         memset(c, 0, sizeof(PPContext));
1029
1030         av_free(c);
1031 }
1032
1033 void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1034                  uint8_t * dst[3], int dstStride[3],
1035                  int width, int height,
1036                  QP_STORE_T *QP_store,  int QPStride,
1037                  pp_mode_t *vm,  void *vc, int pict_type)
1038 {
1039         int mbWidth = (width+15)>>4;
1040         int mbHeight= (height+15)>>4;
1041         PPMode *mode = (PPMode*)vm;
1042         PPContext *c = (PPContext*)vc;
1043         int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1044         int absQPStride = FFABS(QPStride);
1045
1046         // c->stride and c->QPStride are always positive
1047         if(c->stride < minStride || c->qpStride < absQPStride)
1048                 reallocBuffers(c, width, height,
1049                                 FFMAX(minStride, c->stride),
1050                                 FFMAX(c->qpStride, absQPStride));
1051
1052         if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1053         {
1054                 int i;
1055                 QP_store= c->forcedQPTable;
1056                 absQPStride = QPStride = 0;
1057                 if(mode->lumMode & FORCE_QUANT)
1058                         for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1059                 else
1060                         for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1061         }
1062
1063         if(pict_type & PP_PICT_TYPE_QP2){
1064                 int i;
1065                 const int count= mbHeight * absQPStride;
1066                 for(i=0; i<(count>>2); i++){
1067                         ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1068                 }
1069                 for(i<<=2; i<count; i++){
1070                         c->stdQPTable[i] = QP_store[i]>>1;
1071                 }
1072                 QP_store= c->stdQPTable;
1073                 QPStride= absQPStride;
1074         }
1075
1076 if(0){
1077 int x,y;
1078 for(y=0; y<mbHeight; y++){
1079         for(x=0; x<mbWidth; x++){
1080                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1081         }
1082         av_log(c, AV_LOG_INFO, "\n");
1083 }
1084         av_log(c, AV_LOG_INFO, "\n");
1085 }
1086
1087         if((pict_type&7)!=3)
1088         {
1089                 if (QPStride >= 0) {
1090                         int i;
1091                         const int count= mbHeight * QPStride;
1092                         for(i=0; i<(count>>2); i++){
1093                                 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1094                         }
1095                         for(i<<=2; i<count; i++){
1096                                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1097                         }
1098                 } else {
1099                         int i,j;
1100                         for(i=0; i<mbHeight; i++) {
1101                                     for(j=0; j<absQPStride; j++) {
1102                                         c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1103                                 }
1104                         }
1105                 }
1106         }
1107
1108         av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1109                mode->lumMode, mode->chromMode);
1110
1111         postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1112                 width, height, QP_store, QPStride, 0, mode, c);
1113
1114         width  = (width )>>c->hChromaSubSample;
1115         height = (height)>>c->vChromaSubSample;
1116
1117         if(mode->chromMode)
1118         {
1119                 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1120                         width, height, QP_store, QPStride, 1, mode, c);
1121                 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1122                         width, height, QP_store, QPStride, 2, mode, c);
1123         }
1124         else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1125         {
1126                 linecpy(dst[1], src[1], height, srcStride[1]);
1127                 linecpy(dst[2], src[2], height, srcStride[2]);
1128         }
1129         else
1130         {
1131                 int y;
1132                 for(y=0; y<height; y++)
1133                 {
1134                         memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1135                         memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1136                 }
1137         }
1138 }
1139