libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file postprocess.c
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = allmost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use the Subversion log
  75
  76 #include "config.h"
  77 #include "avutil.h"
  78 #include <inttypes.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <string.h>
  82 #ifdef HAVE_MALLOC_H
  83 #include <malloc.h>
  84 #endif
  85 //#undef HAVE_MMX2
  86 //#define HAVE_3DNOW
  87 //#undef HAVE_MMX
  88 //#undef ARCH_X86
  89 //#define DEBUG_BRIGHTNESS
  90 #include "postprocess.h"
  91 #include "postprocess_internal.h"
  92
  93 #ifdef HAVE_ALTIVEC_H
  94 #include <altivec.h>
  95 #endif
  96
  97 #define GET_MODE_BUFFER_SIZE 500
  98 #define OPTIONS_ARRAY_SIZE 10
  99 #define BLOCK_SIZE 8
 100 #define TEMP_STRIDE 8
 101 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 102
 103 #if defined(ARCH_X86)
 104 static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
 105 static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
 106 static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
 107 static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
 108 static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
 109 static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
 110 static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
 111 static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
 112 #endif
 113
 114 static uint8_t clip_table[3*256];
 115 static uint8_t * const clip_tab= clip_table + 256;
 116
 117 static const int attribute_used deringThreshold= 20;
 118
 119
 120 static struct PPFilter filters[]=
 121 {
 122         {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 123         {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 124 /*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 125         {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 126         {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 127         {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 128         {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 129         {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 130         {"dr", "dering",                1, 5, 6, DERING},
 131         {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 132         {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 133         {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 134         {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 135         {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 136         {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 137         {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 138         {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 139         {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 140         {NULL, NULL,0,0,0,0} //End Marker
 141 };
 142
 143 static const char *replaceTable[]=
 144 {
 145         "default",      "hdeblock:a,vdeblock:a,dering:a",
 146         "de",           "hdeblock:a,vdeblock:a,dering:a",
 147         "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
 148         "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
 149         "ac",           "ha:a:128:7,va:a,dering:a",
 150         NULL //End Marker
 151 };
 152
 153
 154 #if defined(ARCH_X86)
 155 static inline void prefetchnta(void *p)
 156 {
 157         asm volatile(   "prefetchnta (%0)\n\t"
 158                 : : "r" (p)
 159         );
 160 }
 161
 162 static inline void prefetcht0(void *p)
 163 {
 164         asm volatile(   "prefetcht0 (%0)\n\t"
 165                 : : "r" (p)
 166         );
 167 }
 168
 169 static inline void prefetcht1(void *p)
 170 {
 171         asm volatile(   "prefetcht1 (%0)\n\t"
 172                 : : "r" (p)
 173         );
 174 }
 175
 176 static inline void prefetcht2(void *p)
 177 {
 178         asm volatile(   "prefetcht2 (%0)\n\t"
 179                 : : "r" (p)
 180         );
 181 }
 182 #endif
 183
 184 // The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
 185
 186 /**
 187  * Check if the given 8x8 Block is mostly "flat"
 188  */
 189 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 190 {
 191         int numEq= 0;
 192         int y;
 193         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 194         const int dcThreshold= dcOffset*2 + 1;
 195
 196         for(y=0; y<BLOCK_SIZE; y++)
 197         {
 198                 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
 199                 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
 200                 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
 201                 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
 202                 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
 203                 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
 204                 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
 205                 src+= stride;
 206         }
 207         return numEq > c->ppMode.flatnessThreshold;
 208 }
 209
 210 /**
 211  * Check if the middle 8x8 Block in the given 8x16 block is flat
 212  */
 213 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
 214         int numEq= 0;
 215         int y;
 216         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 217         const int dcThreshold= dcOffset*2 + 1;
 218
 219         src+= stride*4; // src points to begin of the 8x8 Block
 220         for(y=0; y<BLOCK_SIZE-1; y++)
 221         {
 222                 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
 223                 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
 224                 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
 225                 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
 226                 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
 227                 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
 228                 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
 229                 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
 230                 src+= stride;
 231         }
 232         return numEq > c->ppMode.flatnessThreshold;
 233 }
 234
 235 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 236 {
 237         int i;
 238 #if 1
 239         for(i=0; i<2; i++){
 240                 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 241                 src += stride;
 242                 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 243                 src += stride;
 244                 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 245                 src += stride;
 246                 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 247                 src += stride;
 248         }
 249 #else
 250         for(i=0; i<8; i++){
 251                 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
 252                 src += stride;
 253         }
 254 #endif
 255         return 1;
 256 }
 257
 258 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 259 {
 260 #if 1
 261 #if 1
 262         int x;
 263         src+= stride*4;
 264         for(x=0; x<BLOCK_SIZE; x+=4)
 265         {
 266                 if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 267                 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 268                 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 269                 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 270         }
 271 #else
 272         int x;
 273         src+= stride*3;
 274         for(x=0; x<BLOCK_SIZE; x++)
 275         {
 276                 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
 277         }
 278 #endif
 279         return 1;
 280 #else
 281         int x;
 282         src+= stride*4;
 283         for(x=0; x<BLOCK_SIZE; x++)
 284         {
 285                 int min=255;
 286                 int max=0;
 287                 int y;
 288                 for(y=0; y<8; y++){
 289                         int v= src[x + y*stride];
 290                         if(v>max) max=v;
 291                         if(v<min) min=v;
 292                 }
 293                 if(max-min > 2*QP) return 0;
 294         }
 295         return 1;
 296 #endif
 297 }
 298
 299 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
 300         if( isHorizDC_C(src, stride, c) ){
 301                 if( isHorizMinMaxOk_C(src, stride, c->QP) )
 302                         return 1;
 303                 else
 304                         return 0;
 305         }else{
 306                 return 2;
 307         }
 308 }
 309
 310 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
 311         if( isVertDC_C(src, stride, c) ){
 312                 if( isVertMinMaxOk_C(src, stride, c->QP) )
 313                         return 1;
 314                 else
 315                         return 0;
 316         }else{
 317                 return 2;
 318         }
 319 }
 320
 321 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 322 {
 323         int y;
 324         for(y=0; y<BLOCK_SIZE; y++)
 325         {
 326                 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 327
 328                 if(FFABS(middleEnergy) < 8*c->QP)
 329                 {
 330                         const int q=(dst[3] - dst[4])/2;
 331                         const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 332                         const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 333
 334                         int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 335                         d= FFMAX(d, 0);
 336
 337                         d= (5*d + 32) >> 6;
 338                         d*= FFSIGN(-middleEnergy);
 339
 340                         if(q>0)
 341                         {
 342                                 d= d<0 ? 0 : d;
 343                                 d= d>q ? q : d;
 344                         }
 345                         else
 346                         {
 347                                 d= d>0 ? 0 : d;
 348                                 d= d<q ? q : d;
 349                         }
 350
 351                         dst[3]-= d;
 352                         dst[4]+= d;
 353                 }
 354                 dst+= stride;
 355         }
 356 }
 357
 358 /**
 359  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 360  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 361  */
 362 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 363 {
 364         int y;
 365         for(y=0; y<BLOCK_SIZE; y++)
 366         {
 367                 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 368                 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 369
 370                 int sums[10];
 371                 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 372                 sums[1] = sums[0] - first  + dst[3];
 373                 sums[2] = sums[1] - first  + dst[4];
 374                 sums[3] = sums[2] - first  + dst[5];
 375                 sums[4] = sums[3] - first  + dst[6];
 376                 sums[5] = sums[4] - dst[0] + dst[7];
 377                 sums[6] = sums[5] - dst[1] + last;
 378                 sums[7] = sums[6] - dst[2] + last;
 379                 sums[8] = sums[7] - dst[3] + last;
 380                 sums[9] = sums[8] - dst[4] + last;
 381
 382                 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 383                 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 384                 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 385                 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 386                 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 387                 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 388                 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 389                 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 390
 391                 dst+= stride;
 392         }
 393 }
 394
 395 /**
 396  * Experimental Filter 1 (Horizontal)
 397  * will not damage linear gradients
 398  * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 399  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 400  * MMX2 version does correct clipping C version does not
 401  * not identical with the vertical one
 402  */
 403 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 404 {
 405         int y;
 406         static uint64_t *lut= NULL;
 407         if(lut==NULL)
 408         {
 409                 int i;
 410                 lut = av_malloc(256*8);
 411                 for(i=0; i<256; i++)
 412                 {
 413                         int v= i < 128 ? 2*i : 2*(i-256);
 414 /*
 415 //Simulate 112242211 9-Tap filter
 416                         uint64_t a= (v/16) & 0xFF;
 417                         uint64_t b= (v/8) & 0xFF;
 418                         uint64_t c= (v/4) & 0xFF;
 419                         uint64_t d= (3*v/8) & 0xFF;
 420 */
 421 //Simulate piecewise linear interpolation
 422                         uint64_t a= (v/16) & 0xFF;
 423                         uint64_t b= (v*3/16) & 0xFF;
 424                         uint64_t c= (v*5/16) & 0xFF;
 425                         uint64_t d= (7*v/16) & 0xFF;
 426                         uint64_t A= (0x100 - a)&0xFF;
 427                         uint64_t B= (0x100 - b)&0xFF;
 428                         uint64_t C= (0x100 - c)&0xFF;
 429                         uint64_t D= (0x100 - c)&0xFF;
 430
 431                         lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 432                                 (D<<24) | (C<<16) | (B<<8) | (A);
 433                         //lut[i] = (v<<32) | (v<<24);
 434                 }
 435         }
 436
 437         for(y=0; y<BLOCK_SIZE; y++)
 438         {
 439                 int a= src[1] - src[2];
 440                 int b= src[3] - src[4];
 441                 int c= src[5] - src[6];
 442
 443                 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 444
 445                 if(d < QP)
 446                 {
 447                         int v = d * FFSIGN(-b);
 448
 449                         src[1] +=v/8;
 450                         src[2] +=v/4;
 451                         src[3] +=3*v/8;
 452                         src[4] -=3*v/8;
 453                         src[5] -=v/4;
 454                         src[6] -=v/8;
 455
 456                 }
 457                 src+=stride;
 458         }
 459 }
 460
 461 /**
 462  * accurate deblock filter
 463  */
 464 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
 465         int y;
 466         const int QP= c->QP;
 467         const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 468         const int dcThreshold= dcOffset*2 + 1;
 469 //START_TIMER
 470         src+= step*4; // src points to begin of the 8x8 Block
 471         for(y=0; y<8; y++){
 472                 int numEq= 0;
 473
 474                 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
 475                 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
 476                 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
 477                 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
 478                 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
 479                 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
 480                 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
 481                 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
 482                 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
 483                 if(numEq > c->ppMode.flatnessThreshold){
 484                         int min, max, x;
 485
 486                         if(src[0] > src[step]){
 487                             max= src[0];
 488                             min= src[step];
 489                         }else{
 490                             max= src[step];
 491                             min= src[0];
 492                         }
 493                         for(x=2; x<8; x+=2){
 494                                 if(src[x*step] > src[(x+1)*step]){
 495                                         if(src[x    *step] > max) max= src[ x   *step];
 496                                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 497                                 }else{
 498                                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 499                                         if(src[ x   *step] < min) min= src[ x   *step];
 500                                 }
 501                         }
 502                         if(max-min < 2*QP){
 503                                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 504                                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 505
 506                                 int sums[10];
 507                                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 508                                 sums[1] = sums[0] - first       + src[3*step];
 509                                 sums[2] = sums[1] - first       + src[4*step];
 510                                 sums[3] = sums[2] - first       + src[5*step];
 511                                 sums[4] = sums[3] - first       + src[6*step];
 512                                 sums[5] = sums[4] - src[0*step] + src[7*step];
 513                                 sums[6] = sums[5] - src[1*step] + last;
 514                                 sums[7] = sums[6] - src[2*step] + last;
 515                                 sums[8] = sums[7] - src[3*step] + last;
 516                                 sums[9] = sums[8] - src[4*step] + last;
 517
 518                                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 519                                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 520                                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 521                                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 522                                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 523                                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 524                                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 525                                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 526                         }
 527                 }else{
 528                         const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 529
 530                         if(FFABS(middleEnergy) < 8*QP)
 531                         {
 532                                 const int q=(src[3*step] - src[4*step])/2;
 533                                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 534                                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 535
 536                                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 537                                 d= FFMAX(d, 0);
 538
 539                                 d= (5*d + 32) >> 6;
 540                                 d*= FFSIGN(-middleEnergy);
 541
 542                                 if(q>0)
 543                                 {
 544                                         d= d<0 ? 0 : d;
 545                                         d= d>q ? q : d;
 546                                 }
 547                                 else
 548                                 {
 549                                         d= d>0 ? 0 : d;
 550                                         d= d<q ? q : d;
 551                                 }
 552
 553                                 src[3*step]-= d;
 554                                 src[4*step]+= d;
 555                         }
 556                 }
 557
 558                 src += stride;
 559         }
 560 /*if(step==16){
 561     STOP_TIMER("step16")
 562 }else{
 563     STOP_TIMER("stepX")
 564 }*/
 565 }
 566
 567 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 568 //Plain C versions
 569 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
 570 #define COMPILE_C
 571 #endif
 572
 573 #ifdef ARCH_POWERPC
 574 #ifdef HAVE_ALTIVEC
 575 #define COMPILE_ALTIVEC
 576 #endif //HAVE_ALTIVEC
 577 #endif //ARCH_POWERPC
 578
 579 #if defined(ARCH_X86)
 580
 581 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
 582 #define COMPILE_MMX
 583 #endif
 584
 585 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
 586 #define COMPILE_MMX2
 587 #endif
 588
 589 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
 590 #define COMPILE_3DNOW
 591 #endif
 592 #endif /* defined(ARCH_X86) */
 593
 594 #undef HAVE_MMX
 595 #undef HAVE_MMX2
 596 #undef HAVE_3DNOW
 597 #undef HAVE_ALTIVEC
 598
 599 #ifdef COMPILE_C
 600 #undef HAVE_MMX
 601 #undef HAVE_MMX2
 602 #undef HAVE_3DNOW
 603 #define RENAME(a) a ## _C
 604 #include "postprocess_template.c"
 605 #endif
 606
 607 #ifdef ARCH_POWERPC
 608 #ifdef COMPILE_ALTIVEC
 609 #undef RENAME
 610 #define HAVE_ALTIVEC
 611 #define RENAME(a) a ## _altivec
 612 #include "postprocess_altivec_template.c"
 613 #include "postprocess_template.c"
 614 #endif
 615 #endif //ARCH_POWERPC
 616
 617 //MMX versions
 618 #ifdef COMPILE_MMX
 619 #undef RENAME
 620 #define HAVE_MMX
 621 #undef HAVE_MMX2
 622 #undef HAVE_3DNOW
 623 #define RENAME(a) a ## _MMX
 624 #include "postprocess_template.c"
 625 #endif
 626
 627 //MMX2 versions
 628 #ifdef COMPILE_MMX2
 629 #undef RENAME
 630 #define HAVE_MMX
 631 #define HAVE_MMX2
 632 #undef HAVE_3DNOW
 633 #define RENAME(a) a ## _MMX2
 634 #include "postprocess_template.c"
 635 #endif
 636
 637 //3DNOW versions
 638 #ifdef COMPILE_3DNOW
 639 #undef RENAME
 640 #define HAVE_MMX
 641 #undef HAVE_MMX2
 642 #define HAVE_3DNOW
 643 #define RENAME(a) a ## _3DNow
 644 #include "postprocess_template.c"
 645 #endif
 646
 647 // minor note: the HAVE_xyz is messed up after that line so do not use it.
 648
 649 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 650         QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
 651 {
 652         PPContext *c= (PPContext *)vc;
 653         PPMode *ppMode= (PPMode *)vm;
 654         c->ppMode= *ppMode; //FIXME
 655
 656         // Using ifs here as they are faster than function pointers although the
 657         // difference would not be measureable here but it is much better because
 658         // someone might exchange the CPU whithout restarting MPlayer ;)
 659 #ifdef RUNTIME_CPUDETECT
 660 #if defined(ARCH_X86)
 661         // ordered per speed fasterst first
 662         if(c->cpuCaps & PP_CPU_CAPS_MMX2)
 663                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 664         else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
 665                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 666         else if(c->cpuCaps & PP_CPU_CAPS_MMX)
 667                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 668         else
 669                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 670 #else
 671 #ifdef ARCH_POWERPC
 672 #ifdef HAVE_ALTIVEC
 673         if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
 674                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 675         else
 676 #endif
 677 #endif
 678                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 679 #endif
 680 #else //RUNTIME_CPUDETECT
 681 #ifdef HAVE_MMX2
 682                 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 683 #elif defined (HAVE_3DNOW)
 684                 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 685 #elif defined (HAVE_MMX)
 686                 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 687 #elif defined (HAVE_ALTIVEC)
 688                 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 689 #else
 690                 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 691 #endif
 692 #endif //!RUNTIME_CPUDETECT
 693 }
 694
 695 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 696 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 697
 698 /* -pp Command line Help
 699 */
 700 char *pp_help=
 701 "Available postprocessing filters:\n"
 702 "Filters                        Options\n"
 703 "short  long name       short   long option     Description\n"
 704 "*      *               a       autoq           CPU power dependent enabler\n"
 705 "                       c       chrom           chrominance filtering enabled\n"
 706 "                       y       nochrom         chrominance filtering disabled\n"
 707 "                       n       noluma          luma filtering disabled\n"
 708 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 709 "       1. difference factor: default=32, higher -> more deblocking\n"
 710 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 711 "                       the h & v deblocking filters share these\n"
 712 "                       so you can't set different thresholds for h / v\n"
 713 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 714 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 715 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 716 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 717 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 718 "dr     dering                                  deringing filter\n"
 719 "al     autolevels                              automatic brightness / contrast\n"
 720 "                       f        fullyrange     stretch luminance to (0..255)\n"
 721 "lb     linblenddeint                           linear blend deinterlacer\n"
 722 "li     linipoldeint                            linear interpolating deinterlace\n"
 723 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 724 "md     mediandeint                             median deinterlacer\n"
 725 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 726 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 727 "de     default                                 hb:a,vb:a,dr:a\n"
 728 "fa     fast                                    h1:a,v1:a,dr:a\n"
 729 "ac                                             ha:a:128:7,va:a,dr:a\n"
 730 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 731 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 732 "fq     forceQuant      <quantizer>             force quantizer\n"
 733 "Usage:\n"
 734 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 735 "long form example:\n"
 736 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 737 "short form example:\n"
 738 "vb:a/hb:a/lb                                   de,-vb\n"
 739 "more examples:\n"
 740 "tn:64:128:256\n"
 741 "\n"
 742 ;
 743
 744 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
 745 {
 746         char temp[GET_MODE_BUFFER_SIZE];
 747         char *p= temp;
 748         const char *filterDelimiters= ",/";
 749         const char *optionDelimiters= ":";
 750         struct PPMode *ppMode;
 751         char *filterToken;
 752
 753         ppMode= av_malloc(sizeof(PPMode));
 754
 755         ppMode->lumMode= 0;
 756         ppMode->chromMode= 0;
 757         ppMode->maxTmpNoise[0]= 700;
 758         ppMode->maxTmpNoise[1]= 1500;
 759         ppMode->maxTmpNoise[2]= 3000;
 760         ppMode->maxAllowedY= 234;
 761         ppMode->minAllowedY= 16;
 762         ppMode->baseDcDiff= 256/8;
 763         ppMode->flatnessThreshold= 56-16-1;
 764         ppMode->maxClippedThreshold= 0.01;
 765         ppMode->error=0;
 766
 767         strncpy(temp, name, GET_MODE_BUFFER_SIZE);
 768
 769         av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 770
 771         for(;;){
 772                 char *filterName;
 773                 int q= 1000000; //PP_QUALITY_MAX;
 774                 int chrom=-1;
 775                 int luma=-1;
 776                 char *option;
 777                 char *options[OPTIONS_ARRAY_SIZE];
 778                 int i;
 779                 int filterNameOk=0;
 780                 int numOfUnknownOptions=0;
 781                 int enable=1; //does the user want us to enabled or disabled the filter
 782
 783                 filterToken= strtok(p, filterDelimiters);
 784                 if(filterToken == NULL) break;
 785                 p+= strlen(filterToken) + 1; // p points to next filterToken
 786                 filterName= strtok(filterToken, optionDelimiters);
 787                 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 788
 789                 if(*filterName == '-')
 790                 {
 791                         enable=0;
 792                         filterName++;
 793                 }
 794
 795                 for(;;){ //for all options
 796                         option= strtok(NULL, optionDelimiters);
 797                         if(option == NULL) break;
 798
 799                         av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 800                         if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 801                         else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 802                         else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 803                         else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 804                         else
 805                         {
 806                                 options[numOfUnknownOptions] = option;
 807                                 numOfUnknownOptions++;
 808                         }
 809                         if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 810                 }
 811                 options[numOfUnknownOptions] = NULL;
 812
 813                 /* replace stuff from the replace Table */
 814                 for(i=0; replaceTable[2*i]!=NULL; i++)
 815                 {
 816                         if(!strcmp(replaceTable[2*i], filterName))
 817                         {
 818                                 int newlen= strlen(replaceTable[2*i + 1]);
 819                                 int plen;
 820                                 int spaceLeft;
 821
 822                                 if(p==NULL) p= temp, *p=0;      //last filter
 823                                 else p--, *p=',';               //not last filter
 824
 825                                 plen= strlen(p);
 826                                 spaceLeft= p - temp + plen;
 827                                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
 828                                 {
 829                                         ppMode->error++;
 830                                         break;
 831                                 }
 832                                 memmove(p + newlen, p, plen+1);
 833                                 memcpy(p, replaceTable[2*i + 1], newlen);
 834                                 filterNameOk=1;
 835                         }
 836                 }
 837
 838                 for(i=0; filters[i].shortName!=NULL; i++)
 839                 {
 840                         if(   !strcmp(filters[i].longName, filterName)
 841                            || !strcmp(filters[i].shortName, filterName))
 842                         {
 843                                 ppMode->lumMode &= ~filters[i].mask;
 844                                 ppMode->chromMode &= ~filters[i].mask;
 845
 846                                 filterNameOk=1;
 847                                 if(!enable) break; // user wants to disable it
 848
 849                                 if(q >= filters[i].minLumQuality && luma)
 850                                         ppMode->lumMode|= filters[i].mask;
 851                                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 852                                         if(q >= filters[i].minChromQuality)
 853                                                 ppMode->chromMode|= filters[i].mask;
 854
 855                                 if(filters[i].mask == LEVEL_FIX)
 856                                 {
 857                                         int o;
 858                                         ppMode->minAllowedY= 16;
 859                                         ppMode->maxAllowedY= 234;
 860                                         for(o=0; options[o]!=NULL; o++)
 861                                         {
 862                                                 if(  !strcmp(options[o],"fullyrange")
 863                                                    ||!strcmp(options[o],"f"))
 864                                                 {
 865                                                         ppMode->minAllowedY= 0;
 866                                                         ppMode->maxAllowedY= 255;
 867                                                         numOfUnknownOptions--;
 868                                                 }
 869                                         }
 870                                 }
 871                                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 872                                 {
 873                                         int o;
 874                                         int numOfNoises=0;
 875
 876                                         for(o=0; options[o]!=NULL; o++)
 877                                         {
 878                                                 char *tail;
 879                                                 ppMode->maxTmpNoise[numOfNoises]=
 880                                                         strtol(options[o], &tail, 0);
 881                                                 if(tail!=options[o])
 882                                                 {
 883                                                         numOfNoises++;
 884                                                         numOfUnknownOptions--;
 885                                                         if(numOfNoises >= 3) break;
 886                                                 }
 887                                         }
 888                                 }
 889                                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 890                                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
 891                                 {
 892                                         int o;
 893
 894                                         for(o=0; options[o]!=NULL && o<2; o++)
 895                                         {
 896                                                 char *tail;
 897                                                 int val= strtol(options[o], &tail, 0);
 898                                                 if(tail==options[o]) break;
 899
 900                                                 numOfUnknownOptions--;
 901                                                 if(o==0) ppMode->baseDcDiff= val;
 902                                                 else ppMode->flatnessThreshold= val;
 903                                         }
 904                                 }
 905                                 else if(filters[i].mask == FORCE_QUANT)
 906                                 {
 907                                         int o;
 908                                         ppMode->forcedQuant= 15;
 909
 910                                         for(o=0; options[o]!=NULL && o<1; o++)
 911                                         {
 912                                                 char *tail;
 913                                                 int val= strtol(options[o], &tail, 0);
 914                                                 if(tail==options[o]) break;
 915
 916                                                 numOfUnknownOptions--;
 917                                                 ppMode->forcedQuant= val;
 918                                         }
 919                                 }
 920                         }
 921                 }
 922                 if(!filterNameOk) ppMode->error++;
 923                 ppMode->error += numOfUnknownOptions;
 924         }
 925
 926         av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 927         if(ppMode->error)
 928         {
 929                 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 930                 av_free(ppMode);
 931                 return NULL;
 932         }
 933         return ppMode;
 934 }
 935
 936 void pp_free_mode(pp_mode_t *mode){
 937     av_free(mode);
 938 }
 939
 940 static void reallocAlign(void **p, int alignment, int size){
 941         av_free(*p);
 942         *p= av_mallocz(size);
 943 }
 944
 945 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 946         int mbWidth = (width+15)>>4;
 947         int mbHeight= (height+15)>>4;
 948         int i;
 949
 950         c->stride= stride;
 951         c->qpStride= qpStride;
 952
 953         reallocAlign((void **)&c->tempDst, 8, stride*24);
 954         reallocAlign((void **)&c->tempSrc, 8, stride*24);
 955         reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
 956         reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
 957         for(i=0; i<256; i++)
 958                 c->yHistogram[i]= width*height/64*15/256;
 959
 960         for(i=0; i<3; i++)
 961         {
 962                 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
 963                 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
 964                 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 965         }
 966
 967         reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 968         reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 969         reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 970         reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 971 }
 972
 973 static void global_init(void){
 974         int i;
 975         memset(clip_table, 0, 256);
 976         for(i=256; i<512; i++)
 977                 clip_table[i]= i;
 978         memset(clip_table+512, 0, 256);
 979 }
 980
 981 static const char * context_to_name(void * ptr) {
 982     return "postproc";
 983 }
 984
 985 static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 986
 987 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
 988         PPContext *c= av_malloc(sizeof(PPContext));
 989         int stride= (width+15)&(~15);    //assumed / will realloc if needed
 990         int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 991
 992         global_init();
 993
 994         memset(c, 0, sizeof(PPContext));
 995         c->av_class = &av_codec_context_class;
 996         c->cpuCaps= cpuCaps;
 997         if(cpuCaps&PP_FORMAT){
 998                 c->hChromaSubSample= cpuCaps&0x3;
 999                 c->vChromaSubSample= (cpuCaps>>4)&0x3;
1000         }else{
1001                 c->hChromaSubSample= 1;
1002                 c->vChromaSubSample= 1;
1003         }
1004
1005         reallocBuffers(c, width, height, stride, qpStride);
1006
1007         c->frameNum=-1;
1008
1009         return c;
1010 }
1011
1012 void pp_free_context(void *vc){
1013         PPContext *c = (PPContext*)vc;
1014         int i;
1015
1016         for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1017         for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1018
1019         av_free(c->tempBlocks);
1020         av_free(c->yHistogram);
1021         av_free(c->tempDst);
1022         av_free(c->tempSrc);
1023         av_free(c->deintTemp);
1024         av_free(c->stdQPTable);
1025         av_free(c->nonBQPTable);
1026         av_free(c->forcedQPTable);
1027
1028         memset(c, 0, sizeof(PPContext));
1029
1030         av_free(c);
1031 }
1032
1033 void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1034                  uint8_t * dst[3], int dstStride[3],
1035                  int width, int height,
1036                  QP_STORE_T *QP_store,  int QPStride,
1037                  pp_mode_t *vm,  void *vc, int pict_type)
1038 {
1039         int mbWidth = (width+15)>>4;
1040         int mbHeight= (height+15)>>4;
1041         PPMode *mode = (PPMode*)vm;
1042         PPContext *c = (PPContext*)vc;
1043         int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1044         int absQPStride = FFABS(QPStride);
1045
1046         // c->stride and c->QPStride are always positive
1047         if(c->stride < minStride || c->qpStride < absQPStride)
1048                 reallocBuffers(c, width, height,
1049                                 FFMAX(minStride, c->stride),
1050                                 FFMAX(c->qpStride, absQPStride));
1051
1052         if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1053         {
1054                 int i;
1055                 QP_store= c->forcedQPTable;
1056                 absQPStride = QPStride = 0;
1057                 if(mode->lumMode & FORCE_QUANT)
1058                         for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1059                 else
1060                         for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1061         }
1062
1063         if(pict_type & PP_PICT_TYPE_QP2){
1064                 int i;
1065                 const int count= mbHeight * absQPStride;
1066                 for(i=0; i<(count>>2); i++){
1067                         ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1068                 }
1069                 for(i<<=2; i<count; i++){
1070                         c->stdQPTable[i] = QP_store[i]>>1;
1071                 }
1072                 QP_store= c->stdQPTable;
1073                 QPStride= absQPStride;
1074         }
1075
1076 if(0){
1077 int x,y;
1078 for(y=0; y<mbHeight; y++){
1079         for(x=0; x<mbWidth; x++){
1080                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1081         }
1082         av_log(c, AV_LOG_INFO, "\n");
1083 }
1084         av_log(c, AV_LOG_INFO, "\n");
1085 }
1086
1087         if((pict_type&7)!=3)
1088         {
1089                 if (QPStride >= 0) {
1090                         int i;
1091                         const int count= mbHeight * QPStride;
1092                         for(i=0; i<(count>>2); i++){
1093                                 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1094                         }
1095                         for(i<<=2; i<count; i++){
1096                                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1097                         }
1098                 } else {
1099                         int i,j;
1100                         for(i=0; i<mbHeight; i++) {
1101                                     for(j=0; j<absQPStride; j++) {
1102                                         c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1103                                 }
1104                         }
1105                 }
1106         }
1107
1108         av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1109                mode->lumMode, mode->chromMode);
1110
1111         postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1112                 width, height, QP_store, QPStride, 0, mode, c);
1113
1114         width  = (width )>>c->hChromaSubSample;
1115         height = (height)>>c->vChromaSubSample;
1116
1117         if(mode->chromMode)
1118         {
1119                 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1120                         width, height, QP_store, QPStride, 1, mode, c);
1121                 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1122                         width, height, QP_store, QPStride, 2, mode, c);
1123         }
1124         else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1125         {
1126                 linecpy(dst[1], src[1], height, srcStride[1]);
1127                 linecpy(dst[2], src[2], height, srcStride[2]);
1128         }
1129         else
1130         {
1131                 int y;
1132                 for(y=0; y<height; y++)
1133                 {
1134                         memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1135                         memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1136                 }
1137         }
1138 }
1139