libavcodec/h264.c

   1 /*
   2  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
   3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file h264.c
  24  * H.264 / AVC / MPEG4 part10 codec.
  25  * @author Michael Niedermayer <michaelni@gmx.at>
  26  */
  27
  28 #include "dsputil.h"
  29 #include "avcodec.h"
  30 #include "mpegvideo.h"
  31 #include "h264.h"
  32 #include "h264data.h"
  33 #include "h264_parser.h"
  34 #include "golomb.h"
  35 #include "rectangle.h"
  36 #include "vdpau_internal.h"
  37
  38 #include "cabac.h"
  39 #ifdef ARCH_X86
  40 #include "x86/h264_i386.h"
  41 #endif
  42
  43 //#undef NDEBUG
  44 #include <assert.h>
  45
  46 /**
  47  * Value of Picture.reference when Picture is not a reference picture, but
  48  * is held for delayed output.
  49  */
  50 #define DELAYED_PIC_REF 4
  51
  52 static VLC coeff_token_vlc[4];
  53 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
  54 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
  55
  56 static VLC chroma_dc_coeff_token_vlc;
  57 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
  58 static const int chroma_dc_coeff_token_vlc_table_size = 256;
  59
  60 static VLC total_zeros_vlc[15];
  61 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
  62 static const int total_zeros_vlc_tables_size = 512;
  63
  64 static VLC chroma_dc_total_zeros_vlc[3];
  65 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
  66 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
  67
  68 static VLC run_vlc[6];
  69 static VLC_TYPE run_vlc_tables[6][8][2];
  70 static const int run_vlc_tables_size = 8;
  71
  72 static VLC run7_vlc;
  73 static VLC_TYPE run7_vlc_table[96][2];
  74 static const int run7_vlc_table_size = 96;
  75
  76 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
  77 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
  78 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
  79 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
  80 static Picture * remove_long(H264Context *h, int i, int ref_mask);
  81
  82 static av_always_inline uint32_t pack16to32(int a, int b){
  83 #ifdef WORDS_BIGENDIAN
  84    return (b&0xFFFF) + (a<<16);
  85 #else
  86    return (a&0xFFFF) + (b<<16);
  87 #endif
  88 }
  89
  90 static const uint8_t rem6[52]={
  91 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
  92 };
  93
  94 static const uint8_t div6[52]={
  95 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
  96 };
  97
  98 static const int left_block_options[4][8]={
  99     {0,1,2,3,7,10,8,11},
 100     {2,2,3,3,8,11,8,11},
 101     {0,0,1,1,7,10,7,10},
 102     {0,2,0,2,7,10,7,10}
 103 };
 104
 105 #define LEVEL_TAB_BITS 8
 106 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
 107
 108 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
 109     MpegEncContext * const s = &h->s;
 110     const int mb_xy= h->mb_xy;
 111     int topleft_xy, top_xy, topright_xy, left_xy[2];
 112     int topleft_type, top_type, topright_type, left_type[2];
 113     const int * left_block;
 114     int topleft_partition= -1;
 115     int i;
 116
 117     top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
 118
 119     //FIXME deblocking could skip the intra and nnz parts.
 120     if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
 121         return;
 122
 123     /* Wow, what a mess, why didn't they simplify the interlacing & intra
 124      * stuff, I can't imagine that these complex rules are worth it. */
 125
 126     topleft_xy = top_xy - 1;
 127     topright_xy= top_xy + 1;
 128     left_xy[1] = left_xy[0] = mb_xy-1;
 129     left_block = left_block_options[0];
 130     if(FRAME_MBAFF){
 131         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
 132         const int top_pair_xy      = pair_xy     - s->mb_stride;
 133         const int topleft_pair_xy  = top_pair_xy - 1;
 134         const int topright_pair_xy = top_pair_xy + 1;
 135         const int topleft_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
 136         const int top_mb_field_flag      = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
 137         const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
 138         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
 139         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
 140         const int bottom = (s->mb_y & 1);
 141         tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
 142
 143         if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
 144             top_xy -= s->mb_stride;
 145         }
 146         if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
 147             topleft_xy -= s->mb_stride;
 148         } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
 149             topleft_xy += s->mb_stride;
 150             // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
 151             topleft_partition = 0;
 152         }
 153         if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
 154             topright_xy -= s->mb_stride;
 155         }
 156         if (left_mb_field_flag != curr_mb_field_flag) {
 157             left_xy[1] = left_xy[0] = pair_xy - 1;
 158             if (curr_mb_field_flag) {
 159                 left_xy[1] += s->mb_stride;
 160                 left_block = left_block_options[3];
 161             } else {
 162                 left_block= left_block_options[2 - bottom];
 163             }
 164         }
 165     }
 166
 167     h->top_mb_xy = top_xy;
 168     h->left_mb_xy[0] = left_xy[0];
 169     h->left_mb_xy[1] = left_xy[1];
 170     if(for_deblock){
 171         topleft_type = 0;
 172         topright_type = 0;
 173         top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
 174         left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
 175         left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
 176
 177         if(MB_MBAFF && !IS_INTRA(mb_type)){
 178             int list;
 179             for(list=0; list<h->list_count; list++){
 180                 //These values where changed for ease of performing MC, we need to change them back
 181                 //FIXME maybe we can make MC and loop filter use the same values or prevent
 182                 //the MC code from changing ref_cache and rather use a temporary array.
 183                 if(USES_LIST(mb_type,list)){
 184                     int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
 185                     *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
 186                     *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
 187                     ref += h->b8_stride;
 188                     *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
 189                     *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
 190                 }
 191             }
 192         }
 193     }else{
 194         topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
 195         top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
 196         topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
 197         left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
 198         left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
 199
 200     if(IS_INTRA(mb_type)){
 201         int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
 202         h->topleft_samples_available=
 203         h->top_samples_available=
 204         h->left_samples_available= 0xFFFF;
 205         h->topright_samples_available= 0xEEEA;
 206
 207         if(!(top_type & type_mask)){
 208             h->topleft_samples_available= 0xB3FF;
 209             h->top_samples_available= 0x33FF;
 210             h->topright_samples_available= 0x26EA;
 211         }
 212         if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
 213             if(IS_INTERLACED(mb_type)){
 214                 if(!(left_type[0] & type_mask)){
 215                     h->topleft_samples_available&= 0xDFFF;
 216                     h->left_samples_available&= 0x5FFF;
 217                 }
 218                 if(!(left_type[1] & type_mask)){
 219                     h->topleft_samples_available&= 0xFF5F;
 220                     h->left_samples_available&= 0xFF5F;
 221                 }
 222             }else{
 223                 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
 224                                 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
 225                 assert(left_xy[0] == left_xy[1]);
 226                 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
 227                     h->topleft_samples_available&= 0xDF5F;
 228                     h->left_samples_available&= 0x5F5F;
 229                 }
 230             }
 231         }else{
 232             if(!(left_type[0] & type_mask)){
 233                 h->topleft_samples_available&= 0xDF5F;
 234                 h->left_samples_available&= 0x5F5F;
 235             }
 236         }
 237
 238         if(!(topleft_type & type_mask))
 239             h->topleft_samples_available&= 0x7FFF;
 240
 241         if(!(topright_type & type_mask))
 242             h->topright_samples_available&= 0xFBFF;
 243
 244         if(IS_INTRA4x4(mb_type)){
 245             if(IS_INTRA4x4(top_type)){
 246                 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
 247                 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
 248                 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
 249                 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
 250             }else{
 251                 int pred;
 252                 if(!(top_type & type_mask))
 253                     pred= -1;
 254                 else{
 255                     pred= 2;
 256                 }
 257                 h->intra4x4_pred_mode_cache[4+8*0]=
 258                 h->intra4x4_pred_mode_cache[5+8*0]=
 259                 h->intra4x4_pred_mode_cache[6+8*0]=
 260                 h->intra4x4_pred_mode_cache[7+8*0]= pred;
 261             }
 262             for(i=0; i<2; i++){
 263                 if(IS_INTRA4x4(left_type[i])){
 264                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
 265                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
 266                 }else{
 267                     int pred;
 268                     if(!(left_type[i] & type_mask))
 269                         pred= -1;
 270                     else{
 271                         pred= 2;
 272                     }
 273                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
 274                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
 275                 }
 276             }
 277         }
 278     }
 279     }
 280
 281
 282 /*
 283 0 . T T. T T T T
 284 1 L . .L . . . .
 285 2 L . .L . . . .
 286 3 . T TL . . . .
 287 4 L . .L . . . .
 288 5 L . .. . . . .
 289 */
 290 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
 291     if(top_type){
 292         h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
 293         h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
 294         h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
 295         h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
 296
 297         h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
 298         h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
 299
 300         h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
 301         h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
 302
 303     }else{
 304         h->non_zero_count_cache[4+8*0]=
 305         h->non_zero_count_cache[5+8*0]=
 306         h->non_zero_count_cache[6+8*0]=
 307         h->non_zero_count_cache[7+8*0]=
 308
 309         h->non_zero_count_cache[1+8*0]=
 310         h->non_zero_count_cache[2+8*0]=
 311
 312         h->non_zero_count_cache[1+8*3]=
 313         h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
 314
 315     }
 316
 317     for (i=0; i<2; i++) {
 318         if(left_type[i]){
 319             h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
 320             h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
 321             h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
 322             h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
 323         }else{
 324             h->non_zero_count_cache[3+8*1 + 2*8*i]=
 325             h->non_zero_count_cache[3+8*2 + 2*8*i]=
 326             h->non_zero_count_cache[0+8*1 +   8*i]=
 327             h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
 328         }
 329     }
 330
 331     if( h->pps.cabac ) {
 332         // top_cbp
 333         if(top_type) {
 334             h->top_cbp = h->cbp_table[top_xy];
 335         } else if(IS_INTRA(mb_type)) {
 336             h->top_cbp = 0x1C0;
 337         } else {
 338             h->top_cbp = 0;
 339         }
 340         // left_cbp
 341         if (left_type[0]) {
 342             h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
 343         } else if(IS_INTRA(mb_type)) {
 344             h->left_cbp = 0x1C0;
 345         } else {
 346             h->left_cbp = 0;
 347         }
 348         if (left_type[0]) {
 349             h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
 350         }
 351         if (left_type[1]) {
 352             h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
 353         }
 354     }
 355
 356 #if 1
 357     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
 358         int list;
 359         for(list=0; list<h->list_count; list++){
 360             if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
 361                 /*if(!h->mv_cache_clean[list]){
 362                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
 363                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
 364                     h->mv_cache_clean[list]= 1;
 365                 }*/
 366                 continue;
 367             }
 368             h->mv_cache_clean[list]= 0;
 369
 370             if(USES_LIST(top_type, list)){
 371                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
 372                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
 373                 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
 374                 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
 375                 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
 376                 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
 377                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
 378                 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
 379                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
 380                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
 381             }else{
 382                 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
 383                 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
 384                 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
 385                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
 386                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
 387             }
 388
 389             for(i=0; i<2; i++){
 390                 int cache_idx = scan8[0] - 1 + i*2*8;
 391                 if(USES_LIST(left_type[i], list)){
 392                     const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
 393                     const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
 394                     *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
 395                     *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
 396                     h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
 397                     h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
 398                 }else{
 399                     *(uint32_t*)h->mv_cache [list][cache_idx  ]=
 400                     *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
 401                     h->ref_cache[list][cache_idx  ]=
 402                     h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
 403                 }
 404             }
 405
 406             if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
 407                 continue;
 408
 409             if(USES_LIST(topleft_type, list)){
 410                 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
 411                 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
 412                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
 413                 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
 414             }else{
 415                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
 416                 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
 417             }
 418
 419             if(USES_LIST(topright_type, list)){
 420                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
 421                 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
 422                 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
 423                 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
 424             }else{
 425                 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
 426                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
 427             }
 428
 429             if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
 430                 continue;
 431
 432             h->ref_cache[list][scan8[5 ]+1] =
 433             h->ref_cache[list][scan8[7 ]+1] =
 434             h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
 435             h->ref_cache[list][scan8[4 ]] =
 436             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
 437             *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
 438             *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
 439             *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
 440             *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
 441             *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
 442
 443             if( h->pps.cabac ) {
 444                 /* XXX beurk, Load mvd */
 445                 if(USES_LIST(top_type, list)){
 446                     const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
 447                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
 448                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
 449                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
 450                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
 451                 }else{
 452                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
 453                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
 454                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
 455                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
 456                 }
 457                 if(USES_LIST(left_type[0], list)){
 458                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
 459                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
 460                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
 461                 }else{
 462                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
 463                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
 464                 }
 465                 if(USES_LIST(left_type[1], list)){
 466                     const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
 467                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
 468                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
 469                 }else{
 470                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
 471                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
 472                 }
 473                 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
 474                 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
 475                 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
 476                 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
 477                 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
 478
 479                 if(h->slice_type_nos == FF_B_TYPE){
 480                     fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
 481
 482                     if(IS_DIRECT(top_type)){
 483                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
 484                     }else if(IS_8X8(top_type)){
 485                         int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
 486                         h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
 487                         h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
 488                     }else{
 489                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
 490                     }
 491
 492                     if(IS_DIRECT(left_type[0]))
 493                         h->direct_cache[scan8[0] - 1 + 0*8]= 1;
 494                     else if(IS_8X8(left_type[0]))
 495                         h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
 496                     else
 497                         h->direct_cache[scan8[0] - 1 + 0*8]= 0;
 498
 499                     if(IS_DIRECT(left_type[1]))
 500                         h->direct_cache[scan8[0] - 1 + 2*8]= 1;
 501                     else if(IS_8X8(left_type[1]))
 502                         h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
 503                     else
 504                         h->direct_cache[scan8[0] - 1 + 2*8]= 0;
 505                 }
 506             }
 507
 508             if(FRAME_MBAFF){
 509 #define MAP_MVS\
 510                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
 511                     MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
 512                     MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
 513                     MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
 514                     MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
 515                     MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
 516                     MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
 517                     MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
 518                     MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
 519                     MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
 520                 if(MB_FIELD){
 521 #define MAP_F2F(idx, mb_type)\
 522                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
 523                         h->ref_cache[list][idx] <<= 1;\
 524                         h->mv_cache[list][idx][1] /= 2;\
 525                         h->mvd_cache[list][idx][1] /= 2;\
 526                     }
 527                     MAP_MVS
 528 #undef MAP_F2F
 529                 }else{
 530 #define MAP_F2F(idx, mb_type)\
 531                     if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
 532                         h->ref_cache[list][idx] >>= 1;\
 533                         h->mv_cache[list][idx][1] <<= 1;\
 534                         h->mvd_cache[list][idx][1] <<= 1;\
 535                     }
 536                     MAP_MVS
 537 #undef MAP_F2F
 538                 }
 539             }
 540         }
 541     }
 542 #endif
 543
 544     h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
 545 }
 546
 547 static inline void write_back_intra_pred_mode(H264Context *h){
 548     const int mb_xy= h->mb_xy;
 549
 550     h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
 551     h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
 552     h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
 553     h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
 554     h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
 555     h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
 556     h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
 557 }
 558
 559 /**
 560  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
 561  */
 562 static inline int check_intra4x4_pred_mode(H264Context *h){
 563     MpegEncContext * const s = &h->s;
 564     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
 565     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
 566     int i;
 567
 568     if(!(h->top_samples_available&0x8000)){
 569         for(i=0; i<4; i++){
 570             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
 571             if(status<0){
 572                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
 573                 return -1;
 574             } else if(status){
 575                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
 576             }
 577         }
 578     }
 579
 580     if((h->left_samples_available&0x8888)!=0x8888){
 581         static const int mask[4]={0x8000,0x2000,0x80,0x20};
 582         for(i=0; i<4; i++){
 583             if(!(h->left_samples_available&mask[i])){
 584                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
 585                 if(status<0){
 586                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
 587                     return -1;
 588                 } else if(status){
 589                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
 590                 }
 591             }
 592         }
 593     }
 594
 595     return 0;
 596 } //FIXME cleanup like next
 597
 598 /**
 599  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
 600  */
 601 static inline int check_intra_pred_mode(H264Context *h, int mode){
 602     MpegEncContext * const s = &h->s;
 603     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
 604     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
 605
 606     if(mode > 6U) {
 607         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
 608         return -1;
 609     }
 610
 611     if(!(h->top_samples_available&0x8000)){
 612         mode= top[ mode ];
 613         if(mode<0){
 614             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
 615             return -1;
 616         }
 617     }
 618
 619     if((h->left_samples_available&0x8080) != 0x8080){
 620         mode= left[ mode ];
 621         if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
 622             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
 623         }
 624         if(mode<0){
 625             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
 626             return -1;
 627         }
 628     }
 629
 630     return mode;
 631 }
 632
 633 /**
 634  * gets the predicted intra4x4 prediction mode.
 635  */
 636 static inline int pred_intra_mode(H264Context *h, int n){
 637     const int index8= scan8[n];
 638     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
 639     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
 640     const int min= FFMIN(left, top);
 641
 642     tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
 643
 644     if(min<0) return DC_PRED;
 645     else      return min;
 646 }
 647
 648 static inline void write_back_non_zero_count(H264Context *h){
 649     const int mb_xy= h->mb_xy;
 650
 651     h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
 652     h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
 653     h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
 654     h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
 655     h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
 656     h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
 657     h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
 658
 659     h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
 660     h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
 661     h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
 662
 663     h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
 664     h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
 665     h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
 666 }
 667
 668 /**
 669  * gets the predicted number of non-zero coefficients.
 670  * @param n block index
 671  */
 672 static inline int pred_non_zero_count(H264Context *h, int n){
 673     const int index8= scan8[n];
 674     const int left= h->non_zero_count_cache[index8 - 1];
 675     const int top = h->non_zero_count_cache[index8 - 8];
 676     int i= left + top;
 677
 678     if(i<64) i= (i+1)>>1;
 679
 680     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
 681
 682     return i&31;
 683 }
 684
 685 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
 686     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
 687     MpegEncContext *s = &h->s;
 688
 689     /* there is no consistent mapping of mvs to neighboring locations that will
 690      * make mbaff happy, so we can't move all this logic to fill_caches */
 691     if(FRAME_MBAFF){
 692         const uint32_t *mb_types = s->current_picture_ptr->mb_type;
 693         const int16_t *mv;
 694         *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
 695         *C = h->mv_cache[list][scan8[0]-2];
 696
 697         if(!MB_FIELD
 698            && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
 699             int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
 700             if(IS_INTERLACED(mb_types[topright_xy])){
 701 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
 702                 const int x4 = X4, y4 = Y4;\
 703                 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
 704                 if(!USES_LIST(mb_type,list))\
 705                     return LIST_NOT_USED;\
 706                 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
 707                 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
 708                 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
 709                 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
 710
 711                 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
 712             }
 713         }
 714         if(topright_ref == PART_NOT_AVAILABLE
 715            && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
 716            && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
 717             if(!MB_FIELD
 718                && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
 719                 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
 720             }
 721             if(MB_FIELD
 722                && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
 723                && i >= scan8[0]+8){
 724                 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
 725                 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
 726             }
 727         }
 728 #undef SET_DIAG_MV
 729     }
 730
 731     if(topright_ref != PART_NOT_AVAILABLE){
 732         *C= h->mv_cache[list][ i - 8 + part_width ];
 733         return topright_ref;
 734     }else{
 735         tprintf(s->avctx, "topright MV not available\n");
 736
 737         *C= h->mv_cache[list][ i - 8 - 1 ];
 738         return h->ref_cache[list][ i - 8 - 1 ];
 739     }
 740 }
 741
 742 /**
 743  * gets the predicted MV.
 744  * @param n the block index
 745  * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
 746  * @param mx the x component of the predicted motion vector
 747  * @param my the y component of the predicted motion vector
 748  */
 749 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
 750     const int index8= scan8[n];
 751     const int top_ref=      h->ref_cache[list][ index8 - 8 ];
 752     const int left_ref=     h->ref_cache[list][ index8 - 1 ];
 753     const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
 754     const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
 755     const int16_t * C;
 756     int diagonal_ref, match_count;
 757
 758     assert(part_width==1 || part_width==2 || part_width==4);
 759
 760 /* mv_cache
 761   B . . A T T T T
 762   U . . L . . , .
 763   U . . L . . . .
 764   U . . L . . , .
 765   . . . L . . . .
 766 */
 767
 768     diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
 769     match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
 770     tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
 771     if(match_count > 1){ //most common
 772         *mx= mid_pred(A[0], B[0], C[0]);
 773         *my= mid_pred(A[1], B[1], C[1]);
 774     }else if(match_count==1){
 775         if(left_ref==ref){
 776             *mx= A[0];
 777             *my= A[1];
 778         }else if(top_ref==ref){
 779             *mx= B[0];
 780             *my= B[1];
 781         }else{
 782             *mx= C[0];
 783             *my= C[1];
 784         }
 785     }else{
 786         if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
 787             *mx= A[0];
 788             *my= A[1];
 789         }else{
 790             *mx= mid_pred(A[0], B[0], C[0]);
 791             *my= mid_pred(A[1], B[1], C[1]);
 792         }
 793     }
 794
 795     tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
 796 }
 797
 798 /**
 799  * gets the directionally predicted 16x8 MV.
 800  * @param n the block index
 801  * @param mx the x component of the predicted motion vector
 802  * @param my the y component of the predicted motion vector
 803  */
 804 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
 805     if(n==0){
 806         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
 807         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
 808
 809         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
 810
 811         if(top_ref == ref){
 812             *mx= B[0];
 813             *my= B[1];
 814             return;
 815         }
 816     }else{
 817         const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
 818         const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
 819
 820         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 821
 822         if(left_ref == ref){
 823             *mx= A[0];
 824             *my= A[1];
 825             return;
 826         }
 827     }
 828
 829     //RARE
 830     pred_motion(h, n, 4, list, ref, mx, my);
 831 }
 832
 833 /**
 834  * gets the directionally predicted 8x16 MV.
 835  * @param n the block index
 836  * @param mx the x component of the predicted motion vector
 837  * @param my the y component of the predicted motion vector
 838  */
 839 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
 840     if(n==0){
 841         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
 842         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
 843
 844         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 845
 846         if(left_ref == ref){
 847             *mx= A[0];
 848             *my= A[1];
 849             return;
 850         }
 851     }else{
 852         const int16_t * C;
 853         int diagonal_ref;
 854
 855         diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
 856
 857         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
 858
 859         if(diagonal_ref == ref){
 860             *mx= C[0];
 861             *my= C[1];
 862             return;
 863         }
 864     }
 865
 866     //RARE
 867     pred_motion(h, n, 2, list, ref, mx, my);
 868 }
 869
 870 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
 871     const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
 872     const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
 873
 874     tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
 875
 876     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
 877        || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
 878        || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
 879
 880         *mx = *my = 0;
 881         return;
 882     }
 883
 884     pred_motion(h, 0, 4, 0, 0, mx, my);
 885
 886     return;
 887 }
 888
 889 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
 890     int poc0 = h->ref_list[0][i].poc;
 891     int td = av_clip(poc1 - poc0, -128, 127);
 892     if(td == 0 || h->ref_list[0][i].long_ref){
 893         return 256;
 894     }else{
 895         int tb = av_clip(poc - poc0, -128, 127);
 896         int tx = (16384 + (FFABS(td) >> 1)) / td;
 897         return av_clip((tb*tx + 32) >> 6, -1024, 1023);
 898     }
 899 }
 900
 901 static inline void direct_dist_scale_factor(H264Context * const h){
 902     MpegEncContext * const s = &h->s;
 903     const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
 904     const int poc1 = h->ref_list[1][0].poc;
 905     int i, field;
 906     for(field=0; field<2; field++){
 907         const int poc  = h->s.current_picture_ptr->field_poc[field];
 908         const int poc1 = h->ref_list[1][0].field_poc[field];
 909         for(i=0; i < 2*h->ref_count[0]; i++)
 910             h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
 911     }
 912
 913     for(i=0; i<h->ref_count[0]; i++){
 914         h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
 915     }
 916 }
 917
 918 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
 919     MpegEncContext * const s = &h->s;
 920     Picture * const ref1 = &h->ref_list[1][0];
 921     int j, old_ref, rfield;
 922     int start= mbafi ? 16                      : 0;
 923     int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
 924     int interl= mbafi || s->picture_structure != PICT_FRAME;
 925
 926     /* bogus; fills in for missing frames */
 927     memset(map[list], 0, sizeof(map[list]));
 928
 929     for(rfield=0; rfield<2; rfield++){
 930         for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
 931             int poc = ref1->ref_poc[colfield][list][old_ref];
 932
 933             if     (!interl)
 934                 poc |= 3;
 935             else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
 936                 poc= (poc&~3) + rfield + 1;
 937
 938             for(j=start; j<end; j++){
 939                 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
 940                     int cur_ref= mbafi ? (j-16)^field : j;
 941                     map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
 942                     if(rfield == field)
 943                         map[list][old_ref] = cur_ref;
 944                     break;
 945                 }
 946             }
 947         }
 948     }
 949 }
 950
 951 static inline void direct_ref_list_init(H264Context * const h){
 952     MpegEncContext * const s = &h->s;
 953     Picture * const ref1 = &h->ref_list[1][0];
 954     Picture * const cur = s->current_picture_ptr;
 955     int list, j, field;
 956     int sidx= (s->picture_structure&1)^1;
 957     int ref1sidx= (ref1->reference&1)^1;
 958
 959     for(list=0; list<2; list++){
 960         cur->ref_count[sidx][list] = h->ref_count[list];
 961         for(j=0; j<h->ref_count[list]; j++)
 962             cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
 963     }
 964
 965     if(s->picture_structure == PICT_FRAME){
 966         memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
 967         memcpy(cur->ref_poc  [1], cur->ref_poc  [0], sizeof(cur->ref_poc  [0]));
 968     }
 969
 970     cur->mbaff= FRAME_MBAFF;
 971
 972     if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
 973         return;
 974
 975     for(list=0; list<2; list++){
 976         fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
 977         for(field=0; field<2; field++)
 978             fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
 979     }
 980 }
 981
 982 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
 983     MpegEncContext * const s = &h->s;
 984     int b8_stride = h->b8_stride;
 985     int b4_stride = h->b_stride;
 986     int mb_xy = h->mb_xy;
 987     int mb_type_col[2];
 988     const int16_t (*l1mv0)[2], (*l1mv1)[2];
 989     const int8_t *l1ref0, *l1ref1;
 990     const int is_b8x8 = IS_8X8(*mb_type);
 991     unsigned int sub_mb_type;
 992     int i8, i4;
 993
 994 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
 995
 996     if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
 997         if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
 998             int cur_poc = s->current_picture_ptr->poc;
 999             int *col_poc = h->ref_list[1]->field_poc;
1000             int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1001             mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1002             b8_stride = 0;
1003         }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1004             int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1005             mb_xy += s->mb_stride*fieldoff;
1006         }
1007         goto single_col;
1008     }else{                                               // AFL/AFR/FR/FL -> AFR/FR
1009         if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
1010             mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1011             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1012             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1013             b8_stride *= 3;
1014             b4_stride *= 6;
1015             //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1016             if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1017                 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1018                 && !is_b8x8){
1019                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1020                 *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1021             }else{
1022                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1023                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1024             }
1025         }else{                                           //     AFR/FR    -> AFR/FR
1026 single_col:
1027             mb_type_col[0] =
1028             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1029             if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1030                 /* FIXME save sub mb types from previous frames (or derive from MVs)
1031                 * so we know exactly what block size to use */
1032                 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1033                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1034             }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1035                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1036                 *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1037             }else{
1038                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1039                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1040             }
1041         }
1042     }
1043
1044     l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1045     l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1046     l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1047     l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1048     if(!b8_stride){
1049         if(s->mb_y&1){
1050             l1ref0 += h->b8_stride;
1051             l1ref1 += h->b8_stride;
1052             l1mv0  +=  2*b4_stride;
1053             l1mv1  +=  2*b4_stride;
1054         }
1055     }
1056
1057     if(h->direct_spatial_mv_pred){
1058         int ref[2];
1059         int mv[2][2];
1060         int list;
1061
1062         /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1063
1064         /* ref = min(neighbors) */
1065         for(list=0; list<2; list++){
1066             int refa = h->ref_cache[list][scan8[0] - 1];
1067             int refb = h->ref_cache[list][scan8[0] - 8];
1068             int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1069             if(refc == PART_NOT_AVAILABLE)
1070                 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1071             ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1072             if(ref[list] < 0)
1073                 ref[list] = -1;
1074         }
1075
1076         if(ref[0] < 0 && ref[1] < 0){
1077             ref[0] = ref[1] = 0;
1078             mv[0][0] = mv[0][1] =
1079             mv[1][0] = mv[1][1] = 0;
1080         }else{
1081             for(list=0; list<2; list++){
1082                 if(ref[list] >= 0)
1083                     pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1084                 else
1085                     mv[list][0] = mv[list][1] = 0;
1086             }
1087         }
1088
1089         if(ref[1] < 0){
1090             if(!is_b8x8)
1091                 *mb_type &= ~MB_TYPE_L1;
1092             sub_mb_type &= ~MB_TYPE_L1;
1093         }else if(ref[0] < 0){
1094             if(!is_b8x8)
1095                 *mb_type &= ~MB_TYPE_L0;
1096             sub_mb_type &= ~MB_TYPE_L0;
1097         }
1098
1099         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1100             for(i8=0; i8<4; i8++){
1101                 int x8 = i8&1;
1102                 int y8 = i8>>1;
1103                 int xy8 = x8+y8*b8_stride;
1104                 int xy4 = 3*x8+y8*b4_stride;
1105                 int a=0, b=0;
1106
1107                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1108                     continue;
1109                 h->sub_mb_type[i8] = sub_mb_type;
1110
1111                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1112                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1113                 if(!IS_INTRA(mb_type_col[y8])
1114                    && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1115                        || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1116                     if(ref[0] > 0)
1117                         a= pack16to32(mv[0][0],mv[0][1]);
1118                     if(ref[1] > 0)
1119                         b= pack16to32(mv[1][0],mv[1][1]);
1120                 }else{
1121                     a= pack16to32(mv[0][0],mv[0][1]);
1122                     b= pack16to32(mv[1][0],mv[1][1]);
1123                 }
1124                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1125                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1126             }
1127         }else if(IS_16X16(*mb_type)){
1128             int a=0, b=0;
1129
1130             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1131             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1132             if(!IS_INTRA(mb_type_col[0])
1133                && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1134                    || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1135                        && (h->x264_build>33 || !h->x264_build)))){
1136                 if(ref[0] > 0)
1137                     a= pack16to32(mv[0][0],mv[0][1]);
1138                 if(ref[1] > 0)
1139                     b= pack16to32(mv[1][0],mv[1][1]);
1140             }else{
1141                 a= pack16to32(mv[0][0],mv[0][1]);
1142                 b= pack16to32(mv[1][0],mv[1][1]);
1143             }
1144             fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1145             fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1146         }else{
1147             for(i8=0; i8<4; i8++){
1148                 const int x8 = i8&1;
1149                 const int y8 = i8>>1;
1150
1151                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1152                     continue;
1153                 h->sub_mb_type[i8] = sub_mb_type;
1154
1155                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1156                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1157                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1158                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1159
1160                 /* col_zero_flag */
1161                 if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
1162                                               || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1163                                                   && (h->x264_build>33 || !h->x264_build)))){
1164                     const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1165                     if(IS_SUB_8X8(sub_mb_type)){
1166                         const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1167                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1168                             if(ref[0] == 0)
1169                                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1170                             if(ref[1] == 0)
1171                                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1172                         }
1173                     }else
1174                     for(i4=0; i4<4; i4++){
1175                         const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1176                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177                             if(ref[0] == 0)
1178                                 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1179                             if(ref[1] == 0)
1180                                 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1181                         }
1182                     }
1183                 }
1184             }
1185         }
1186     }else{ /* direct temporal mv pred */
1187         const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1188         const int *dist_scale_factor = h->dist_scale_factor;
1189         int ref_offset= 0;
1190
1191         if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1192             map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1193             map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1194             dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
1195         }
1196         if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1197             ref_offset += 16;
1198
1199         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1200             /* FIXME assumes direct_8x8_inference == 1 */
1201             int y_shift  = 2*!IS_INTERLACED(*mb_type);
1202
1203             for(i8=0; i8<4; i8++){
1204                 const int x8 = i8&1;
1205                 const int y8 = i8>>1;
1206                 int ref0, scale;
1207                 const int16_t (*l1mv)[2]= l1mv0;
1208
1209                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1210                     continue;
1211                 h->sub_mb_type[i8] = sub_mb_type;
1212
1213                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1214                 if(IS_INTRA(mb_type_col[y8])){
1215                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1216                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1217                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1218                     continue;
1219                 }
1220
1221                 ref0 = l1ref0[x8 + y8*b8_stride];
1222                 if(ref0 >= 0)
1223                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
1224                 else{
1225                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1226                     l1mv= l1mv1;
1227                 }
1228                 scale = dist_scale_factor[ref0];
1229                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1230
1231                 {
1232                     const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1233                     int my_col = (mv_col[1]<<y_shift)/2;
1234                     int mx = (scale * mv_col[0] + 128) >> 8;
1235                     int my = (scale * my_col + 128) >> 8;
1236                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1237                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1238                 }
1239             }
1240             return;
1241         }
1242
1243         /* one-to-one mv scaling */
1244
1245         if(IS_16X16(*mb_type)){
1246             int ref, mv0, mv1;
1247
1248             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1249             if(IS_INTRA(mb_type_col[0])){
1250                 ref=mv0=mv1=0;
1251             }else{
1252                 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1253                                                 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1254                 const int scale = dist_scale_factor[ref0];
1255                 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1256                 int mv_l0[2];
1257                 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1258                 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1259                 ref= ref0;
1260                 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1261                 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1262             }
1263             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1264             fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1265             fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1266         }else{
1267             for(i8=0; i8<4; i8++){
1268                 const int x8 = i8&1;
1269                 const int y8 = i8>>1;
1270                 int ref0, scale;
1271                 const int16_t (*l1mv)[2]= l1mv0;
1272
1273                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1274                     continue;
1275                 h->sub_mb_type[i8] = sub_mb_type;
1276                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1277                 if(IS_INTRA(mb_type_col[0])){
1278                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1279                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1280                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1281                     continue;
1282                 }
1283
1284                 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1285                 if(ref0 >= 0)
1286                     ref0 = map_col_to_list0[0][ref0];
1287                 else{
1288                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1289                     l1mv= l1mv1;
1290                 }
1291                 scale = dist_scale_factor[ref0];
1292
1293                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1294                 if(IS_SUB_8X8(sub_mb_type)){
1295                     const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1296                     int mx = (scale * mv_col[0] + 128) >> 8;
1297                     int my = (scale * mv_col[1] + 128) >> 8;
1298                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1299                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1300                 }else
1301                 for(i4=0; i4<4; i4++){
1302                     const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1303                     int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1304                     mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1305                     mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1306                     *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1307                         pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1308                 }
1309             }
1310         }
1311     }
1312 }
1313
1314 static inline void write_back_motion(H264Context *h, int mb_type){
1315     MpegEncContext * const s = &h->s;
1316     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1317     const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1318     int list;
1319
1320     if(!USES_LIST(mb_type, 0))
1321         fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1322
1323     for(list=0; list<h->list_count; list++){
1324         int y;
1325         if(!USES_LIST(mb_type, list))
1326             continue;
1327
1328         for(y=0; y<4; y++){
1329             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1330             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1331         }
1332         if( h->pps.cabac ) {
1333             if(IS_SKIP(mb_type))
1334                 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1335             else
1336             for(y=0; y<4; y++){
1337                 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1338                 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1339             }
1340         }
1341
1342         {
1343             int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1344             ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1345             ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1346             ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1347             ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1348         }
1349     }
1350
1351     if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1352         if(IS_8X8(mb_type)){
1353             uint8_t *direct_table = &h->direct_table[b8_xy];
1354             direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1355             direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1356             direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1357         }
1358     }
1359 }
1360
1361 /**
1362  * Decodes a network abstraction layer unit.
1363  * @param consumed is the number of bytes used as input
1364  * @param length is the length of the array
1365  * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1366  * @returns decoded bytes, might be src+1 if no escapes
1367  */
1368 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1369     int i, si, di;
1370     uint8_t *dst;
1371     int bufidx;
1372
1373 //    src[0]&0x80;                //forbidden bit
1374     h->nal_ref_idc= src[0]>>5;
1375     h->nal_unit_type= src[0]&0x1F;
1376
1377     src++; length--;
1378 #if 0
1379     for(i=0; i<length; i++)
1380         printf("%2X ", src[i]);
1381 #endif
1382
1383 #ifdef HAVE_FAST_UNALIGNED
1384 # ifdef HAVE_FAST_64BIT
1385 #   define RS 7
1386     for(i=0; i+1<length; i+=9){
1387         if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1388 # else
1389 #   define RS 3
1390     for(i=0; i+1<length; i+=5){
1391         if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1392 # endif
1393             continue;
1394         if(i>0 && !src[i]) i--;
1395         while(src[i]) i++;
1396 #else
1397 #   define RS 0
1398     for(i=0; i+1<length; i+=2){
1399         if(src[i]) continue;
1400         if(i>0 && src[i-1]==0) i--;
1401 #endif
1402         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1403             if(src[i+2]!=3){
1404                 /* startcode, so we must be past the end */
1405                 length=i;
1406             }
1407             break;
1408         }
1409         i-= RS;
1410     }
1411
1412     if(i>=length-1){ //no escaped 0
1413         *dst_length= length;
1414         *consumed= length+1; //+1 for the header
1415         return src;
1416     }
1417
1418     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1419     h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1420     dst= h->rbsp_buffer[bufidx];
1421
1422     if (dst == NULL){
1423         return NULL;
1424     }
1425
1426 //printf("decoding esc\n");
1427     memcpy(dst, src, i);
1428     si=di=i;
1429     while(si+2<length){
1430         //remove escapes (very rare 1:2^22)
1431         if(src[si+2]>3){
1432             dst[di++]= src[si++];
1433             dst[di++]= src[si++];
1434         }else if(src[si]==0 && src[si+1]==0){
1435             if(src[si+2]==3){ //escape
1436                 dst[di++]= 0;
1437                 dst[di++]= 0;
1438                 si+=3;
1439                 continue;
1440             }else //next start code
1441                 goto nsc;
1442         }
1443
1444         dst[di++]= src[si++];
1445     }
1446     while(si<length)
1447         dst[di++]= src[si++];
1448 nsc:
1449
1450     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1451
1452     *dst_length= di;
1453     *consumed= si + 1;//+1 for the header
1454 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455     return dst;
1456 }
1457
1458 /**
1459  * identifies the exact end of the bitstream
1460  * @return the length of the trailing, or 0 if damaged
1461  */
1462 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1463     int v= *src;
1464     int r;
1465
1466     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1467
1468     for(r=1; r<9; r++){
1469         if(v&1) return r;
1470         v>>=1;
1471     }
1472     return 0;
1473 }
1474
1475 /**
1476  * IDCT transforms the 16 dc values and dequantizes them.
1477  * @param qp quantization parameter
1478  */
1479 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1480 #define stride 16
1481     int i;
1482     int temp[16]; //FIXME check if this is a good idea
1483     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1484     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1485
1486 //memset(block, 64, 2*256);
1487 //return;
1488     for(i=0; i<4; i++){
1489         const int offset= y_offset[i];
1490         const int z0= block[offset+stride*0] + block[offset+stride*4];
1491         const int z1= block[offset+stride*0] - block[offset+stride*4];
1492         const int z2= block[offset+stride*1] - block[offset+stride*5];
1493         const int z3= block[offset+stride*1] + block[offset+stride*5];
1494
1495         temp[4*i+0]= z0+z3;
1496         temp[4*i+1]= z1+z2;
1497         temp[4*i+2]= z1-z2;
1498         temp[4*i+3]= z0-z3;
1499     }
1500
1501     for(i=0; i<4; i++){
1502         const int offset= x_offset[i];
1503         const int z0= temp[4*0+i] + temp[4*2+i];
1504         const int z1= temp[4*0+i] - temp[4*2+i];
1505         const int z2= temp[4*1+i] - temp[4*3+i];
1506         const int z3= temp[4*1+i] + temp[4*3+i];
1507
1508         block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1509         block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1510         block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1511         block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1512     }
1513 }
1514
1515 #if 0
1516 /**
1517  * DCT transforms the 16 dc values.
1518  * @param qp quantization parameter ??? FIXME
1519  */
1520 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1521 //    const int qmul= dequant_coeff[qp][0];
1522     int i;
1523     int temp[16]; //FIXME check if this is a good idea
1524     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1525     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1526
1527     for(i=0; i<4; i++){
1528         const int offset= y_offset[i];
1529         const int z0= block[offset+stride*0] + block[offset+stride*4];
1530         const int z1= block[offset+stride*0] - block[offset+stride*4];
1531         const int z2= block[offset+stride*1] - block[offset+stride*5];
1532         const int z3= block[offset+stride*1] + block[offset+stride*5];
1533
1534         temp[4*i+0]= z0+z3;
1535         temp[4*i+1]= z1+z2;
1536         temp[4*i+2]= z1-z2;
1537         temp[4*i+3]= z0-z3;
1538     }
1539
1540     for(i=0; i<4; i++){
1541         const int offset= x_offset[i];
1542         const int z0= temp[4*0+i] + temp[4*2+i];
1543         const int z1= temp[4*0+i] - temp[4*2+i];
1544         const int z2= temp[4*1+i] - temp[4*3+i];
1545         const int z3= temp[4*1+i] + temp[4*3+i];
1546
1547         block[stride*0 +offset]= (z0 + z3)>>1;
1548         block[stride*2 +offset]= (z1 + z2)>>1;
1549         block[stride*8 +offset]= (z1 - z2)>>1;
1550         block[stride*10+offset]= (z0 - z3)>>1;
1551     }
1552 }
1553 #endif
1554
1555 #undef xStride
1556 #undef stride
1557
1558 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1559     const int stride= 16*2;
1560     const int xStride= 16;
1561     int a,b,c,d,e;
1562
1563     a= block[stride*0 + xStride*0];
1564     b= block[stride*0 + xStride*1];
1565     c= block[stride*1 + xStride*0];
1566     d= block[stride*1 + xStride*1];
1567
1568     e= a-b;
1569     a= a+b;
1570     b= c-d;
1571     c= c+d;
1572
1573     block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1574     block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1575     block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1576     block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1577 }
1578
1579 #if 0
1580 static void chroma_dc_dct_c(DCTELEM *block){
1581     const int stride= 16*2;
1582     const int xStride= 16;
1583     int a,b,c,d,e;
1584
1585     a= block[stride*0 + xStride*0];
1586     b= block[stride*0 + xStride*1];
1587     c= block[stride*1 + xStride*0];
1588     d= block[stride*1 + xStride*1];
1589
1590     e= a-b;
1591     a= a+b;
1592     b= c-d;
1593     c= c+d;
1594
1595     block[stride*0 + xStride*0]= (a+c);
1596     block[stride*0 + xStride*1]= (e+b);
1597     block[stride*1 + xStride*0]= (a-c);
1598     block[stride*1 + xStride*1]= (e-b);
1599 }
1600 #endif
1601
1602 /**
1603  * gets the chroma qp.
1604  */
1605 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1606     return h->pps.chroma_qp_table[t][qscale];
1607 }
1608
1609 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1610                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1611                            int src_x_offset, int src_y_offset,
1612                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1613     MpegEncContext * const s = &h->s;
1614     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1615     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1616     const int luma_xy= (mx&3) + ((my&3)<<2);
1617     uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1618     uint8_t * src_cb, * src_cr;
1619     int extra_width= h->emu_edge_width;
1620     int extra_height= h->emu_edge_height;
1621     int emu=0;
1622     const int full_mx= mx>>2;
1623     const int full_my= my>>2;
1624     const int pic_width  = 16*s->mb_width;
1625     const int pic_height = 16*s->mb_height >> MB_FIELD;
1626
1627     if(mx&7) extra_width -= 3;
1628     if(my&7) extra_height -= 3;
1629
1630     if(   full_mx < 0-extra_width
1631        || full_my < 0-extra_height
1632        || full_mx + 16/*FIXME*/ > pic_width + extra_width
1633        || full_my + 16/*FIXME*/ > pic_height + extra_height){
1634         ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1635             src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1636         emu=1;
1637     }
1638
1639     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1640     if(!square){
1641         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1642     }
1643
1644     if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1645
1646     if(MB_FIELD){
1647         // chroma offset when predicting from a field of opposite parity
1648         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1649         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1650     }
1651     src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1652     src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653
1654     if(emu){
1655         ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1656             src_cb= s->edge_emu_buffer;
1657     }
1658     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1659
1660     if(emu){
1661         ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1662             src_cr= s->edge_emu_buffer;
1663     }
1664     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1665 }
1666
1667 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1668                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1669                            int x_offset, int y_offset,
1670                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1671                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1672                            int list0, int list1){
1673     MpegEncContext * const s = &h->s;
1674     qpel_mc_func *qpix_op=  qpix_put;
1675     h264_chroma_mc_func chroma_op= chroma_put;
1676
1677     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1678     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1679     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1680     x_offset += 8*s->mb_x;
1681     y_offset += 8*(s->mb_y >> MB_FIELD);
1682
1683     if(list0){
1684         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1685         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1686                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
1687                            qpix_op, chroma_op);
1688
1689         qpix_op=  qpix_avg;
1690         chroma_op= chroma_avg;
1691     }
1692
1693     if(list1){
1694         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1695         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1696                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
1697                            qpix_op, chroma_op);
1698     }
1699 }
1700
1701 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1702                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1703                            int x_offset, int y_offset,
1704                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1705                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1706                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1707                            int list0, int list1){
1708     MpegEncContext * const s = &h->s;
1709
1710     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1711     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1712     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1713     x_offset += 8*s->mb_x;
1714     y_offset += 8*(s->mb_y >> MB_FIELD);
1715
1716     if(list0 && list1){
1717         /* don't optimize for luma-only case, since B-frames usually
1718          * use implicit weights => chroma too. */
1719         uint8_t *tmp_cb = s->obmc_scratchpad;
1720         uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1721         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1722         int refn0 = h->ref_cache[0][ scan8[n] ];
1723         int refn1 = h->ref_cache[1][ scan8[n] ];
1724
1725         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1726                     dest_y, dest_cb, dest_cr,
1727                     x_offset, y_offset, qpix_put, chroma_put);
1728         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1729                     tmp_y, tmp_cb, tmp_cr,
1730                     x_offset, y_offset, qpix_put, chroma_put);
1731
1732         if(h->use_weight == 2){
1733             int weight0 = h->implicit_weight[refn0][refn1];
1734             int weight1 = 64 - weight0;
1735             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
1736             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1737             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738         }else{
1739             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1740                             h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1741                             h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1742             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1743                             h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1744                             h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1745             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1746                             h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1747                             h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1748         }
1749     }else{
1750         int list = list1 ? 1 : 0;
1751         int refn = h->ref_cache[list][ scan8[n] ];
1752         Picture *ref= &h->ref_list[list][refn];
1753         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1754                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
1755                     qpix_put, chroma_put);
1756
1757         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1758                        h->luma_weight[list][refn], h->luma_offset[list][refn]);
1759         if(h->use_weight_chroma){
1760             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1761                              h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1762             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1763                              h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1764         }
1765     }
1766 }
1767
1768 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1769                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1770                            int x_offset, int y_offset,
1771                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1772                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1773                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1774                            int list0, int list1){
1775     if((h->use_weight==2 && list0 && list1
1776         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1777        || h->use_weight==1)
1778         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1779                          x_offset, y_offset, qpix_put, chroma_put,
1780                          weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1781     else
1782         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1783                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1784 }
1785
1786 static inline void prefetch_motion(H264Context *h, int list){
1787     /* fetch pixels for estimated mv 4 macroblocks ahead
1788      * optimized for 64byte cache lines */
1789     MpegEncContext * const s = &h->s;
1790     const int refn = h->ref_cache[list][scan8[0]];
1791     if(refn >= 0){
1792         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1793         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1794         uint8_t **src= h->ref_list[list][refn].data;
1795         int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1796         s->dsp.prefetch(src[0]+off, s->linesize, 4);
1797         off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1798         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1799     }
1800 }
1801
1802 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1803                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1804                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1805                       h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1806     MpegEncContext * const s = &h->s;
1807     const int mb_xy= h->mb_xy;
1808     const int mb_type= s->current_picture.mb_type[mb_xy];
1809
1810     assert(IS_INTER(mb_type));
1811
1812     prefetch_motion(h, 0);
1813
1814     if(IS_16X16(mb_type)){
1815         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1816                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1817                 &weight_op[0], &weight_avg[0],
1818                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1819     }else if(IS_16X8(mb_type)){
1820         mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1821                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1822                 &weight_op[1], &weight_avg[1],
1823                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1824         mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1825                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1826                 &weight_op[1], &weight_avg[1],
1827                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1828     }else if(IS_8X16(mb_type)){
1829         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1830                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1831                 &weight_op[2], &weight_avg[2],
1832                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1833         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1834                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1835                 &weight_op[2], &weight_avg[2],
1836                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1837     }else{
1838         int i;
1839
1840         assert(IS_8X8(mb_type));
1841
1842         for(i=0; i<4; i++){
1843             const int sub_mb_type= h->sub_mb_type[i];
1844             const int n= 4*i;
1845             int x_offset= (i&1)<<2;
1846             int y_offset= (i&2)<<1;
1847
1848             if(IS_SUB_8X8(sub_mb_type)){
1849                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1850                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1851                     &weight_op[3], &weight_avg[3],
1852                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853             }else if(IS_SUB_8X4(sub_mb_type)){
1854                 mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1855                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1856                     &weight_op[4], &weight_avg[4],
1857                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1858                 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1859                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1860                     &weight_op[4], &weight_avg[4],
1861                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1862             }else if(IS_SUB_4X8(sub_mb_type)){
1863                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1864                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1865                     &weight_op[5], &weight_avg[5],
1866                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1867                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1868                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1869                     &weight_op[5], &weight_avg[5],
1870                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1871             }else{
1872                 int j;
1873                 assert(IS_SUB_4X4(sub_mb_type));
1874                 for(j=0; j<4; j++){
1875                     int sub_x_offset= x_offset + 2*(j&1);
1876                     int sub_y_offset= y_offset +   (j&2);
1877                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1878                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1879                         &weight_op[6], &weight_avg[6],
1880                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1881                 }
1882             }
1883         }
1884     }
1885
1886     prefetch_motion(h, 1);
1887 }
1888
1889 static av_cold void init_cavlc_level_tab(void){
1890     int suffix_length, mask;
1891     unsigned int i;
1892
1893     for(suffix_length=0; suffix_length<7; suffix_length++){
1894         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1895             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1896             int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1897
1898             mask= -(level_code&1);
1899             level_code= (((2+level_code)>>1) ^ mask) - mask;
1900             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1901                 cavlc_level_tab[suffix_length][i][0]= level_code;
1902                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1903             }else if(prefix + 1 <= LEVEL_TAB_BITS){
1904                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1905                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1906             }else{
1907                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1908                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1909             }
1910         }
1911     }
1912 }
1913
1914 static av_cold void decode_init_vlc(void){
1915     static int done = 0;
1916
1917     if (!done) {
1918         int i;
1919         int offset;
1920         done = 1;
1921
1922         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1923         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1924         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1925                  &chroma_dc_coeff_token_len [0], 1, 1,
1926                  &chroma_dc_coeff_token_bits[0], 1, 1,
1927                  INIT_VLC_USE_NEW_STATIC);
1928
1929         offset = 0;
1930         for(i=0; i<4; i++){
1931             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1932             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1933             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1934                      &coeff_token_len [i][0], 1, 1,
1935                      &coeff_token_bits[i][0], 1, 1,
1936                      INIT_VLC_USE_NEW_STATIC);
1937             offset += coeff_token_vlc_tables_size[i];
1938         }
1939         /*
1940          * This is a one time safety check to make sure that
1941          * the packed static coeff_token_vlc table sizes
1942          * were initialized correctly.
1943          */
1944         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1945
1946         for(i=0; i<3; i++){
1947             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1948             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1949             init_vlc(&chroma_dc_total_zeros_vlc[i],
1950                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1951                      &chroma_dc_total_zeros_len [i][0], 1, 1,
1952                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
1953                      INIT_VLC_USE_NEW_STATIC);
1954         }
1955         for(i=0; i<15; i++){
1956             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1957             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1958             init_vlc(&total_zeros_vlc[i],
1959                      TOTAL_ZEROS_VLC_BITS, 16,
1960                      &total_zeros_len [i][0], 1, 1,
1961                      &total_zeros_bits[i][0], 1, 1,
1962                      INIT_VLC_USE_NEW_STATIC);
1963         }
1964
1965         for(i=0; i<6; i++){
1966             run_vlc[i].table = run_vlc_tables[i];
1967             run_vlc[i].table_allocated = run_vlc_tables_size;
1968             init_vlc(&run_vlc[i],
1969                      RUN_VLC_BITS, 7,
1970                      &run_len [i][0], 1, 1,
1971                      &run_bits[i][0], 1, 1,
1972                      INIT_VLC_USE_NEW_STATIC);
1973         }
1974         run7_vlc.table = run7_vlc_table,
1975         run7_vlc.table_allocated = run7_vlc_table_size;
1976         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1977                  &run_len [6][0], 1, 1,
1978                  &run_bits[6][0], 1, 1,
1979                  INIT_VLC_USE_NEW_STATIC);
1980
1981         init_cavlc_level_tab();
1982     }
1983 }
1984
1985 static void free_tables(H264Context *h){
1986     int i;
1987     H264Context *hx;
1988     av_freep(&h->intra4x4_pred_mode);
1989     av_freep(&h->chroma_pred_mode_table);
1990     av_freep(&h->cbp_table);
1991     av_freep(&h->mvd_table[0]);
1992     av_freep(&h->mvd_table[1]);
1993     av_freep(&h->direct_table);
1994     av_freep(&h->non_zero_count);
1995     av_freep(&h->slice_table_base);
1996     h->slice_table= NULL;
1997
1998     av_freep(&h->mb2b_xy);
1999     av_freep(&h->mb2b8_xy);
2000
2001     for(i = 0; i < h->s.avctx->thread_count; i++) {
2002         hx = h->thread_context[i];
2003         if(!hx) continue;
2004         av_freep(&hx->top_borders[1]);
2005         av_freep(&hx->top_borders[0]);
2006         av_freep(&hx->s.obmc_scratchpad);
2007     }
2008 }
2009
2010 static void init_dequant8_coeff_table(H264Context *h){
2011     int i,q,x;
2012     const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2013     h->dequant8_coeff[0] = h->dequant8_buffer[0];
2014     h->dequant8_coeff[1] = h->dequant8_buffer[1];
2015
2016     for(i=0; i<2; i++ ){
2017         if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2018             h->dequant8_coeff[1] = h->dequant8_buffer[0];
2019             break;
2020         }
2021
2022         for(q=0; q<52; q++){
2023             int shift = div6[q];
2024             int idx = rem6[q];
2025             for(x=0; x<64; x++)
2026                 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2027                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2028                     h->pps.scaling_matrix8[i][x]) << shift;
2029         }
2030     }
2031 }
2032
2033 static void init_dequant4_coeff_table(H264Context *h){
2034     int i,j,q,x;
2035     const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2036     for(i=0; i<6; i++ ){
2037         h->dequant4_coeff[i] = h->dequant4_buffer[i];
2038         for(j=0; j<i; j++){
2039             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2040                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2041                 break;
2042             }
2043         }
2044         if(j<i)
2045             continue;
2046
2047         for(q=0; q<52; q++){
2048             int shift = div6[q] + 2;
2049             int idx = rem6[q];
2050             for(x=0; x<16; x++)
2051                 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2052                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2053                     h->pps.scaling_matrix4[i][x]) << shift;
2054         }
2055     }
2056 }
2057
2058 static void init_dequant_tables(H264Context *h){
2059     int i,x;
2060     init_dequant4_coeff_table(h);
2061     if(h->pps.transform_8x8_mode)
2062         init_dequant8_coeff_table(h);
2063     if(h->sps.transform_bypass){
2064         for(i=0; i<6; i++)
2065             for(x=0; x<16; x++)
2066                 h->dequant4_coeff[i][0][x] = 1<<6;
2067         if(h->pps.transform_8x8_mode)
2068             for(i=0; i<2; i++)
2069                 for(x=0; x<64; x++)
2070                     h->dequant8_coeff[i][0][x] = 1<<6;
2071     }
2072 }
2073
2074
2075 /**
2076  * allocates tables.
2077  * needs width/height
2078  */
2079 static int alloc_tables(H264Context *h){
2080     MpegEncContext * const s = &h->s;
2081     const int big_mb_num= s->mb_stride * (s->mb_height+1);
2082     int x,y;
2083
2084     CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2085
2086     CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2087     CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2088     CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2089
2090     CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2091     CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2092     CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2093     CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2094
2095     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
2096     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2097
2098     CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
2099     CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2100     for(y=0; y<s->mb_height; y++){
2101         for(x=0; x<s->mb_width; x++){
2102             const int mb_xy= x + y*s->mb_stride;
2103             const int b_xy = 4*x + 4*y*h->b_stride;
2104             const int b8_xy= 2*x + 2*y*h->b8_stride;
2105
2106             h->mb2b_xy [mb_xy]= b_xy;
2107             h->mb2b8_xy[mb_xy]= b8_xy;
2108         }
2109     }
2110
2111     s->obmc_scratchpad = NULL;
2112
2113     if(!h->dequant4_coeff[0])
2114         init_dequant_tables(h);
2115
2116     return 0;
2117 fail:
2118     free_tables(h);
2119     return -1;
2120 }
2121
2122 /**
2123  * Mimic alloc_tables(), but for every context thread.
2124  */
2125 static void clone_tables(H264Context *dst, H264Context *src){
2126     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
2127     dst->non_zero_count           = src->non_zero_count;
2128     dst->slice_table              = src->slice_table;
2129     dst->cbp_table                = src->cbp_table;
2130     dst->mb2b_xy                  = src->mb2b_xy;
2131     dst->mb2b8_xy                 = src->mb2b8_xy;
2132     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
2133     dst->mvd_table[0]             = src->mvd_table[0];
2134     dst->mvd_table[1]             = src->mvd_table[1];
2135     dst->direct_table             = src->direct_table;
2136
2137     dst->s.obmc_scratchpad = NULL;
2138     ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2139 }
2140
2141 /**
2142  * Init context
2143  * Allocate buffers which are not shared amongst multiple threads.
2144  */
2145 static int context_init(H264Context *h){
2146     CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2147     CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2148
2149     return 0;
2150 fail:
2151     return -1; // free_tables will clean up for us
2152 }
2153
2154 static av_cold void common_init(H264Context *h){
2155     MpegEncContext * const s = &h->s;
2156
2157     s->width = s->avctx->width;
2158     s->height = s->avctx->height;
2159     s->codec_id= s->avctx->codec->id;
2160
2161     ff_h264_pred_init(&h->hpc, s->codec_id);
2162
2163     h->dequant_coeff_pps= -1;
2164     s->unrestricted_mv=1;
2165     s->decode=1; //FIXME
2166
2167     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2168
2169     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2170     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2171 }
2172
2173 static av_cold int decode_init(AVCodecContext *avctx){
2174     H264Context *h= avctx->priv_data;
2175     MpegEncContext * const s = &h->s;
2176
2177     MPV_decode_defaults(s);
2178
2179     s->avctx = avctx;
2180     common_init(h);
2181
2182     s->out_format = FMT_H264;
2183     s->workaround_bugs= avctx->workaround_bugs;
2184
2185     // set defaults
2186 //    s->decode_mb= ff_h263_decode_mb;
2187     s->quarter_sample = 1;
2188     s->low_delay= 1;
2189
2190     if(avctx->codec_id == CODEC_ID_SVQ3)
2191         avctx->pix_fmt= PIX_FMT_YUVJ420P;
2192     else if(avctx->codec_id == CODEC_ID_H264_VDPAU)
2193         avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2194     else
2195         avctx->pix_fmt= PIX_FMT_YUV420P;
2196
2197     decode_init_vlc();
2198
2199     if(avctx->extradata_size > 0 && avctx->extradata &&
2200        *(char *)avctx->extradata == 1){
2201         h->is_avc = 1;
2202         h->got_avcC = 0;
2203     } else {
2204         h->is_avc = 0;
2205     }
2206
2207     h->thread_context[0] = h;
2208     h->outputed_poc = INT_MIN;
2209     h->prev_poc_msb= 1<<16;
2210     return 0;
2211 }
2212
2213 static int frame_start(H264Context *h){
2214     MpegEncContext * const s = &h->s;
2215     int i;
2216
2217     if(MPV_frame_start(s, s->avctx) < 0)
2218         return -1;
2219     ff_er_frame_start(s);
2220     /*
2221      * MPV_frame_start uses pict_type to derive key_frame.
2222      * This is incorrect for H.264; IDR markings must be used.
2223      * Zero here; IDR markings per slice in frame or fields are ORed in later.
2224      * See decode_nal_units().
2225      */
2226     s->current_picture_ptr->key_frame= 0;
2227
2228     assert(s->linesize && s->uvlinesize);
2229
2230     for(i=0; i<16; i++){
2231         h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2232         h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2233     }
2234     for(i=0; i<4; i++){
2235         h->block_offset[16+i]=
2236         h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2237         h->block_offset[24+16+i]=
2238         h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2239     }
2240
2241     /* can't be in alloc_tables because linesize isn't known there.
2242      * FIXME: redo bipred weight to not require extra buffer? */
2243     for(i = 0; i < s->avctx->thread_count; i++)
2244         if(!h->thread_context[i]->s.obmc_scratchpad)
2245             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2246
2247     /* some macroblocks will be accessed before they're available */
2248     if(FRAME_MBAFF || s->avctx->thread_count > 1)
2249         memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2250
2251 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2252
2253     // We mark the current picture as non-reference after allocating it, so
2254     // that if we break out due to an error it can be released automatically
2255     // in the next MPV_frame_start().
2256     // SVQ3 as well as most other codecs have only last/next/current and thus
2257     // get released even with set reference, besides SVQ3 and others do not
2258     // mark frames as reference later "naturally".
2259     if(s->codec_id != CODEC_ID_SVQ3)
2260         s->current_picture_ptr->reference= 0;
2261
2262     s->current_picture_ptr->field_poc[0]=
2263     s->current_picture_ptr->field_poc[1]= INT_MAX;
2264     assert(s->current_picture_ptr->long_ref==0);
2265
2266     return 0;
2267 }
2268
2269 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2270     MpegEncContext * const s = &h->s;
2271     int i;
2272     int step    = 1;
2273     int offset  = 1;
2274     int uvoffset= 1;
2275     int top_idx = 1;
2276     int skiplast= 0;
2277
2278     src_y  -=   linesize;
2279     src_cb -= uvlinesize;
2280     src_cr -= uvlinesize;
2281
2282     if(!simple && FRAME_MBAFF){
2283         if(s->mb_y&1){
2284             offset  = MB_MBAFF ? 1 : 17;
2285             uvoffset= MB_MBAFF ? 1 : 9;
2286             if(!MB_MBAFF){
2287                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
2288                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2289                 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2290                     *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2291                     *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2292                 }
2293             }
2294         }else{
2295             if(!MB_MBAFF){
2296                 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2297                 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2298                     h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
2299                     h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2300                 }
2301                 skiplast= 1;
2302             }
2303             offset  =
2304             uvoffset=
2305             top_idx = MB_MBAFF ? 0 : 1;
2306         }
2307         step= MB_MBAFF ? 2 : 1;
2308     }
2309
2310     // There are two lines saved, the line above the the top macroblock of a pair,
2311     // and the line above the bottom macroblock
2312     h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2313     for(i=1; i<17 - skiplast; i++){
2314         h->left_border[offset+i*step]= src_y[15+i*  linesize];
2315     }
2316
2317     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
2318     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2319
2320     if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2321         h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
2322         h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2323         for(i=1; i<9 - skiplast; i++){
2324             h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
2325             h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2326         }
2327         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2328         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2329     }
2330 }
2331
2332 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2333     MpegEncContext * const s = &h->s;
2334     int temp8, i;
2335     uint64_t temp64;
2336     int deblock_left;
2337     int deblock_top;
2338     int mb_xy;
2339     int step    = 1;
2340     int offset  = 1;
2341     int uvoffset= 1;
2342     int top_idx = 1;
2343
2344     if(!simple && FRAME_MBAFF){
2345         if(s->mb_y&1){
2346             offset  = MB_MBAFF ? 1 : 17;
2347             uvoffset= MB_MBAFF ? 1 : 9;
2348         }else{
2349             offset  =
2350             uvoffset=
2351             top_idx = MB_MBAFF ? 0 : 1;
2352         }
2353         step= MB_MBAFF ? 2 : 1;
2354     }
2355
2356     if(h->deblocking_filter == 2) {
2357         mb_xy = h->mb_xy;
2358         deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2359         deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2360     } else {
2361         deblock_left = (s->mb_x > 0);
2362         deblock_top =  (s->mb_y > !!MB_FIELD);
2363     }
2364
2365     src_y  -=   linesize + 1;
2366     src_cb -= uvlinesize + 1;
2367     src_cr -= uvlinesize + 1;
2368
2369 #define XCHG(a,b,t,xchg)\
2370 t= a;\
2371 if(xchg)\
2372     a= b;\
2373 b= t;
2374
2375     if(deblock_left){
2376         for(i = !deblock_top; i<16; i++){
2377             XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
2378         }
2379         XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
2380     }
2381
2382     if(deblock_top){
2383         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2384         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2385         if(s->mb_x+1 < s->mb_width){
2386             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2387         }
2388     }
2389
2390     if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2391         if(deblock_left){
2392             for(i = !deblock_top; i<8; i++){
2393                 XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
2394                 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2395             }
2396             XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
2397             XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2398         }
2399         if(deblock_top){
2400             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2401             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2402         }
2403     }
2404 }
2405
2406 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2407     MpegEncContext * const s = &h->s;
2408     const int mb_x= s->mb_x;
2409     const int mb_y= s->mb_y;
2410     const int mb_xy= h->mb_xy;
2411     const int mb_type= s->current_picture.mb_type[mb_xy];
2412     uint8_t  *dest_y, *dest_cb, *dest_cr;
2413     int linesize, uvlinesize /*dct_offset*/;
2414     int i;
2415     int *block_offset = &h->block_offset[0];
2416     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2417     const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2418     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2419     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2420
2421     dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
2422     dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2423     dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2424
2425     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2426     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2427
2428     if (!simple && MB_FIELD) {
2429         linesize   = h->mb_linesize   = s->linesize * 2;
2430         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2431         block_offset = &h->block_offset[24];
2432         if(mb_y&1){ //FIXME move out of this function?
2433             dest_y -= s->linesize*15;
2434             dest_cb-= s->uvlinesize*7;
2435             dest_cr-= s->uvlinesize*7;
2436         }
2437         if(FRAME_MBAFF) {
2438             int list;
2439             for(list=0; list<h->list_count; list++){
2440                 if(!USES_LIST(mb_type, list))
2441                     continue;
2442                 if(IS_16X16(mb_type)){
2443                     int8_t *ref = &h->ref_cache[list][scan8[0]];
2444                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2445                 }else{
2446                     for(i=0; i<16; i+=4){
2447                         int ref = h->ref_cache[list][scan8[i]];
2448                         if(ref >= 0)
2449                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2450                     }
2451                 }
2452             }
2453         }
2454     } else {
2455         linesize   = h->mb_linesize   = s->linesize;
2456         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2457 //        dct_offset = s->linesize * 16;
2458     }
2459
2460     if (!simple && IS_INTRA_PCM(mb_type)) {
2461         for (i=0; i<16; i++) {
2462             memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
2463         }
2464         for (i=0; i<8; i++) {
2465             memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
2466             memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
2467         }
2468     } else {
2469         if(IS_INTRA(mb_type)){
2470             if(h->deblocking_filter)
2471                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2472
2473             if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2474                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2475                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2476             }
2477
2478             if(IS_INTRA4x4(mb_type)){
2479                 if(simple || !s->encoding){
2480                     if(IS_8x8DCT(mb_type)){
2481                         if(transform_bypass){
2482                             idct_dc_add =
2483                             idct_add    = s->dsp.add_pixels8;
2484                         }else{
2485                             idct_dc_add = s->dsp.h264_idct8_dc_add;
2486                             idct_add    = s->dsp.h264_idct8_add;
2487                         }
2488                         for(i=0; i<16; i+=4){
2489                             uint8_t * const ptr= dest_y + block_offset[i];
2490                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2491                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2492                                 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2493                             }else{
2494                                 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2495                                 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2496                                                             (h->topright_samples_available<<i)&0x4000, linesize);
2497                                 if(nnz){
2498                                     if(nnz == 1 && h->mb[i*16])
2499                                         idct_dc_add(ptr, h->mb + i*16, linesize);
2500                                     else
2501                                         idct_add   (ptr, h->mb + i*16, linesize);
2502                                 }
2503                             }
2504                         }
2505                     }else{
2506                         if(transform_bypass){
2507                             idct_dc_add =
2508                             idct_add    = s->dsp.add_pixels4;
2509                         }else{
2510                             idct_dc_add = s->dsp.h264_idct_dc_add;
2511                             idct_add    = s->dsp.h264_idct_add;
2512                         }
2513                         for(i=0; i<16; i++){
2514                             uint8_t * const ptr= dest_y + block_offset[i];
2515                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2516
2517                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2518                                 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2519                             }else{
2520                                 uint8_t *topright;
2521                                 int nnz, tr;
2522                                 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2523                                     const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2524                                     assert(mb_y || linesize <= block_offset[i]);
2525                                     if(!topright_avail){
2526                                         tr= ptr[3 - linesize]*0x01010101;
2527                                         topright= (uint8_t*) &tr;
2528                                     }else
2529                                         topright= ptr + 4 - linesize;
2530                                 }else
2531                                     topright= NULL;
2532
2533                                 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2534                                 nnz = h->non_zero_count_cache[ scan8[i] ];
2535                                 if(nnz){
2536                                     if(is_h264){
2537                                         if(nnz == 1 && h->mb[i*16])
2538                                             idct_dc_add(ptr, h->mb + i*16, linesize);
2539                                         else
2540                                             idct_add   (ptr, h->mb + i*16, linesize);
2541                                     }else
2542                                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2543                                 }
2544                             }
2545                         }
2546                     }
2547                 }
2548             }else{
2549                 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2550                 if(is_h264){
2551                     if(!transform_bypass)
2552                         h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2553                 }else
2554                     svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2555             }
2556             if(h->deblocking_filter)
2557                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2558         }else if(is_h264){
2559             hl_motion(h, dest_y, dest_cb, dest_cr,
2560                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2561                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2562                       s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2563         }
2564
2565
2566         if(!IS_INTRA4x4(mb_type)){
2567             if(is_h264){
2568                 if(IS_INTRA16x16(mb_type)){
2569                     if(transform_bypass){
2570                         if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2571                             h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2572                         }else{
2573                             for(i=0; i<16; i++){
2574                                 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2575                                     s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2576                             }
2577                         }
2578                     }else{
2579                          s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2580                     }
2581                 }else if(h->cbp&15){
2582                     if(transform_bypass){
2583                         const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2584                         idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2585                         for(i=0; i<16; i+=di){
2586                             if(h->non_zero_count_cache[ scan8[i] ]){
2587                                 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2588                             }
2589                         }
2590                     }else{
2591                         if(IS_8x8DCT(mb_type)){
2592                             s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2593                         }else{
2594                             s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2595                         }
2596                     }
2597                 }
2598             }else{
2599                 for(i=0; i<16; i++){
2600                     if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2601                         uint8_t * const ptr= dest_y + block_offset[i];
2602                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2603                     }
2604                 }
2605             }
2606         }
2607
2608         if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2609             uint8_t *dest[2] = {dest_cb, dest_cr};
2610             if(transform_bypass){
2611                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2612                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2613                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2614                 }else{
2615                     idct_add = s->dsp.add_pixels4;
2616                     for(i=16; i<16+8; i++){
2617                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2618                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2619                     }
2620                 }
2621             }else{
2622                 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2623                 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2624                 if(is_h264){
2625                     idct_add = s->dsp.h264_idct_add;
2626                     idct_dc_add = s->dsp.h264_idct_dc_add;
2627                     for(i=16; i<16+8; i++){
2628                         if(h->non_zero_count_cache[ scan8[i] ])
2629                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2630                         else if(h->mb[i*16])
2631                             idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2632                     }
2633                 }else{
2634                     for(i=16; i<16+8; i++){
2635                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2636                             uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2637                             svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2638                         }
2639                     }
2640                 }
2641             }
2642         }
2643     }
2644     if(h->cbp || IS_INTRA(mb_type))
2645         s->dsp.clear_blocks(h->mb);
2646
2647     if(h->deblocking_filter) {
2648         backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2649         fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2650         h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2651         h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2652         if (!simple && FRAME_MBAFF) {
2653             filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2654         } else {
2655             filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2656         }
2657     }
2658 }
2659
2660 /**
2661  * Process a macroblock; this case avoids checks for expensive uncommon cases.
2662  */
2663 static void hl_decode_mb_simple(H264Context *h){
2664     hl_decode_mb_internal(h, 1);
2665 }
2666
2667 /**
2668  * Process a macroblock; this handles edge cases, such as interlacing.
2669  */
2670 static void av_noinline hl_decode_mb_complex(H264Context *h){
2671     hl_decode_mb_internal(h, 0);
2672 }
2673
2674 static void hl_decode_mb(H264Context *h){
2675     MpegEncContext * const s = &h->s;
2676     const int mb_xy= h->mb_xy;
2677     const int mb_type= s->current_picture.mb_type[mb_xy];
2678     int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2679
2680     if(ENABLE_H264_ENCODER && !s->decode)
2681         return;
2682
2683     if (is_complex)
2684         hl_decode_mb_complex(h);
2685     else hl_decode_mb_simple(h);
2686 }
2687
2688 static void pic_as_field(Picture *pic, const int parity){
2689     int i;
2690     for (i = 0; i < 4; ++i) {
2691         if (parity == PICT_BOTTOM_FIELD)
2692             pic->data[i] += pic->linesize[i];
2693         pic->reference = parity;
2694         pic->linesize[i] *= 2;
2695     }
2696     pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2697 }
2698
2699 static int split_field_copy(Picture *dest, Picture *src,
2700                             int parity, int id_add){
2701     int match = !!(src->reference & parity);
2702
2703     if (match) {
2704         *dest = *src;
2705         if(parity != PICT_FRAME){
2706             pic_as_field(dest, parity);
2707             dest->pic_id *= 2;
2708             dest->pic_id += id_add;
2709         }
2710     }
2711
2712     return match;
2713 }
2714
2715 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2716     int i[2]={0};
2717     int index=0;
2718
2719     while(i[0]<len || i[1]<len){
2720         while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2721             i[0]++;
2722         while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2723             i[1]++;
2724         if(i[0] < len){
2725             in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2726             split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
2727         }
2728         if(i[1] < len){
2729             in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2730             split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2731         }
2732     }
2733
2734     return index;
2735 }
2736
2737 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2738     int i, best_poc;
2739     int out_i= 0;
2740
2741     for(;;){
2742         best_poc= dir ? INT_MIN : INT_MAX;
2743
2744         for(i=0; i<len; i++){
2745             const int poc= src[i]->poc;
2746             if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2747                 best_poc= poc;
2748                 sorted[out_i]= src[i];
2749             }
2750         }
2751         if(best_poc == (dir ? INT_MIN : INT_MAX))
2752             break;
2753         limit= sorted[out_i++]->poc - dir;
2754     }
2755     return out_i;
2756 }
2757
2758 /**
2759  * fills the default_ref_list.
2760  */
2761 static int fill_default_ref_list(H264Context *h){
2762     MpegEncContext * const s = &h->s;
2763     int i, len;
2764
2765     if(h->slice_type_nos==FF_B_TYPE){
2766         Picture *sorted[32];
2767         int cur_poc, list;
2768         int lens[2];
2769
2770         if(FIELD_PICTURE)
2771             cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2772         else
2773             cur_poc= s->current_picture_ptr->poc;
2774
2775         for(list= 0; list<2; list++){
2776             len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2777             len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2778             assert(len<=32);
2779             len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
2780             len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2781             assert(len<=32);
2782
2783             if(len < h->ref_count[list])
2784                 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2785             lens[list]= len;
2786         }
2787
2788         if(lens[0] == lens[1] && lens[1] > 1){
2789             for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2790             if(i == lens[0])
2791                 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2792         }
2793     }else{
2794         len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2795         len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
2796         assert(len <= 32);
2797         if(len < h->ref_count[0])
2798             memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2799     }
2800 #ifdef TRACE
2801     for (i=0; i<h->ref_count[0]; i++) {
2802         tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2803     }
2804     if(h->slice_type_nos==FF_B_TYPE){
2805         for (i=0; i<h->ref_count[1]; i++) {
2806             tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2807         }
2808     }
2809 #endif
2810     return 0;
2811 }
2812
2813 static void print_short_term(H264Context *h);
2814 static void print_long_term(H264Context *h);
2815
2816 /**
2817  * Extract structure information about the picture described by pic_num in
2818  * the current decoding context (frame or field). Note that pic_num is
2819  * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2820  * @param pic_num picture number for which to extract structure information
2821  * @param structure one of PICT_XXX describing structure of picture
2822  *                      with pic_num
2823  * @return frame number (short term) or long term index of picture
2824  *         described by pic_num
2825  */
2826 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2827     MpegEncContext * const s = &h->s;
2828
2829     *structure = s->picture_structure;
2830     if(FIELD_PICTURE){
2831         if (!(pic_num & 1))
2832             /* opposite field */
2833             *structure ^= PICT_FRAME;
2834         pic_num >>= 1;
2835     }
2836
2837     return pic_num;
2838 }
2839
2840 static int decode_ref_pic_list_reordering(H264Context *h){
2841     MpegEncContext * const s = &h->s;
2842     int list, index, pic_structure;
2843
2844     print_short_term(h);
2845     print_long_term(h);
2846
2847     for(list=0; list<h->list_count; list++){
2848         memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2849
2850         if(get_bits1(&s->gb)){
2851             int pred= h->curr_pic_num;
2852
2853             for(index=0; ; index++){
2854                 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2855                 unsigned int pic_id;
2856                 int i;
2857                 Picture *ref = NULL;
2858
2859                 if(reordering_of_pic_nums_idc==3)
2860                     break;
2861
2862                 if(index >= h->ref_count[list]){
2863                     av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2864                     return -1;
2865                 }
2866
2867                 if(reordering_of_pic_nums_idc<3){
2868                     if(reordering_of_pic_nums_idc<2){
2869                         const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2870                         int frame_num;
2871
2872                         if(abs_diff_pic_num > h->max_pic_num){
2873                             av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2874                             return -1;
2875                         }
2876
2877                         if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2878                         else                                pred+= abs_diff_pic_num;
2879                         pred &= h->max_pic_num - 1;
2880
2881                         frame_num = pic_num_extract(h, pred, &pic_structure);
2882
2883                         for(i= h->short_ref_count-1; i>=0; i--){
2884                             ref = h->short_ref[i];
2885                             assert(ref->reference);
2886                             assert(!ref->long_ref);
2887                             if(
2888                                    ref->frame_num == frame_num &&
2889                                    (ref->reference & pic_structure)
2890                               )
2891                                 break;
2892                         }
2893                         if(i>=0)
2894                             ref->pic_id= pred;
2895                     }else{
2896                         int long_idx;
2897                         pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2898
2899                         long_idx= pic_num_extract(h, pic_id, &pic_structure);
2900
2901                         if(long_idx>31){
2902                             av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2903                             return -1;
2904                         }
2905                         ref = h->long_ref[long_idx];
2906                         assert(!(ref && !ref->reference));
2907                         if(ref && (ref->reference & pic_structure)){
2908                             ref->pic_id= pic_id;
2909                             assert(ref->long_ref);
2910                             i=0;
2911                         }else{
2912                             i=-1;
2913                         }
2914                     }
2915
2916                     if (i < 0) {
2917                         av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2918                         memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2919                     } else {
2920                         for(i=index; i+1<h->ref_count[list]; i++){
2921                             if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2922                                 break;
2923                         }
2924                         for(; i > index; i--){
2925                             h->ref_list[list][i]= h->ref_list[list][i-1];
2926                         }
2927                         h->ref_list[list][index]= *ref;
2928                         if (FIELD_PICTURE){
2929                             pic_as_field(&h->ref_list[list][index], pic_structure);
2930                         }
2931                     }
2932                 }else{
2933                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2934                     return -1;
2935                 }
2936             }
2937         }
2938     }
2939     for(list=0; list<h->list_count; list++){
2940         for(index= 0; index < h->ref_count[list]; index++){
2941             if(!h->ref_list[list][index].data[0]){
2942                 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2943                 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2944             }
2945         }
2946     }
2947
2948     return 0;
2949 }
2950
2951 static void fill_mbaff_ref_list(H264Context *h){
2952     int list, i, j;
2953     for(list=0; list<2; list++){ //FIXME try list_count
2954         for(i=0; i<h->ref_count[list]; i++){
2955             Picture *frame = &h->ref_list[list][i];
2956             Picture *field = &h->ref_list[list][16+2*i];
2957             field[0] = *frame;
2958             for(j=0; j<3; j++)
2959                 field[0].linesize[j] <<= 1;
2960             field[0].reference = PICT_TOP_FIELD;
2961             field[0].poc= field[0].field_poc[0];
2962             field[1] = field[0];
2963             for(j=0; j<3; j++)
2964                 field[1].data[j] += frame->linesize[j];
2965             field[1].reference = PICT_BOTTOM_FIELD;
2966             field[1].poc= field[1].field_poc[1];
2967
2968             h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2969             h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2970             for(j=0; j<2; j++){
2971                 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2972                 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2973             }
2974         }
2975     }
2976     for(j=0; j<h->ref_count[1]; j++){
2977         for(i=0; i<h->ref_count[0]; i++)
2978             h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2979         memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
2980         memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2981     }
2982 }
2983
2984 static int pred_weight_table(H264Context *h){
2985     MpegEncContext * const s = &h->s;
2986     int list, i;
2987     int luma_def, chroma_def;
2988
2989     h->use_weight= 0;
2990     h->use_weight_chroma= 0;
2991     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2992     h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2993     luma_def = 1<<h->luma_log2_weight_denom;
2994     chroma_def = 1<<h->chroma_log2_weight_denom;
2995
2996     for(list=0; list<2; list++){
2997         for(i=0; i<h->ref_count[list]; i++){
2998             int luma_weight_flag, chroma_weight_flag;
2999
3000             luma_weight_flag= get_bits1(&s->gb);
3001             if(luma_weight_flag){
3002                 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3003                 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3004                 if(   h->luma_weight[list][i] != luma_def
3005                    || h->luma_offset[list][i] != 0)
3006                     h->use_weight= 1;
3007             }else{
3008                 h->luma_weight[list][i]= luma_def;
3009                 h->luma_offset[list][i]= 0;
3010             }
3011
3012             if(CHROMA){
3013                 chroma_weight_flag= get_bits1(&s->gb);
3014                 if(chroma_weight_flag){
3015                     int j;
3016                     for(j=0; j<2; j++){
3017                         h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3018                         h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3019                         if(   h->chroma_weight[list][i][j] != chroma_def
3020                         || h->chroma_offset[list][i][j] != 0)
3021                             h->use_weight_chroma= 1;
3022                     }
3023                 }else{
3024                     int j;
3025                     for(j=0; j<2; j++){
3026                         h->chroma_weight[list][i][j]= chroma_def;
3027                         h->chroma_offset[list][i][j]= 0;
3028                     }
3029                 }
3030             }
3031         }
3032         if(h->slice_type_nos != FF_B_TYPE) break;
3033     }
3034     h->use_weight= h->use_weight || h->use_weight_chroma;
3035     return 0;
3036 }
3037
3038 static void implicit_weight_table(H264Context *h){
3039     MpegEncContext * const s = &h->s;
3040     int ref0, ref1;
3041     int cur_poc = s->current_picture_ptr->poc;
3042
3043     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
3044        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3045         h->use_weight= 0;
3046         h->use_weight_chroma= 0;
3047         return;
3048     }
3049
3050     h->use_weight= 2;
3051     h->use_weight_chroma= 2;
3052     h->luma_log2_weight_denom= 5;
3053     h->chroma_log2_weight_denom= 5;
3054
3055     for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3056         int poc0 = h->ref_list[0][ref0].poc;
3057         for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3058             int poc1 = h->ref_list[1][ref1].poc;
3059             int td = av_clip(poc1 - poc0, -128, 127);
3060             if(td){
3061                 int tb = av_clip(cur_poc - poc0, -128, 127);
3062                 int tx = (16384 + (FFABS(td) >> 1)) / td;
3063                 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3064                 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3065                     h->implicit_weight[ref0][ref1] = 32;
3066                 else
3067                     h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3068             }else
3069                 h->implicit_weight[ref0][ref1] = 32;
3070         }
3071     }
3072 }
3073
3074 /**
3075  * Mark a picture as no longer needed for reference. The refmask
3076  * argument allows unreferencing of individual fields or the whole frame.
3077  * If the picture becomes entirely unreferenced, but is being held for
3078  * display purposes, it is marked as such.
3079  * @param refmask mask of fields to unreference; the mask is bitwise
3080  *                anded with the reference marking of pic
3081  * @return non-zero if pic becomes entirely unreferenced (except possibly
3082  *         for display purposes) zero if one of the fields remains in
3083  *         reference
3084  */
3085 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3086     int i;
3087     if (pic->reference &= refmask) {
3088         return 0;
3089     } else {
3090         for(i = 0; h->delayed_pic[i]; i++)
3091             if(pic == h->delayed_pic[i]){
3092                 pic->reference=DELAYED_PIC_REF;
3093                 break;
3094             }
3095         return 1;
3096     }
3097 }
3098
3099 /**
3100  * instantaneous decoder refresh.
3101  */
3102 static void idr(H264Context *h){
3103     int i;
3104
3105     for(i=0; i<16; i++){
3106         remove_long(h, i, 0);
3107     }
3108     assert(h->long_ref_count==0);
3109
3110     for(i=0; i<h->short_ref_count; i++){
3111         unreference_pic(h, h->short_ref[i], 0);
3112         h->short_ref[i]= NULL;
3113     }
3114     h->short_ref_count=0;
3115     h->prev_frame_num= 0;
3116     h->prev_frame_num_offset= 0;
3117     h->prev_poc_msb=
3118     h->prev_poc_lsb= 0;
3119 }
3120
3121 /* forget old pics after a seek */
3122 static void flush_dpb(AVCodecContext *avctx){
3123     H264Context *h= avctx->priv_data;
3124     int i;
3125     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3126         if(h->delayed_pic[i])
3127             h->delayed_pic[i]->reference= 0;
3128         h->delayed_pic[i]= NULL;
3129     }
3130     h->outputed_poc= INT_MIN;
3131     idr(h);
3132     if(h->s.current_picture_ptr)
3133         h->s.current_picture_ptr->reference= 0;
3134     h->s.first_field= 0;
3135     ff_mpeg_flush(avctx);
3136 }
3137
3138 /**
3139  * Find a Picture in the short term reference list by frame number.
3140  * @param frame_num frame number to search for
3141  * @param idx the index into h->short_ref where returned picture is found
3142  *            undefined if no picture found.
3143  * @return pointer to the found picture, or NULL if no pic with the provided
3144  *                 frame number is found
3145  */
3146 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3147     MpegEncContext * const s = &h->s;
3148     int i;
3149
3150     for(i=0; i<h->short_ref_count; i++){
3151         Picture *pic= h->short_ref[i];
3152         if(s->avctx->debug&FF_DEBUG_MMCO)
3153             av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3154         if(pic->frame_num == frame_num) {
3155             *idx = i;
3156             return pic;
3157         }
3158     }
3159     return NULL;
3160 }
3161
3162 /**
3163  * Remove a picture from the short term reference list by its index in
3164  * that list.  This does no checking on the provided index; it is assumed
3165  * to be valid. Other list entries are shifted down.
3166  * @param i index into h->short_ref of picture to remove.
3167  */
3168 static void remove_short_at_index(H264Context *h, int i){
3169     assert(i >= 0 && i < h->short_ref_count);
3170     h->short_ref[i]= NULL;
3171     if (--h->short_ref_count)
3172         memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3173 }
3174
3175 /**
3176  *
3177  * @return the removed picture or NULL if an error occurs
3178  */
3179 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3180     MpegEncContext * const s = &h->s;
3181     Picture *pic;
3182     int i;
3183
3184     if(s->avctx->debug&FF_DEBUG_MMCO)
3185         av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3186
3187     pic = find_short(h, frame_num, &i);
3188     if (pic){
3189         if(unreference_pic(h, pic, ref_mask))
3190         remove_short_at_index(h, i);
3191     }
3192
3193     return pic;
3194 }
3195
3196 /**
3197  * Remove a picture from the long term reference list by its index in
3198  * that list.
3199  * @return the removed picture or NULL if an error occurs
3200  */
3201 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3202     Picture *pic;
3203
3204     pic= h->long_ref[i];
3205     if (pic){
3206         if(unreference_pic(h, pic, ref_mask)){
3207             assert(h->long_ref[i]->long_ref == 1);
3208             h->long_ref[i]->long_ref= 0;
3209             h->long_ref[i]= NULL;
3210             h->long_ref_count--;
3211         }
3212     }
3213
3214     return pic;
3215 }
3216
3217 /**
3218  * print short term list
3219  */
3220 static void print_short_term(H264Context *h) {
3221     uint32_t i;
3222     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3223         av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3224         for(i=0; i<h->short_ref_count; i++){
3225             Picture *pic= h->short_ref[i];
3226             av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3227         }
3228     }
3229 }
3230
3231 /**
3232  * print long term list
3233  */
3234 static void print_long_term(H264Context *h) {
3235     uint32_t i;
3236     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3237         av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3238         for(i = 0; i < 16; i++){
3239             Picture *pic= h->long_ref[i];
3240             if (pic) {
3241                 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3242             }
3243         }
3244     }
3245 }
3246
3247 /**
3248  * Executes the reference picture marking (memory management control operations).
3249  */
3250 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3251     MpegEncContext * const s = &h->s;
3252     int i, j;
3253     int current_ref_assigned=0;
3254     Picture *pic;
3255
3256     if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3257         av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3258
3259     for(i=0; i<mmco_count; i++){
3260         int structure, frame_num;
3261         if(s->avctx->debug&FF_DEBUG_MMCO)
3262             av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3263
3264         if(   mmco[i].opcode == MMCO_SHORT2UNUSED
3265            || mmco[i].opcode == MMCO_SHORT2LONG){
3266             frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3267             pic = find_short(h, frame_num, &j);
3268             if(!pic){
3269                 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3270                    || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3271                 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3272                 continue;
3273             }
3274         }
3275
3276         switch(mmco[i].opcode){
3277         case MMCO_SHORT2UNUSED:
3278             if(s->avctx->debug&FF_DEBUG_MMCO)
3279                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3280             remove_short(h, frame_num, structure ^ PICT_FRAME);
3281             break;
3282         case MMCO_SHORT2LONG:
3283                 if (h->long_ref[mmco[i].long_arg] != pic)
3284                     remove_long(h, mmco[i].long_arg, 0);
3285
3286                 remove_short_at_index(h, j);
3287                 h->long_ref[ mmco[i].long_arg ]= pic;
3288                 if (h->long_ref[ mmco[i].long_arg ]){
3289                     h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3290                     h->long_ref_count++;
3291                 }
3292             break;
3293         case MMCO_LONG2UNUSED:
3294             j = pic_num_extract(h, mmco[i].long_arg, &structure);
3295             pic = h->long_ref[j];
3296             if (pic) {
3297                 remove_long(h, j, structure ^ PICT_FRAME);
3298             } else if(s->avctx->debug&FF_DEBUG_MMCO)
3299                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3300             break;
3301         case MMCO_LONG:
3302                     // Comment below left from previous code as it is an interresting note.
3303                     /* First field in pair is in short term list or
3304                      * at a different long term index.
3305                      * This is not allowed; see 7.4.3.3, notes 2 and 3.
3306                      * Report the problem and keep the pair where it is,
3307                      * and mark this field valid.
3308                      */
3309
3310             if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3311                 remove_long(h, mmco[i].long_arg, 0);
3312
3313                 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3314                 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3315                 h->long_ref_count++;
3316             }
3317
3318             s->current_picture_ptr->reference |= s->picture_structure;
3319             current_ref_assigned=1;
3320             break;
3321         case MMCO_SET_MAX_LONG:
3322             assert(mmco[i].long_arg <= 16);
3323             // just remove the long term which index is greater than new max
3324             for(j = mmco[i].long_arg; j<16; j++){
3325                 remove_long(h, j, 0);
3326             }
3327             break;
3328         case MMCO_RESET:
3329             while(h->short_ref_count){
3330                 remove_short(h, h->short_ref[0]->frame_num, 0);
3331             }
3332             for(j = 0; j < 16; j++) {
3333                 remove_long(h, j, 0);
3334             }
3335             s->current_picture_ptr->poc=
3336             s->current_picture_ptr->field_poc[0]=
3337             s->current_picture_ptr->field_poc[1]=
3338             h->poc_lsb=
3339             h->poc_msb=
3340             h->frame_num=
3341             s->current_picture_ptr->frame_num= 0;
3342             break;
3343         default: assert(0);
3344         }
3345     }
3346
3347     if (!current_ref_assigned) {
3348         /* Second field of complementary field pair; the first field of
3349          * which is already referenced. If short referenced, it
3350          * should be first entry in short_ref. If not, it must exist
3351          * in long_ref; trying to put it on the short list here is an
3352          * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3353          */
3354         if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3355             /* Just mark the second field valid */
3356             s->current_picture_ptr->reference = PICT_FRAME;
3357         } else if (s->current_picture_ptr->long_ref) {
3358             av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3359                                              "assignment for second field "
3360                                              "in complementary field pair "
3361                                              "(first field is long term)\n");
3362         } else {
3363             pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3364             if(pic){
3365                 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3366             }
3367
3368             if(h->short_ref_count)
3369                 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3370
3371             h->short_ref[0]= s->current_picture_ptr;
3372             h->short_ref_count++;
3373             s->current_picture_ptr->reference |= s->picture_structure;
3374         }
3375     }
3376
3377     if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3378
3379         /* We have too many reference frames, probably due to corrupted
3380          * stream. Need to discard one frame. Prevents overrun of the
3381          * short_ref and long_ref buffers.
3382          */
3383         av_log(h->s.avctx, AV_LOG_ERROR,
3384                "number of reference frames exceeds max (probably "
3385                "corrupt input), discarding one\n");
3386
3387         if (h->long_ref_count && !h->short_ref_count) {
3388             for (i = 0; i < 16; ++i)
3389                 if (h->long_ref[i])
3390                     break;
3391
3392             assert(i < 16);
3393             remove_long(h, i, 0);
3394         } else {
3395             pic = h->short_ref[h->short_ref_count - 1];
3396             remove_short(h, pic->frame_num, 0);
3397         }
3398     }
3399
3400     print_short_term(h);
3401     print_long_term(h);
3402     return 0;
3403 }
3404
3405 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3406     MpegEncContext * const s = &h->s;
3407     int i;
3408
3409     h->mmco_index= 0;
3410     if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3411         s->broken_link= get_bits1(gb) -1;
3412         if(get_bits1(gb)){
3413             h->mmco[0].opcode= MMCO_LONG;
3414             h->mmco[0].long_arg= 0;
3415             h->mmco_index= 1;
3416         }
3417     }else{
3418         if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3419             for(i= 0; i<MAX_MMCO_COUNT; i++) {
3420                 MMCOOpcode opcode= get_ue_golomb_31(gb);
3421
3422                 h->mmco[i].opcode= opcode;
3423                 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3424                     h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3425 /*                    if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3426                         av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3427                         return -1;
3428                     }*/
3429                 }
3430                 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3431                     unsigned int long_arg= get_ue_golomb_31(gb);
3432                     if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3433                         av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3434                         return -1;
3435                     }
3436                     h->mmco[i].long_arg= long_arg;
3437                 }
3438
3439                 if(opcode > (unsigned)MMCO_LONG){
3440                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3441                     return -1;
3442                 }
3443                 if(opcode == MMCO_END)
3444                     break;
3445             }
3446             h->mmco_index= i;
3447         }else{
3448             assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3449
3450             if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3451                     !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3452                 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3453                 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3454                 h->mmco_index= 1;
3455                 if (FIELD_PICTURE) {
3456                     h->mmco[0].short_pic_num *= 2;
3457                     h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3458                     h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3459                     h->mmco_index= 2;
3460                 }
3461             }
3462         }
3463     }
3464
3465     return 0;
3466 }
3467
3468 static int init_poc(H264Context *h){
3469     MpegEncContext * const s = &h->s;
3470     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3471     int field_poc[2];
3472     Picture *cur = s->current_picture_ptr;
3473
3474     h->frame_num_offset= h->prev_frame_num_offset;
3475     if(h->frame_num < h->prev_frame_num)
3476         h->frame_num_offset += max_frame_num;
3477
3478     if(h->sps.poc_type==0){
3479         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3480
3481         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3482             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3483         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3484             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3485         else
3486             h->poc_msb = h->prev_poc_msb;
3487 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3488         field_poc[0] =
3489         field_poc[1] = h->poc_msb + h->poc_lsb;
3490         if(s->picture_structure == PICT_FRAME)
3491             field_poc[1] += h->delta_poc_bottom;
3492     }else if(h->sps.poc_type==1){
3493         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3494         int i;
3495
3496         if(h->sps.poc_cycle_length != 0)
3497             abs_frame_num = h->frame_num_offset + h->frame_num;
3498         else
3499             abs_frame_num = 0;
3500
3501         if(h->nal_ref_idc==0 && abs_frame_num > 0)
3502             abs_frame_num--;
3503
3504         expected_delta_per_poc_cycle = 0;
3505         for(i=0; i < h->sps.poc_cycle_length; i++)
3506             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3507
3508         if(abs_frame_num > 0){
3509             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3510             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3511
3512             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3513             for(i = 0; i <= frame_num_in_poc_cycle; i++)
3514                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3515         } else
3516             expectedpoc = 0;
3517
3518         if(h->nal_ref_idc == 0)
3519             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3520
3521         field_poc[0] = expectedpoc + h->delta_poc[0];
3522         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3523
3524         if(s->picture_structure == PICT_FRAME)
3525             field_poc[1] += h->delta_poc[1];
3526     }else{
3527         int poc= 2*(h->frame_num_offset + h->frame_num);
3528
3529         if(!h->nal_ref_idc)
3530             poc--;
3531
3532         field_poc[0]= poc;
3533         field_poc[1]= poc;
3534     }
3535
3536     if(s->picture_structure != PICT_BOTTOM_FIELD)
3537         s->current_picture_ptr->field_poc[0]= field_poc[0];
3538     if(s->picture_structure != PICT_TOP_FIELD)
3539         s->current_picture_ptr->field_poc[1]= field_poc[1];
3540     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3541
3542     return 0;
3543 }
3544
3545
3546 /**
3547  * initialize scan tables
3548  */
3549 static void init_scan_tables(H264Context *h){
3550     MpegEncContext * const s = &h->s;
3551     int i;
3552     if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3553         memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3554         memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
3555     }else{
3556         for(i=0; i<16; i++){
3557 #define T(x) (x>>2) | ((x<<2) & 0xF)
3558             h->zigzag_scan[i] = T(zigzag_scan[i]);
3559             h-> field_scan[i] = T( field_scan[i]);
3560 #undef T
3561         }
3562     }
3563     if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3564         memcpy(h->zigzag_scan8x8,       zigzag_scan8x8,       64*sizeof(uint8_t));
3565         memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3566         memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
3567         memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
3568     }else{
3569         for(i=0; i<64; i++){
3570 #define T(x) (x>>3) | ((x&7)<<3)
3571             h->zigzag_scan8x8[i]       = T(zigzag_scan8x8[i]);
3572             h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3573             h->field_scan8x8[i]        = T(field_scan8x8[i]);
3574             h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
3575 #undef T
3576         }
3577     }
3578     if(h->sps.transform_bypass){ //FIXME same ugly
3579         h->zigzag_scan_q0          = zigzag_scan;
3580         h->zigzag_scan8x8_q0       = zigzag_scan8x8;
3581         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3582         h->field_scan_q0           = field_scan;
3583         h->field_scan8x8_q0        = field_scan8x8;
3584         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
3585     }else{
3586         h->zigzag_scan_q0          = h->zigzag_scan;
3587         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
3588         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3589         h->field_scan_q0           = h->field_scan;
3590         h->field_scan8x8_q0        = h->field_scan8x8;
3591         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
3592     }
3593 }
3594
3595 /**
3596  * Replicates H264 "master" context to thread contexts.
3597  */
3598 static void clone_slice(H264Context *dst, H264Context *src)
3599 {
3600     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
3601     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
3602     dst->s.current_picture      = src->s.current_picture;
3603     dst->s.linesize             = src->s.linesize;
3604     dst->s.uvlinesize           = src->s.uvlinesize;
3605     dst->s.first_field          = src->s.first_field;
3606
3607     dst->prev_poc_msb           = src->prev_poc_msb;
3608     dst->prev_poc_lsb           = src->prev_poc_lsb;
3609     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
3610     dst->prev_frame_num         = src->prev_frame_num;
3611     dst->short_ref_count        = src->short_ref_count;
3612
3613     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
3614     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
3615     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3616     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
3617
3618     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
3619     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
3620 }
3621
3622 /**
3623  * decodes a slice header.
3624  * This will also call MPV_common_init() and frame_start() as needed.
3625  *
3626  * @param h h264context
3627  * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3628  *
3629  * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3630  */
3631 static int decode_slice_header(H264Context *h, H264Context *h0){
3632     MpegEncContext * const s = &h->s;
3633     MpegEncContext * const s0 = &h0->s;
3634     unsigned int first_mb_in_slice;
3635     unsigned int pps_id;
3636     int num_ref_idx_active_override_flag;
3637     unsigned int slice_type, tmp, i, j;
3638     int default_ref_list_done = 0;
3639     int last_pic_structure;
3640
3641     s->dropable= h->nal_ref_idc == 0;
3642
3643     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3644         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3645         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3646     }else{
3647         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3648         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3649     }
3650
3651     first_mb_in_slice= get_ue_golomb(&s->gb);
3652
3653     if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3654         h0->current_slice = 0;
3655         if (!s0->first_field)
3656             s->current_picture_ptr= NULL;
3657     }
3658
3659     slice_type= get_ue_golomb_31(&s->gb);
3660     if(slice_type > 9){
3661         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3662         return -1;
3663     }
3664     if(slice_type > 4){
3665         slice_type -= 5;
3666         h->slice_type_fixed=1;
3667     }else
3668         h->slice_type_fixed=0;
3669
3670     slice_type= golomb_to_pict_type[ slice_type ];
3671     if (slice_type == FF_I_TYPE
3672         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3673         default_ref_list_done = 1;
3674     }
3675     h->slice_type= slice_type;
3676     h->slice_type_nos= slice_type & 3;
3677
3678     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3679     if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3680         av_log(h->s.avctx, AV_LOG_ERROR,
3681                "B picture before any references, skipping\n");
3682         return -1;
3683     }
3684
3685     pps_id= get_ue_golomb(&s->gb);
3686     if(pps_id>=MAX_PPS_COUNT){
3687         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3688         return -1;
3689     }
3690     if(!h0->pps_buffers[pps_id]) {
3691         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3692         return -1;
3693     }
3694     h->pps= *h0->pps_buffers[pps_id];
3695
3696     if(!h0->sps_buffers[h->pps.sps_id]) {
3697         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3698         return -1;
3699     }
3700     h->sps = *h0->sps_buffers[h->pps.sps_id];
3701
3702     if(h == h0 && h->dequant_coeff_pps != pps_id){
3703         h->dequant_coeff_pps = pps_id;
3704         init_dequant_tables(h);
3705     }
3706
3707     s->mb_width= h->sps.mb_width;
3708     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3709
3710     h->b_stride=  s->mb_width*4;
3711     h->b8_stride= s->mb_width*2;
3712
3713     s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3714     if(h->sps.frame_mbs_only_flag)
3715         s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3716     else
3717         s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3718
3719     if (s->context_initialized
3720         && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
3721         if(h != h0)
3722             return -1;   // width / height changed during parallelized decoding
3723         free_tables(h);
3724         flush_dpb(s->avctx);
3725         MPV_common_end(s);
3726     }
3727     if (!s->context_initialized) {
3728         if(h != h0)
3729             return -1;  // we cant (re-)initialize context during parallel decoding
3730         if (MPV_common_init(s) < 0)
3731             return -1;
3732         s->first_field = 0;
3733
3734         init_scan_tables(h);
3735         alloc_tables(h);
3736
3737         for(i = 1; i < s->avctx->thread_count; i++) {
3738             H264Context *c;
3739             c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3740             memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3741             memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3742             c->sps = h->sps;
3743             c->pps = h->pps;
3744             init_scan_tables(c);
3745             clone_tables(c, h);
3746         }
3747
3748         for(i = 0; i < s->avctx->thread_count; i++)
3749             if(context_init(h->thread_context[i]) < 0)
3750                 return -1;
3751
3752         s->avctx->width = s->width;
3753         s->avctx->height = s->height;
3754         s->avctx->sample_aspect_ratio= h->sps.sar;
3755         if(!s->avctx->sample_aspect_ratio.den)
3756             s->avctx->sample_aspect_ratio.den = 1;
3757
3758         if(h->sps.timing_info_present_flag){
3759             s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3760             if(h->x264_build > 0 && h->x264_build < 44)
3761                 s->avctx->time_base.den *= 2;
3762             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3763                       s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3764         }
3765     }
3766
3767     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3768
3769     h->mb_mbaff = 0;
3770     h->mb_aff_frame = 0;
3771     last_pic_structure = s0->picture_structure;
3772     if(h->sps.frame_mbs_only_flag){
3773         s->picture_structure= PICT_FRAME;
3774     }else{
3775         if(get_bits1(&s->gb)) { //field_pic_flag
3776             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3777         } else {
3778             s->picture_structure= PICT_FRAME;
3779             h->mb_aff_frame = h->sps.mb_aff;
3780         }
3781     }
3782     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3783
3784     if(h0->current_slice == 0){
3785         while(h->frame_num !=  h->prev_frame_num &&
3786               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3787             av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3788             frame_start(h);
3789             h->prev_frame_num++;
3790             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3791             s->current_picture_ptr->frame_num= h->prev_frame_num;
3792             execute_ref_pic_marking(h, NULL, 0);
3793         }
3794
3795         /* See if we have a decoded first field looking for a pair... */
3796         if (s0->first_field) {
3797             assert(s0->current_picture_ptr);
3798             assert(s0->current_picture_ptr->data[0]);
3799             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3800
3801             /* figure out if we have a complementary field pair */
3802             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3803                 /*
3804                  * Previous field is unmatched. Don't display it, but let it
3805                  * remain for reference if marked as such.
3806                  */
3807                 s0->current_picture_ptr = NULL;
3808                 s0->first_field = FIELD_PICTURE;
3809
3810             } else {
3811                 if (h->nal_ref_idc &&
3812                         s0->current_picture_ptr->reference &&
3813                         s0->current_picture_ptr->frame_num != h->frame_num) {
3814                     /*
3815                      * This and previous field were reference, but had
3816                      * different frame_nums. Consider this field first in
3817                      * pair. Throw away previous field except for reference
3818                      * purposes.
3819                      */
3820                     s0->first_field = 1;
3821                     s0->current_picture_ptr = NULL;
3822
3823                 } else {
3824                     /* Second field in complementary pair */
3825                     s0->first_field = 0;
3826                 }
3827             }
3828
3829         } else {
3830             /* Frame or first field in a potentially complementary pair */
3831             assert(!s0->current_picture_ptr);
3832             s0->first_field = FIELD_PICTURE;
3833         }
3834
3835         if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3836             s0->first_field = 0;
3837             return -1;
3838         }
3839     }
3840     if(h != h0)
3841         clone_slice(h, h0);
3842
3843     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3844
3845     assert(s->mb_num == s->mb_width * s->mb_height);
3846     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3847        first_mb_in_slice                    >= s->mb_num){
3848         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3849         return -1;
3850     }
3851     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3852     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3853     if (s->picture_structure == PICT_BOTTOM_FIELD)
3854         s->resync_mb_y = s->mb_y = s->mb_y + 1;
3855     assert(s->mb_y < s->mb_height);
3856
3857     if(s->picture_structure==PICT_FRAME){
3858         h->curr_pic_num=   h->frame_num;
3859         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3860     }else{
3861         h->curr_pic_num= 2*h->frame_num + 1;
3862         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3863     }
3864
3865     if(h->nal_unit_type == NAL_IDR_SLICE){
3866         get_ue_golomb(&s->gb); /* idr_pic_id */
3867     }
3868
3869     if(h->sps.poc_type==0){
3870         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3871
3872         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3873             h->delta_poc_bottom= get_se_golomb(&s->gb);
3874         }
3875     }
3876
3877     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3878         h->delta_poc[0]= get_se_golomb(&s->gb);
3879
3880         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3881             h->delta_poc[1]= get_se_golomb(&s->gb);
3882     }
3883
3884     init_poc(h);
3885
3886     if(h->pps.redundant_pic_cnt_present){
3887         h->redundant_pic_count= get_ue_golomb(&s->gb);
3888     }
3889
3890     //set defaults, might be overridden a few lines later
3891     h->ref_count[0]= h->pps.ref_count[0];
3892     h->ref_count[1]= h->pps.ref_count[1];
3893
3894     if(h->slice_type_nos != FF_I_TYPE){
3895         if(h->slice_type_nos == FF_B_TYPE){
3896             h->direct_spatial_mv_pred= get_bits1(&s->gb);
3897         }
3898         num_ref_idx_active_override_flag= get_bits1(&s->gb);
3899
3900         if(num_ref_idx_active_override_flag){
3901             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3902             if(h->slice_type_nos==FF_B_TYPE)
3903                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3904
3905             if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3906                 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3907                 h->ref_count[0]= h->ref_count[1]= 1;
3908                 return -1;
3909             }
3910         }
3911         if(h->slice_type_nos == FF_B_TYPE)
3912             h->list_count= 2;
3913         else
3914             h->list_count= 1;
3915     }else
3916         h->list_count= 0;
3917
3918     if(!default_ref_list_done){
3919         fill_default_ref_list(h);
3920     }
3921
3922     if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3923         return -1;
3924
3925     if(h->slice_type_nos!=FF_I_TYPE){
3926         s->last_picture_ptr= &h->ref_list[0][0];
3927         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3928     }
3929     if(h->slice_type_nos==FF_B_TYPE){
3930         s->next_picture_ptr= &h->ref_list[1][0];
3931         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3932     }
3933
3934     if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
3935        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3936         pred_weight_table(h);
3937     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3938         implicit_weight_table(h);
3939     else
3940         h->use_weight = 0;
3941
3942     if(h->nal_ref_idc)
3943         decode_ref_pic_marking(h0, &s->gb);
3944
3945     if(FRAME_MBAFF)
3946         fill_mbaff_ref_list(h);
3947
3948     if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3949         direct_dist_scale_factor(h);
3950     direct_ref_list_init(h);
3951
3952     if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3953         tmp = get_ue_golomb_31(&s->gb);
3954         if(tmp > 2){
3955             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3956             return -1;
3957         }
3958         h->cabac_init_idc= tmp;
3959     }
3960
3961     h->last_qscale_diff = 0;
3962     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3963     if(tmp>51){
3964         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3965         return -1;
3966     }
3967     s->qscale= tmp;
3968     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3969     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3970     //FIXME qscale / qp ... stuff
3971     if(h->slice_type == FF_SP_TYPE){
3972         get_bits1(&s->gb); /* sp_for_switch_flag */
3973     }
3974     if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3975         get_se_golomb(&s->gb); /* slice_qs_delta */
3976     }
3977
3978     h->deblocking_filter = 1;
3979     h->slice_alpha_c0_offset = 0;
3980     h->slice_beta_offset = 0;
3981     if( h->pps.deblocking_filter_parameters_present ) {
3982         tmp= get_ue_golomb_31(&s->gb);
3983         if(tmp > 2){
3984             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3985             return -1;
3986         }
3987         h->deblocking_filter= tmp;
3988         if(h->deblocking_filter < 2)
3989             h->deblocking_filter^= 1; // 1<->0
3990
3991         if( h->deblocking_filter ) {
3992             h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3993             h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3994         }
3995     }
3996
3997     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
3998        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3999        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == FF_B_TYPE)
4000        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4001         h->deblocking_filter= 0;
4002
4003     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4004         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4005             /* Cheat slightly for speed:
4006                Do not bother to deblock across slices. */
4007             h->deblocking_filter = 2;
4008         } else {
4009             h0->max_contexts = 1;
4010             if(!h0->single_decode_warning) {
4011                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4012                 h0->single_decode_warning = 1;
4013             }
4014             if(h != h0)
4015                 return 1; // deblocking switched inside frame
4016         }
4017     }
4018
4019 #if 0 //FMO
4020     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4021         slice_group_change_cycle= get_bits(&s->gb, ?);
4022 #endif
4023
4024     h0->last_slice_type = slice_type;
4025     h->slice_num = ++h0->current_slice;
4026     if(h->slice_num >= MAX_SLICES){
4027         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4028     }
4029
4030     for(j=0; j<2; j++){
4031         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4032         ref2frm[0]=
4033         ref2frm[1]= -1;
4034         for(i=0; i<16; i++)
4035             ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4036                           +(h->ref_list[j][i].reference&3);
4037         ref2frm[18+0]=
4038         ref2frm[18+1]= -1;
4039         for(i=16; i<48; i++)
4040             ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4041                           +(h->ref_list[j][i].reference&3);
4042     }
4043
4044     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4045     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4046
4047     s->avctx->refs= h->sps.ref_frame_count;
4048
4049     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4050         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4051                h->slice_num,
4052                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4053                first_mb_in_slice,
4054                av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4055                pps_id, h->frame_num,
4056                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4057                h->ref_count[0], h->ref_count[1],
4058                s->qscale,
4059                h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4060                h->use_weight,
4061                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4062                h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4063                );
4064     }
4065
4066     return 0;
4067 }
4068
4069 /**
4070  *
4071  */
4072 static inline int get_level_prefix(GetBitContext *gb){
4073     unsigned int buf;
4074     int log;
4075
4076     OPEN_READER(re, gb);
4077     UPDATE_CACHE(re, gb);
4078     buf=GET_CACHE(re, gb);
4079
4080     log= 32 - av_log2(buf);
4081 #ifdef TRACE
4082     print_bin(buf>>(32-log), log);
4083     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4084 #endif
4085
4086     LAST_SKIP_BITS(re, gb, log);
4087     CLOSE_READER(re, gb);
4088
4089     return log-1;
4090 }
4091
4092 static inline int get_dct8x8_allowed(H264Context *h){
4093     if(h->sps.direct_8x8_inference_flag)
4094         return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
4095     else
4096         return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4097 }
4098
4099 /**
4100  * decodes a residual block.
4101  * @param n block index
4102  * @param scantable scantable
4103  * @param max_coeff number of coefficients in the block
4104  * @return <0 if an error occurred
4105  */
4106 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4107     MpegEncContext * const s = &h->s;
4108     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4109     int level[16];
4110     int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4111
4112     //FIXME put trailing_onex into the context
4113
4114     if(n == CHROMA_DC_BLOCK_INDEX){
4115         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4116         total_coeff= coeff_token>>2;
4117     }else{
4118         if(n == LUMA_DC_BLOCK_INDEX){
4119             total_coeff= pred_non_zero_count(h, 0);
4120             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4121             total_coeff= coeff_token>>2;
4122         }else{
4123             total_coeff= pred_non_zero_count(h, n);
4124             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4125             total_coeff= coeff_token>>2;
4126             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4127         }
4128     }
4129
4130     //FIXME set last_non_zero?
4131
4132     if(total_coeff==0)
4133         return 0;
4134     if(total_coeff > (unsigned)max_coeff) {
4135         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4136         return -1;
4137     }
4138
4139     trailing_ones= coeff_token&3;
4140     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4141     assert(total_coeff<=16);
4142
4143     i = show_bits(gb, 3);
4144     skip_bits(gb, trailing_ones);
4145     level[0] = 1-((i&4)>>1);
4146     level[1] = 1-((i&2)   );
4147     level[2] = 1-((i&1)<<1);
4148
4149     if(trailing_ones<total_coeff) {
4150         int mask, prefix;
4151         int suffix_length = total_coeff > 10 && trailing_ones < 3;
4152         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4153         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4154
4155         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4156         if(level_code >= 100){
4157             prefix= level_code - 100;
4158             if(prefix == LEVEL_TAB_BITS)
4159                 prefix += get_level_prefix(gb);
4160
4161             //first coefficient has suffix_length equal to 0 or 1
4162             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4163                 if(suffix_length)
4164                     level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4165                 else
4166                     level_code= (prefix<<suffix_length); //part
4167             }else if(prefix==14){
4168                 if(suffix_length)
4169                     level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4170                 else
4171                     level_code= prefix + get_bits(gb, 4); //part
4172             }else{
4173                 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4174                 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4175                 if(prefix>=16)
4176                     level_code += (1<<(prefix-3))-4096;
4177             }
4178
4179             if(trailing_ones < 3) level_code += 2;
4180
4181             suffix_length = 2;
4182             mask= -(level_code&1);
4183             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4184         }else{
4185             if(trailing_ones < 3) level_code += (level_code>>31)|1;
4186
4187             suffix_length = 1;
4188             if(level_code + 3U > 6U)
4189                 suffix_length++;
4190             level[trailing_ones]= level_code;
4191         }
4192
4193         //remaining coefficients have suffix_length > 0
4194         for(i=trailing_ones+1;i<total_coeff;i++) {
4195             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4196             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4197             level_code= cavlc_level_tab[suffix_length][bitsi][0];
4198
4199             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4200             if(level_code >= 100){
4201                 prefix= level_code - 100;
4202                 if(prefix == LEVEL_TAB_BITS){
4203                     prefix += get_level_prefix(gb);
4204                 }
4205                 if(prefix<15){
4206                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4207                 }else{
4208                     level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4209                     if(prefix>=16)
4210                         level_code += (1<<(prefix-3))-4096;
4211                 }
4212                 mask= -(level_code&1);
4213                 level_code= (((2+level_code)>>1) ^ mask) - mask;
4214             }
4215             level[i]= level_code;
4216
4217             if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4218                 suffix_length++;
4219         }
4220     }
4221
4222     if(total_coeff == max_coeff)
4223         zeros_left=0;
4224     else{
4225         if(n == CHROMA_DC_BLOCK_INDEX)
4226             zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4227         else
4228             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4229     }
4230
4231     coeff_num = zeros_left + total_coeff - 1;
4232     j = scantable[coeff_num];
4233     if(n > 24){
4234         block[j] = level[0];
4235         for(i=1;i<total_coeff;i++) {
4236             if(zeros_left <= 0)
4237                 run_before = 0;
4238             else if(zeros_left < 7){
4239                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4240             }else{
4241                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4242             }
4243             zeros_left -= run_before;
4244             coeff_num -= 1 + run_before;
4245             j= scantable[ coeff_num ];
4246
4247             block[j]= level[i];
4248         }
4249     }else{
4250         block[j] = (level[0] * qmul[j] + 32)>>6;
4251         for(i=1;i<total_coeff;i++) {
4252             if(zeros_left <= 0)
4253                 run_before = 0;
4254             else if(zeros_left < 7){
4255                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4256             }else{
4257                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4258             }
4259             zeros_left -= run_before;
4260             coeff_num -= 1 + run_before;
4261             j= scantable[ coeff_num ];
4262
4263             block[j]= (level[i] * qmul[j] + 32)>>6;
4264         }
4265     }
4266
4267     if(zeros_left<0){
4268         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4269         return -1;
4270     }
4271
4272     return 0;
4273 }
4274
4275 static void predict_field_decoding_flag(H264Context *h){
4276     MpegEncContext * const s = &h->s;
4277     const int mb_xy= h->mb_xy;
4278     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4279                 ? s->current_picture.mb_type[mb_xy-1]
4280                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4281                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4282                 : 0;
4283     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4284 }
4285
4286 /**
4287  * decodes a P_SKIP or B_SKIP macroblock
4288  */
4289 static void decode_mb_skip(H264Context *h){
4290     MpegEncContext * const s = &h->s;
4291     const int mb_xy= h->mb_xy;
4292     int mb_type=0;
4293
4294     memset(h->non_zero_count[mb_xy], 0, 16);
4295     memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4296
4297     if(MB_FIELD)
4298         mb_type|= MB_TYPE_INTERLACED;
4299
4300     if( h->slice_type_nos == FF_B_TYPE )
4301     {
4302         // just for fill_caches. pred_direct_motion will set the real mb_type
4303         mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4304
4305         fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4306         pred_direct_motion(h, &mb_type);
4307         mb_type|= MB_TYPE_SKIP;
4308     }
4309     else
4310     {
4311         int mx, my;
4312         mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4313
4314         fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4315         pred_pskip_motion(h, &mx, &my);
4316         fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4317         fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4318     }
4319
4320     write_back_motion(h, mb_type);
4321     s->current_picture.mb_type[mb_xy]= mb_type;
4322     s->current_picture.qscale_table[mb_xy]= s->qscale;
4323     h->slice_table[ mb_xy ]= h->slice_num;
4324     h->prev_mb_skipped= 1;
4325 }
4326
4327 /**
4328  * decodes a macroblock
4329  * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4330  */
4331 static int decode_mb_cavlc(H264Context *h){
4332     MpegEncContext * const s = &h->s;
4333     int mb_xy;
4334     int partition_count;
4335     unsigned int mb_type, cbp;
4336     int dct8x8_allowed= h->pps.transform_8x8_mode;
4337
4338     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4339
4340     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4341     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4342                 down the code */
4343     if(h->slice_type_nos != FF_I_TYPE){
4344         if(s->mb_skip_run==-1)
4345             s->mb_skip_run= get_ue_golomb(&s->gb);
4346
4347         if (s->mb_skip_run--) {
4348             if(FRAME_MBAFF && (s->mb_y&1) == 0){
4349                 if(s->mb_skip_run==0)
4350                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4351                 else
4352                     predict_field_decoding_flag(h);
4353             }
4354             decode_mb_skip(h);
4355             return 0;
4356         }
4357     }
4358     if(FRAME_MBAFF){
4359         if( (s->mb_y&1) == 0 )
4360             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4361     }
4362
4363     h->prev_mb_skipped= 0;
4364
4365     mb_type= get_ue_golomb(&s->gb);
4366     if(h->slice_type_nos == FF_B_TYPE){
4367         if(mb_type < 23){
4368             partition_count= b_mb_type_info[mb_type].partition_count;
4369             mb_type=         b_mb_type_info[mb_type].type;
4370         }else{
4371             mb_type -= 23;
4372             goto decode_intra_mb;
4373         }
4374     }else if(h->slice_type_nos == FF_P_TYPE){
4375         if(mb_type < 5){
4376             partition_count= p_mb_type_info[mb_type].partition_count;
4377             mb_type=         p_mb_type_info[mb_type].type;
4378         }else{
4379             mb_type -= 5;
4380             goto decode_intra_mb;
4381         }
4382     }else{
4383        assert(h->slice_type_nos == FF_I_TYPE);
4384         if(h->slice_type == FF_SI_TYPE && mb_type)
4385             mb_type--;
4386 decode_intra_mb:
4387         if(mb_type > 25){
4388             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4389             return -1;
4390         }
4391         partition_count=0;
4392         cbp= i_mb_type_info[mb_type].cbp;
4393         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4394         mb_type= i_mb_type_info[mb_type].type;
4395     }
4396
4397     if(MB_FIELD)
4398         mb_type |= MB_TYPE_INTERLACED;
4399
4400     h->slice_table[ mb_xy ]= h->slice_num;
4401
4402     if(IS_INTRA_PCM(mb_type)){
4403         unsigned int x;
4404
4405         // We assume these blocks are very rare so we do not optimize it.
4406         align_get_bits(&s->gb);
4407
4408         // The pixels are stored in the same order as levels in h->mb array.
4409         for(x=0; x < (CHROMA ? 384 : 256); x++){
4410             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4411         }
4412
4413         // In deblocking, the quantizer is 0
4414         s->current_picture.qscale_table[mb_xy]= 0;
4415         // All coeffs are present
4416         memset(h->non_zero_count[mb_xy], 16, 16);
4417
4418         s->current_picture.mb_type[mb_xy]= mb_type;
4419         return 0;
4420     }
4421
4422     if(MB_MBAFF){
4423         h->ref_count[0] <<= 1;
4424         h->ref_count[1] <<= 1;
4425     }
4426
4427     fill_caches(h, mb_type, 0);
4428
4429     //mb_pred
4430     if(IS_INTRA(mb_type)){
4431         int pred_mode;
4432 //            init_top_left_availability(h);
4433         if(IS_INTRA4x4(mb_type)){
4434             int i;
4435             int di = 1;
4436             if(dct8x8_allowed && get_bits1(&s->gb)){
4437                 mb_type |= MB_TYPE_8x8DCT;
4438                 di = 4;
4439             }
4440
4441 //                fill_intra4x4_pred_table(h);
4442             for(i=0; i<16; i+=di){
4443                 int mode= pred_intra_mode(h, i);
4444
4445                 if(!get_bits1(&s->gb)){
4446                     const int rem_mode= get_bits(&s->gb, 3);
4447                     mode = rem_mode + (rem_mode >= mode);
4448                 }
4449
4450                 if(di==4)
4451                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4452                 else
4453                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4454             }
4455             write_back_intra_pred_mode(h);
4456             if( check_intra4x4_pred_mode(h) < 0)
4457                 return -1;
4458         }else{
4459             h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4460             if(h->intra16x16_pred_mode < 0)
4461                 return -1;
4462         }
4463         if(CHROMA){
4464             pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4465             if(pred_mode < 0)
4466                 return -1;
4467             h->chroma_pred_mode= pred_mode;
4468         }
4469     }else if(partition_count==4){
4470         int i, j, sub_partition_count[4], list, ref[2][4];
4471
4472         if(h->slice_type_nos == FF_B_TYPE){
4473             for(i=0; i<4; i++){
4474                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4475                 if(h->sub_mb_type[i] >=13){
4476                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4477                     return -1;
4478                 }
4479                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4480                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4481             }
4482             if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4483                || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4484                 pred_direct_motion(h, &mb_type);
4485                 h->ref_cache[0][scan8[4]] =
4486                 h->ref_cache[1][scan8[4]] =
4487                 h->ref_cache[0][scan8[12]] =
4488                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4489             }
4490         }else{
4491             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4492             for(i=0; i<4; i++){
4493                 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4494                 if(h->sub_mb_type[i] >=4){
4495                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4496                     return -1;
4497                 }
4498                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4499                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4500             }
4501         }
4502
4503         for(list=0; list<h->list_count; list++){
4504             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4505             for(i=0; i<4; i++){
4506                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4507                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4508                     unsigned int tmp;
4509                     if(ref_count == 1){
4510                         tmp= 0;
4511                     }else if(ref_count == 2){
4512                         tmp= get_bits1(&s->gb)^1;
4513                     }else{
4514                         tmp= get_ue_golomb_31(&s->gb);
4515                         if(tmp>=ref_count){
4516                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4517                             return -1;
4518                         }
4519                     }
4520                     ref[list][i]= tmp;
4521                 }else{
4522                  //FIXME
4523                     ref[list][i] = -1;
4524                 }
4525             }
4526         }
4527
4528         if(dct8x8_allowed)
4529             dct8x8_allowed = get_dct8x8_allowed(h);
4530
4531         for(list=0; list<h->list_count; list++){
4532             for(i=0; i<4; i++){
4533                 if(IS_DIRECT(h->sub_mb_type[i])) {
4534                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4535                     continue;
4536                 }
4537                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4538                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4539
4540                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4541                     const int sub_mb_type= h->sub_mb_type[i];
4542                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4543                     for(j=0; j<sub_partition_count[i]; j++){
4544                         int mx, my;
4545                         const int index= 4*i + block_width*j;
4546                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4547                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4548                         mx += get_se_golomb(&s->gb);
4549                         my += get_se_golomb(&s->gb);
4550                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4551
4552                         if(IS_SUB_8X8(sub_mb_type)){
4553                             mv_cache[ 1 ][0]=
4554                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4555                             mv_cache[ 1 ][1]=
4556                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4557                         }else if(IS_SUB_8X4(sub_mb_type)){
4558                             mv_cache[ 1 ][0]= mx;
4559                             mv_cache[ 1 ][1]= my;
4560                         }else if(IS_SUB_4X8(sub_mb_type)){
4561                             mv_cache[ 8 ][0]= mx;
4562                             mv_cache[ 8 ][1]= my;
4563                         }
4564                         mv_cache[ 0 ][0]= mx;
4565                         mv_cache[ 0 ][1]= my;
4566                     }
4567                 }else{
4568                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4569                     p[0] = p[1]=
4570                     p[8] = p[9]= 0;
4571                 }
4572             }
4573         }
4574     }else if(IS_DIRECT(mb_type)){
4575         pred_direct_motion(h, &mb_type);
4576         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4577     }else{
4578         int list, mx, my, i;
4579          //FIXME we should set ref_idx_l? to 0 if we use that later ...
4580         if(IS_16X16(mb_type)){
4581             for(list=0; list<h->list_count; list++){
4582                     unsigned int val;
4583                     if(IS_DIR(mb_type, 0, list)){
4584                         if(h->ref_count[list]==1){
4585                             val= 0;
4586                         }else if(h->ref_count[list]==2){
4587                             val= get_bits1(&s->gb)^1;
4588                         }else{
4589                             val= get_ue_golomb_31(&s->gb);
4590                             if(val >= h->ref_count[list]){
4591                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4592                                 return -1;
4593                             }
4594                         }
4595                     }else
4596                         val= LIST_NOT_USED&0xFF;
4597                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4598             }
4599             for(list=0; list<h->list_count; list++){
4600                 unsigned int val;
4601                 if(IS_DIR(mb_type, 0, list)){
4602                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4603                     mx += get_se_golomb(&s->gb);
4604                     my += get_se_golomb(&s->gb);
4605                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4606
4607                     val= pack16to32(mx,my);
4608                 }else
4609                     val=0;
4610                 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4611             }
4612         }
4613         else if(IS_16X8(mb_type)){
4614             for(list=0; list<h->list_count; list++){
4615                     for(i=0; i<2; i++){
4616                         unsigned int val;
4617                         if(IS_DIR(mb_type, i, list)){
4618                             if(h->ref_count[list] == 1){
4619                                 val= 0;
4620                             }else if(h->ref_count[list] == 2){
4621                                 val= get_bits1(&s->gb)^1;
4622                             }else{
4623                                 val= get_ue_golomb_31(&s->gb);
4624                                 if(val >= h->ref_count[list]){
4625                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4626                                     return -1;
4627                                 }
4628                             }
4629                         }else
4630                             val= LIST_NOT_USED&0xFF;
4631                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4632                     }
4633             }
4634             for(list=0; list<h->list_count; list++){
4635                 for(i=0; i<2; i++){
4636                     unsigned int val;
4637                     if(IS_DIR(mb_type, i, list)){
4638                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4639                         mx += get_se_golomb(&s->gb);
4640                         my += get_se_golomb(&s->gb);
4641                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4642
4643                         val= pack16to32(mx,my);
4644                     }else
4645                         val=0;
4646                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4647                 }
4648             }
4649         }else{
4650             assert(IS_8X16(mb_type));
4651             for(list=0; list<h->list_count; list++){
4652                     for(i=0; i<2; i++){
4653                         unsigned int val;
4654                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4655                             if(h->ref_count[list]==1){
4656                                 val= 0;
4657                             }else if(h->ref_count[list]==2){
4658                                 val= get_bits1(&s->gb)^1;
4659                             }else{
4660                                 val= get_ue_golomb_31(&s->gb);
4661                                 if(val >= h->ref_count[list]){
4662                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4663                                     return -1;
4664                                 }
4665                             }
4666                         }else
4667                             val= LIST_NOT_USED&0xFF;
4668                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4669                     }
4670             }
4671             for(list=0; list<h->list_count; list++){
4672                 for(i=0; i<2; i++){
4673                     unsigned int val;
4674                     if(IS_DIR(mb_type, i, list)){
4675                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4676                         mx += get_se_golomb(&s->gb);
4677                         my += get_se_golomb(&s->gb);
4678                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4679
4680                         val= pack16to32(mx,my);
4681                     }else
4682                         val=0;
4683                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4684                 }
4685             }
4686         }
4687     }
4688
4689     if(IS_INTER(mb_type))
4690         write_back_motion(h, mb_type);
4691
4692     if(!IS_INTRA16x16(mb_type)){
4693         cbp= get_ue_golomb(&s->gb);
4694         if(cbp > 47){
4695             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4696             return -1;
4697         }
4698
4699         if(CHROMA){
4700             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4701             else                     cbp= golomb_to_inter_cbp   [cbp];
4702         }else{
4703             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4704             else                     cbp= golomb_to_inter_cbp_gray[cbp];
4705         }
4706     }
4707     h->cbp = cbp;
4708
4709     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4710         if(get_bits1(&s->gb)){
4711             mb_type |= MB_TYPE_8x8DCT;
4712             h->cbp_table[mb_xy]= cbp;
4713         }
4714     }
4715     s->current_picture.mb_type[mb_xy]= mb_type;
4716
4717     if(cbp || IS_INTRA16x16(mb_type)){
4718         int i8x8, i4x4, chroma_idx;
4719         int dquant;
4720         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4721         const uint8_t *scan, *scan8x8, *dc_scan;
4722
4723 //        fill_non_zero_count_cache(h);
4724
4725         if(IS_INTERLACED(mb_type)){
4726             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4727             scan= s->qscale ? h->field_scan : h->field_scan_q0;
4728             dc_scan= luma_dc_field_scan;
4729         }else{
4730             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4731             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4732             dc_scan= luma_dc_zigzag_scan;
4733         }
4734
4735         dquant= get_se_golomb(&s->gb);
4736
4737         if( dquant > 25 || dquant < -26 ){
4738             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4739             return -1;
4740         }
4741
4742         s->qscale += dquant;
4743         if(((unsigned)s->qscale) > 51){
4744             if(s->qscale<0) s->qscale+= 52;
4745             else            s->qscale-= 52;
4746         }
4747
4748         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4749         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4750         if(IS_INTRA16x16(mb_type)){
4751             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4752                 return -1; //FIXME continue if partitioned and other return -1 too
4753             }
4754
4755             assert((cbp&15) == 0 || (cbp&15) == 15);
4756
4757             if(cbp&15){
4758                 for(i8x8=0; i8x8<4; i8x8++){
4759                     for(i4x4=0; i4x4<4; i4x4++){
4760                         const int index= i4x4 + 4*i8x8;
4761                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4762                             return -1;
4763                         }
4764                     }
4765                 }
4766             }else{
4767                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4768             }
4769         }else{
4770             for(i8x8=0; i8x8<4; i8x8++){
4771                 if(cbp & (1<<i8x8)){
4772                     if(IS_8x8DCT(mb_type)){
4773                         DCTELEM *buf = &h->mb[64*i8x8];
4774                         uint8_t *nnz;
4775                         for(i4x4=0; i4x4<4; i4x4++){
4776                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4777                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4778                                 return -1;
4779                         }
4780                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4781                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
4782                     }else{
4783                         for(i4x4=0; i4x4<4; i4x4++){
4784                             const int index= i4x4 + 4*i8x8;
4785
4786                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4787                                 return -1;
4788                             }
4789                         }
4790                     }
4791                 }else{
4792                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4793                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4794                 }
4795             }
4796         }
4797
4798         if(cbp&0x30){
4799             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4800                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4801                     return -1;
4802                 }
4803         }
4804
4805         if(cbp&0x20){
4806             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4807                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4808                 for(i4x4=0; i4x4<4; i4x4++){
4809                     const int index= 16 + 4*chroma_idx + i4x4;
4810                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4811                         return -1;
4812                     }
4813                 }
4814             }
4815         }else{
4816             uint8_t * const nnz= &h->non_zero_count_cache[0];
4817             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4818             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4819         }
4820     }else{
4821         uint8_t * const nnz= &h->non_zero_count_cache[0];
4822         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4823         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4824         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4825     }
4826     s->current_picture.qscale_table[mb_xy]= s->qscale;
4827     write_back_non_zero_count(h);
4828
4829     if(MB_MBAFF){
4830         h->ref_count[0] >>= 1;
4831         h->ref_count[1] >>= 1;
4832     }
4833
4834     return 0;
4835 }
4836
4837 static int decode_cabac_field_decoding_flag(H264Context *h) {
4838     MpegEncContext * const s = &h->s;
4839     const int mb_x = s->mb_x;
4840     const int mb_y = s->mb_y & ~1;
4841     const int mba_xy = mb_x - 1 +  mb_y   *s->mb_stride;
4842     const int mbb_xy = mb_x     + (mb_y-2)*s->mb_stride;
4843
4844     unsigned int ctx = 0;
4845
4846     if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4847         ctx += 1;
4848     }
4849     if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4850         ctx += 1;
4851     }
4852
4853     return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4854 }
4855
4856 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4857     uint8_t *state= &h->cabac_state[ctx_base];
4858     int mb_type;
4859
4860     if(intra_slice){
4861         MpegEncContext * const s = &h->s;
4862         const int mba_xy = h->left_mb_xy[0];
4863         const int mbb_xy = h->top_mb_xy;
4864         int ctx=0;
4865         if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4866             ctx++;
4867         if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4868             ctx++;
4869         if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4870             return 0;   /* I4x4 */
4871         state += 2;
4872     }else{
4873         if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4874             return 0;   /* I4x4 */
4875     }
4876
4877     if( get_cabac_terminate( &h->cabac ) )
4878         return 25;  /* PCM */
4879
4880     mb_type = 1; /* I16x16 */
4881     mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4882     if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4883         mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4884     mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4885     mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4886     return mb_type;
4887 }
4888
4889 static int decode_cabac_mb_type_b( H264Context *h ) {
4890     MpegEncContext * const s = &h->s;
4891
4892         const int mba_xy = h->left_mb_xy[0];
4893         const int mbb_xy = h->top_mb_xy;
4894         int ctx = 0;
4895         int bits;
4896         assert(h->slice_type_nos == FF_B_TYPE);
4897
4898         if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4899             ctx++;
4900         if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4901             ctx++;
4902
4903         if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4904             return 0; /* B_Direct_16x16 */
4905
4906         if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4907             return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4908         }
4909
4910         bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4911         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4912         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4913         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4914         if( bits < 8 )
4915             return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4916         else if( bits == 13 ) {
4917             return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4918         } else if( bits == 14 )
4919             return 11; /* B_L1_L0_8x16 */
4920         else if( bits == 15 )
4921             return 22; /* B_8x8 */
4922
4923         bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4924         return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4925 }
4926
4927 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4928     MpegEncContext * const s = &h->s;
4929     int mba_xy, mbb_xy;
4930     int ctx = 0;
4931
4932     if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4933         int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4934         mba_xy = mb_xy - 1;
4935         if( (mb_y&1)
4936             && h->slice_table[mba_xy] == h->slice_num
4937             && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4938             mba_xy += s->mb_stride;
4939         if( MB_FIELD ){
4940             mbb_xy = mb_xy - s->mb_stride;
4941             if( !(mb_y&1)
4942                 && h->slice_table[mbb_xy] == h->slice_num
4943                 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4944                 mbb_xy -= s->mb_stride;
4945         }else
4946             mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4947     }else{
4948         int mb_xy = h->mb_xy;
4949         mba_xy = mb_xy - 1;
4950         mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4951     }
4952
4953     if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4954         ctx++;
4955     if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4956         ctx++;
4957
4958     if( h->slice_type_nos == FF_B_TYPE )
4959         ctx += 13;
4960     return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4961 }
4962
4963 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4964     int mode = 0;
4965
4966     if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4967         return pred_mode;
4968
4969     mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4970     mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4971     mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4972
4973     if( mode >= pred_mode )
4974         return mode + 1;
4975     else
4976         return mode;
4977 }
4978
4979 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4980     const int mba_xy = h->left_mb_xy[0];
4981     const int mbb_xy = h->top_mb_xy;
4982
4983     int ctx = 0;
4984
4985     /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4986     if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4987         ctx++;
4988
4989     if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4990         ctx++;
4991
4992     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4993         return 0;
4994
4995     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4996         return 1;
4997     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4998         return 2;
4999     else
5000         return 3;
5001 }
5002
5003 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5004     int cbp_b, cbp_a, ctx, cbp = 0;
5005
5006     cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5007     cbp_b = h->slice_table[h->top_mb_xy]     == h->slice_num ? h->top_cbp  : -1;
5008
5009     ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5010     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5011     ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
5012     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5013     ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
5014     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5015     ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
5016     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5017     return cbp;
5018 }
5019 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5020     int ctx;
5021     int cbp_a, cbp_b;
5022
5023     cbp_a = (h->left_cbp>>4)&0x03;
5024     cbp_b = (h-> top_cbp>>4)&0x03;
5025
5026     ctx = 0;
5027     if( cbp_a > 0 ) ctx++;
5028     if( cbp_b > 0 ) ctx += 2;
5029     if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5030         return 0;
5031
5032     ctx = 4;
5033     if( cbp_a == 2 ) ctx++;
5034     if( cbp_b == 2 ) ctx += 2;
5035     return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5036 }
5037 static int decode_cabac_mb_dqp( H264Context *h) {
5038     int   ctx= h->last_qscale_diff != 0;
5039     int   val = 0;
5040
5041     while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5042         ctx= 2+(ctx>>1);
5043         val++;
5044         if(val > 102) //prevent infinite loop
5045             return INT_MIN;
5046     }
5047
5048     if( val&0x01 )
5049         return   (val + 1)>>1 ;
5050     else
5051         return -((val + 1)>>1);
5052 }
5053 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5054     if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5055         return 0;   /* 8x8 */
5056     if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5057         return 1;   /* 8x4 */
5058     if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5059         return 2;   /* 4x8 */
5060     return 3;       /* 4x4 */
5061 }
5062 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5063     int type;
5064     if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5065         return 0;   /* B_Direct_8x8 */
5066     if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5067         return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5068     type = 3;
5069     if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5070         if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5071             return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5072         type += 4;
5073     }
5074     type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5075     type +=   get_cabac( &h->cabac, &h->cabac_state[39] );
5076     return type;
5077 }
5078
5079 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5080     return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5081 }
5082
5083 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5084     int refa = h->ref_cache[list][scan8[n] - 1];
5085     int refb = h->ref_cache[list][scan8[n] - 8];
5086     int ref  = 0;
5087     int ctx  = 0;
5088
5089     if( h->slice_type_nos == FF_B_TYPE) {
5090         if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5091             ctx++;
5092         if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5093             ctx += 2;
5094     } else {
5095         if( refa > 0 )
5096             ctx++;
5097         if( refb > 0 )
5098             ctx += 2;
5099     }
5100
5101     while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5102         ref++;
5103         ctx = (ctx>>2)+4;
5104         if(ref >= 32 /*h->ref_list[list]*/){
5105             return -1;
5106         }
5107     }
5108     return ref;
5109 }
5110
5111 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5112     int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5113                abs( h->mvd_cache[list][scan8[n] - 8][l] );
5114     int ctxbase = (l == 0) ? 40 : 47;
5115     int mvd;
5116     int ctx = (amvd>2) + (amvd>32);
5117
5118     if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5119         return 0;
5120
5121     mvd= 1;
5122     ctx= 3;
5123     while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5124         mvd++;
5125         if( ctx < 6 )
5126             ctx++;
5127     }
5128
5129     if( mvd >= 9 ) {
5130         int k = 3;
5131         while( get_cabac_bypass( &h->cabac ) ) {
5132             mvd += 1 << k;
5133             k++;
5134             if(k>24){
5135                 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5136                 return INT_MIN;
5137             }
5138         }
5139         while( k-- ) {
5140             if( get_cabac_bypass( &h->cabac ) )
5141                 mvd += 1 << k;
5142         }
5143     }
5144     return get_cabac_bypass_sign( &h->cabac, -mvd );
5145 }
5146
5147 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5148     int nza, nzb;
5149     int ctx = 0;
5150
5151     if( is_dc ) {
5152         if( cat == 0 ) {
5153             nza = h->left_cbp&0x100;
5154             nzb = h-> top_cbp&0x100;
5155         } else {
5156             nza = (h->left_cbp>>(6+idx))&0x01;
5157             nzb = (h-> top_cbp>>(6+idx))&0x01;
5158         }
5159     } else {
5160         assert(cat == 1 || cat == 2 || cat == 4);
5161         nza = h->non_zero_count_cache[scan8[idx] - 1];
5162         nzb = h->non_zero_count_cache[scan8[idx] - 8];
5163     }
5164
5165     if( nza > 0 )
5166         ctx++;
5167
5168     if( nzb > 0 )
5169         ctx += 2;
5170
5171     return ctx + 4 * cat;
5172 }
5173
5174 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5175     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5176     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5177     3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5178     5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5179 };
5180
5181 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5182     static const int significant_coeff_flag_offset[2][6] = {
5183       { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5184       { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5185     };
5186     static const int last_coeff_flag_offset[2][6] = {
5187       { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5188       { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5189     };
5190     static const int coeff_abs_level_m1_offset[6] = {
5191         227+0, 227+10, 227+20, 227+30, 227+39, 426
5192     };
5193     static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5194       { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5195         4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5196         7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5197        12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5198       { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5199         6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5200         9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5201         9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5202     };
5203     /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5204      * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5205      * map node ctx => cabac ctx for level=1 */
5206     static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5207     /* map node ctx => cabac ctx for level>1 */
5208     static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5209     static const uint8_t coeff_abs_level_transition[2][8] = {
5210     /* update node ctx after decoding a level=1 */
5211         { 1, 2, 3, 3, 4, 5, 6, 7 },
5212     /* update node ctx after decoding a level>1 */
5213         { 4, 4, 4, 4, 5, 6, 7, 7 }
5214     };
5215
5216     int index[64];
5217
5218     int av_unused last;
5219     int coeff_count = 0;
5220     int node_ctx = 0;
5221
5222     uint8_t *significant_coeff_ctx_base;
5223     uint8_t *last_coeff_ctx_base;
5224     uint8_t *abs_level_m1_ctx_base;
5225
5226 #ifndef ARCH_X86
5227 #define CABAC_ON_STACK
5228 #endif
5229 #ifdef CABAC_ON_STACK
5230 #define CC &cc
5231     CABACContext cc;
5232     cc.range     = h->cabac.range;
5233     cc.low       = h->cabac.low;
5234     cc.bytestream= h->cabac.bytestream;
5235 #else
5236 #define CC &h->cabac
5237 #endif
5238
5239
5240     /* cat: 0-> DC 16x16  n = 0
5241      *      1-> AC 16x16  n = luma4x4idx
5242      *      2-> Luma4x4   n = luma4x4idx
5243      *      3-> DC Chroma n = iCbCr
5244      *      4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5245      *      5-> Luma8x8   n = 4 * luma8x8idx
5246      */
5247
5248     /* read coded block flag */
5249     if( is_dc || cat != 5 ) {
5250         if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5251             if( !is_dc )
5252                 h->non_zero_count_cache[scan8[n]] = 0;
5253
5254 #ifdef CABAC_ON_STACK
5255             h->cabac.range     = cc.range     ;
5256             h->cabac.low       = cc.low       ;
5257             h->cabac.bytestream= cc.bytestream;
5258 #endif
5259             return;
5260         }
5261     }
5262
5263     significant_coeff_ctx_base = h->cabac_state
5264         + significant_coeff_flag_offset[MB_FIELD][cat];
5265     last_coeff_ctx_base = h->cabac_state
5266         + last_coeff_flag_offset[MB_FIELD][cat];
5267     abs_level_m1_ctx_base = h->cabac_state
5268         + coeff_abs_level_m1_offset[cat];
5269
5270     if( !is_dc && cat == 5 ) {
5271 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5272         for(last= 0; last < coefs; last++) { \
5273             uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5274             if( get_cabac( CC, sig_ctx )) { \
5275                 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5276                 index[coeff_count++] = last; \
5277                 if( get_cabac( CC, last_ctx ) ) { \
5278                     last= max_coeff; \
5279                     break; \
5280                 } \
5281             } \
5282         }\
5283         if( last == max_coeff -1 ) {\
5284             index[coeff_count++] = last;\
5285         }
5286         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5287 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5288         coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5289     } else {
5290         coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5291 #else
5292         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5293     } else {
5294         DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5295 #endif
5296     }
5297     assert(coeff_count > 0);
5298
5299     if( is_dc ) {
5300         if( cat == 0 )
5301             h->cbp_table[h->mb_xy] |= 0x100;
5302         else
5303             h->cbp_table[h->mb_xy] |= 0x40 << n;
5304     } else {
5305         if( cat == 5 )
5306             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5307         else {
5308             assert( cat == 1 || cat == 2 || cat == 4 );
5309             h->non_zero_count_cache[scan8[n]] = coeff_count;
5310         }
5311     }
5312
5313     do {
5314         uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5315
5316         int j= scantable[index[--coeff_count]];
5317
5318         if( get_cabac( CC, ctx ) == 0 ) {
5319             node_ctx = coeff_abs_level_transition[0][node_ctx];
5320             if( is_dc ) {
5321                 block[j] = get_cabac_bypass_sign( CC, -1);
5322             }else{
5323                 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5324             }
5325         } else {
5326             int coeff_abs = 2;
5327             ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5328             node_ctx = coeff_abs_level_transition[1][node_ctx];
5329
5330             while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5331                 coeff_abs++;
5332             }
5333
5334             if( coeff_abs >= 15 ) {
5335                 int j = 0;
5336                 while( get_cabac_bypass( CC ) ) {
5337                     j++;
5338                 }
5339
5340                 coeff_abs=1;
5341                 while( j-- ) {
5342                     coeff_abs += coeff_abs + get_cabac_bypass( CC );
5343                 }
5344                 coeff_abs+= 14;
5345             }
5346
5347             if( is_dc ) {
5348                 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5349             }else{
5350                 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5351             }
5352         }
5353     } while( coeff_count );
5354 #ifdef CABAC_ON_STACK
5355             h->cabac.range     = cc.range     ;
5356             h->cabac.low       = cc.low       ;
5357             h->cabac.bytestream= cc.bytestream;
5358 #endif
5359
5360 }
5361
5362 #ifndef CONFIG_SMALL
5363 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5364     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5365 }
5366
5367 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5368     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5369 }
5370 #endif
5371
5372 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5373 #ifdef CONFIG_SMALL
5374     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5375 #else
5376     if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5377     else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5378 #endif
5379 }
5380
5381 static inline void compute_mb_neighbors(H264Context *h)
5382 {
5383     MpegEncContext * const s = &h->s;
5384     const int mb_xy  = h->mb_xy;
5385     h->top_mb_xy     = mb_xy - s->mb_stride;
5386     h->left_mb_xy[0] = mb_xy - 1;
5387     if(FRAME_MBAFF){
5388         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
5389         const int top_pair_xy      = pair_xy     - s->mb_stride;
5390         const int top_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5391         const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5392         const int curr_mb_field_flag = MB_FIELD;
5393         const int bottom = (s->mb_y & 1);
5394
5395         if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5396             h->top_mb_xy -= s->mb_stride;
5397         }
5398         if (!left_mb_field_flag == curr_mb_field_flag) {
5399             h->left_mb_xy[0] = pair_xy - 1;
5400         }
5401     } else if (FIELD_PICTURE) {
5402         h->top_mb_xy -= s->mb_stride;
5403     }
5404     return;
5405 }
5406
5407 /**
5408  * decodes a macroblock
5409  * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5410  */
5411 static int decode_mb_cabac(H264Context *h) {
5412     MpegEncContext * const s = &h->s;
5413     int mb_xy;
5414     int mb_type, partition_count, cbp = 0;
5415     int dct8x8_allowed= h->pps.transform_8x8_mode;
5416
5417     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5418
5419     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5420     if( h->slice_type_nos != FF_I_TYPE ) {
5421         int skip;
5422         /* a skipped mb needs the aff flag from the following mb */
5423         if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5424             predict_field_decoding_flag(h);
5425         if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5426             skip = h->next_mb_skipped;
5427         else
5428             skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5429         /* read skip flags */
5430         if( skip ) {
5431             if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5432                 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5433                 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5434                 if(!h->next_mb_skipped)
5435                     h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5436             }
5437
5438             decode_mb_skip(h);
5439
5440             h->cbp_table[mb_xy] = 0;
5441             h->chroma_pred_mode_table[mb_xy] = 0;
5442             h->last_qscale_diff = 0;
5443
5444             return 0;
5445
5446         }
5447     }
5448     if(FRAME_MBAFF){
5449         if( (s->mb_y&1) == 0 )
5450             h->mb_mbaff =
5451             h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5452     }
5453
5454     h->prev_mb_skipped = 0;
5455
5456     compute_mb_neighbors(h);
5457
5458     if( h->slice_type_nos == FF_B_TYPE ) {
5459         mb_type = decode_cabac_mb_type_b( h );
5460         if( mb_type < 23 ){
5461             partition_count= b_mb_type_info[mb_type].partition_count;
5462             mb_type=         b_mb_type_info[mb_type].type;
5463         }else{
5464             mb_type -= 23;
5465             goto decode_intra_mb;
5466         }
5467     } else if( h->slice_type_nos == FF_P_TYPE ) {
5468         if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5469             /* P-type */
5470             if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5471                 /* P_L0_D16x16, P_8x8 */
5472                 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5473             } else {
5474                 /* P_L0_D8x16, P_L0_D16x8 */
5475                 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5476             }
5477             partition_count= p_mb_type_info[mb_type].partition_count;
5478             mb_type=         p_mb_type_info[mb_type].type;
5479         } else {
5480             mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5481             goto decode_intra_mb;
5482         }
5483     } else {
5484         mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5485         if(h->slice_type == FF_SI_TYPE && mb_type)
5486             mb_type--;
5487         assert(h->slice_type_nos == FF_I_TYPE);
5488 decode_intra_mb:
5489         partition_count = 0;
5490         cbp= i_mb_type_info[mb_type].cbp;
5491         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5492         mb_type= i_mb_type_info[mb_type].type;
5493     }
5494     if(MB_FIELD)
5495         mb_type |= MB_TYPE_INTERLACED;
5496
5497     h->slice_table[ mb_xy ]= h->slice_num;
5498
5499     if(IS_INTRA_PCM(mb_type)) {
5500         const uint8_t *ptr;
5501
5502         // We assume these blocks are very rare so we do not optimize it.
5503         // FIXME The two following lines get the bitstream position in the cabac
5504         // decode, I think it should be done by a function in cabac.h (or cabac.c).
5505         ptr= h->cabac.bytestream;
5506         if(h->cabac.low&0x1) ptr--;
5507         if(CABAC_BITS==16){
5508             if(h->cabac.low&0x1FF) ptr--;
5509         }
5510
5511         // The pixels are stored in the same order as levels in h->mb array.
5512         memcpy(h->mb, ptr, 256); ptr+=256;
5513         if(CHROMA){
5514             memcpy(h->mb+128, ptr, 128); ptr+=128;
5515         }
5516
5517         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5518
5519         // All blocks are present
5520         h->cbp_table[mb_xy] = 0x1ef;
5521         h->chroma_pred_mode_table[mb_xy] = 0;
5522         // In deblocking, the quantizer is 0
5523         s->current_picture.qscale_table[mb_xy]= 0;
5524         // All coeffs are present
5525         memset(h->non_zero_count[mb_xy], 16, 16);
5526         s->current_picture.mb_type[mb_xy]= mb_type;
5527         h->last_qscale_diff = 0;
5528         return 0;
5529     }
5530
5531     if(MB_MBAFF){
5532         h->ref_count[0] <<= 1;
5533         h->ref_count[1] <<= 1;
5534     }
5535
5536     fill_caches(h, mb_type, 0);
5537
5538     if( IS_INTRA( mb_type ) ) {
5539         int i, pred_mode;
5540         if( IS_INTRA4x4( mb_type ) ) {
5541             if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5542                 mb_type |= MB_TYPE_8x8DCT;
5543                 for( i = 0; i < 16; i+=4 ) {
5544                     int pred = pred_intra_mode( h, i );
5545                     int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5546                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5547                 }
5548             } else {
5549                 for( i = 0; i < 16; i++ ) {
5550                     int pred = pred_intra_mode( h, i );
5551                     h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5552
5553                 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5554                 }
5555             }
5556             write_back_intra_pred_mode(h);
5557             if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5558         } else {
5559             h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5560             if( h->intra16x16_pred_mode < 0 ) return -1;
5561         }
5562         if(CHROMA){
5563             h->chroma_pred_mode_table[mb_xy] =
5564             pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
5565
5566             pred_mode= check_intra_pred_mode( h, pred_mode );
5567             if( pred_mode < 0 ) return -1;
5568             h->chroma_pred_mode= pred_mode;
5569         }
5570     } else if( partition_count == 4 ) {
5571         int i, j, sub_partition_count[4], list, ref[2][4];
5572
5573         if( h->slice_type_nos == FF_B_TYPE ) {
5574             for( i = 0; i < 4; i++ ) {
5575                 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5576                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5577                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5578             }
5579             if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5580                           h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5581                 pred_direct_motion(h, &mb_type);
5582                 h->ref_cache[0][scan8[4]] =
5583                 h->ref_cache[1][scan8[4]] =
5584                 h->ref_cache[0][scan8[12]] =
5585                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5586                 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5587                     for( i = 0; i < 4; i++ )
5588                         if( IS_DIRECT(h->sub_mb_type[i]) )
5589                             fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5590                 }
5591             }
5592         } else {
5593             for( i = 0; i < 4; i++ ) {
5594                 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5595                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5596                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5597             }
5598         }
5599
5600         for( list = 0; list < h->list_count; list++ ) {
5601                 for( i = 0; i < 4; i++ ) {
5602                     if(IS_DIRECT(h->sub_mb_type[i])) continue;
5603                     if(IS_DIR(h->sub_mb_type[i], 0, list)){
5604                         if( h->ref_count[list] > 1 ){
5605                             ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5606                             if(ref[list][i] >= (unsigned)h->ref_count[list]){
5607                                 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5608                                 return -1;
5609                             }
5610                         }else
5611                             ref[list][i] = 0;
5612                     } else {
5613                         ref[list][i] = -1;
5614                     }
5615                                                        h->ref_cache[list][ scan8[4*i]+1 ]=
5616                     h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5617                 }
5618         }
5619
5620         if(dct8x8_allowed)
5621             dct8x8_allowed = get_dct8x8_allowed(h);
5622
5623         for(list=0; list<h->list_count; list++){
5624             for(i=0; i<4; i++){
5625                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
5626                 if(IS_DIRECT(h->sub_mb_type[i])){
5627                     fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5628                     continue;
5629                 }
5630
5631                 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5632                     const int sub_mb_type= h->sub_mb_type[i];
5633                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5634                     for(j=0; j<sub_partition_count[i]; j++){
5635                         int mpx, mpy;
5636                         int mx, my;
5637                         const int index= 4*i + block_width*j;
5638                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5639                         int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5640                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5641
5642                         mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5643                         my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5644                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5645
5646                         if(IS_SUB_8X8(sub_mb_type)){
5647                             mv_cache[ 1 ][0]=
5648                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5649                             mv_cache[ 1 ][1]=
5650                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5651
5652                             mvd_cache[ 1 ][0]=
5653                             mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5654                             mvd_cache[ 1 ][1]=
5655                             mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5656                         }else if(IS_SUB_8X4(sub_mb_type)){
5657                             mv_cache[ 1 ][0]= mx;
5658                             mv_cache[ 1 ][1]= my;
5659
5660                             mvd_cache[ 1 ][0]= mx - mpx;
5661                             mvd_cache[ 1 ][1]= my - mpy;
5662                         }else if(IS_SUB_4X8(sub_mb_type)){
5663                             mv_cache[ 8 ][0]= mx;
5664                             mv_cache[ 8 ][1]= my;
5665
5666                             mvd_cache[ 8 ][0]= mx - mpx;
5667                             mvd_cache[ 8 ][1]= my - mpy;
5668                         }
5669                         mv_cache[ 0 ][0]= mx;
5670                         mv_cache[ 0 ][1]= my;
5671
5672                         mvd_cache[ 0 ][0]= mx - mpx;
5673                         mvd_cache[ 0 ][1]= my - mpy;
5674                     }
5675                 }else{
5676                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5677                     uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5678                     p[0] = p[1] = p[8] = p[9] = 0;
5679                     pd[0]= pd[1]= pd[8]= pd[9]= 0;
5680                 }
5681             }
5682         }
5683     } else if( IS_DIRECT(mb_type) ) {
5684         pred_direct_motion(h, &mb_type);
5685         fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5686         fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5687         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5688     } else {
5689         int list, mx, my, i, mpx, mpy;
5690         if(IS_16X16(mb_type)){
5691             for(list=0; list<h->list_count; list++){
5692                 if(IS_DIR(mb_type, 0, list)){
5693                     int ref;
5694                     if(h->ref_count[list] > 1){
5695                         ref= decode_cabac_mb_ref(h, list, 0);
5696                         if(ref >= (unsigned)h->ref_count[list]){
5697                             av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5698                             return -1;
5699                         }
5700                     }else
5701                         ref=0;
5702                         fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5703                 }else
5704                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5705             }
5706             for(list=0; list<h->list_count; list++){
5707                 if(IS_DIR(mb_type, 0, list)){
5708                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5709
5710                     mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5711                     my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5712                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5713
5714                     fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5715                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5716                 }else
5717                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5718             }
5719         }
5720         else if(IS_16X8(mb_type)){
5721             for(list=0; list<h->list_count; list++){
5722                     for(i=0; i<2; i++){
5723                         if(IS_DIR(mb_type, i, list)){
5724                             int ref;
5725                             if(h->ref_count[list] > 1){
5726                                 ref= decode_cabac_mb_ref( h, list, 8*i );
5727                                 if(ref >= (unsigned)h->ref_count[list]){
5728                                     av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5729                                     return -1;
5730                                 }
5731                             }else
5732                                 ref=0;
5733                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5734                         }else
5735                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5736                     }
5737             }
5738             for(list=0; list<h->list_count; list++){
5739                 for(i=0; i<2; i++){
5740                     if(IS_DIR(mb_type, i, list)){
5741                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5742                         mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5743                         my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5744                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5745
5746                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5747                         fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5748                     }else{
5749                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5750                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5751                     }
5752                 }
5753             }
5754         }else{
5755             assert(IS_8X16(mb_type));
5756             for(list=0; list<h->list_count; list++){
5757                     for(i=0; i<2; i++){
5758                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5759                             int ref;
5760                             if(h->ref_count[list] > 1){
5761                                 ref= decode_cabac_mb_ref( h, list, 4*i );
5762                                 if(ref >= (unsigned)h->ref_count[list]){
5763                                     av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5764                                     return -1;
5765                                 }
5766                             }else
5767                                 ref=0;
5768                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5769                         }else
5770                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5771                     }
5772             }
5773             for(list=0; list<h->list_count; list++){
5774                 for(i=0; i<2; i++){
5775                     if(IS_DIR(mb_type, i, list)){
5776                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5777                         mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5778                         my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5779
5780                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5781                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5782                         fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5783                     }else{
5784                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5785                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5786                     }
5787                 }
5788             }
5789         }
5790     }
5791
5792    if( IS_INTER( mb_type ) ) {
5793         h->chroma_pred_mode_table[mb_xy] = 0;
5794         write_back_motion( h, mb_type );
5795    }
5796
5797     if( !IS_INTRA16x16( mb_type ) ) {
5798         cbp  = decode_cabac_mb_cbp_luma( h );
5799         if(CHROMA)
5800             cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5801     }
5802
5803     h->cbp_table[mb_xy] = h->cbp = cbp;
5804
5805     if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5806         if( decode_cabac_mb_transform_size( h ) )
5807             mb_type |= MB_TYPE_8x8DCT;
5808     }
5809     s->current_picture.mb_type[mb_xy]= mb_type;
5810
5811     if( cbp || IS_INTRA16x16( mb_type ) ) {
5812         const uint8_t *scan, *scan8x8, *dc_scan;
5813         const uint32_t *qmul;
5814         int dqp;
5815
5816         if(IS_INTERLACED(mb_type)){
5817             scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5818             scan= s->qscale ? h->field_scan : h->field_scan_q0;
5819             dc_scan= luma_dc_field_scan;
5820         }else{
5821             scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5822             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5823             dc_scan= luma_dc_zigzag_scan;
5824         }
5825
5826         h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5827         if( dqp == INT_MIN ){
5828             av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5829             return -1;
5830         }
5831         s->qscale += dqp;
5832         if(((unsigned)s->qscale) > 51){
5833             if(s->qscale<0) s->qscale+= 52;
5834             else            s->qscale-= 52;
5835         }
5836         h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5837         h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5838
5839         if( IS_INTRA16x16( mb_type ) ) {
5840             int i;
5841             //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5842             decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5843
5844             if( cbp&15 ) {
5845                 qmul = h->dequant4_coeff[0][s->qscale];
5846                 for( i = 0; i < 16; i++ ) {
5847                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5848                     decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5849                 }
5850             } else {
5851                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5852             }
5853         } else {
5854             int i8x8, i4x4;
5855             for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5856                 if( cbp & (1<<i8x8) ) {
5857                     if( IS_8x8DCT(mb_type) ) {
5858                         decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5859                             scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5860                     } else {
5861                         qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5862                         for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5863                             const int index = 4*i8x8 + i4x4;
5864                             //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5865 //START_TIMER
5866                             decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5867 //STOP_TIMER("decode_residual")
5868                         }
5869                     }
5870                 } else {
5871                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5872                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5873                 }
5874             }
5875         }
5876
5877         if( cbp&0x30 ){
5878             int c;
5879             for( c = 0; c < 2; c++ ) {
5880                 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5881                 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5882             }
5883         }
5884
5885         if( cbp&0x20 ) {
5886             int c, i;
5887             for( c = 0; c < 2; c++ ) {
5888                 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5889                 for( i = 0; i < 4; i++ ) {
5890                     const int index = 16 + 4 * c + i;
5891                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5892                     decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5893                 }
5894             }
5895         } else {
5896             uint8_t * const nnz= &h->non_zero_count_cache[0];
5897             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5898             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5899         }
5900     } else {
5901         uint8_t * const nnz= &h->non_zero_count_cache[0];
5902         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5903         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5904         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5905         h->last_qscale_diff = 0;
5906     }
5907
5908     s->current_picture.qscale_table[mb_xy]= s->qscale;
5909     write_back_non_zero_count(h);
5910
5911     if(MB_MBAFF){
5912         h->ref_count[0] >>= 1;
5913         h->ref_count[1] >>= 1;
5914     }
5915
5916     return 0;
5917 }
5918
5919
5920 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5921     const int index_a = qp + h->slice_alpha_c0_offset;
5922     const int alpha = (alpha_table+52)[index_a];
5923     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
5924
5925     if( bS[0] < 4 ) {
5926         int8_t tc[4];
5927         tc[0] = (tc0_table+52)[index_a][bS[0]];
5928         tc[1] = (tc0_table+52)[index_a][bS[1]];
5929         tc[2] = (tc0_table+52)[index_a][bS[2]];
5930         tc[3] = (tc0_table+52)[index_a][bS[3]];
5931         h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5932     } else {
5933         h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5934     }
5935 }
5936 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5937     const int index_a = qp + h->slice_alpha_c0_offset;
5938     const int alpha = (alpha_table+52)[index_a];
5939     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
5940
5941     if( bS[0] < 4 ) {
5942         int8_t tc[4];
5943         tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5944         tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5945         tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5946         tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5947         h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5948     } else {
5949         h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5950     }
5951 }
5952
5953 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5954     int i;
5955     for( i = 0; i < 16; i++, pix += stride) {
5956         int index_a;
5957         int alpha;
5958         int beta;
5959
5960         int qp_index;
5961         int bS_index = (i >> 1);
5962         if (!MB_FIELD) {
5963             bS_index &= ~1;
5964             bS_index |= (i & 1);
5965         }
5966
5967         if( bS[bS_index] == 0 ) {
5968             continue;
5969         }
5970
5971         qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5972         index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5973         alpha = (alpha_table+52)[index_a];
5974         beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5975
5976         if( bS[bS_index] < 4 ) {
5977             const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5978             const int p0 = pix[-1];
5979             const int p1 = pix[-2];
5980             const int p2 = pix[-3];
5981             const int q0 = pix[0];
5982             const int q1 = pix[1];
5983             const int q2 = pix[2];
5984
5985             if( FFABS( p0 - q0 ) < alpha &&
5986                 FFABS( p1 - p0 ) < beta &&
5987                 FFABS( q1 - q0 ) < beta ) {
5988                 int tc = tc0;
5989                 int i_delta;
5990
5991                 if( FFABS( p2 - p0 ) < beta ) {
5992                     pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5993                     tc++;
5994                 }
5995                 if( FFABS( q2 - q0 ) < beta ) {
5996                     pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5997                     tc++;
5998                 }
5999
6000                 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6001                 pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
6002                 pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
6003                 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6004             }
6005         }else{
6006             const int p0 = pix[-1];
6007             const int p1 = pix[-2];
6008             const int p2 = pix[-3];
6009
6010             const int q0 = pix[0];
6011             const int q1 = pix[1];
6012             const int q2 = pix[2];
6013
6014             if( FFABS( p0 - q0 ) < alpha &&
6015                 FFABS( p1 - p0 ) < beta &&
6016                 FFABS( q1 - q0 ) < beta ) {
6017
6018                 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6019                     if( FFABS( p2 - p0 ) < beta)
6020                     {
6021                         const int p3 = pix[-4];
6022                         /* p0', p1', p2' */
6023                         pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6024                         pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6025                         pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6026                     } else {
6027                         /* p0' */
6028                         pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6029                     }
6030                     if( FFABS( q2 - q0 ) < beta)
6031                     {
6032                         const int q3 = pix[3];
6033                         /* q0', q1', q2' */
6034                         pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6035                         pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6036                         pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6037                     } else {
6038                         /* q0' */
6039                         pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6040                     }
6041                 }else{
6042                     /* p0', q0' */
6043                     pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6044                     pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6045                 }
6046                 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6047             }
6048         }
6049     }
6050 }
6051 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6052     int i;
6053     for( i = 0; i < 8; i++, pix += stride) {
6054         int index_a;
6055         int alpha;
6056         int beta;
6057
6058         int qp_index;
6059         int bS_index = i;
6060
6061         if( bS[bS_index] == 0 ) {
6062             continue;
6063         }
6064
6065         qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6066         index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6067         alpha = (alpha_table+52)[index_a];
6068         beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6069
6070         if( bS[bS_index] < 4 ) {
6071             const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6072             const int p0 = pix[-1];
6073             const int p1 = pix[-2];
6074             const int q0 = pix[0];
6075             const int q1 = pix[1];
6076
6077             if( FFABS( p0 - q0 ) < alpha &&
6078                 FFABS( p1 - p0 ) < beta &&
6079                 FFABS( q1 - q0 ) < beta ) {
6080                 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6081
6082                 pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
6083                 pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
6084                 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6085             }
6086         }else{
6087             const int p0 = pix[-1];
6088             const int p1 = pix[-2];
6089             const int q0 = pix[0];
6090             const int q1 = pix[1];
6091
6092             if( FFABS( p0 - q0 ) < alpha &&
6093                 FFABS( p1 - p0 ) < beta &&
6094                 FFABS( q1 - q0 ) < beta ) {
6095
6096                 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
6097                 pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
6098                 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6099             }
6100         }
6101     }
6102 }
6103
6104 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6105     const int index_a = qp + h->slice_alpha_c0_offset;
6106     const int alpha = (alpha_table+52)[index_a];
6107     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
6108
6109     if( bS[0] < 4 ) {
6110         int8_t tc[4];
6111         tc[0] = (tc0_table+52)[index_a][bS[0]];
6112         tc[1] = (tc0_table+52)[index_a][bS[1]];
6113         tc[2] = (tc0_table+52)[index_a][bS[2]];
6114         tc[3] = (tc0_table+52)[index_a][bS[3]];
6115         h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6116     } else {
6117         h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6118     }
6119 }
6120
6121 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6122     const int index_a = qp + h->slice_alpha_c0_offset;
6123     const int alpha = (alpha_table+52)[index_a];
6124     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
6125
6126     if( bS[0] < 4 ) {
6127         int8_t tc[4];
6128         tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6129         tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6130         tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6131         tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6132         h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6133     } else {
6134         h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6135     }
6136 }
6137
6138 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6139     MpegEncContext * const s = &h->s;
6140     int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6141     int mb_xy, mb_type;
6142     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6143
6144     mb_xy = h->mb_xy;
6145
6146     if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6147         !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6148        (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6149                                       h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6150         filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6151         return;
6152     }
6153     assert(!FRAME_MBAFF);
6154
6155     mb_type = s->current_picture.mb_type[mb_xy];
6156     qp = s->current_picture.qscale_table[mb_xy];
6157     qp0 = s->current_picture.qscale_table[mb_xy-1];
6158     qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6159     qpc = get_chroma_qp( h, 0, qp );
6160     qpc0 = get_chroma_qp( h, 0, qp0 );
6161     qpc1 = get_chroma_qp( h, 0, qp1 );
6162     qp0 = (qp + qp0 + 1) >> 1;
6163     qp1 = (qp + qp1 + 1) >> 1;
6164     qpc0 = (qpc + qpc0 + 1) >> 1;
6165     qpc1 = (qpc + qpc1 + 1) >> 1;
6166     qp_thresh = 15 - h->slice_alpha_c0_offset;
6167     if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6168        qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6169         return;
6170
6171     if( IS_INTRA(mb_type) ) {
6172         int16_t bS4[4] = {4,4,4,4};
6173         int16_t bS3[4] = {3,3,3,3};
6174         int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6175         if( IS_8x8DCT(mb_type) ) {
6176             filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6177             filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6178             filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6179             filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6180         } else {
6181             filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6182             filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6183             filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6184             filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6185             filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6186             filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6187             filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6188             filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6189         }
6190         filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6191         filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6192         filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6193         filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6194         filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6195         filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6196         filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6197         filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6198         return;
6199     } else {
6200         DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6201         uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6202         int edges;
6203         if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6204             edges = 4;
6205             bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6206         } else {
6207             int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6208                              (mb_type & MB_TYPE_16x8) ? 1 : 0;
6209             int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6210                              && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6211                              ? 3 : 0;
6212             int step = IS_8x8DCT(mb_type) ? 2 : 1;
6213             edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6214             s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6215                                               (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6216         }
6217         if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6218             bSv[0][0] = 0x0004000400040004ULL;
6219         if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6220             bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6221
6222 #define FILTER(hv,dir,edge)\
6223         if(bSv[dir][edge]) {\
6224             filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6225             if(!(edge&1)) {\
6226                 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6227                 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6228             }\
6229         }
6230         if( edges == 1 ) {
6231             FILTER(v,0,0);
6232             FILTER(h,1,0);
6233         } else if( IS_8x8DCT(mb_type) ) {
6234             FILTER(v,0,0);
6235             FILTER(v,0,2);
6236             FILTER(h,1,0);
6237             FILTER(h,1,2);
6238         } else {
6239             FILTER(v,0,0);
6240             FILTER(v,0,1);
6241             FILTER(v,0,2);
6242             FILTER(v,0,3);
6243             FILTER(h,1,0);
6244             FILTER(h,1,1);
6245             FILTER(h,1,2);
6246             FILTER(h,1,3);
6247         }
6248 #undef FILTER
6249     }
6250 }
6251
6252
6253 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6254     MpegEncContext * const s = &h->s;
6255     int edge;
6256     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6257     const int mbm_type = s->current_picture.mb_type[mbm_xy];
6258     int (*ref2frm) [64] = h->ref2frm[ h->slice_num          &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6259     int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6260     int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6261
6262     const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6263                               == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6264     // how often to recheck mv-based bS when iterating between edges
6265     const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6266                           (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6267     // how often to recheck mv-based bS when iterating along each edge
6268     const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6269
6270     if (first_vertical_edge_done) {
6271         start = 1;
6272     }
6273
6274     if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6275         start = 1;
6276
6277     if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6278         && !IS_INTERLACED(mb_type)
6279         && IS_INTERLACED(mbm_type)
6280         ) {
6281         // This is a special case in the norm where the filtering must
6282         // be done twice (one each of the field) even if we are in a
6283         // frame macroblock.
6284         //
6285         static const int nnz_idx[4] = {4,5,6,3};
6286         unsigned int tmp_linesize   = 2 *   linesize;
6287         unsigned int tmp_uvlinesize = 2 * uvlinesize;
6288         int mbn_xy = mb_xy - 2 * s->mb_stride;
6289         int qp;
6290         int i, j;
6291         int16_t bS[4];
6292
6293         for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6294             if( IS_INTRA(mb_type) ||
6295                 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6296                 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6297             } else {
6298                 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6299                 for( i = 0; i < 4; i++ ) {
6300                     if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6301                         mbn_nnz[nnz_idx[i]] != 0 )
6302                         bS[i] = 2;
6303                     else
6304                         bS[i] = 1;
6305                 }
6306             }
6307             // Do not use s->qscale as luma quantizer because it has not the same
6308             // value in IPCM macroblocks.
6309             qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6310             tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6311             { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6312             filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6313             filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6314                               ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6315             filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6316                               ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6317         }
6318
6319         start = 1;
6320     }
6321
6322     /* Calculate bS */
6323     for( edge = start; edge < edges; edge++ ) {
6324         /* mbn_xy: neighbor macroblock */
6325         const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6326         const int mbn_type = s->current_picture.mb_type[mbn_xy];
6327         int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6328         int16_t bS[4];
6329         int qp;
6330
6331         if( (edge&1) && IS_8x8DCT(mb_type) )
6332             continue;
6333
6334         if( IS_INTRA(mb_type) ||
6335             IS_INTRA(mbn_type) ) {
6336             int value;
6337             if (edge == 0) {
6338                 if (   (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6339                     || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6340                 ) {
6341                     value = 4;
6342                 } else {
6343                     value = 3;
6344                 }
6345             } else {
6346                 value = 3;
6347             }
6348             bS[0] = bS[1] = bS[2] = bS[3] = value;
6349         } else {
6350             int i, l;
6351             int mv_done;
6352
6353             if( edge & mask_edge ) {
6354                 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6355                 mv_done = 1;
6356             }
6357             else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6358                 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6359                 mv_done = 1;
6360             }
6361             else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6362                 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6363                 int bn_idx= b_idx - (dir ? 8:1);
6364                 int v = 0;
6365
6366                 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6367                     v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6368                          FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6369                          FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6370                 }
6371
6372                 if(h->slice_type_nos == FF_B_TYPE && v){
6373                     v=0;
6374                     for( l = 0; !v && l < 2; l++ ) {
6375                         int ln= 1-l;
6376                         v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6377                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6378                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6379                     }
6380                 }
6381
6382                 bS[0] = bS[1] = bS[2] = bS[3] = v;
6383                 mv_done = 1;
6384             }
6385             else
6386                 mv_done = 0;
6387
6388             for( i = 0; i < 4; i++ ) {
6389                 int x = dir == 0 ? edge : i;
6390                 int y = dir == 0 ? i    : edge;
6391                 int b_idx= 8 + 4 + x + 8*y;
6392                 int bn_idx= b_idx - (dir ? 8:1);
6393
6394                 if( h->non_zero_count_cache[b_idx] |
6395                     h->non_zero_count_cache[bn_idx] ) {
6396                     bS[i] = 2;
6397                 }
6398                 else if(!mv_done)
6399                 {
6400                     bS[i] = 0;
6401                     for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6402                         if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6403                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6404                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6405                             bS[i] = 1;
6406                             break;
6407                         }
6408                     }
6409
6410                     if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6411                         bS[i] = 0;
6412                         for( l = 0; l < 2; l++ ) {
6413                             int ln= 1-l;
6414                             if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6415                                 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6416                                 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6417                                 bS[i] = 1;
6418                                 break;
6419                             }
6420                         }
6421                     }
6422                 }
6423             }
6424
6425             if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6426                 continue;
6427         }
6428
6429         /* Filter edge */
6430         // Do not use s->qscale as luma quantizer because it has not the same
6431         // value in IPCM macroblocks.
6432         qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6433         //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6434         tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6435         { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6436         if( dir == 0 ) {
6437             filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6438             if( (edge&1) == 0 ) {
6439                 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6440                                   ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6441                 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6442                                   ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6443             }
6444         } else {
6445             filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6446             if( (edge&1) == 0 ) {
6447                 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6448                                   ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6449                 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6450                                   ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6451             }
6452         }
6453     }
6454 }
6455
6456 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6457     MpegEncContext * const s = &h->s;
6458     const int mb_xy= mb_x + mb_y*s->mb_stride;
6459     const int mb_type = s->current_picture.mb_type[mb_xy];
6460     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6461     int first_vertical_edge_done = 0;
6462     int dir;
6463
6464     //for sufficiently low qp, filtering wouldn't do anything
6465     //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6466     if(!FRAME_MBAFF){
6467         int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6468         int qp = s->current_picture.qscale_table[mb_xy];
6469         if(qp <= qp_thresh
6470            && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6471            && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6472             return;
6473         }
6474     }
6475
6476     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6477     if(!h->pps.cabac && h->pps.transform_8x8_mode){
6478         int top_type, left_type[2];
6479         top_type     = s->current_picture.mb_type[h->top_mb_xy]    ;
6480         left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6481         left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6482
6483         if(IS_8x8DCT(top_type)){
6484             h->non_zero_count_cache[4+8*0]=
6485             h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6486             h->non_zero_count_cache[6+8*0]=
6487             h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6488         }
6489         if(IS_8x8DCT(left_type[0])){
6490             h->non_zero_count_cache[3+8*1]=
6491             h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6492         }
6493         if(IS_8x8DCT(left_type[1])){
6494             h->non_zero_count_cache[3+8*3]=
6495             h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6496         }
6497
6498         if(IS_8x8DCT(mb_type)){
6499             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
6500             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
6501
6502             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6503             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6504
6505             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6506             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6507
6508             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6509             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6510         }
6511     }
6512
6513     if (FRAME_MBAFF
6514             // left mb is in picture
6515             && h->slice_table[mb_xy-1] != 0xFFFF
6516             // and current and left pair do not have the same interlaced type
6517             && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6518             // and left mb is in the same slice if deblocking_filter == 2
6519             && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6520         /* First vertical edge is different in MBAFF frames
6521          * There are 8 different bS to compute and 2 different Qp
6522          */
6523         const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6524         const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6525         int16_t bS[8];
6526         int qp[2];
6527         int bqp[2];
6528         int rqp[2];
6529         int mb_qp, mbn0_qp, mbn1_qp;
6530         int i;
6531         first_vertical_edge_done = 1;
6532
6533         if( IS_INTRA(mb_type) )
6534             bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6535         else {
6536             for( i = 0; i < 8; i++ ) {
6537                 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6538
6539                 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6540                     bS[i] = 4;
6541                 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6542                          ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6543                             (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6544                                                                        :
6545                             h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6546                     bS[i] = 2;
6547                 else
6548                     bS[i] = 1;
6549             }
6550         }
6551
6552         mb_qp = s->current_picture.qscale_table[mb_xy];
6553         mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6554         mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6555         qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6556         bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6557                    get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6558         rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6559                    get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6560         qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6561         bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6562                    get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6563         rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6564                    get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6565
6566         /* Filter edge */
6567         tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6568         { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6569         filter_mb_mbaff_edgev ( h, &img_y [0], linesize,   bS, qp );
6570         filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6571         filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6572     }
6573
6574 #ifdef CONFIG_SMALL
6575     for( dir = 0; dir < 2; dir++ )
6576         filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6577 #else
6578     filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6579     filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6580 #endif
6581 }
6582
6583 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6584     H264Context *h = *(void**)arg;
6585     MpegEncContext * const s = &h->s;
6586     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6587
6588     s->mb_skip_run= -1;
6589
6590     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6591                     (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6592
6593     if( h->pps.cabac ) {
6594         int i;
6595
6596         /* realign */
6597         align_get_bits( &s->gb );
6598
6599         /* init cabac */
6600         ff_init_cabac_states( &h->cabac);
6601         ff_init_cabac_decoder( &h->cabac,
6602                                s->gb.buffer + get_bits_count(&s->gb)/8,
6603                                ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6604         /* calculate pre-state */
6605         for( i= 0; i < 460; i++ ) {
6606             int pre;
6607             if( h->slice_type_nos == FF_I_TYPE )
6608                 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6609             else
6610                 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6611
6612             if( pre <= 63 )
6613                 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6614             else
6615                 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6616         }
6617
6618         for(;;){
6619 //START_TIMER
6620             int ret = decode_mb_cabac(h);
6621             int eos;
6622 //STOP_TIMER("decode_mb_cabac")
6623
6624             if(ret>=0) hl_decode_mb(h);
6625
6626             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6627                 s->mb_y++;
6628
6629                 ret = decode_mb_cabac(h);
6630
6631                 if(ret>=0) hl_decode_mb(h);
6632                 s->mb_y--;
6633             }
6634             eos = get_cabac_terminate( &h->cabac );
6635
6636             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6637                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6638                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6639                 return -1;
6640             }
6641
6642             if( ++s->mb_x >= s->mb_width ) {
6643                 s->mb_x = 0;
6644                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6645                 ++s->mb_y;
6646                 if(FIELD_OR_MBAFF_PICTURE) {
6647                     ++s->mb_y;
6648                 }
6649             }
6650
6651             if( eos || s->mb_y >= s->mb_height ) {
6652                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6653                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6654                 return 0;
6655             }
6656         }
6657
6658     } else {
6659         for(;;){
6660             int ret = decode_mb_cavlc(h);
6661
6662             if(ret>=0) hl_decode_mb(h);
6663
6664             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6665                 s->mb_y++;
6666                 ret = decode_mb_cavlc(h);
6667
6668                 if(ret>=0) hl_decode_mb(h);
6669                 s->mb_y--;
6670             }
6671
6672             if(ret<0){
6673                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6674                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6675
6676                 return -1;
6677             }
6678
6679             if(++s->mb_x >= s->mb_width){
6680                 s->mb_x=0;
6681                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6682                 ++s->mb_y;
6683                 if(FIELD_OR_MBAFF_PICTURE) {
6684                     ++s->mb_y;
6685                 }
6686                 if(s->mb_y >= s->mb_height){
6687                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6688
6689                     if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6690                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6691
6692                         return 0;
6693                     }else{
6694                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6695
6696                         return -1;
6697                     }
6698                 }
6699             }
6700
6701             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6702                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6703                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6704                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6705
6706                     return 0;
6707                 }else{
6708                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6709
6710                     return -1;
6711                 }
6712             }
6713         }
6714     }
6715
6716 #if 0
6717     for(;s->mb_y < s->mb_height; s->mb_y++){
6718         for(;s->mb_x < s->mb_width; s->mb_x++){
6719             int ret= decode_mb(h);
6720
6721             hl_decode_mb(h);
6722
6723             if(ret<0){
6724                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6725                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6726
6727                 return -1;
6728             }
6729
6730             if(++s->mb_x >= s->mb_width){
6731                 s->mb_x=0;
6732                 if(++s->mb_y >= s->mb_height){
6733                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
6734                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6735
6736                         return 0;
6737                     }else{
6738                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6739
6740                         return -1;
6741                     }
6742                 }
6743             }
6744
6745             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6746                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6747                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6748
6749                     return 0;
6750                 }else{
6751                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6752
6753                     return -1;
6754                 }
6755             }
6756         }
6757         s->mb_x=0;
6758         ff_draw_horiz_band(s, 16*s->mb_y, 16);
6759     }
6760 #endif
6761     return -1; //not reached
6762 }
6763
6764 static int decode_picture_timing(H264Context *h){
6765     MpegEncContext * const s = &h->s;
6766     if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6767         skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6768         skip_bits(&s->gb, h->sps.dpb_output_delay_length);  /* dpb_output_delay */
6769     }
6770     if(h->sps.pic_struct_present_flag){
6771         unsigned int i, num_clock_ts;
6772         h->sei_pic_struct = get_bits(&s->gb, 4);
6773
6774         if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6775             return -1;
6776
6777         num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6778
6779         for (i = 0 ; i < num_clock_ts ; i++){
6780             if(get_bits(&s->gb, 1)){                  /* clock_timestamp_flag */
6781                 unsigned int full_timestamp_flag;
6782                 skip_bits(&s->gb, 2);                 /* ct_type */
6783                 skip_bits(&s->gb, 1);                 /* nuit_field_based_flag */
6784                 skip_bits(&s->gb, 5);                 /* counting_type */
6785                 full_timestamp_flag = get_bits(&s->gb, 1);
6786                 skip_bits(&s->gb, 1);                 /* discontinuity_flag */
6787                 skip_bits(&s->gb, 1);                 /* cnt_dropped_flag */
6788                 skip_bits(&s->gb, 8);                 /* n_frames */
6789                 if(full_timestamp_flag){
6790                     skip_bits(&s->gb, 6);             /* seconds_value 0..59 */
6791                     skip_bits(&s->gb, 6);             /* minutes_value 0..59 */
6792                     skip_bits(&s->gb, 5);             /* hours_value 0..23 */
6793                 }else{
6794                     if(get_bits(&s->gb, 1)){          /* seconds_flag */
6795                         skip_bits(&s->gb, 6);         /* seconds_value range 0..59 */
6796                         if(get_bits(&s->gb, 1)){      /* minutes_flag */
6797                             skip_bits(&s->gb, 6);     /* minutes_value 0..59 */
6798                             if(get_bits(&s->gb, 1))   /* hours_flag */
6799                                 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6800                         }
6801                     }
6802                 }
6803                 if(h->sps.time_offset_length > 0)
6804                     skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6805             }
6806         }
6807     }
6808     return 0;
6809 }
6810
6811 static int decode_unregistered_user_data(H264Context *h, int size){
6812     MpegEncContext * const s = &h->s;
6813     uint8_t user_data[16+256];
6814     int e, build, i;
6815
6816     if(size<16)
6817         return -1;
6818
6819     for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6820         user_data[i]= get_bits(&s->gb, 8);
6821     }
6822
6823     user_data[i]= 0;
6824     e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6825     if(e==1 && build>=0)
6826         h->x264_build= build;
6827
6828     if(s->avctx->debug & FF_DEBUG_BUGS)
6829         av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6830
6831     for(; i<size; i++)
6832         skip_bits(&s->gb, 8);
6833
6834     return 0;
6835 }
6836
6837 static int decode_sei(H264Context *h){
6838     MpegEncContext * const s = &h->s;
6839
6840     while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6841         int size, type;
6842
6843         type=0;
6844         do{
6845             type+= show_bits(&s->gb, 8);
6846         }while(get_bits(&s->gb, 8) == 255);
6847
6848         size=0;
6849         do{
6850             size+= show_bits(&s->gb, 8);
6851         }while(get_bits(&s->gb, 8) == 255);
6852
6853         switch(type){
6854         case 1: // Picture timing SEI
6855             if(decode_picture_timing(h) < 0)
6856                 return -1;
6857             break;
6858         case 5:
6859             if(decode_unregistered_user_data(h, size) < 0)
6860                 return -1;
6861             break;
6862         default:
6863             skip_bits(&s->gb, 8*size);
6864         }
6865
6866         //FIXME check bits here
6867         align_get_bits(&s->gb);
6868     }
6869
6870     return 0;
6871 }
6872
6873 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6874     MpegEncContext * const s = &h->s;
6875     int cpb_count, i;
6876     cpb_count = get_ue_golomb_31(&s->gb) + 1;
6877
6878     if(cpb_count > 32U){
6879         av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6880         return -1;
6881     }
6882
6883     get_bits(&s->gb, 4); /* bit_rate_scale */
6884     get_bits(&s->gb, 4); /* cpb_size_scale */
6885     for(i=0; i<cpb_count; i++){
6886         get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6887         get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6888         get_bits1(&s->gb);     /* cbr_flag */
6889     }
6890     get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6891     sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6892     sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6893     sps->time_offset_length = get_bits(&s->gb, 5);
6894     return 0;
6895 }
6896
6897 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6898     MpegEncContext * const s = &h->s;
6899     int aspect_ratio_info_present_flag;
6900     unsigned int aspect_ratio_idc;
6901
6902     aspect_ratio_info_present_flag= get_bits1(&s->gb);
6903
6904     if( aspect_ratio_info_present_flag ) {
6905         aspect_ratio_idc= get_bits(&s->gb, 8);
6906         if( aspect_ratio_idc == EXTENDED_SAR ) {
6907             sps->sar.num= get_bits(&s->gb, 16);
6908             sps->sar.den= get_bits(&s->gb, 16);
6909         }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6910             sps->sar=  pixel_aspect[aspect_ratio_idc];
6911         }else{
6912             av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6913             return -1;
6914         }
6915     }else{
6916         sps->sar.num=
6917         sps->sar.den= 0;
6918     }
6919 //            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6920
6921     if(get_bits1(&s->gb)){      /* overscan_info_present_flag */
6922         get_bits1(&s->gb);      /* overscan_appropriate_flag */
6923     }
6924
6925     if(get_bits1(&s->gb)){      /* video_signal_type_present_flag */
6926         get_bits(&s->gb, 3);    /* video_format */
6927         get_bits1(&s->gb);      /* video_full_range_flag */
6928         if(get_bits1(&s->gb)){  /* colour_description_present_flag */
6929             get_bits(&s->gb, 8); /* colour_primaries */
6930             get_bits(&s->gb, 8); /* transfer_characteristics */
6931             get_bits(&s->gb, 8); /* matrix_coefficients */
6932         }
6933     }
6934
6935     if(get_bits1(&s->gb)){      /* chroma_location_info_present_flag */
6936         get_ue_golomb(&s->gb);  /* chroma_sample_location_type_top_field */
6937         get_ue_golomb(&s->gb);  /* chroma_sample_location_type_bottom_field */
6938     }
6939
6940     sps->timing_info_present_flag = get_bits1(&s->gb);
6941     if(sps->timing_info_present_flag){
6942         sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6943         sps->time_scale = get_bits_long(&s->gb, 32);
6944         sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6945     }
6946
6947     sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6948     if(sps->nal_hrd_parameters_present_flag)
6949         if(decode_hrd_parameters(h, sps) < 0)
6950             return -1;
6951     sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6952     if(sps->vcl_hrd_parameters_present_flag)
6953         if(decode_hrd_parameters(h, sps) < 0)
6954             return -1;
6955     if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6956         get_bits1(&s->gb);     /* low_delay_hrd_flag */
6957     sps->pic_struct_present_flag = get_bits1(&s->gb);
6958
6959     sps->bitstream_restriction_flag = get_bits1(&s->gb);
6960     if(sps->bitstream_restriction_flag){
6961         get_bits1(&s->gb);     /* motion_vectors_over_pic_boundaries_flag */
6962         get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6963         get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6964         get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6965         get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6966         sps->num_reorder_frames= get_ue_golomb(&s->gb);
6967         get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6968
6969         if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6970             av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6971             return -1;
6972         }
6973     }
6974
6975     return 0;
6976 }
6977
6978 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6979                                 const uint8_t *jvt_list, const uint8_t *fallback_list){
6980     MpegEncContext * const s = &h->s;
6981     int i, last = 8, next = 8;
6982     const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6983     if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6984         memcpy(factors, fallback_list, size*sizeof(uint8_t));
6985     else
6986     for(i=0;i<size;i++){
6987         if(next)
6988             next = (last + get_se_golomb(&s->gb)) & 0xff;
6989         if(!i && !next){ /* matrix not written, we use the preset one */
6990             memcpy(factors, jvt_list, size*sizeof(uint8_t));
6991             break;
6992         }
6993         last = factors[scan[i]] = next ? next : last;
6994     }
6995 }
6996
6997 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6998                                    uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6999     MpegEncContext * const s = &h->s;
7000     int fallback_sps = !is_sps && sps->scaling_matrix_present;
7001     const uint8_t *fallback[4] = {
7002         fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7003         fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7004         fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7005         fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7006     };
7007     if(get_bits1(&s->gb)){
7008         sps->scaling_matrix_present |= is_sps;
7009         decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7010         decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7011         decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7012         decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7013         decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7014         decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7015         if(is_sps || pps->transform_8x8_mode){
7016             decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
7017             decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
7018         }
7019     }
7020 }
7021
7022 static inline int decode_seq_parameter_set(H264Context *h){
7023     MpegEncContext * const s = &h->s;
7024     int profile_idc, level_idc;
7025     unsigned int sps_id;
7026     int i;
7027     SPS *sps;
7028
7029     profile_idc= get_bits(&s->gb, 8);
7030     get_bits1(&s->gb);   //constraint_set0_flag
7031     get_bits1(&s->gb);   //constraint_set1_flag
7032     get_bits1(&s->gb);   //constraint_set2_flag
7033     get_bits1(&s->gb);   //constraint_set3_flag
7034     get_bits(&s->gb, 4); // reserved
7035     level_idc= get_bits(&s->gb, 8);
7036     sps_id= get_ue_golomb_31(&s->gb);
7037
7038     if(sps_id >= MAX_SPS_COUNT) {
7039         av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7040         return -1;
7041     }
7042     sps= av_mallocz(sizeof(SPS));
7043     if(sps == NULL)
7044         return -1;
7045
7046     sps->profile_idc= profile_idc;
7047     sps->level_idc= level_idc;
7048
7049     memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7050     memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7051     sps->scaling_matrix_present = 0;
7052
7053     if(sps->profile_idc >= 100){ //high profile
7054         sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7055         if(sps->chroma_format_idc == 3)
7056             get_bits1(&s->gb);  //residual_color_transform_flag
7057         get_ue_golomb(&s->gb);  //bit_depth_luma_minus8
7058         get_ue_golomb(&s->gb);  //bit_depth_chroma_minus8
7059         sps->transform_bypass = get_bits1(&s->gb);
7060         decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7061     }else{
7062         sps->chroma_format_idc= 1;
7063     }
7064
7065     sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7066     sps->poc_type= get_ue_golomb_31(&s->gb);
7067
7068     if(sps->poc_type == 0){ //FIXME #define
7069         sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7070     } else if(sps->poc_type == 1){//FIXME #define
7071         sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7072         sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7073         sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7074         sps->poc_cycle_length                = get_ue_golomb(&s->gb);
7075
7076         if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7077             av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7078             goto fail;
7079         }
7080
7081         for(i=0; i<sps->poc_cycle_length; i++)
7082             sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7083     }else if(sps->poc_type != 2){
7084         av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7085         goto fail;
7086     }
7087
7088     sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7089     if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7090         av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7091         goto fail;
7092     }
7093     sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7094     sps->mb_width = get_ue_golomb(&s->gb) + 1;
7095     sps->mb_height= get_ue_golomb(&s->gb) + 1;
7096     if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7097        avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7098         av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7099         goto fail;
7100     }
7101
7102     sps->frame_mbs_only_flag= get_bits1(&s->gb);
7103     if(!sps->frame_mbs_only_flag)
7104         sps->mb_aff= get_bits1(&s->gb);
7105     else
7106         sps->mb_aff= 0;
7107
7108     sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7109
7110 #ifndef ALLOW_INTERLACE
7111     if(sps->mb_aff)
7112         av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7113 #endif
7114     sps->crop= get_bits1(&s->gb);
7115     if(sps->crop){
7116         sps->crop_left  = get_ue_golomb(&s->gb);
7117         sps->crop_right = get_ue_golomb(&s->gb);
7118         sps->crop_top   = get_ue_golomb(&s->gb);
7119         sps->crop_bottom= get_ue_golomb(&s->gb);
7120         if(sps->crop_left || sps->crop_top){
7121             av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7122         }
7123         if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7124             av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7125         }
7126     }else{
7127         sps->crop_left  =
7128         sps->crop_right =
7129         sps->crop_top   =
7130         sps->crop_bottom= 0;
7131     }
7132
7133     sps->vui_parameters_present_flag= get_bits1(&s->gb);
7134     if( sps->vui_parameters_present_flag )
7135         decode_vui_parameters(h, sps);
7136
7137     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7138         av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7139                sps_id, sps->profile_idc, sps->level_idc,
7140                sps->poc_type,
7141                sps->ref_frame_count,
7142                sps->mb_width, sps->mb_height,
7143                sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7144                sps->direct_8x8_inference_flag ? "8B8" : "",
7145                sps->crop_left, sps->crop_right,
7146                sps->crop_top, sps->crop_bottom,
7147                sps->vui_parameters_present_flag ? "VUI" : "",
7148                ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7149                );
7150     }
7151     av_free(h->sps_buffers[sps_id]);
7152     h->sps_buffers[sps_id]= sps;
7153     return 0;
7154 fail:
7155     av_free(sps);
7156     return -1;
7157 }
7158
7159 static void
7160 build_qp_table(PPS *pps, int t, int index)
7161 {
7162     int i;
7163     for(i = 0; i < 52; i++)
7164         pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7165 }
7166
7167 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7168     MpegEncContext * const s = &h->s;
7169     unsigned int pps_id= get_ue_golomb(&s->gb);
7170     PPS *pps;
7171
7172     if(pps_id >= MAX_PPS_COUNT) {
7173         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7174         return -1;
7175     }
7176
7177     pps= av_mallocz(sizeof(PPS));
7178     if(pps == NULL)
7179         return -1;
7180     pps->sps_id= get_ue_golomb_31(&s->gb);
7181     if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7182         av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7183         goto fail;
7184     }
7185
7186     pps->cabac= get_bits1(&s->gb);
7187     pps->pic_order_present= get_bits1(&s->gb);
7188     pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7189     if(pps->slice_group_count > 1 ){
7190         pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7191         av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7192         switch(pps->mb_slice_group_map_type){
7193         case 0:
7194 #if 0
7195 |   for( i = 0; i <= num_slice_groups_minus1; i++ ) |   |        |
7196 |    run_length[ i ]                                |1  |ue(v)   |
7197 #endif
7198             break;
7199         case 2:
7200 #if 0
7201 |   for( i = 0; i < num_slice_groups_minus1; i++ )  |   |        |
7202 |{                                                  |   |        |
7203 |    top_left_mb[ i ]                               |1  |ue(v)   |
7204 |    bottom_right_mb[ i ]                           |1  |ue(v)   |
7205 |   }                                               |   |        |
7206 #endif
7207             break;
7208         case 3:
7209         case 4:
7210         case 5:
7211 #if 0
7212 |   slice_group_change_direction_flag               |1  |u(1)    |
7213 |   slice_group_change_rate_minus1                  |1  |ue(v)   |
7214 #endif
7215             break;
7216         case 6:
7217 #if 0
7218 |   slice_group_id_cnt_minus1                       |1  |ue(v)   |
7219 |   for( i = 0; i <= slice_group_id_cnt_minus1; i++ |   |        |
7220 |)                                                  |   |        |
7221 |    slice_group_id[ i ]                            |1  |u(v)    |
7222 #endif
7223             break;
7224         }
7225     }
7226     pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7227     pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7228     if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7229         av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7230         goto fail;
7231     }
7232
7233     pps->weighted_pred= get_bits1(&s->gb);
7234     pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7235     pps->init_qp= get_se_golomb(&s->gb) + 26;
7236     pps->init_qs= get_se_golomb(&s->gb) + 26;
7237     pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7238     pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7239     pps->constrained_intra_pred= get_bits1(&s->gb);
7240     pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7241
7242     pps->transform_8x8_mode= 0;
7243     h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7244     memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7245     memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7246
7247     if(get_bits_count(&s->gb) < bit_length){
7248         pps->transform_8x8_mode= get_bits1(&s->gb);
7249         decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7250         pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7251     } else {
7252         pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7253     }
7254
7255     build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7256     build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7257     if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7258         h->pps.chroma_qp_diff= 1;
7259
7260     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7261         av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7262                pps_id, pps->sps_id,
7263                pps->cabac ? "CABAC" : "CAVLC",
7264                pps->slice_group_count,
7265                pps->ref_count[0], pps->ref_count[1],
7266                pps->weighted_pred ? "weighted" : "",
7267                pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7268                pps->deblocking_filter_parameters_present ? "LPAR" : "",
7269                pps->constrained_intra_pred ? "CONSTR" : "",
7270                pps->redundant_pic_cnt_present ? "REDU" : "",
7271                pps->transform_8x8_mode ? "8x8DCT" : ""
7272                );
7273     }
7274
7275     av_free(h->pps_buffers[pps_id]);
7276     h->pps_buffers[pps_id]= pps;
7277     return 0;
7278 fail:
7279     av_free(pps);
7280     return -1;
7281 }
7282
7283 /**
7284  * Call decode_slice() for each context.
7285  *
7286  * @param h h264 master context
7287  * @param context_count number of contexts to execute
7288  */
7289 static void execute_decode_slices(H264Context *h, int context_count){
7290     MpegEncContext * const s = &h->s;
7291     AVCodecContext * const avctx= s->avctx;
7292     H264Context *hx;
7293     int i;
7294
7295     if(avctx->codec_id == CODEC_ID_H264_VDPAU)
7296         return;
7297     if(context_count == 1) {
7298         decode_slice(avctx, &h);
7299     } else {
7300         for(i = 1; i < context_count; i++) {
7301             hx = h->thread_context[i];
7302             hx->s.error_recognition = avctx->error_recognition;
7303             hx->s.error_count = 0;
7304         }
7305
7306         avctx->execute(avctx, (void *)decode_slice,
7307                        (void **)h->thread_context, NULL, context_count, sizeof(void*));
7308
7309         /* pull back stuff from slices to master context */
7310         hx = h->thread_context[context_count - 1];
7311         s->mb_x = hx->s.mb_x;
7312         s->mb_y = hx->s.mb_y;
7313         s->dropable = hx->s.dropable;
7314         s->picture_structure = hx->s.picture_structure;
7315         for(i = 1; i < context_count; i++)
7316             h->s.error_count += h->thread_context[i]->s.error_count;
7317     }
7318 }
7319
7320
7321 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7322     MpegEncContext * const s = &h->s;
7323     AVCodecContext * const avctx= s->avctx;
7324     int buf_index=0;
7325     H264Context *hx; ///< thread context
7326     int context_count = 0;
7327
7328     h->max_contexts = avctx->thread_count;
7329 #if 0
7330     int i;
7331     for(i=0; i<50; i++){
7332         av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7333     }
7334 #endif
7335     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7336         h->current_slice = 0;
7337         if (!s->first_field)
7338             s->current_picture_ptr= NULL;
7339     }
7340
7341     for(;;){
7342         int consumed;
7343         int dst_length;
7344         int bit_length;
7345         const uint8_t *ptr;
7346         int i, nalsize = 0;
7347         int err;
7348
7349         if(h->is_avc) {
7350             if(buf_index >= buf_size) break;
7351             nalsize = 0;
7352             for(i = 0; i < h->nal_length_size; i++)
7353                 nalsize = (nalsize << 8) | buf[buf_index++];
7354             if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7355                 if(nalsize == 1){
7356                     buf_index++;
7357                     continue;
7358                 }else{
7359                     av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7360                     break;
7361                 }
7362             }
7363         } else {
7364             // start code prefix search
7365             for(; buf_index + 3 < buf_size; buf_index++){
7366                 // This should always succeed in the first iteration.
7367                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7368                     break;
7369             }
7370
7371             if(buf_index+3 >= buf_size) break;
7372
7373             buf_index+=3;
7374         }
7375
7376         hx = h->thread_context[context_count];
7377
7378         ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7379         if (ptr==NULL || dst_length < 0){
7380             return -1;
7381         }
7382         while(ptr[dst_length - 1] == 0 && dst_length > 0)
7383             dst_length--;
7384         bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7385
7386         if(s->avctx->debug&FF_DEBUG_STARTCODE){
7387             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7388         }
7389
7390         if (h->is_avc && (nalsize != consumed)){
7391             av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7392             consumed= nalsize;
7393         }
7394
7395         buf_index += consumed;
7396
7397         if(  (s->hurry_up == 1 && h->nal_ref_idc  == 0) //FIXME do not discard SEI id
7398            ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
7399             continue;
7400
7401       again:
7402         err = 0;
7403         switch(hx->nal_unit_type){
7404         case NAL_IDR_SLICE:
7405             if (h->nal_unit_type != NAL_IDR_SLICE) {
7406                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7407                 return -1;
7408             }
7409             idr(h); //FIXME ensure we don't loose some frames if there is reordering
7410         case NAL_SLICE:
7411             init_get_bits(&hx->s.gb, ptr, bit_length);
7412             hx->intra_gb_ptr=
7413             hx->inter_gb_ptr= &hx->s.gb;
7414             hx->s.data_partitioning = 0;
7415
7416             if((err = decode_slice_header(hx, h)))
7417                break;
7418
7419             s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7420             if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7421                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7422                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
7423                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7424                && avctx->skip_frame < AVDISCARD_ALL){
7425                 if(ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU){
7426                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7427                     ff_VDPAU_h264_add_data_chunk(h, start_code, sizeof(start_code));
7428                     ff_VDPAU_h264_add_data_chunk(h, &buf[buf_index - consumed], consumed );
7429                 }else
7430                     context_count++;
7431             }
7432             break;
7433         case NAL_DPA:
7434             init_get_bits(&hx->s.gb, ptr, bit_length);
7435             hx->intra_gb_ptr=
7436             hx->inter_gb_ptr= NULL;
7437             hx->s.data_partitioning = 1;
7438
7439             err = decode_slice_header(hx, h);
7440             break;
7441         case NAL_DPB:
7442             init_get_bits(&hx->intra_gb, ptr, bit_length);
7443             hx->intra_gb_ptr= &hx->intra_gb;
7444             break;
7445         case NAL_DPC:
7446             init_get_bits(&hx->inter_gb, ptr, bit_length);
7447             hx->inter_gb_ptr= &hx->inter_gb;
7448
7449             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7450                && s->context_initialized
7451                && s->hurry_up < 5
7452                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7453                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
7454                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7455                && avctx->skip_frame < AVDISCARD_ALL)
7456                 context_count++;
7457             break;
7458         case NAL_SEI:
7459             init_get_bits(&s->gb, ptr, bit_length);
7460             decode_sei(h);
7461             break;
7462         case NAL_SPS:
7463             init_get_bits(&s->gb, ptr, bit_length);
7464             decode_seq_parameter_set(h);
7465
7466             if(s->flags& CODEC_FLAG_LOW_DELAY)
7467                 s->low_delay=1;
7468
7469             if(avctx->has_b_frames < 2)
7470                 avctx->has_b_frames= !s->low_delay;
7471             break;
7472         case NAL_PPS:
7473             init_get_bits(&s->gb, ptr, bit_length);
7474
7475             decode_picture_parameter_set(h, bit_length);
7476
7477             break;
7478         case NAL_AUD:
7479         case NAL_END_SEQUENCE:
7480         case NAL_END_STREAM:
7481         case NAL_FILLER_DATA:
7482         case NAL_SPS_EXT:
7483         case NAL_AUXILIARY_SLICE:
7484             break;
7485         default:
7486             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7487         }
7488
7489         if(context_count == h->max_contexts) {
7490             execute_decode_slices(h, context_count);
7491             context_count = 0;
7492         }
7493
7494         if (err < 0)
7495             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7496         else if(err == 1) {
7497             /* Slice could not be decoded in parallel mode, copy down
7498              * NAL unit stuff to context 0 and restart. Note that
7499              * rbsp_buffer is not transferred, but since we no longer
7500              * run in parallel mode this should not be an issue. */
7501             h->nal_unit_type = hx->nal_unit_type;
7502             h->nal_ref_idc   = hx->nal_ref_idc;
7503             hx = h;
7504             goto again;
7505         }
7506     }
7507     if(context_count)
7508         execute_decode_slices(h, context_count);
7509     return buf_index;
7510 }
7511
7512 /**
7513  * returns the number of bytes consumed for building the current frame
7514  */
7515 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7516         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7517         if(pos+10>buf_size) pos=buf_size; // oops ;)
7518
7519         return pos;
7520 }
7521
7522 static int decode_frame(AVCodecContext *avctx,
7523                              void *data, int *data_size,
7524                              const uint8_t *buf, int buf_size)
7525 {
7526     H264Context *h = avctx->priv_data;
7527     MpegEncContext *s = &h->s;
7528     AVFrame *pict = data;
7529     int buf_index;
7530
7531     s->flags= avctx->flags;
7532     s->flags2= avctx->flags2;
7533
7534    /* end of stream, output what is still in the buffers */
7535     if (buf_size == 0) {
7536         Picture *out;
7537         int i, out_idx;
7538
7539 //FIXME factorize this with the output code below
7540         out = h->delayed_pic[0];
7541         out_idx = 0;
7542         for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7543             if(h->delayed_pic[i]->poc < out->poc){
7544                 out = h->delayed_pic[i];
7545                 out_idx = i;
7546             }
7547
7548         for(i=out_idx; h->delayed_pic[i]; i++)
7549             h->delayed_pic[i] = h->delayed_pic[i+1];
7550
7551         if(out){
7552             *data_size = sizeof(AVFrame);
7553             *pict= *(AVFrame*)out;
7554         }
7555
7556         return 0;
7557     }
7558
7559     if(h->is_avc && !h->got_avcC) {
7560         int i, cnt, nalsize;
7561         unsigned char *p = avctx->extradata;
7562         if(avctx->extradata_size < 7) {
7563             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7564             return -1;
7565         }
7566         if(*p != 1) {
7567             av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7568             return -1;
7569         }
7570         /* sps and pps in the avcC always have length coded with 2 bytes,
7571            so put a fake nal_length_size = 2 while parsing them */
7572         h->nal_length_size = 2;
7573         // Decode sps from avcC
7574         cnt = *(p+5) & 0x1f; // Number of sps
7575         p += 6;
7576         for (i = 0; i < cnt; i++) {
7577             nalsize = AV_RB16(p) + 2;
7578             if(decode_nal_units(h, p, nalsize) < 0) {
7579                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7580                 return -1;
7581             }
7582             p += nalsize;
7583         }
7584         // Decode pps from avcC
7585         cnt = *(p++); // Number of pps
7586         for (i = 0; i < cnt; i++) {
7587             nalsize = AV_RB16(p) + 2;
7588             if(decode_nal_units(h, p, nalsize)  != nalsize) {
7589                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7590                 return -1;
7591             }
7592             p += nalsize;
7593         }
7594         // Now store right nal length size, that will be use to parse all other nals
7595         h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7596         // Do not reparse avcC
7597         h->got_avcC = 1;
7598     }
7599
7600     if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7601         if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7602             return -1;
7603         h->got_avcC = 1;
7604     }
7605
7606     buf_index=decode_nal_units(h, buf, buf_size);
7607     if(buf_index < 0)
7608         return -1;
7609
7610     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7611         if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7612         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7613         return -1;
7614     }
7615
7616     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7617         Picture *out = s->current_picture_ptr;
7618         Picture *cur = s->current_picture_ptr;
7619         int i, pics, cross_idr, out_of_order, out_idx;
7620
7621         s->mb_y= 0;
7622
7623         s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7624         s->current_picture_ptr->pict_type= s->pict_type;
7625
7626         if(!s->dropable) {
7627             execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7628             h->prev_poc_msb= h->poc_msb;
7629             h->prev_poc_lsb= h->poc_lsb;
7630         }
7631         h->prev_frame_num_offset= h->frame_num_offset;
7632         h->prev_frame_num= h->frame_num;
7633
7634         if (ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU)
7635             ff_VDPAU_h264_picture_complete(h);
7636
7637         /*
7638          * FIXME: Error handling code does not seem to support interlaced
7639          * when slices span multiple rows
7640          * The ff_er_add_slice calls don't work right for bottom
7641          * fields; they cause massive erroneous error concealing
7642          * Error marking covers both fields (top and bottom).
7643          * This causes a mismatched s->error_count
7644          * and a bad error table. Further, the error count goes to
7645          * INT_MAX when called for bottom field, because mb_y is
7646          * past end by one (callers fault) and resync_mb_y != 0
7647          * causes problems for the first MB line, too.
7648          */
7649         if (avctx->codec_id != CODEC_ID_H264_VDPAU && !FIELD_PICTURE)
7650             ff_er_frame_end(s);
7651
7652         MPV_frame_end(s);
7653
7654         if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7655             /* Wait for second field. */
7656             *data_size = 0;
7657
7658         } else {
7659             cur->repeat_pict = 0;
7660
7661             /* Signal interlacing information externally. */
7662             /* Prioritize picture timing SEI information over used decoding process if it exists. */
7663             if(h->sps.pic_struct_present_flag){
7664                 switch (h->sei_pic_struct)
7665                 {
7666                 case SEI_PIC_STRUCT_FRAME:
7667                     cur->interlaced_frame = 0;
7668                     break;
7669                 case SEI_PIC_STRUCT_TOP_FIELD:
7670                 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7671                 case SEI_PIC_STRUCT_TOP_BOTTOM:
7672                 case SEI_PIC_STRUCT_BOTTOM_TOP:
7673                     cur->interlaced_frame = 1;
7674                     break;
7675                 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7676                 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7677                     // Signal the possibility of telecined film externally (pic_struct 5,6)
7678                     // From these hints, let the applications decide if they apply deinterlacing.
7679                     cur->repeat_pict = 1;
7680                     cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7681                     break;
7682                 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7683                     // Force progressive here, as doubling interlaced frame is a bad idea.
7684                     cur->interlaced_frame = 0;
7685                     cur->repeat_pict = 2;
7686                     break;
7687                 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7688                     cur->interlaced_frame = 0;
7689                     cur->repeat_pict = 4;
7690                     break;
7691                 }
7692             }else{
7693                 /* Derive interlacing flag from used decoding process. */
7694                 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7695             }
7696
7697             if (cur->field_poc[0] != cur->field_poc[1]){
7698                 /* Derive top_field_first from field pocs. */
7699                 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7700             }else{
7701                 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7702                     /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7703                     if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7704                       || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7705                         cur->top_field_first = 1;
7706                     else
7707                         cur->top_field_first = 0;
7708                 }else{
7709                     /* Most likely progressive */
7710                     cur->top_field_first = 0;
7711                 }
7712             }
7713
7714         //FIXME do something with unavailable reference frames
7715
7716             /* Sort B-frames into display order */
7717
7718             if(h->sps.bitstream_restriction_flag
7719                && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7720                 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7721                 s->low_delay = 0;
7722             }
7723
7724             if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7725                && !h->sps.bitstream_restriction_flag){
7726                 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7727                 s->low_delay= 0;
7728             }
7729
7730             pics = 0;
7731             while(h->delayed_pic[pics]) pics++;
7732
7733             assert(pics <= MAX_DELAYED_PIC_COUNT);
7734
7735             h->delayed_pic[pics++] = cur;
7736             if(cur->reference == 0)
7737                 cur->reference = DELAYED_PIC_REF;
7738
7739             out = h->delayed_pic[0];
7740             out_idx = 0;
7741             for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7742                 if(h->delayed_pic[i]->poc < out->poc){
7743                     out = h->delayed_pic[i];
7744                     out_idx = i;
7745                 }
7746             cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7747
7748             out_of_order = !cross_idr && out->poc < h->outputed_poc;
7749
7750             if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7751                 { }
7752             else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7753                || (s->low_delay &&
7754                 ((!cross_idr && out->poc > h->outputed_poc + 2)
7755                  || cur->pict_type == FF_B_TYPE)))
7756             {
7757                 s->low_delay = 0;
7758                 s->avctx->has_b_frames++;
7759             }
7760
7761             if(out_of_order || pics > s->avctx->has_b_frames){
7762                 out->reference &= ~DELAYED_PIC_REF;
7763                 for(i=out_idx; h->delayed_pic[i]; i++)
7764                     h->delayed_pic[i] = h->delayed_pic[i+1];
7765             }
7766             if(!out_of_order && pics > s->avctx->has_b_frames){
7767                 *data_size = sizeof(AVFrame);
7768
7769                 h->outputed_poc = out->poc;
7770                 *pict= *(AVFrame*)out;
7771             }else{
7772                 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7773             }
7774         }
7775     }
7776
7777     assert(pict->data[0] || !*data_size);
7778     ff_print_debug_info(s, pict);
7779 //printf("out %d\n", (int)pict->data[0]);
7780 #if 0 //?
7781
7782     /* Return the Picture timestamp as the frame number */
7783     /* we subtract 1 because it is added on utils.c     */
7784     avctx->frame_number = s->picture_number - 1;
7785 #endif
7786     return get_consumed_bytes(s, buf_index, buf_size);
7787 }
7788 #if 0
7789 static inline void fill_mb_avail(H264Context *h){
7790     MpegEncContext * const s = &h->s;
7791     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7792
7793     if(s->mb_y){
7794         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7795         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
7796         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7797     }else{
7798         h->mb_avail[0]=
7799         h->mb_avail[1]=
7800         h->mb_avail[2]= 0;
7801     }
7802     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7803     h->mb_avail[4]= 1; //FIXME move out
7804     h->mb_avail[5]= 0; //FIXME move out
7805 }
7806 #endif
7807
7808 #ifdef TEST
7809 #undef printf
7810 #undef random
7811 #define COUNT 8000
7812 #define SIZE (COUNT*40)
7813 int main(void){
7814     int i;
7815     uint8_t temp[SIZE];
7816     PutBitContext pb;
7817     GetBitContext gb;
7818 //    int int_temp[10000];
7819     DSPContext dsp;
7820     AVCodecContext avctx;
7821
7822     dsputil_init(&dsp, &avctx);
7823
7824     init_put_bits(&pb, temp, SIZE);
7825     printf("testing unsigned exp golomb\n");
7826     for(i=0; i<COUNT; i++){
7827         START_TIMER
7828         set_ue_golomb(&pb, i);
7829         STOP_TIMER("set_ue_golomb");
7830     }
7831     flush_put_bits(&pb);
7832
7833     init_get_bits(&gb, temp, 8*SIZE);
7834     for(i=0; i<COUNT; i++){
7835         int j, s;
7836
7837         s= show_bits(&gb, 24);
7838
7839         START_TIMER
7840         j= get_ue_golomb(&gb);
7841         if(j != i){
7842             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7843 //            return -1;
7844         }
7845         STOP_TIMER("get_ue_golomb");
7846     }
7847
7848
7849     init_put_bits(&pb, temp, SIZE);
7850     printf("testing signed exp golomb\n");
7851     for(i=0; i<COUNT; i++){
7852         START_TIMER
7853         set_se_golomb(&pb, i - COUNT/2);
7854         STOP_TIMER("set_se_golomb");
7855     }
7856     flush_put_bits(&pb);
7857
7858     init_get_bits(&gb, temp, 8*SIZE);
7859     for(i=0; i<COUNT; i++){
7860         int j, s;
7861
7862         s= show_bits(&gb, 24);
7863
7864         START_TIMER
7865         j= get_se_golomb(&gb);
7866         if(j != i - COUNT/2){
7867             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7868 //            return -1;
7869         }
7870         STOP_TIMER("get_se_golomb");
7871     }
7872
7873 #if 0
7874     printf("testing 4x4 (I)DCT\n");
7875
7876     DCTELEM block[16];
7877     uint8_t src[16], ref[16];
7878     uint64_t error= 0, max_error=0;
7879
7880     for(i=0; i<COUNT; i++){
7881         int j;
7882 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7883         for(j=0; j<16; j++){
7884             ref[j]= random()%255;
7885             src[j]= random()%255;
7886         }
7887
7888         h264_diff_dct_c(block, src, ref, 4);
7889
7890         //normalize
7891         for(j=0; j<16; j++){
7892 //            printf("%d ", block[j]);
7893             block[j]= block[j]*4;
7894             if(j&1) block[j]= (block[j]*4 + 2)/5;
7895             if(j&4) block[j]= (block[j]*4 + 2)/5;
7896         }
7897 //        printf("\n");
7898
7899         s->dsp.h264_idct_add(ref, block, 4);
7900 /*        for(j=0; j<16; j++){
7901             printf("%d ", ref[j]);
7902         }
7903         printf("\n");*/
7904
7905         for(j=0; j<16; j++){
7906             int diff= FFABS(src[j] - ref[j]);
7907
7908             error+= diff*diff;
7909             max_error= FFMAX(max_error, diff);
7910         }
7911     }
7912     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7913     printf("testing quantizer\n");
7914     for(qp=0; qp<52; qp++){
7915         for(i=0; i<16; i++)
7916             src1_block[i]= src2_block[i]= random()%255;
7917
7918     }
7919     printf("Testing NAL layer\n");
7920
7921     uint8_t bitstream[COUNT];
7922     uint8_t nal[COUNT*2];
7923     H264Context h;
7924     memset(&h, 0, sizeof(H264Context));
7925
7926     for(i=0; i<COUNT; i++){
7927         int zeros= i;
7928         int nal_length;
7929         int consumed;
7930         int out_length;
7931         uint8_t *out;
7932         int j;
7933
7934         for(j=0; j<COUNT; j++){
7935             bitstream[j]= (random() % 255) + 1;
7936         }
7937
7938         for(j=0; j<zeros; j++){
7939             int pos= random() % COUNT;
7940             while(bitstream[pos] == 0){
7941                 pos++;
7942                 pos %= COUNT;
7943             }
7944             bitstream[pos]=0;
7945         }
7946
7947         START_TIMER
7948
7949         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7950         if(nal_length<0){
7951             printf("encoding failed\n");
7952             return -1;
7953         }
7954
7955         out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7956
7957         STOP_TIMER("NAL")
7958
7959         if(out_length != COUNT){
7960             printf("incorrect length %d %d\n", out_length, COUNT);
7961             return -1;
7962         }
7963
7964         if(consumed != nal_length){
7965             printf("incorrect consumed length %d %d\n", nal_length, consumed);
7966             return -1;
7967         }
7968
7969         if(memcmp(bitstream, out, COUNT)){
7970             printf("mismatch\n");
7971             return -1;
7972         }
7973     }
7974 #endif
7975
7976     printf("Testing RBSP\n");
7977
7978
7979     return 0;
7980 }
7981 #endif /* TEST */
7982
7983
7984 static av_cold int decode_end(AVCodecContext *avctx)
7985 {
7986     H264Context *h = avctx->priv_data;
7987     MpegEncContext *s = &h->s;
7988     int i;
7989
7990     av_freep(&h->rbsp_buffer[0]);
7991     av_freep(&h->rbsp_buffer[1]);
7992     free_tables(h); //FIXME cleanup init stuff perhaps
7993
7994     for(i = 0; i < MAX_SPS_COUNT; i++)
7995         av_freep(h->sps_buffers + i);
7996
7997     for(i = 0; i < MAX_PPS_COUNT; i++)
7998         av_freep(h->pps_buffers + i);
7999
8000     MPV_common_end(s);
8001
8002 //    memset(h, 0, sizeof(H264Context));
8003
8004     return 0;
8005 }
8006
8007
8008 AVCodec h264_decoder = {
8009     "h264",
8010     CODEC_TYPE_VIDEO,
8011     CODEC_ID_H264,
8012     sizeof(H264Context),
8013     decode_init,
8014     NULL,
8015     decode_end,
8016     decode_frame,
8017     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8018     .flush= flush_dpb,
8019     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8020 };
8021
8022 #ifdef CONFIG_H264_VDPAU_DECODER
8023 AVCodec h264_vdpau_decoder = {
8024     "h264_vdpau",
8025     CODEC_TYPE_VIDEO,
8026     CODEC_ID_H264_VDPAU,
8027     sizeof(H264Context),
8028     decode_init,
8029     NULL,
8030     decode_end,
8031     decode_frame,
8032     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8033     .flush= flush_dpb,
8034     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8035 };
8036 #endif
8037
8038 #include "svq3.c"