libavcodec/h264.c

   1 /*
   2  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
   3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file h264.c
  24  * H.264 / AVC / MPEG4 part10 codec.
  25  * @author Michael Niedermayer <michaelni@gmx.at>
  26  */
  27
  28 #include "dsputil.h"
  29 #include "avcodec.h"
  30 #include "mpegvideo.h"
  31 #include "h264.h"
  32 #include "h264data.h"
  33 #include "h264_parser.h"
  34 #include "golomb.h"
  35 #include "rectangle.h"
  36
  37 #include "cabac.h"
  38 #ifdef ARCH_X86
  39 #include "x86/h264_i386.h"
  40 #endif
  41
  42 //#undef NDEBUG
  43 #include <assert.h>
  44
  45 /**
  46  * Value of Picture.reference when Picture is not a reference picture, but
  47  * is held for delayed output.
  48  */
  49 #define DELAYED_PIC_REF 4
  50
  51 static VLC coeff_token_vlc[4];
  52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
  53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
  54
  55 static VLC chroma_dc_coeff_token_vlc;
  56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
  57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
  58
  59 static VLC total_zeros_vlc[15];
  60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
  61 static const int total_zeros_vlc_tables_size = 512;
  62
  63 static VLC chroma_dc_total_zeros_vlc[3];
  64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
  65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
  66
  67 static VLC run_vlc[6];
  68 static VLC_TYPE run_vlc_tables[6][8][2];
  69 static const int run_vlc_tables_size = 8;
  70
  71 static VLC run7_vlc;
  72 static VLC_TYPE run7_vlc_table[96][2];
  73 static const int run7_vlc_table_size = 96;
  74
  75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
  76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
  77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
  78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
  79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
  80
  81 static av_always_inline uint32_t pack16to32(int a, int b){
  82 #ifdef WORDS_BIGENDIAN
  83    return (b&0xFFFF) + (a<<16);
  84 #else
  85    return (a&0xFFFF) + (b<<16);
  86 #endif
  87 }
  88
  89 static const uint8_t rem6[52]={
  90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
  91 };
  92
  93 static const uint8_t div6[52]={
  94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
  95 };
  96
  97 static const int left_block_options[4][8]={
  98     {0,1,2,3,7,10,8,11},
  99     {2,2,3,3,8,11,8,11},
 100     {0,0,1,1,7,10,7,10},
 101     {0,2,0,2,7,10,7,10}
 102 };
 103
 104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
 105     MpegEncContext * const s = &h->s;
 106     const int mb_xy= h->mb_xy;
 107     int topleft_xy, top_xy, topright_xy, left_xy[2];
 108     int topleft_type, top_type, topright_type, left_type[2];
 109     const int * left_block;
 110     int topleft_partition= -1;
 111     int i;
 112
 113     top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
 114
 115     //FIXME deblocking could skip the intra and nnz parts.
 116     if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
 117         return;
 118
 119     /* Wow, what a mess, why didn't they simplify the interlacing & intra
 120      * stuff, I can't imagine that these complex rules are worth it. */
 121
 122     topleft_xy = top_xy - 1;
 123     topright_xy= top_xy + 1;
 124     left_xy[1] = left_xy[0] = mb_xy-1;
 125     left_block = left_block_options[0];
 126     if(FRAME_MBAFF){
 127         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
 128         const int top_pair_xy      = pair_xy     - s->mb_stride;
 129         const int topleft_pair_xy  = top_pair_xy - 1;
 130         const int topright_pair_xy = top_pair_xy + 1;
 131         const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
 132         const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
 133         const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
 134         const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
 135         const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
 136         const int bottom = (s->mb_y & 1);
 137         tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
 138
 139         if (!curr_mb_frame_flag && (bottom || !top_mb_frame_flag)){
 140             top_xy -= s->mb_stride;
 141         }
 142         if (!curr_mb_frame_flag && (bottom || !topleft_mb_frame_flag)){
 143             topleft_xy -= s->mb_stride;
 144         } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
 145             topleft_xy += s->mb_stride;
 146             // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
 147             topleft_partition = 0;
 148         }
 149         if (!curr_mb_frame_flag && (bottom || !topright_mb_frame_flag)){
 150             topright_xy -= s->mb_stride;
 151         }
 152         if (left_mb_frame_flag != curr_mb_frame_flag) {
 153             left_xy[1] = left_xy[0] = pair_xy - 1;
 154             if (curr_mb_frame_flag) {
 155                 if (bottom) {
 156                     left_block = left_block_options[1];
 157                 } else {
 158                     left_block= left_block_options[2];
 159                 }
 160             } else {
 161                 left_xy[1] += s->mb_stride;
 162                 left_block = left_block_options[3];
 163             }
 164         }
 165     }
 166
 167     h->top_mb_xy = top_xy;
 168     h->left_mb_xy[0] = left_xy[0];
 169     h->left_mb_xy[1] = left_xy[1];
 170     if(for_deblock){
 171         topleft_type = 0;
 172         topright_type = 0;
 173         top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
 174         left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
 175         left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
 176
 177         if(MB_MBAFF && !IS_INTRA(mb_type)){
 178             int list;
 179             for(list=0; list<h->list_count; list++){
 180                 //These values where changed for ease of performing MC, we need to change them back
 181                 //FIXME maybe we can make MC and loop filter use the same values or prevent
 182                 //the MC code from changing ref_cache and rather use a temporary array.
 183                 if(USES_LIST(mb_type,list)){
 184                     int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
 185                     *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
 186                     *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
 187                     ref += h->b8_stride;
 188                     *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
 189                     *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
 190                 }
 191             }
 192         }
 193     }else{
 194         topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
 195         top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
 196         topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
 197         left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
 198         left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
 199
 200     if(IS_INTRA(mb_type)){
 201         int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
 202         h->topleft_samples_available=
 203         h->top_samples_available=
 204         h->left_samples_available= 0xFFFF;
 205         h->topright_samples_available= 0xEEEA;
 206
 207         if(!(top_type & type_mask)){
 208             h->topleft_samples_available= 0xB3FF;
 209             h->top_samples_available= 0x33FF;
 210             h->topright_samples_available= 0x26EA;
 211         }
 212         if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
 213             if(IS_INTERLACED(mb_type)){
 214                 if(!(left_type[0] & type_mask)){
 215                     h->topleft_samples_available&= 0xDFFF;
 216                     h->left_samples_available&= 0x5FFF;
 217                 }
 218                 if(!(left_type[1] & type_mask)){
 219                     h->topleft_samples_available&= 0xFF5F;
 220                     h->left_samples_available&= 0xFF5F;
 221                 }
 222             }else{
 223                 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
 224                                 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
 225                 assert(left_xy[0] == left_xy[1]);
 226                 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
 227                     h->topleft_samples_available&= 0xDF5F;
 228                     h->left_samples_available&= 0x5F5F;
 229                 }
 230             }
 231         }else{
 232             if(!(left_type[0] & type_mask)){
 233                 h->topleft_samples_available&= 0xDF5F;
 234                 h->left_samples_available&= 0x5F5F;
 235             }
 236         }
 237
 238         if(!(topleft_type & type_mask))
 239             h->topleft_samples_available&= 0x7FFF;
 240
 241         if(!(topright_type & type_mask))
 242             h->topright_samples_available&= 0xFBFF;
 243
 244         if(IS_INTRA4x4(mb_type)){
 245             if(IS_INTRA4x4(top_type)){
 246                 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
 247                 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
 248                 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
 249                 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
 250             }else{
 251                 int pred;
 252                 if(!(top_type & type_mask))
 253                     pred= -1;
 254                 else{
 255                     pred= 2;
 256                 }
 257                 h->intra4x4_pred_mode_cache[4+8*0]=
 258                 h->intra4x4_pred_mode_cache[5+8*0]=
 259                 h->intra4x4_pred_mode_cache[6+8*0]=
 260                 h->intra4x4_pred_mode_cache[7+8*0]= pred;
 261             }
 262             for(i=0; i<2; i++){
 263                 if(IS_INTRA4x4(left_type[i])){
 264                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
 265                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
 266                 }else{
 267                     int pred;
 268                     if(!(left_type[i] & type_mask))
 269                         pred= -1;
 270                     else{
 271                         pred= 2;
 272                     }
 273                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
 274                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
 275                 }
 276             }
 277         }
 278     }
 279     }
 280
 281
 282 /*
 283 0 . T T. T T T T
 284 1 L . .L . . . .
 285 2 L . .L . . . .
 286 3 . T TL . . . .
 287 4 L . .L . . . .
 288 5 L . .. . . . .
 289 */
 290 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
 291     if(top_type){
 292         h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
 293         h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
 294         h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
 295         h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
 296
 297         h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
 298         h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
 299
 300         h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
 301         h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
 302
 303     }else{
 304         h->non_zero_count_cache[4+8*0]=
 305         h->non_zero_count_cache[5+8*0]=
 306         h->non_zero_count_cache[6+8*0]=
 307         h->non_zero_count_cache[7+8*0]=
 308
 309         h->non_zero_count_cache[1+8*0]=
 310         h->non_zero_count_cache[2+8*0]=
 311
 312         h->non_zero_count_cache[1+8*3]=
 313         h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
 314
 315     }
 316
 317     for (i=0; i<2; i++) {
 318         if(left_type[i]){
 319             h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
 320             h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
 321             h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
 322             h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
 323         }else{
 324             h->non_zero_count_cache[3+8*1 + 2*8*i]=
 325             h->non_zero_count_cache[3+8*2 + 2*8*i]=
 326             h->non_zero_count_cache[0+8*1 +   8*i]=
 327             h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
 328         }
 329     }
 330
 331     if( h->pps.cabac ) {
 332         // top_cbp
 333         if(top_type) {
 334             h->top_cbp = h->cbp_table[top_xy];
 335         } else if(IS_INTRA(mb_type)) {
 336             h->top_cbp = 0x1C0;
 337         } else {
 338             h->top_cbp = 0;
 339         }
 340         // left_cbp
 341         if (left_type[0]) {
 342             h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
 343         } else if(IS_INTRA(mb_type)) {
 344             h->left_cbp = 0x1C0;
 345         } else {
 346             h->left_cbp = 0;
 347         }
 348         if (left_type[0]) {
 349             h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
 350         }
 351         if (left_type[1]) {
 352             h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
 353         }
 354     }
 355
 356 #if 1
 357     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
 358         int list;
 359         for(list=0; list<h->list_count; list++){
 360             if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
 361                 /*if(!h->mv_cache_clean[list]){
 362                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
 363                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
 364                     h->mv_cache_clean[list]= 1;
 365                 }*/
 366                 continue;
 367             }
 368             h->mv_cache_clean[list]= 0;
 369
 370             if(USES_LIST(top_type, list)){
 371                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
 372                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
 373                 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
 374                 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
 375                 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
 376                 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
 377                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
 378                 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
 379                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
 380                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
 381             }else{
 382                 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
 383                 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
 384                 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
 385                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
 386                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
 387             }
 388
 389             for(i=0; i<2; i++){
 390                 int cache_idx = scan8[0] - 1 + i*2*8;
 391                 if(USES_LIST(left_type[i], list)){
 392                     const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
 393                     const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
 394                     *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
 395                     *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
 396                     h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
 397                     h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
 398                 }else{
 399                     *(uint32_t*)h->mv_cache [list][cache_idx  ]=
 400                     *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
 401                     h->ref_cache[list][cache_idx  ]=
 402                     h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
 403                 }
 404             }
 405
 406             if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
 407                 continue;
 408
 409             if(USES_LIST(topleft_type, list)){
 410                 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
 411                 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
 412                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
 413                 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
 414             }else{
 415                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
 416                 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
 417             }
 418
 419             if(USES_LIST(topright_type, list)){
 420                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
 421                 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
 422                 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
 423                 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
 424             }else{
 425                 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
 426                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
 427             }
 428
 429             if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
 430                 continue;
 431
 432             h->ref_cache[list][scan8[5 ]+1] =
 433             h->ref_cache[list][scan8[7 ]+1] =
 434             h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
 435             h->ref_cache[list][scan8[4 ]] =
 436             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
 437             *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
 438             *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
 439             *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
 440             *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
 441             *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
 442
 443             if( h->pps.cabac ) {
 444                 /* XXX beurk, Load mvd */
 445                 if(USES_LIST(top_type, list)){
 446                     const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
 447                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
 448                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
 449                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
 450                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
 451                 }else{
 452                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
 453                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
 454                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
 455                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
 456                 }
 457                 if(USES_LIST(left_type[0], list)){
 458                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
 459                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
 460                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
 461                 }else{
 462                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
 463                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
 464                 }
 465                 if(USES_LIST(left_type[1], list)){
 466                     const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
 467                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
 468                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
 469                 }else{
 470                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
 471                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
 472                 }
 473                 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
 474                 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
 475                 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
 476                 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
 477                 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
 478
 479                 if(h->slice_type_nos == FF_B_TYPE){
 480                     fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
 481
 482                     if(IS_DIRECT(top_type)){
 483                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
 484                     }else if(IS_8X8(top_type)){
 485                         int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
 486                         h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
 487                         h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
 488                     }else{
 489                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
 490                     }
 491
 492                     if(IS_DIRECT(left_type[0]))
 493                         h->direct_cache[scan8[0] - 1 + 0*8]= 1;
 494                     else if(IS_8X8(left_type[0]))
 495                         h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
 496                     else
 497                         h->direct_cache[scan8[0] - 1 + 0*8]= 0;
 498
 499                     if(IS_DIRECT(left_type[1]))
 500                         h->direct_cache[scan8[0] - 1 + 2*8]= 1;
 501                     else if(IS_8X8(left_type[1]))
 502                         h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
 503                     else
 504                         h->direct_cache[scan8[0] - 1 + 2*8]= 0;
 505                 }
 506             }
 507
 508             if(FRAME_MBAFF){
 509 #define MAP_MVS\
 510                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
 511                     MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
 512                     MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
 513                     MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
 514                     MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
 515                     MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
 516                     MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
 517                     MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
 518                     MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
 519                     MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
 520                 if(MB_FIELD){
 521 #define MAP_F2F(idx, mb_type)\
 522                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
 523                         h->ref_cache[list][idx] <<= 1;\
 524                         h->mv_cache[list][idx][1] /= 2;\
 525                         h->mvd_cache[list][idx][1] /= 2;\
 526                     }
 527                     MAP_MVS
 528 #undef MAP_F2F
 529                 }else{
 530 #define MAP_F2F(idx, mb_type)\
 531                     if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
 532                         h->ref_cache[list][idx] >>= 1;\
 533                         h->mv_cache[list][idx][1] <<= 1;\
 534                         h->mvd_cache[list][idx][1] <<= 1;\
 535                     }
 536                     MAP_MVS
 537 #undef MAP_F2F
 538                 }
 539             }
 540         }
 541     }
 542 #endif
 543
 544     h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
 545 }
 546
 547 static inline void write_back_intra_pred_mode(H264Context *h){
 548     const int mb_xy= h->mb_xy;
 549
 550     h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
 551     h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
 552     h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
 553     h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
 554     h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
 555     h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
 556     h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
 557 }
 558
 559 /**
 560  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
 561  */
 562 static inline int check_intra4x4_pred_mode(H264Context *h){
 563     MpegEncContext * const s = &h->s;
 564     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
 565     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
 566     int i;
 567
 568     if(!(h->top_samples_available&0x8000)){
 569         for(i=0; i<4; i++){
 570             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
 571             if(status<0){
 572                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
 573                 return -1;
 574             } else if(status){
 575                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
 576             }
 577         }
 578     }
 579
 580     if((h->left_samples_available&0x8888)!=0x8888){
 581         static const int mask[4]={0x8000,0x2000,0x80,0x20};
 582         for(i=0; i<4; i++){
 583             if(!(h->left_samples_available&mask[i])){
 584             int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
 585             if(status<0){
 586                 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
 587                 return -1;
 588             } else if(status){
 589                 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
 590             }
 591             }
 592         }
 593     }
 594
 595     return 0;
 596 } //FIXME cleanup like next
 597
 598 /**
 599  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
 600  */
 601 static inline int check_intra_pred_mode(H264Context *h, int mode){
 602     MpegEncContext * const s = &h->s;
 603     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
 604     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
 605
 606     if(mode > 6U) {
 607         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
 608         return -1;
 609     }
 610
 611     if(!(h->top_samples_available&0x8000)){
 612         mode= top[ mode ];
 613         if(mode<0){
 614             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
 615             return -1;
 616         }
 617     }
 618
 619     if((h->left_samples_available&0x8080) != 0x8080){
 620         mode= left[ mode ];
 621         if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
 622             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
 623         }
 624         if(mode<0){
 625             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
 626             return -1;
 627         }
 628     }
 629
 630     return mode;
 631 }
 632
 633 /**
 634  * gets the predicted intra4x4 prediction mode.
 635  */
 636 static inline int pred_intra_mode(H264Context *h, int n){
 637     const int index8= scan8[n];
 638     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
 639     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
 640     const int min= FFMIN(left, top);
 641
 642     tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
 643
 644     if(min<0) return DC_PRED;
 645     else      return min;
 646 }
 647
 648 static inline void write_back_non_zero_count(H264Context *h){
 649     const int mb_xy= h->mb_xy;
 650
 651     h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
 652     h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
 653     h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
 654     h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
 655     h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
 656     h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
 657     h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
 658
 659     h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
 660     h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
 661     h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
 662
 663     h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
 664     h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
 665     h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
 666 }
 667
 668 /**
 669  * gets the predicted number of non-zero coefficients.
 670  * @param n block index
 671  */
 672 static inline int pred_non_zero_count(H264Context *h, int n){
 673     const int index8= scan8[n];
 674     const int left= h->non_zero_count_cache[index8 - 1];
 675     const int top = h->non_zero_count_cache[index8 - 8];
 676     int i= left + top;
 677
 678     if(i<64) i= (i+1)>>1;
 679
 680     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
 681
 682     return i&31;
 683 }
 684
 685 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
 686     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
 687     MpegEncContext *s = &h->s;
 688
 689     /* there is no consistent mapping of mvs to neighboring locations that will
 690      * make mbaff happy, so we can't move all this logic to fill_caches */
 691     if(FRAME_MBAFF){
 692         const uint32_t *mb_types = s->current_picture_ptr->mb_type;
 693         const int16_t *mv;
 694         *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
 695         *C = h->mv_cache[list][scan8[0]-2];
 696
 697         if(!MB_FIELD
 698            && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
 699             int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
 700             if(IS_INTERLACED(mb_types[topright_xy])){
 701 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
 702                 const int x4 = X4, y4 = Y4;\
 703                 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
 704                 if(!USES_LIST(mb_type,list))\
 705                     return LIST_NOT_USED;\
 706                 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
 707                 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
 708                 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
 709                 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
 710
 711                 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
 712             }
 713         }
 714         if(topright_ref == PART_NOT_AVAILABLE
 715            && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
 716            && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
 717             if(!MB_FIELD
 718                && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
 719                 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
 720             }
 721             if(MB_FIELD
 722                && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
 723                && i >= scan8[0]+8){
 724                 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
 725                 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
 726             }
 727         }
 728 #undef SET_DIAG_MV
 729     }
 730
 731     if(topright_ref != PART_NOT_AVAILABLE){
 732         *C= h->mv_cache[list][ i - 8 + part_width ];
 733         return topright_ref;
 734     }else{
 735         tprintf(s->avctx, "topright MV not available\n");
 736
 737         *C= h->mv_cache[list][ i - 8 - 1 ];
 738         return h->ref_cache[list][ i - 8 - 1 ];
 739     }
 740 }
 741
 742 /**
 743  * gets the predicted MV.
 744  * @param n the block index
 745  * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
 746  * @param mx the x component of the predicted motion vector
 747  * @param my the y component of the predicted motion vector
 748  */
 749 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
 750     const int index8= scan8[n];
 751     const int top_ref=      h->ref_cache[list][ index8 - 8 ];
 752     const int left_ref=     h->ref_cache[list][ index8 - 1 ];
 753     const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
 754     const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
 755     const int16_t * C;
 756     int diagonal_ref, match_count;
 757
 758     assert(part_width==1 || part_width==2 || part_width==4);
 759
 760 /* mv_cache
 761   B . . A T T T T
 762   U . . L . . , .
 763   U . . L . . . .
 764   U . . L . . , .
 765   . . . L . . . .
 766 */
 767
 768     diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
 769     match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
 770     tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
 771     if(match_count > 1){ //most common
 772         *mx= mid_pred(A[0], B[0], C[0]);
 773         *my= mid_pred(A[1], B[1], C[1]);
 774     }else if(match_count==1){
 775         if(left_ref==ref){
 776             *mx= A[0];
 777             *my= A[1];
 778         }else if(top_ref==ref){
 779             *mx= B[0];
 780             *my= B[1];
 781         }else{
 782             *mx= C[0];
 783             *my= C[1];
 784         }
 785     }else{
 786         if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
 787             *mx= A[0];
 788             *my= A[1];
 789         }else{
 790             *mx= mid_pred(A[0], B[0], C[0]);
 791             *my= mid_pred(A[1], B[1], C[1]);
 792         }
 793     }
 794
 795     tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
 796 }
 797
 798 /**
 799  * gets the directionally predicted 16x8 MV.
 800  * @param n the block index
 801  * @param mx the x component of the predicted motion vector
 802  * @param my the y component of the predicted motion vector
 803  */
 804 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
 805     if(n==0){
 806         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
 807         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
 808
 809         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
 810
 811         if(top_ref == ref){
 812             *mx= B[0];
 813             *my= B[1];
 814             return;
 815         }
 816     }else{
 817         const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
 818         const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
 819
 820         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 821
 822         if(left_ref == ref){
 823             *mx= A[0];
 824             *my= A[1];
 825             return;
 826         }
 827     }
 828
 829     //RARE
 830     pred_motion(h, n, 4, list, ref, mx, my);
 831 }
 832
 833 /**
 834  * gets the directionally predicted 8x16 MV.
 835  * @param n the block index
 836  * @param mx the x component of the predicted motion vector
 837  * @param my the y component of the predicted motion vector
 838  */
 839 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
 840     if(n==0){
 841         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
 842         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
 843
 844         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 845
 846         if(left_ref == ref){
 847             *mx= A[0];
 848             *my= A[1];
 849             return;
 850         }
 851     }else{
 852         const int16_t * C;
 853         int diagonal_ref;
 854
 855         diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
 856
 857         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
 858
 859         if(diagonal_ref == ref){
 860             *mx= C[0];
 861             *my= C[1];
 862             return;
 863         }
 864     }
 865
 866     //RARE
 867     pred_motion(h, n, 2, list, ref, mx, my);
 868 }
 869
 870 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
 871     const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
 872     const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
 873
 874     tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
 875
 876     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
 877        || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
 878        || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
 879
 880         *mx = *my = 0;
 881         return;
 882     }
 883
 884     pred_motion(h, 0, 4, 0, 0, mx, my);
 885
 886     return;
 887 }
 888
 889 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
 890     int poc0 = h->ref_list[0][i].poc;
 891     int td = av_clip(poc1 - poc0, -128, 127);
 892     if(td == 0 || h->ref_list[0][i].long_ref){
 893         return 256;
 894     }else{
 895         int tb = av_clip(poc - poc0, -128, 127);
 896         int tx = (16384 + (FFABS(td) >> 1)) / td;
 897         return av_clip((tb*tx + 32) >> 6, -1024, 1023);
 898     }
 899 }
 900
 901 static inline void direct_dist_scale_factor(H264Context * const h){
 902     MpegEncContext * const s = &h->s;
 903     const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
 904     const int poc1 = h->ref_list[1][0].poc;
 905     int i, field;
 906     for(field=0; field<2; field++){
 907         const int poc  = h->s.current_picture_ptr->field_poc[field];
 908         const int poc1 = h->ref_list[1][0].field_poc[field];
 909         for(i=0; i < 2*h->ref_count[0]; i++)
 910             h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
 911     }
 912
 913     for(i=0; i<h->ref_count[0]; i++){
 914         h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
 915     }
 916 }
 917
 918 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
 919     MpegEncContext * const s = &h->s;
 920     Picture * const ref1 = &h->ref_list[1][0];
 921     int j, old_ref, rfield;
 922     int start= mbafi ? 16                      : 0;
 923     int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
 924     int interl= mbafi || s->picture_structure != PICT_FRAME;
 925
 926     /* bogus; fills in for missing frames */
 927     memset(map[list], 0, sizeof(map[list]));
 928
 929     for(rfield=0; rfield<2; rfield++){
 930         for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
 931             int poc = ref1->ref_poc[colfield][list][old_ref];
 932
 933             if     (!interl)
 934                 poc |= 3;
 935             else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
 936                 poc= (poc&~3) + rfield + 1;
 937
 938             for(j=start; j<end; j++){
 939                 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
 940                     int cur_ref= mbafi ? (j-16)^field : j;
 941                     map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
 942                     if(rfield == field)
 943                         map[list][old_ref] = cur_ref;
 944                     break;
 945                 }
 946             }
 947         }
 948     }
 949 }
 950
 951 static inline void direct_ref_list_init(H264Context * const h){
 952     MpegEncContext * const s = &h->s;
 953     Picture * const ref1 = &h->ref_list[1][0];
 954     Picture * const cur = s->current_picture_ptr;
 955     int list, j, field;
 956     int sidx= (s->picture_structure&1)^1;
 957     int ref1sidx= (ref1->reference&1)^1;
 958
 959     for(list=0; list<2; list++){
 960         cur->ref_count[sidx][list] = h->ref_count[list];
 961         for(j=0; j<h->ref_count[list]; j++)
 962             cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
 963     }
 964
 965     if(s->picture_structure == PICT_FRAME){
 966         memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
 967         memcpy(cur->ref_poc  [1], cur->ref_poc  [0], sizeof(cur->ref_poc  [0]));
 968     }
 969
 970     cur->mbaff= FRAME_MBAFF;
 971
 972     if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
 973         return;
 974
 975     for(list=0; list<2; list++){
 976         fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
 977         for(field=0; field<2; field++)
 978             fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
 979     }
 980 }
 981
 982 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
 983     MpegEncContext * const s = &h->s;
 984     int b8_stride = h->b8_stride;
 985     int b4_stride = h->b_stride;
 986     int mb_xy = h->mb_xy;
 987     int mb_type_col[2];
 988     const int16_t (*l1mv0)[2], (*l1mv1)[2];
 989     const int8_t *l1ref0, *l1ref1;
 990     const int is_b8x8 = IS_8X8(*mb_type);
 991     unsigned int sub_mb_type;
 992     int i8, i4;
 993
 994 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
 995
 996     if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
 997         if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
 998             int cur_poc = s->current_picture_ptr->poc;
 999             int *col_poc = h->ref_list[1]->field_poc;
1000             int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1001             mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1002             b8_stride = 0;
1003         }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1004             int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1005             mb_xy += s->mb_stride*fieldoff;
1006         }
1007         goto single_col;
1008     }else{                                               // AFL/AFR/FR/FL -> AFR/FR
1009         if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
1010             mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1011             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1012             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1013             b8_stride *= 3;
1014             b4_stride *= 6;
1015             //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1016             if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1017                 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1018                 && !is_b8x8){
1019                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1020                 *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1021             }else{
1022                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1023                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1024             }
1025         }else{                                           //     AFR/FR    -> AFR/FR
1026 single_col:
1027             mb_type_col[0] =
1028             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1029             if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1030                 /* FIXME save sub mb types from previous frames (or derive from MVs)
1031                 * so we know exactly what block size to use */
1032                 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1033                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1034             }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1035                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1036                 *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1037             }else{
1038                 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1039                 *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1040             }
1041         }
1042     }
1043
1044     l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1045     l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1046     l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1047     l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1048     if(!b8_stride){
1049         if(s->mb_y&1){
1050             l1ref0 += h->b8_stride;
1051             l1ref1 += h->b8_stride;
1052             l1mv0  +=  2*b4_stride;
1053             l1mv1  +=  2*b4_stride;
1054         }
1055     }
1056
1057     if(h->direct_spatial_mv_pred){
1058         int ref[2];
1059         int mv[2][2];
1060         int list;
1061
1062         /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1063
1064         /* ref = min(neighbors) */
1065         for(list=0; list<2; list++){
1066             int refa = h->ref_cache[list][scan8[0] - 1];
1067             int refb = h->ref_cache[list][scan8[0] - 8];
1068             int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1069             if(refc == PART_NOT_AVAILABLE)
1070                 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1071             ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1072             if(ref[list] < 0)
1073                 ref[list] = -1;
1074         }
1075
1076         if(ref[0] < 0 && ref[1] < 0){
1077             ref[0] = ref[1] = 0;
1078             mv[0][0] = mv[0][1] =
1079             mv[1][0] = mv[1][1] = 0;
1080         }else{
1081             for(list=0; list<2; list++){
1082                 if(ref[list] >= 0)
1083                     pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1084                 else
1085                     mv[list][0] = mv[list][1] = 0;
1086             }
1087         }
1088
1089         if(ref[1] < 0){
1090             if(!is_b8x8)
1091                 *mb_type &= ~MB_TYPE_L1;
1092             sub_mb_type &= ~MB_TYPE_L1;
1093         }else if(ref[0] < 0){
1094             if(!is_b8x8)
1095                 *mb_type &= ~MB_TYPE_L0;
1096             sub_mb_type &= ~MB_TYPE_L0;
1097         }
1098
1099         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1100             for(i8=0; i8<4; i8++){
1101                 int x8 = i8&1;
1102                 int y8 = i8>>1;
1103                 int xy8 = x8+y8*b8_stride;
1104                 int xy4 = 3*x8+y8*b4_stride;
1105                 int a=0, b=0;
1106
1107                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1108                     continue;
1109                 h->sub_mb_type[i8] = sub_mb_type;
1110
1111                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1112                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1113                 if(!IS_INTRA(mb_type_col[y8])
1114                    && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1115                        || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1116                     if(ref[0] > 0)
1117                         a= pack16to32(mv[0][0],mv[0][1]);
1118                     if(ref[1] > 0)
1119                         b= pack16to32(mv[1][0],mv[1][1]);
1120                 }else{
1121                     a= pack16to32(mv[0][0],mv[0][1]);
1122                     b= pack16to32(mv[1][0],mv[1][1]);
1123                 }
1124                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1125                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1126             }
1127         }else if(IS_16X16(*mb_type)){
1128             int a=0, b=0;
1129
1130             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1131             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1132             if(!IS_INTRA(mb_type_col[0])
1133                && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1134                    || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1135                        && (h->x264_build>33 || !h->x264_build)))){
1136                 if(ref[0] > 0)
1137                     a= pack16to32(mv[0][0],mv[0][1]);
1138                 if(ref[1] > 0)
1139                     b= pack16to32(mv[1][0],mv[1][1]);
1140             }else{
1141                 a= pack16to32(mv[0][0],mv[0][1]);
1142                 b= pack16to32(mv[1][0],mv[1][1]);
1143             }
1144             fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1145             fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1146         }else{
1147             for(i8=0; i8<4; i8++){
1148                 const int x8 = i8&1;
1149                 const int y8 = i8>>1;
1150
1151                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1152                     continue;
1153                 h->sub_mb_type[i8] = sub_mb_type;
1154
1155                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1156                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1157                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1158                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1159
1160                 /* col_zero_flag */
1161                 if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
1162                                               || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1163                                                   && (h->x264_build>33 || !h->x264_build)))){
1164                     const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1165                     if(IS_SUB_8X8(sub_mb_type)){
1166                         const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1167                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1168                             if(ref[0] == 0)
1169                                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1170                             if(ref[1] == 0)
1171                                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1172                         }
1173                     }else
1174                     for(i4=0; i4<4; i4++){
1175                         const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1176                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177                             if(ref[0] == 0)
1178                                 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1179                             if(ref[1] == 0)
1180                                 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1181                         }
1182                     }
1183                 }
1184             }
1185         }
1186     }else{ /* direct temporal mv pred */
1187         const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1188         const int *dist_scale_factor = h->dist_scale_factor;
1189         int ref_offset= 0;
1190
1191         if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1192             map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1193             map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1194             dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
1195         }
1196         if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1197             ref_offset += 16;
1198
1199         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1200             /* FIXME assumes direct_8x8_inference == 1 */
1201             int y_shift  = 2*!IS_INTERLACED(*mb_type);
1202
1203             for(i8=0; i8<4; i8++){
1204                 const int x8 = i8&1;
1205                 const int y8 = i8>>1;
1206                 int ref0, scale;
1207                 const int16_t (*l1mv)[2]= l1mv0;
1208
1209                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1210                     continue;
1211                 h->sub_mb_type[i8] = sub_mb_type;
1212
1213                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1214                 if(IS_INTRA(mb_type_col[y8])){
1215                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1216                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1217                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1218                     continue;
1219                 }
1220
1221                 ref0 = l1ref0[x8 + y8*b8_stride];
1222                 if(ref0 >= 0)
1223                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
1224                 else{
1225                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1226                     l1mv= l1mv1;
1227                 }
1228                 scale = dist_scale_factor[ref0];
1229                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1230
1231                 {
1232                     const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1233                     int my_col = (mv_col[1]<<y_shift)/2;
1234                     int mx = (scale * mv_col[0] + 128) >> 8;
1235                     int my = (scale * my_col + 128) >> 8;
1236                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1237                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1238                 }
1239             }
1240             return;
1241         }
1242
1243         /* one-to-one mv scaling */
1244
1245         if(IS_16X16(*mb_type)){
1246             int ref, mv0, mv1;
1247
1248             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1249             if(IS_INTRA(mb_type_col[0])){
1250                 ref=mv0=mv1=0;
1251             }else{
1252                 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1253                                                 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1254                 const int scale = dist_scale_factor[ref0];
1255                 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1256                 int mv_l0[2];
1257                 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1258                 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1259                 ref= ref0;
1260                 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1261                 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1262             }
1263             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1264             fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1265             fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1266         }else{
1267             for(i8=0; i8<4; i8++){
1268                 const int x8 = i8&1;
1269                 const int y8 = i8>>1;
1270                 int ref0, scale;
1271                 const int16_t (*l1mv)[2]= l1mv0;
1272
1273                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1274                     continue;
1275                 h->sub_mb_type[i8] = sub_mb_type;
1276                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1277                 if(IS_INTRA(mb_type_col[0])){
1278                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1279                     fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1280                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1281                     continue;
1282                 }
1283
1284                 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1285                 if(ref0 >= 0)
1286                     ref0 = map_col_to_list0[0][ref0];
1287                 else{
1288                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1289                     l1mv= l1mv1;
1290                 }
1291                 scale = dist_scale_factor[ref0];
1292
1293                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1294                 if(IS_SUB_8X8(sub_mb_type)){
1295                     const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1296                     int mx = (scale * mv_col[0] + 128) >> 8;
1297                     int my = (scale * mv_col[1] + 128) >> 8;
1298                     fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1299                     fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1300                 }else
1301                 for(i4=0; i4<4; i4++){
1302                     const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1303                     int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1304                     mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1305                     mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1306                     *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1307                         pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1308                 }
1309             }
1310         }
1311     }
1312 }
1313
1314 static inline void write_back_motion(H264Context *h, int mb_type){
1315     MpegEncContext * const s = &h->s;
1316     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1317     const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1318     int list;
1319
1320     if(!USES_LIST(mb_type, 0))
1321         fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1322
1323     for(list=0; list<h->list_count; list++){
1324         int y;
1325         if(!USES_LIST(mb_type, list))
1326             continue;
1327
1328         for(y=0; y<4; y++){
1329             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1330             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1331         }
1332         if( h->pps.cabac ) {
1333             if(IS_SKIP(mb_type))
1334                 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1335             else
1336             for(y=0; y<4; y++){
1337                 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1338                 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1339             }
1340         }
1341
1342         {
1343             int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1344             ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1345             ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1346             ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1347             ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1348         }
1349     }
1350
1351     if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1352         if(IS_8X8(mb_type)){
1353             uint8_t *direct_table = &h->direct_table[b8_xy];
1354             direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1355             direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1356             direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1357         }
1358     }
1359 }
1360
1361 /**
1362  * Decodes a network abstraction layer unit.
1363  * @param consumed is the number of bytes used as input
1364  * @param length is the length of the array
1365  * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1366  * @returns decoded bytes, might be src+1 if no escapes
1367  */
1368 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1369     int i, si, di;
1370     uint8_t *dst;
1371     int bufidx;
1372
1373 //    src[0]&0x80;                //forbidden bit
1374     h->nal_ref_idc= src[0]>>5;
1375     h->nal_unit_type= src[0]&0x1F;
1376
1377     src++; length--;
1378 #if 0
1379     for(i=0; i<length; i++)
1380         printf("%2X ", src[i]);
1381 #endif
1382     for(i=0; i+1<length; i+=2){
1383         if(src[i]) continue;
1384         if(i>0 && src[i-1]==0) i--;
1385         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1386             if(src[i+2]!=3){
1387                 /* startcode, so we must be past the end */
1388                 length=i;
1389             }
1390             break;
1391         }
1392     }
1393
1394     if(i>=length-1){ //no escaped 0
1395         *dst_length= length;
1396         *consumed= length+1; //+1 for the header
1397         return src;
1398     }
1399
1400     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1401     h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1402     dst= h->rbsp_buffer[bufidx];
1403
1404     if (dst == NULL){
1405         return NULL;
1406     }
1407
1408 //printf("decoding esc\n");
1409     si=di=0;
1410     while(si<length){
1411         //remove escapes (very rare 1:2^22)
1412         if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1413             if(src[si+2]==3){ //escape
1414                 dst[di++]= 0;
1415                 dst[di++]= 0;
1416                 si+=3;
1417                 continue;
1418             }else //next start code
1419                 break;
1420         }
1421
1422         dst[di++]= src[si++];
1423     }
1424
1425     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1426
1427     *dst_length= di;
1428     *consumed= si + 1;//+1 for the header
1429 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1430     return dst;
1431 }
1432
1433 /**
1434  * identifies the exact end of the bitstream
1435  * @return the length of the trailing, or 0 if damaged
1436  */
1437 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1438     int v= *src;
1439     int r;
1440
1441     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1442
1443     for(r=1; r<9; r++){
1444         if(v&1) return r;
1445         v>>=1;
1446     }
1447     return 0;
1448 }
1449
1450 /**
1451  * IDCT transforms the 16 dc values and dequantizes them.
1452  * @param qp quantization parameter
1453  */
1454 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1455 #define stride 16
1456     int i;
1457     int temp[16]; //FIXME check if this is a good idea
1458     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1459     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1460
1461 //memset(block, 64, 2*256);
1462 //return;
1463     for(i=0; i<4; i++){
1464         const int offset= y_offset[i];
1465         const int z0= block[offset+stride*0] + block[offset+stride*4];
1466         const int z1= block[offset+stride*0] - block[offset+stride*4];
1467         const int z2= block[offset+stride*1] - block[offset+stride*5];
1468         const int z3= block[offset+stride*1] + block[offset+stride*5];
1469
1470         temp[4*i+0]= z0+z3;
1471         temp[4*i+1]= z1+z2;
1472         temp[4*i+2]= z1-z2;
1473         temp[4*i+3]= z0-z3;
1474     }
1475
1476     for(i=0; i<4; i++){
1477         const int offset= x_offset[i];
1478         const int z0= temp[4*0+i] + temp[4*2+i];
1479         const int z1= temp[4*0+i] - temp[4*2+i];
1480         const int z2= temp[4*1+i] - temp[4*3+i];
1481         const int z3= temp[4*1+i] + temp[4*3+i];
1482
1483         block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1484         block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1485         block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1486         block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1487     }
1488 }
1489
1490 #if 0
1491 /**
1492  * DCT transforms the 16 dc values.
1493  * @param qp quantization parameter ??? FIXME
1494  */
1495 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1496 //    const int qmul= dequant_coeff[qp][0];
1497     int i;
1498     int temp[16]; //FIXME check if this is a good idea
1499     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1500     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1501
1502     for(i=0; i<4; i++){
1503         const int offset= y_offset[i];
1504         const int z0= block[offset+stride*0] + block[offset+stride*4];
1505         const int z1= block[offset+stride*0] - block[offset+stride*4];
1506         const int z2= block[offset+stride*1] - block[offset+stride*5];
1507         const int z3= block[offset+stride*1] + block[offset+stride*5];
1508
1509         temp[4*i+0]= z0+z3;
1510         temp[4*i+1]= z1+z2;
1511         temp[4*i+2]= z1-z2;
1512         temp[4*i+3]= z0-z3;
1513     }
1514
1515     for(i=0; i<4; i++){
1516         const int offset= x_offset[i];
1517         const int z0= temp[4*0+i] + temp[4*2+i];
1518         const int z1= temp[4*0+i] - temp[4*2+i];
1519         const int z2= temp[4*1+i] - temp[4*3+i];
1520         const int z3= temp[4*1+i] + temp[4*3+i];
1521
1522         block[stride*0 +offset]= (z0 + z3)>>1;
1523         block[stride*2 +offset]= (z1 + z2)>>1;
1524         block[stride*8 +offset]= (z1 - z2)>>1;
1525         block[stride*10+offset]= (z0 - z3)>>1;
1526     }
1527 }
1528 #endif
1529
1530 #undef xStride
1531 #undef stride
1532
1533 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1534     const int stride= 16*2;
1535     const int xStride= 16;
1536     int a,b,c,d,e;
1537
1538     a= block[stride*0 + xStride*0];
1539     b= block[stride*0 + xStride*1];
1540     c= block[stride*1 + xStride*0];
1541     d= block[stride*1 + xStride*1];
1542
1543     e= a-b;
1544     a= a+b;
1545     b= c-d;
1546     c= c+d;
1547
1548     block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1549     block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1550     block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1551     block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1552 }
1553
1554 #if 0
1555 static void chroma_dc_dct_c(DCTELEM *block){
1556     const int stride= 16*2;
1557     const int xStride= 16;
1558     int a,b,c,d,e;
1559
1560     a= block[stride*0 + xStride*0];
1561     b= block[stride*0 + xStride*1];
1562     c= block[stride*1 + xStride*0];
1563     d= block[stride*1 + xStride*1];
1564
1565     e= a-b;
1566     a= a+b;
1567     b= c-d;
1568     c= c+d;
1569
1570     block[stride*0 + xStride*0]= (a+c);
1571     block[stride*0 + xStride*1]= (e+b);
1572     block[stride*1 + xStride*0]= (a-c);
1573     block[stride*1 + xStride*1]= (e-b);
1574 }
1575 #endif
1576
1577 /**
1578  * gets the chroma qp.
1579  */
1580 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1581     return h->pps.chroma_qp_table[t][qscale];
1582 }
1583
1584 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1585                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1586                            int src_x_offset, int src_y_offset,
1587                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1588     MpegEncContext * const s = &h->s;
1589     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1590     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1591     const int luma_xy= (mx&3) + ((my&3)<<2);
1592     uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1593     uint8_t * src_cb, * src_cr;
1594     int extra_width= h->emu_edge_width;
1595     int extra_height= h->emu_edge_height;
1596     int emu=0;
1597     const int full_mx= mx>>2;
1598     const int full_my= my>>2;
1599     const int pic_width  = 16*s->mb_width;
1600     const int pic_height = 16*s->mb_height >> MB_FIELD;
1601
1602     if(mx&7) extra_width -= 3;
1603     if(my&7) extra_height -= 3;
1604
1605     if(   full_mx < 0-extra_width
1606        || full_my < 0-extra_height
1607        || full_mx + 16/*FIXME*/ > pic_width + extra_width
1608        || full_my + 16/*FIXME*/ > pic_height + extra_height){
1609         ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1610             src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1611         emu=1;
1612     }
1613
1614     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1615     if(!square){
1616         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1617     }
1618
1619     if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1620
1621     if(MB_FIELD){
1622         // chroma offset when predicting from a field of opposite parity
1623         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1624         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1625     }
1626     src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1627     src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1628
1629     if(emu){
1630         ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1631             src_cb= s->edge_emu_buffer;
1632     }
1633     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1634
1635     if(emu){
1636         ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1637             src_cr= s->edge_emu_buffer;
1638     }
1639     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1640 }
1641
1642 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1643                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1644                            int x_offset, int y_offset,
1645                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1646                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1647                            int list0, int list1){
1648     MpegEncContext * const s = &h->s;
1649     qpel_mc_func *qpix_op=  qpix_put;
1650     h264_chroma_mc_func chroma_op= chroma_put;
1651
1652     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1653     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1654     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1655     x_offset += 8*s->mb_x;
1656     y_offset += 8*(s->mb_y >> MB_FIELD);
1657
1658     if(list0){
1659         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1660         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1661                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
1662                            qpix_op, chroma_op);
1663
1664         qpix_op=  qpix_avg;
1665         chroma_op= chroma_avg;
1666     }
1667
1668     if(list1){
1669         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1670         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1671                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
1672                            qpix_op, chroma_op);
1673     }
1674 }
1675
1676 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1677                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1678                            int x_offset, int y_offset,
1679                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1680                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1681                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1682                            int list0, int list1){
1683     MpegEncContext * const s = &h->s;
1684
1685     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1686     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1687     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1688     x_offset += 8*s->mb_x;
1689     y_offset += 8*(s->mb_y >> MB_FIELD);
1690
1691     if(list0 && list1){
1692         /* don't optimize for luma-only case, since B-frames usually
1693          * use implicit weights => chroma too. */
1694         uint8_t *tmp_cb = s->obmc_scratchpad;
1695         uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1696         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1697         int refn0 = h->ref_cache[0][ scan8[n] ];
1698         int refn1 = h->ref_cache[1][ scan8[n] ];
1699
1700         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1701                     dest_y, dest_cb, dest_cr,
1702                     x_offset, y_offset, qpix_put, chroma_put);
1703         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1704                     tmp_y, tmp_cb, tmp_cr,
1705                     x_offset, y_offset, qpix_put, chroma_put);
1706
1707         if(h->use_weight == 2){
1708             int weight0 = h->implicit_weight[refn0][refn1];
1709             int weight1 = 64 - weight0;
1710             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
1711             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1712             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1713         }else{
1714             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1715                             h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1716                             h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1717             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1718                             h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1719                             h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1720             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1721                             h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1722                             h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1723         }
1724     }else{
1725         int list = list1 ? 1 : 0;
1726         int refn = h->ref_cache[list][ scan8[n] ];
1727         Picture *ref= &h->ref_list[list][refn];
1728         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1729                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
1730                     qpix_put, chroma_put);
1731
1732         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1733                        h->luma_weight[list][refn], h->luma_offset[list][refn]);
1734         if(h->use_weight_chroma){
1735             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736                              h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1737             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1738                              h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1739         }
1740     }
1741 }
1742
1743 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1744                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1745                            int x_offset, int y_offset,
1746                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1747                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1748                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1749                            int list0, int list1){
1750     if((h->use_weight==2 && list0 && list1
1751         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1752        || h->use_weight==1)
1753         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1754                          x_offset, y_offset, qpix_put, chroma_put,
1755                          weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1756     else
1757         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1758                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1759 }
1760
1761 static inline void prefetch_motion(H264Context *h, int list){
1762     /* fetch pixels for estimated mv 4 macroblocks ahead
1763      * optimized for 64byte cache lines */
1764     MpegEncContext * const s = &h->s;
1765     const int refn = h->ref_cache[list][scan8[0]];
1766     if(refn >= 0){
1767         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1768         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1769         uint8_t **src= h->ref_list[list][refn].data;
1770         int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1771         s->dsp.prefetch(src[0]+off, s->linesize, 4);
1772         off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1773         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1774     }
1775 }
1776
1777 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1778                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1779                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1780                       h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1781     MpegEncContext * const s = &h->s;
1782     const int mb_xy= h->mb_xy;
1783     const int mb_type= s->current_picture.mb_type[mb_xy];
1784
1785     assert(IS_INTER(mb_type));
1786
1787     prefetch_motion(h, 0);
1788
1789     if(IS_16X16(mb_type)){
1790         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1791                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1792                 &weight_op[0], &weight_avg[0],
1793                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1794     }else if(IS_16X8(mb_type)){
1795         mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1796                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1797                 &weight_op[1], &weight_avg[1],
1798                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1799         mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1800                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1801                 &weight_op[1], &weight_avg[1],
1802                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1803     }else if(IS_8X16(mb_type)){
1804         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1805                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1806                 &weight_op[2], &weight_avg[2],
1807                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1809                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1810                 &weight_op[2], &weight_avg[2],
1811                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1812     }else{
1813         int i;
1814
1815         assert(IS_8X8(mb_type));
1816
1817         for(i=0; i<4; i++){
1818             const int sub_mb_type= h->sub_mb_type[i];
1819             const int n= 4*i;
1820             int x_offset= (i&1)<<2;
1821             int y_offset= (i&2)<<1;
1822
1823             if(IS_SUB_8X8(sub_mb_type)){
1824                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1825                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1826                     &weight_op[3], &weight_avg[3],
1827                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1828             }else if(IS_SUB_8X4(sub_mb_type)){
1829                 mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1830                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1831                     &weight_op[4], &weight_avg[4],
1832                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1833                 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1834                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1835                     &weight_op[4], &weight_avg[4],
1836                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1837             }else if(IS_SUB_4X8(sub_mb_type)){
1838                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1840                     &weight_op[5], &weight_avg[5],
1841                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1843                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1844                     &weight_op[5], &weight_avg[5],
1845                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846             }else{
1847                 int j;
1848                 assert(IS_SUB_4X4(sub_mb_type));
1849                 for(j=0; j<4; j++){
1850                     int sub_x_offset= x_offset + 2*(j&1);
1851                     int sub_y_offset= y_offset +   (j&2);
1852                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1853                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1854                         &weight_op[6], &weight_avg[6],
1855                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1856                 }
1857             }
1858         }
1859     }
1860
1861     prefetch_motion(h, 1);
1862 }
1863
1864 static av_cold void decode_init_vlc(void){
1865     static int done = 0;
1866
1867     if (!done) {
1868         int i;
1869         int offset;
1870         done = 1;
1871
1872         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1873         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1874         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1875                  &chroma_dc_coeff_token_len [0], 1, 1,
1876                  &chroma_dc_coeff_token_bits[0], 1, 1,
1877                  INIT_VLC_USE_NEW_STATIC);
1878
1879         offset = 0;
1880         for(i=0; i<4; i++){
1881             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1882             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1883             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1884                      &coeff_token_len [i][0], 1, 1,
1885                      &coeff_token_bits[i][0], 1, 1,
1886                      INIT_VLC_USE_NEW_STATIC);
1887             offset += coeff_token_vlc_tables_size[i];
1888         }
1889         /*
1890          * This is a one time safety check to make sure that
1891          * the packed static coeff_token_vlc table sizes
1892          * were initialized correctly.
1893          */
1894         assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1895
1896         for(i=0; i<3; i++){
1897             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1898             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1899             init_vlc(&chroma_dc_total_zeros_vlc[i],
1900                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1901                      &chroma_dc_total_zeros_len [i][0], 1, 1,
1902                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
1903                      INIT_VLC_USE_NEW_STATIC);
1904         }
1905         for(i=0; i<15; i++){
1906             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1907             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1908             init_vlc(&total_zeros_vlc[i],
1909                      TOTAL_ZEROS_VLC_BITS, 16,
1910                      &total_zeros_len [i][0], 1, 1,
1911                      &total_zeros_bits[i][0], 1, 1,
1912                      INIT_VLC_USE_NEW_STATIC);
1913         }
1914
1915         for(i=0; i<6; i++){
1916             run_vlc[i].table = run_vlc_tables[i];
1917             run_vlc[i].table_allocated = run_vlc_tables_size;
1918             init_vlc(&run_vlc[i],
1919                      RUN_VLC_BITS, 7,
1920                      &run_len [i][0], 1, 1,
1921                      &run_bits[i][0], 1, 1,
1922                      INIT_VLC_USE_NEW_STATIC);
1923         }
1924         run7_vlc.table = run7_vlc_table,
1925         run7_vlc.table_allocated = run7_vlc_table_size;
1926         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1927                  &run_len [6][0], 1, 1,
1928                  &run_bits[6][0], 1, 1,
1929                  INIT_VLC_USE_NEW_STATIC);
1930     }
1931 }
1932
1933 static void free_tables(H264Context *h){
1934     int i;
1935     H264Context *hx;
1936     av_freep(&h->intra4x4_pred_mode);
1937     av_freep(&h->chroma_pred_mode_table);
1938     av_freep(&h->cbp_table);
1939     av_freep(&h->mvd_table[0]);
1940     av_freep(&h->mvd_table[1]);
1941     av_freep(&h->direct_table);
1942     av_freep(&h->non_zero_count);
1943     av_freep(&h->slice_table_base);
1944     h->slice_table= NULL;
1945
1946     av_freep(&h->mb2b_xy);
1947     av_freep(&h->mb2b8_xy);
1948
1949     for(i = 0; i < h->s.avctx->thread_count; i++) {
1950         hx = h->thread_context[i];
1951         if(!hx) continue;
1952         av_freep(&hx->top_borders[1]);
1953         av_freep(&hx->top_borders[0]);
1954         av_freep(&hx->s.obmc_scratchpad);
1955     }
1956 }
1957
1958 static void init_dequant8_coeff_table(H264Context *h){
1959     int i,q,x;
1960     const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1961     h->dequant8_coeff[0] = h->dequant8_buffer[0];
1962     h->dequant8_coeff[1] = h->dequant8_buffer[1];
1963
1964     for(i=0; i<2; i++ ){
1965         if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1966             h->dequant8_coeff[1] = h->dequant8_buffer[0];
1967             break;
1968         }
1969
1970         for(q=0; q<52; q++){
1971             int shift = div6[q];
1972             int idx = rem6[q];
1973             for(x=0; x<64; x++)
1974                 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1975                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1976                     h->pps.scaling_matrix8[i][x]) << shift;
1977         }
1978     }
1979 }
1980
1981 static void init_dequant4_coeff_table(H264Context *h){
1982     int i,j,q,x;
1983     const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1984     for(i=0; i<6; i++ ){
1985         h->dequant4_coeff[i] = h->dequant4_buffer[i];
1986         for(j=0; j<i; j++){
1987             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1988                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1989                 break;
1990             }
1991         }
1992         if(j<i)
1993             continue;
1994
1995         for(q=0; q<52; q++){
1996             int shift = div6[q] + 2;
1997             int idx = rem6[q];
1998             for(x=0; x<16; x++)
1999                 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2000                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2001                     h->pps.scaling_matrix4[i][x]) << shift;
2002         }
2003     }
2004 }
2005
2006 static void init_dequant_tables(H264Context *h){
2007     int i,x;
2008     init_dequant4_coeff_table(h);
2009     if(h->pps.transform_8x8_mode)
2010         init_dequant8_coeff_table(h);
2011     if(h->sps.transform_bypass){
2012         for(i=0; i<6; i++)
2013             for(x=0; x<16; x++)
2014                 h->dequant4_coeff[i][0][x] = 1<<6;
2015         if(h->pps.transform_8x8_mode)
2016             for(i=0; i<2; i++)
2017                 for(x=0; x<64; x++)
2018                     h->dequant8_coeff[i][0][x] = 1<<6;
2019     }
2020 }
2021
2022
2023 /**
2024  * allocates tables.
2025  * needs width/height
2026  */
2027 static int alloc_tables(H264Context *h){
2028     MpegEncContext * const s = &h->s;
2029     const int big_mb_num= s->mb_stride * (s->mb_height+1);
2030     int x,y;
2031
2032     CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2033
2034     CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2035     CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2036     CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2037
2038     CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2039     CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2040     CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2041     CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2042
2043     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
2044     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2045
2046     CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
2047     CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2048     for(y=0; y<s->mb_height; y++){
2049         for(x=0; x<s->mb_width; x++){
2050             const int mb_xy= x + y*s->mb_stride;
2051             const int b_xy = 4*x + 4*y*h->b_stride;
2052             const int b8_xy= 2*x + 2*y*h->b8_stride;
2053
2054             h->mb2b_xy [mb_xy]= b_xy;
2055             h->mb2b8_xy[mb_xy]= b8_xy;
2056         }
2057     }
2058
2059     s->obmc_scratchpad = NULL;
2060
2061     if(!h->dequant4_coeff[0])
2062         init_dequant_tables(h);
2063
2064     return 0;
2065 fail:
2066     free_tables(h);
2067     return -1;
2068 }
2069
2070 /**
2071  * Mimic alloc_tables(), but for every context thread.
2072  */
2073 static void clone_tables(H264Context *dst, H264Context *src){
2074     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
2075     dst->non_zero_count           = src->non_zero_count;
2076     dst->slice_table              = src->slice_table;
2077     dst->cbp_table                = src->cbp_table;
2078     dst->mb2b_xy                  = src->mb2b_xy;
2079     dst->mb2b8_xy                 = src->mb2b8_xy;
2080     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
2081     dst->mvd_table[0]             = src->mvd_table[0];
2082     dst->mvd_table[1]             = src->mvd_table[1];
2083     dst->direct_table             = src->direct_table;
2084
2085     dst->s.obmc_scratchpad = NULL;
2086     ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2087 }
2088
2089 /**
2090  * Init context
2091  * Allocate buffers which are not shared amongst multiple threads.
2092  */
2093 static int context_init(H264Context *h){
2094     CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2095     CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2096
2097     return 0;
2098 fail:
2099     return -1; // free_tables will clean up for us
2100 }
2101
2102 static av_cold void common_init(H264Context *h){
2103     MpegEncContext * const s = &h->s;
2104
2105     s->width = s->avctx->width;
2106     s->height = s->avctx->height;
2107     s->codec_id= s->avctx->codec->id;
2108
2109     ff_h264_pred_init(&h->hpc, s->codec_id);
2110
2111     h->dequant_coeff_pps= -1;
2112     s->unrestricted_mv=1;
2113     s->decode=1; //FIXME
2114
2115     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2116
2117     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2118     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2119 }
2120
2121 static av_cold int decode_init(AVCodecContext *avctx){
2122     H264Context *h= avctx->priv_data;
2123     MpegEncContext * const s = &h->s;
2124
2125     MPV_decode_defaults(s);
2126
2127     s->avctx = avctx;
2128     common_init(h);
2129
2130     s->out_format = FMT_H264;
2131     s->workaround_bugs= avctx->workaround_bugs;
2132
2133     // set defaults
2134 //    s->decode_mb= ff_h263_decode_mb;
2135     s->quarter_sample = 1;
2136     s->low_delay= 1;
2137
2138     if(avctx->codec_id == CODEC_ID_SVQ3)
2139         avctx->pix_fmt= PIX_FMT_YUVJ420P;
2140     else
2141         avctx->pix_fmt= PIX_FMT_YUV420P;
2142
2143     decode_init_vlc();
2144
2145     if(avctx->extradata_size > 0 && avctx->extradata &&
2146        *(char *)avctx->extradata == 1){
2147         h->is_avc = 1;
2148         h->got_avcC = 0;
2149     } else {
2150         h->is_avc = 0;
2151     }
2152
2153     h->thread_context[0] = h;
2154     h->outputed_poc = INT_MIN;
2155     h->prev_poc_msb= 1<<16;
2156     return 0;
2157 }
2158
2159 static int frame_start(H264Context *h){
2160     MpegEncContext * const s = &h->s;
2161     int i;
2162
2163     if(MPV_frame_start(s, s->avctx) < 0)
2164         return -1;
2165     ff_er_frame_start(s);
2166     /*
2167      * MPV_frame_start uses pict_type to derive key_frame.
2168      * This is incorrect for H.264; IDR markings must be used.
2169      * Zero here; IDR markings per slice in frame or fields are ORed in later.
2170      * See decode_nal_units().
2171      */
2172     s->current_picture_ptr->key_frame= 0;
2173
2174     assert(s->linesize && s->uvlinesize);
2175
2176     for(i=0; i<16; i++){
2177         h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2178         h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2179     }
2180     for(i=0; i<4; i++){
2181         h->block_offset[16+i]=
2182         h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2183         h->block_offset[24+16+i]=
2184         h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2185     }
2186
2187     /* can't be in alloc_tables because linesize isn't known there.
2188      * FIXME: redo bipred weight to not require extra buffer? */
2189     for(i = 0; i < s->avctx->thread_count; i++)
2190         if(!h->thread_context[i]->s.obmc_scratchpad)
2191             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2192
2193     /* some macroblocks will be accessed before they're available */
2194     if(FRAME_MBAFF || s->avctx->thread_count > 1)
2195         memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2196
2197 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2198
2199     // We mark the current picture as non-reference after allocating it, so
2200     // that if we break out due to an error it can be released automatically
2201     // in the next MPV_frame_start().
2202     // SVQ3 as well as most other codecs have only last/next/current and thus
2203     // get released even with set reference, besides SVQ3 and others do not
2204     // mark frames as reference later "naturally".
2205     if(s->codec_id != CODEC_ID_SVQ3)
2206         s->current_picture_ptr->reference= 0;
2207
2208     s->current_picture_ptr->field_poc[0]=
2209     s->current_picture_ptr->field_poc[1]= INT_MAX;
2210     assert(s->current_picture_ptr->long_ref==0);
2211
2212     return 0;
2213 }
2214
2215 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2216     MpegEncContext * const s = &h->s;
2217     int i;
2218     int step    = 1;
2219     int offset  = 1;
2220     int uvoffset= 1;
2221     int top_idx = 1;
2222     int skiplast= 0;
2223
2224     src_y  -=   linesize;
2225     src_cb -= uvlinesize;
2226     src_cr -= uvlinesize;
2227
2228     if(!simple && FRAME_MBAFF){
2229         if(s->mb_y&1){
2230             offset  = MB_MBAFF ? 1 : 17;
2231             uvoffset= MB_MBAFF ? 1 : 9;
2232             if(!MB_MBAFF){
2233                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
2234                 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2235                 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2236                     *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2237                     *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2238                 }
2239             }
2240         }else{
2241             if(!MB_MBAFF){
2242                 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2243                 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2244                     h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
2245                     h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2246                 }
2247                 skiplast= 1;
2248             }
2249             offset  =
2250             uvoffset=
2251             top_idx = MB_MBAFF ? 0 : 1;
2252         }
2253         step= MB_MBAFF ? 2 : 1;
2254     }
2255
2256     // There are two lines saved, the line above the the top macroblock of a pair,
2257     // and the line above the bottom macroblock
2258     h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2259     for(i=1; i<17 - skiplast; i++){
2260         h->left_border[offset+i*step]= src_y[15+i*  linesize];
2261     }
2262
2263     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
2264     *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2265
2266     if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2267         h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
2268         h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2269         for(i=1; i<9 - skiplast; i++){
2270             h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
2271             h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2272         }
2273         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2274         *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2275     }
2276 }
2277
2278 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2279     MpegEncContext * const s = &h->s;
2280     int temp8, i;
2281     uint64_t temp64;
2282     int deblock_left;
2283     int deblock_top;
2284     int mb_xy;
2285     int step    = 1;
2286     int offset  = 1;
2287     int uvoffset= 1;
2288     int top_idx = 1;
2289
2290     if(!simple && FRAME_MBAFF){
2291         if(s->mb_y&1){
2292             offset  = MB_MBAFF ? 1 : 17;
2293             uvoffset= MB_MBAFF ? 1 : 9;
2294         }else{
2295             offset  =
2296             uvoffset=
2297             top_idx = MB_MBAFF ? 0 : 1;
2298         }
2299         step= MB_MBAFF ? 2 : 1;
2300     }
2301
2302     if(h->deblocking_filter == 2) {
2303         mb_xy = h->mb_xy;
2304         deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2305         deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2306     } else {
2307         deblock_left = (s->mb_x > 0);
2308         deblock_top =  (s->mb_y > !!MB_FIELD);
2309     }
2310
2311     src_y  -=   linesize + 1;
2312     src_cb -= uvlinesize + 1;
2313     src_cr -= uvlinesize + 1;
2314
2315 #define XCHG(a,b,t,xchg)\
2316 t= a;\
2317 if(xchg)\
2318     a= b;\
2319 b= t;
2320
2321     if(deblock_left){
2322         for(i = !deblock_top; i<16; i++){
2323             XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
2324         }
2325         XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
2326     }
2327
2328     if(deblock_top){
2329         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2330         XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2331         if(s->mb_x+1 < s->mb_width){
2332             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2333         }
2334     }
2335
2336     if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2337         if(deblock_left){
2338             for(i = !deblock_top; i<8; i++){
2339                 XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
2340                 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2341             }
2342             XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
2343             XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2344         }
2345         if(deblock_top){
2346             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2347             XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2348         }
2349     }
2350 }
2351
2352 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2353     MpegEncContext * const s = &h->s;
2354     const int mb_x= s->mb_x;
2355     const int mb_y= s->mb_y;
2356     const int mb_xy= h->mb_xy;
2357     const int mb_type= s->current_picture.mb_type[mb_xy];
2358     uint8_t  *dest_y, *dest_cb, *dest_cr;
2359     int linesize, uvlinesize /*dct_offset*/;
2360     int i;
2361     int *block_offset = &h->block_offset[0];
2362     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2363     const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2364     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2365     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2366
2367     dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
2368     dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2369     dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2370
2371     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2372     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2373
2374     if (!simple && MB_FIELD) {
2375         linesize   = h->mb_linesize   = s->linesize * 2;
2376         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2377         block_offset = &h->block_offset[24];
2378         if(mb_y&1){ //FIXME move out of this function?
2379             dest_y -= s->linesize*15;
2380             dest_cb-= s->uvlinesize*7;
2381             dest_cr-= s->uvlinesize*7;
2382         }
2383         if(FRAME_MBAFF) {
2384             int list;
2385             for(list=0; list<h->list_count; list++){
2386                 if(!USES_LIST(mb_type, list))
2387                     continue;
2388                 if(IS_16X16(mb_type)){
2389                     int8_t *ref = &h->ref_cache[list][scan8[0]];
2390                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2391                 }else{
2392                     for(i=0; i<16; i+=4){
2393                         int ref = h->ref_cache[list][scan8[i]];
2394                         if(ref >= 0)
2395                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2396                     }
2397                 }
2398             }
2399         }
2400     } else {
2401         linesize   = h->mb_linesize   = s->linesize;
2402         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2403 //        dct_offset = s->linesize * 16;
2404     }
2405
2406     if (!simple && IS_INTRA_PCM(mb_type)) {
2407         for (i=0; i<16; i++) {
2408             memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
2409         }
2410         for (i=0; i<8; i++) {
2411             memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
2412             memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
2413         }
2414     } else {
2415         if(IS_INTRA(mb_type)){
2416             if(h->deblocking_filter)
2417                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2418
2419             if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2420                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2421                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2422             }
2423
2424             if(IS_INTRA4x4(mb_type)){
2425                 if(simple || !s->encoding){
2426                     if(IS_8x8DCT(mb_type)){
2427                         if(transform_bypass){
2428                             idct_dc_add =
2429                             idct_add    = s->dsp.add_pixels8;
2430                         }else{
2431                             idct_dc_add = s->dsp.h264_idct8_dc_add;
2432                             idct_add    = s->dsp.h264_idct8_add;
2433                         }
2434                         for(i=0; i<16; i+=4){
2435                             uint8_t * const ptr= dest_y + block_offset[i];
2436                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2437                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2438                                 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2439                             }else{
2440                                 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2441                                 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2442                                                             (h->topright_samples_available<<i)&0x4000, linesize);
2443                                 if(nnz){
2444                                     if(nnz == 1 && h->mb[i*16])
2445                                         idct_dc_add(ptr, h->mb + i*16, linesize);
2446                                     else
2447                                         idct_add   (ptr, h->mb + i*16, linesize);
2448                                 }
2449                             }
2450                         }
2451                     }else{
2452                         if(transform_bypass){
2453                             idct_dc_add =
2454                             idct_add    = s->dsp.add_pixels4;
2455                         }else{
2456                             idct_dc_add = s->dsp.h264_idct_dc_add;
2457                             idct_add    = s->dsp.h264_idct_add;
2458                         }
2459                         for(i=0; i<16; i++){
2460                             uint8_t * const ptr= dest_y + block_offset[i];
2461                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2462
2463                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2464                                 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2465                             }else{
2466                                 uint8_t *topright;
2467                                 int nnz, tr;
2468                                 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2469                                     const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2470                                     assert(mb_y || linesize <= block_offset[i]);
2471                                     if(!topright_avail){
2472                                         tr= ptr[3 - linesize]*0x01010101;
2473                                         topright= (uint8_t*) &tr;
2474                                     }else
2475                                         topright= ptr + 4 - linesize;
2476                                 }else
2477                                     topright= NULL;
2478
2479                                 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2480                                 nnz = h->non_zero_count_cache[ scan8[i] ];
2481                                 if(nnz){
2482                                     if(is_h264){
2483                                         if(nnz == 1 && h->mb[i*16])
2484                                             idct_dc_add(ptr, h->mb + i*16, linesize);
2485                                         else
2486                                             idct_add   (ptr, h->mb + i*16, linesize);
2487                                     }else
2488                                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2489                                 }
2490                             }
2491                         }
2492                     }
2493                 }
2494             }else{
2495                 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2496                 if(is_h264){
2497                     if(!transform_bypass)
2498                         h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2499                 }else
2500                     svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2501             }
2502             if(h->deblocking_filter)
2503                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2504         }else if(is_h264){
2505             hl_motion(h, dest_y, dest_cb, dest_cr,
2506                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2507                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2508                       s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2509         }
2510
2511
2512         if(!IS_INTRA4x4(mb_type)){
2513             if(is_h264){
2514                 if(IS_INTRA16x16(mb_type)){
2515                     if(transform_bypass){
2516                         if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2517                             h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2518                         }else{
2519                             for(i=0; i<16; i++){
2520                                 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2521                                     s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2522                             }
2523                         }
2524                     }else{
2525                          s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2526                     }
2527                 }else if(h->cbp&15){
2528                     if(transform_bypass){
2529                         const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2530                         idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2531                         for(i=0; i<16; i+=di){
2532                             if(h->non_zero_count_cache[ scan8[i] ]){
2533                                 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2534                             }
2535                         }
2536                     }else{
2537                         if(IS_8x8DCT(mb_type)){
2538                             s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2539                         }else{
2540                             s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2541                         }
2542                     }
2543                 }
2544             }else{
2545                 for(i=0; i<16; i++){
2546                     if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2547                         uint8_t * const ptr= dest_y + block_offset[i];
2548                         svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2549                     }
2550                 }
2551             }
2552         }
2553
2554         if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2555             uint8_t *dest[2] = {dest_cb, dest_cr};
2556             if(transform_bypass){
2557                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2558                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2559                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2560                 }else{
2561                     idct_add = s->dsp.add_pixels4;
2562                     for(i=16; i<16+8; i++){
2563                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2564                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2565                     }
2566                 }
2567             }else{
2568                 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2569                 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2570                 if(is_h264){
2571                     idct_add = s->dsp.h264_idct_add;
2572                     idct_dc_add = s->dsp.h264_idct_dc_add;
2573                     for(i=16; i<16+8; i++){
2574                         if(h->non_zero_count_cache[ scan8[i] ])
2575                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2576                         else if(h->mb[i*16])
2577                             idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2578                     }
2579                 }else{
2580                     for(i=16; i<16+8; i++){
2581                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2582                             uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2583                             svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2584                         }
2585                     }
2586                 }
2587             }
2588         }
2589     }
2590     if(h->cbp || IS_INTRA(mb_type))
2591         s->dsp.clear_blocks(h->mb);
2592
2593     if(h->deblocking_filter) {
2594         backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2595         fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2596         h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2597         h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2598         if (!simple && FRAME_MBAFF) {
2599             filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2600         } else {
2601             filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2602         }
2603     }
2604 }
2605
2606 /**
2607  * Process a macroblock; this case avoids checks for expensive uncommon cases.
2608  */
2609 static void hl_decode_mb_simple(H264Context *h){
2610     hl_decode_mb_internal(h, 1);
2611 }
2612
2613 /**
2614  * Process a macroblock; this handles edge cases, such as interlacing.
2615  */
2616 static void av_noinline hl_decode_mb_complex(H264Context *h){
2617     hl_decode_mb_internal(h, 0);
2618 }
2619
2620 static void hl_decode_mb(H264Context *h){
2621     MpegEncContext * const s = &h->s;
2622     const int mb_xy= h->mb_xy;
2623     const int mb_type= s->current_picture.mb_type[mb_xy];
2624     int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2625
2626     if(ENABLE_H264_ENCODER && !s->decode)
2627         return;
2628
2629     if (is_complex)
2630         hl_decode_mb_complex(h);
2631     else hl_decode_mb_simple(h);
2632 }
2633
2634 static void pic_as_field(Picture *pic, const int parity){
2635     int i;
2636     for (i = 0; i < 4; ++i) {
2637         if (parity == PICT_BOTTOM_FIELD)
2638             pic->data[i] += pic->linesize[i];
2639         pic->reference = parity;
2640         pic->linesize[i] *= 2;
2641     }
2642     pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2643 }
2644
2645 static int split_field_copy(Picture *dest, Picture *src,
2646                             int parity, int id_add){
2647     int match = !!(src->reference & parity);
2648
2649     if (match) {
2650         *dest = *src;
2651         if(parity != PICT_FRAME){
2652             pic_as_field(dest, parity);
2653             dest->pic_id *= 2;
2654             dest->pic_id += id_add;
2655         }
2656     }
2657
2658     return match;
2659 }
2660
2661 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2662     int i[2]={0};
2663     int index=0;
2664
2665     while(i[0]<len || i[1]<len){
2666         while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2667             i[0]++;
2668         while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2669             i[1]++;
2670         if(i[0] < len){
2671             in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2672             split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
2673         }
2674         if(i[1] < len){
2675             in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2676             split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2677         }
2678     }
2679
2680     return index;
2681 }
2682
2683 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2684     int i, best_poc;
2685     int out_i= 0;
2686
2687     for(;;){
2688         best_poc= dir ? INT_MIN : INT_MAX;
2689
2690         for(i=0; i<len; i++){
2691             const int poc= src[i]->poc;
2692             if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2693                 best_poc= poc;
2694                 sorted[out_i]= src[i];
2695             }
2696         }
2697         if(best_poc == (dir ? INT_MIN : INT_MAX))
2698             break;
2699         limit= sorted[out_i++]->poc - dir;
2700     }
2701     return out_i;
2702 }
2703
2704 /**
2705  * fills the default_ref_list.
2706  */
2707 static int fill_default_ref_list(H264Context *h){
2708     MpegEncContext * const s = &h->s;
2709     int i, len;
2710
2711     if(h->slice_type_nos==FF_B_TYPE){
2712         Picture *sorted[32];
2713         int cur_poc, list;
2714         int lens[2];
2715
2716         if(FIELD_PICTURE)
2717             cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2718         else
2719             cur_poc= s->current_picture_ptr->poc;
2720
2721         for(list= 0; list<2; list++){
2722             len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2723             len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2724             assert(len<=32);
2725             len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
2726             len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2727             assert(len<=32);
2728
2729             if(len < h->ref_count[list])
2730                 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2731             lens[list]= len;
2732         }
2733
2734         if(lens[0] == lens[1] && lens[1] > 1){
2735             for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2736             if(i == lens[0])
2737                 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2738         }
2739     }else{
2740         len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2741         len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
2742         assert(len <= 32);
2743         if(len < h->ref_count[0])
2744             memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2745     }
2746 #ifdef TRACE
2747     for (i=0; i<h->ref_count[0]; i++) {
2748         tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2749     }
2750     if(h->slice_type_nos==FF_B_TYPE){
2751         for (i=0; i<h->ref_count[1]; i++) {
2752             tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2753         }
2754     }
2755 #endif
2756     return 0;
2757 }
2758
2759 static void print_short_term(H264Context *h);
2760 static void print_long_term(H264Context *h);
2761
2762 /**
2763  * Extract structure information about the picture described by pic_num in
2764  * the current decoding context (frame or field). Note that pic_num is
2765  * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2766  * @param pic_num picture number for which to extract structure information
2767  * @param structure one of PICT_XXX describing structure of picture
2768  *                      with pic_num
2769  * @return frame number (short term) or long term index of picture
2770  *         described by pic_num
2771  */
2772 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2773     MpegEncContext * const s = &h->s;
2774
2775     *structure = s->picture_structure;
2776     if(FIELD_PICTURE){
2777         if (!(pic_num & 1))
2778             /* opposite field */
2779             *structure ^= PICT_FRAME;
2780         pic_num >>= 1;
2781     }
2782
2783     return pic_num;
2784 }
2785
2786 static int decode_ref_pic_list_reordering(H264Context *h){
2787     MpegEncContext * const s = &h->s;
2788     int list, index, pic_structure;
2789
2790     print_short_term(h);
2791     print_long_term(h);
2792
2793     for(list=0; list<h->list_count; list++){
2794         memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2795
2796         if(get_bits1(&s->gb)){
2797             int pred= h->curr_pic_num;
2798
2799             for(index=0; ; index++){
2800                 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2801                 unsigned int pic_id;
2802                 int i;
2803                 Picture *ref = NULL;
2804
2805                 if(reordering_of_pic_nums_idc==3)
2806                     break;
2807
2808                 if(index >= h->ref_count[list]){
2809                     av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2810                     return -1;
2811                 }
2812
2813                 if(reordering_of_pic_nums_idc<3){
2814                     if(reordering_of_pic_nums_idc<2){
2815                         const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2816                         int frame_num;
2817
2818                         if(abs_diff_pic_num > h->max_pic_num){
2819                             av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2820                             return -1;
2821                         }
2822
2823                         if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2824                         else                                pred+= abs_diff_pic_num;
2825                         pred &= h->max_pic_num - 1;
2826
2827                         frame_num = pic_num_extract(h, pred, &pic_structure);
2828
2829                         for(i= h->short_ref_count-1; i>=0; i--){
2830                             ref = h->short_ref[i];
2831                             assert(ref->reference);
2832                             assert(!ref->long_ref);
2833                             if(
2834                                    ref->frame_num == frame_num &&
2835                                    (ref->reference & pic_structure)
2836                               )
2837                                 break;
2838                         }
2839                         if(i>=0)
2840                             ref->pic_id= pred;
2841                     }else{
2842                         int long_idx;
2843                         pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2844
2845                         long_idx= pic_num_extract(h, pic_id, &pic_structure);
2846
2847                         if(long_idx>31){
2848                             av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2849                             return -1;
2850                         }
2851                         ref = h->long_ref[long_idx];
2852                         assert(!(ref && !ref->reference));
2853                         if(ref && (ref->reference & pic_structure)){
2854                             ref->pic_id= pic_id;
2855                             assert(ref->long_ref);
2856                             i=0;
2857                         }else{
2858                             i=-1;
2859                         }
2860                     }
2861
2862                     if (i < 0) {
2863                         av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2864                         memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2865                     } else {
2866                         for(i=index; i+1<h->ref_count[list]; i++){
2867                             if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2868                                 break;
2869                         }
2870                         for(; i > index; i--){
2871                             h->ref_list[list][i]= h->ref_list[list][i-1];
2872                         }
2873                         h->ref_list[list][index]= *ref;
2874                         if (FIELD_PICTURE){
2875                             pic_as_field(&h->ref_list[list][index], pic_structure);
2876                         }
2877                     }
2878                 }else{
2879                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2880                     return -1;
2881                 }
2882             }
2883         }
2884     }
2885     for(list=0; list<h->list_count; list++){
2886         for(index= 0; index < h->ref_count[list]; index++){
2887             if(!h->ref_list[list][index].data[0]){
2888                 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2889                 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2890             }
2891         }
2892     }
2893
2894     return 0;
2895 }
2896
2897 static void fill_mbaff_ref_list(H264Context *h){
2898     int list, i, j;
2899     for(list=0; list<2; list++){ //FIXME try list_count
2900         for(i=0; i<h->ref_count[list]; i++){
2901             Picture *frame = &h->ref_list[list][i];
2902             Picture *field = &h->ref_list[list][16+2*i];
2903             field[0] = *frame;
2904             for(j=0; j<3; j++)
2905                 field[0].linesize[j] <<= 1;
2906             field[0].reference = PICT_TOP_FIELD;
2907             field[0].poc= field[0].field_poc[0];
2908             field[1] = field[0];
2909             for(j=0; j<3; j++)
2910                 field[1].data[j] += frame->linesize[j];
2911             field[1].reference = PICT_BOTTOM_FIELD;
2912             field[1].poc= field[1].field_poc[1];
2913
2914             h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2915             h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2916             for(j=0; j<2; j++){
2917                 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2918                 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2919             }
2920         }
2921     }
2922     for(j=0; j<h->ref_count[1]; j++){
2923         for(i=0; i<h->ref_count[0]; i++)
2924             h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2925         memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
2926         memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2927     }
2928 }
2929
2930 static int pred_weight_table(H264Context *h){
2931     MpegEncContext * const s = &h->s;
2932     int list, i;
2933     int luma_def, chroma_def;
2934
2935     h->use_weight= 0;
2936     h->use_weight_chroma= 0;
2937     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2938     h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2939     luma_def = 1<<h->luma_log2_weight_denom;
2940     chroma_def = 1<<h->chroma_log2_weight_denom;
2941
2942     for(list=0; list<2; list++){
2943         for(i=0; i<h->ref_count[list]; i++){
2944             int luma_weight_flag, chroma_weight_flag;
2945
2946             luma_weight_flag= get_bits1(&s->gb);
2947             if(luma_weight_flag){
2948                 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2949                 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2950                 if(   h->luma_weight[list][i] != luma_def
2951                    || h->luma_offset[list][i] != 0)
2952                     h->use_weight= 1;
2953             }else{
2954                 h->luma_weight[list][i]= luma_def;
2955                 h->luma_offset[list][i]= 0;
2956             }
2957
2958             if(CHROMA){
2959                 chroma_weight_flag= get_bits1(&s->gb);
2960                 if(chroma_weight_flag){
2961                     int j;
2962                     for(j=0; j<2; j++){
2963                         h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2964                         h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2965                         if(   h->chroma_weight[list][i][j] != chroma_def
2966                         || h->chroma_offset[list][i][j] != 0)
2967                             h->use_weight_chroma= 1;
2968                     }
2969                 }else{
2970                     int j;
2971                     for(j=0; j<2; j++){
2972                         h->chroma_weight[list][i][j]= chroma_def;
2973                         h->chroma_offset[list][i][j]= 0;
2974                     }
2975                 }
2976             }
2977         }
2978         if(h->slice_type_nos != FF_B_TYPE) break;
2979     }
2980     h->use_weight= h->use_weight || h->use_weight_chroma;
2981     return 0;
2982 }
2983
2984 static void implicit_weight_table(H264Context *h){
2985     MpegEncContext * const s = &h->s;
2986     int ref0, ref1;
2987     int cur_poc = s->current_picture_ptr->poc;
2988
2989     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
2990        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2991         h->use_weight= 0;
2992         h->use_weight_chroma= 0;
2993         return;
2994     }
2995
2996     h->use_weight= 2;
2997     h->use_weight_chroma= 2;
2998     h->luma_log2_weight_denom= 5;
2999     h->chroma_log2_weight_denom= 5;
3000
3001     for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3002         int poc0 = h->ref_list[0][ref0].poc;
3003         for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3004             int poc1 = h->ref_list[1][ref1].poc;
3005             int td = av_clip(poc1 - poc0, -128, 127);
3006             if(td){
3007                 int tb = av_clip(cur_poc - poc0, -128, 127);
3008                 int tx = (16384 + (FFABS(td) >> 1)) / td;
3009                 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3010                 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3011                     h->implicit_weight[ref0][ref1] = 32;
3012                 else
3013                     h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3014             }else
3015                 h->implicit_weight[ref0][ref1] = 32;
3016         }
3017     }
3018 }
3019
3020 /**
3021  * Mark a picture as no longer needed for reference. The refmask
3022  * argument allows unreferencing of individual fields or the whole frame.
3023  * If the picture becomes entirely unreferenced, but is being held for
3024  * display purposes, it is marked as such.
3025  * @param refmask mask of fields to unreference; the mask is bitwise
3026  *                anded with the reference marking of pic
3027  * @return non-zero if pic becomes entirely unreferenced (except possibly
3028  *         for display purposes) zero if one of the fields remains in
3029  *         reference
3030  */
3031 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3032     int i;
3033     if (pic->reference &= refmask) {
3034         return 0;
3035     } else {
3036         for(i = 0; h->delayed_pic[i]; i++)
3037             if(pic == h->delayed_pic[i]){
3038                 pic->reference=DELAYED_PIC_REF;
3039                 break;
3040             }
3041         return 1;
3042     }
3043 }
3044
3045 /**
3046  * instantaneous decoder refresh.
3047  */
3048 static void idr(H264Context *h){
3049     int i;
3050
3051     for(i=0; i<16; i++){
3052         remove_long(h, i, 0);
3053     }
3054     assert(h->long_ref_count==0);
3055
3056     for(i=0; i<h->short_ref_count; i++){
3057         unreference_pic(h, h->short_ref[i], 0);
3058         h->short_ref[i]= NULL;
3059     }
3060     h->short_ref_count=0;
3061     h->prev_frame_num= 0;
3062     h->prev_frame_num_offset= 0;
3063     h->prev_poc_msb=
3064     h->prev_poc_lsb= 0;
3065 }
3066
3067 /* forget old pics after a seek */
3068 static void flush_dpb(AVCodecContext *avctx){
3069     H264Context *h= avctx->priv_data;
3070     int i;
3071     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3072         if(h->delayed_pic[i])
3073             h->delayed_pic[i]->reference= 0;
3074         h->delayed_pic[i]= NULL;
3075     }
3076     h->outputed_poc= INT_MIN;
3077     idr(h);
3078     if(h->s.current_picture_ptr)
3079         h->s.current_picture_ptr->reference= 0;
3080     h->s.first_field= 0;
3081     ff_mpeg_flush(avctx);
3082 }
3083
3084 /**
3085  * Find a Picture in the short term reference list by frame number.
3086  * @param frame_num frame number to search for
3087  * @param idx the index into h->short_ref where returned picture is found
3088  *            undefined if no picture found.
3089  * @return pointer to the found picture, or NULL if no pic with the provided
3090  *                 frame number is found
3091  */
3092 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3093     MpegEncContext * const s = &h->s;
3094     int i;
3095
3096     for(i=0; i<h->short_ref_count; i++){
3097         Picture *pic= h->short_ref[i];
3098         if(s->avctx->debug&FF_DEBUG_MMCO)
3099             av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3100         if(pic->frame_num == frame_num) {
3101             *idx = i;
3102             return pic;
3103         }
3104     }
3105     return NULL;
3106 }
3107
3108 /**
3109  * Remove a picture from the short term reference list by its index in
3110  * that list.  This does no checking on the provided index; it is assumed
3111  * to be valid. Other list entries are shifted down.
3112  * @param i index into h->short_ref of picture to remove.
3113  */
3114 static void remove_short_at_index(H264Context *h, int i){
3115     assert(i >= 0 && i < h->short_ref_count);
3116     h->short_ref[i]= NULL;
3117     if (--h->short_ref_count)
3118         memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3119 }
3120
3121 /**
3122  *
3123  * @return the removed picture or NULL if an error occurs
3124  */
3125 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3126     MpegEncContext * const s = &h->s;
3127     Picture *pic;
3128     int i;
3129
3130     if(s->avctx->debug&FF_DEBUG_MMCO)
3131         av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3132
3133     pic = find_short(h, frame_num, &i);
3134     if (pic){
3135         if(unreference_pic(h, pic, ref_mask))
3136         remove_short_at_index(h, i);
3137     }
3138
3139     return pic;
3140 }
3141
3142 /**
3143  * Remove a picture from the long term reference list by its index in
3144  * that list.
3145  * @return the removed picture or NULL if an error occurs
3146  */
3147 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3148     Picture *pic;
3149
3150     pic= h->long_ref[i];
3151     if (pic){
3152         if(unreference_pic(h, pic, ref_mask)){
3153             assert(h->long_ref[i]->long_ref == 1);
3154             h->long_ref[i]->long_ref= 0;
3155             h->long_ref[i]= NULL;
3156             h->long_ref_count--;
3157         }
3158     }
3159
3160     return pic;
3161 }
3162
3163 /**
3164  * print short term list
3165  */
3166 static void print_short_term(H264Context *h) {
3167     uint32_t i;
3168     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3169         av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3170         for(i=0; i<h->short_ref_count; i++){
3171             Picture *pic= h->short_ref[i];
3172             av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3173         }
3174     }
3175 }
3176
3177 /**
3178  * print long term list
3179  */
3180 static void print_long_term(H264Context *h) {
3181     uint32_t i;
3182     if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3183         av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3184         for(i = 0; i < 16; i++){
3185             Picture *pic= h->long_ref[i];
3186             if (pic) {
3187                 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3188             }
3189         }
3190     }
3191 }
3192
3193 /**
3194  * Executes the reference picture marking (memory management control operations).
3195  */
3196 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3197     MpegEncContext * const s = &h->s;
3198     int i, j;
3199     int current_ref_assigned=0;
3200     Picture *pic;
3201
3202     if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3203         av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3204
3205     for(i=0; i<mmco_count; i++){
3206         int structure, frame_num;
3207         if(s->avctx->debug&FF_DEBUG_MMCO)
3208             av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3209
3210         if(   mmco[i].opcode == MMCO_SHORT2UNUSED
3211            || mmco[i].opcode == MMCO_SHORT2LONG){
3212             frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3213             pic = find_short(h, frame_num, &j);
3214             if(!pic){
3215                 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3216                    || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3217                 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3218                 continue;
3219             }
3220         }
3221
3222         switch(mmco[i].opcode){
3223         case MMCO_SHORT2UNUSED:
3224             if(s->avctx->debug&FF_DEBUG_MMCO)
3225                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3226             remove_short(h, frame_num, structure ^ PICT_FRAME);
3227             break;
3228         case MMCO_SHORT2LONG:
3229                 if (h->long_ref[mmco[i].long_arg] != pic)
3230                     remove_long(h, mmco[i].long_arg, 0);
3231
3232                 remove_short_at_index(h, j);
3233                 h->long_ref[ mmco[i].long_arg ]= pic;
3234                 if (h->long_ref[ mmco[i].long_arg ]){
3235                     h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3236                     h->long_ref_count++;
3237                 }
3238             break;
3239         case MMCO_LONG2UNUSED:
3240             j = pic_num_extract(h, mmco[i].long_arg, &structure);
3241             pic = h->long_ref[j];
3242             if (pic) {
3243                 remove_long(h, j, structure ^ PICT_FRAME);
3244             } else if(s->avctx->debug&FF_DEBUG_MMCO)
3245                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3246             break;
3247         case MMCO_LONG:
3248                     // Comment below left from previous code as it is an interresting note.
3249                     /* First field in pair is in short term list or
3250                      * at a different long term index.
3251                      * This is not allowed; see 7.4.3.3, notes 2 and 3.
3252                      * Report the problem and keep the pair where it is,
3253                      * and mark this field valid.
3254                      */
3255
3256             if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3257                 remove_long(h, mmco[i].long_arg, 0);
3258
3259                 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3260                 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3261                 h->long_ref_count++;
3262             }
3263
3264             s->current_picture_ptr->reference |= s->picture_structure;
3265             current_ref_assigned=1;
3266             break;
3267         case MMCO_SET_MAX_LONG:
3268             assert(mmco[i].long_arg <= 16);
3269             // just remove the long term which index is greater than new max
3270             for(j = mmco[i].long_arg; j<16; j++){
3271                 remove_long(h, j, 0);
3272             }
3273             break;
3274         case MMCO_RESET:
3275             while(h->short_ref_count){
3276                 remove_short(h, h->short_ref[0]->frame_num, 0);
3277             }
3278             for(j = 0; j < 16; j++) {
3279                 remove_long(h, j, 0);
3280             }
3281             s->current_picture_ptr->poc=
3282             s->current_picture_ptr->field_poc[0]=
3283             s->current_picture_ptr->field_poc[1]=
3284             h->poc_lsb=
3285             h->poc_msb=
3286             h->frame_num=
3287             s->current_picture_ptr->frame_num= 0;
3288             break;
3289         default: assert(0);
3290         }
3291     }
3292
3293     if (!current_ref_assigned) {
3294         /* Second field of complementary field pair; the first field of
3295          * which is already referenced. If short referenced, it
3296          * should be first entry in short_ref. If not, it must exist
3297          * in long_ref; trying to put it on the short list here is an
3298          * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3299          */
3300         if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3301             /* Just mark the second field valid */
3302             s->current_picture_ptr->reference = PICT_FRAME;
3303         } else if (s->current_picture_ptr->long_ref) {
3304             av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3305                                              "assignment for second field "
3306                                              "in complementary field pair "
3307                                              "(first field is long term)\n");
3308         } else {
3309             pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3310             if(pic){
3311                 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3312             }
3313
3314             if(h->short_ref_count)
3315                 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3316
3317             h->short_ref[0]= s->current_picture_ptr;
3318             h->short_ref_count++;
3319             s->current_picture_ptr->reference |= s->picture_structure;
3320         }
3321     }
3322
3323     if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3324
3325         /* We have too many reference frames, probably due to corrupted
3326          * stream. Need to discard one frame. Prevents overrun of the
3327          * short_ref and long_ref buffers.
3328          */
3329         av_log(h->s.avctx, AV_LOG_ERROR,
3330                "number of reference frames exceeds max (probably "
3331                "corrupt input), discarding one\n");
3332
3333         if (h->long_ref_count && !h->short_ref_count) {
3334             for (i = 0; i < 16; ++i)
3335                 if (h->long_ref[i])
3336                     break;
3337
3338             assert(i < 16);
3339             remove_long(h, i, 0);
3340         } else {
3341             pic = h->short_ref[h->short_ref_count - 1];
3342             remove_short(h, pic->frame_num, 0);
3343         }
3344     }
3345
3346     print_short_term(h);
3347     print_long_term(h);
3348     return 0;
3349 }
3350
3351 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3352     MpegEncContext * const s = &h->s;
3353     int i;
3354
3355     h->mmco_index= 0;
3356     if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3357         s->broken_link= get_bits1(gb) -1;
3358         if(get_bits1(gb)){
3359             h->mmco[0].opcode= MMCO_LONG;
3360             h->mmco[0].long_arg= 0;
3361             h->mmco_index= 1;
3362         }
3363     }else{
3364         if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3365             for(i= 0; i<MAX_MMCO_COUNT; i++) {
3366                 MMCOOpcode opcode= get_ue_golomb(gb);
3367
3368                 h->mmco[i].opcode= opcode;
3369                 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3370                     h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3371 /*                    if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3372                         av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3373                         return -1;
3374                     }*/
3375                 }
3376                 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3377                     unsigned int long_arg= get_ue_golomb(gb);
3378                     if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3379                         av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3380                         return -1;
3381                     }
3382                     h->mmco[i].long_arg= long_arg;
3383                 }
3384
3385                 if(opcode > (unsigned)MMCO_LONG){
3386                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3387                     return -1;
3388                 }
3389                 if(opcode == MMCO_END)
3390                     break;
3391             }
3392             h->mmco_index= i;
3393         }else{
3394             assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3395
3396             if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3397                     !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3398                 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3399                 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3400                 h->mmco_index= 1;
3401                 if (FIELD_PICTURE) {
3402                     h->mmco[0].short_pic_num *= 2;
3403                     h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3404                     h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3405                     h->mmco_index= 2;
3406                 }
3407             }
3408         }
3409     }
3410
3411     return 0;
3412 }
3413
3414 static int init_poc(H264Context *h){
3415     MpegEncContext * const s = &h->s;
3416     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3417     int field_poc[2];
3418     Picture *cur = s->current_picture_ptr;
3419
3420     h->frame_num_offset= h->prev_frame_num_offset;
3421     if(h->frame_num < h->prev_frame_num)
3422         h->frame_num_offset += max_frame_num;
3423
3424     if(h->sps.poc_type==0){
3425         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3426
3427         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3428             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3429         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3430             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3431         else
3432             h->poc_msb = h->prev_poc_msb;
3433 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3434         field_poc[0] =
3435         field_poc[1] = h->poc_msb + h->poc_lsb;
3436         if(s->picture_structure == PICT_FRAME)
3437             field_poc[1] += h->delta_poc_bottom;
3438     }else if(h->sps.poc_type==1){
3439         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3440         int i;
3441
3442         if(h->sps.poc_cycle_length != 0)
3443             abs_frame_num = h->frame_num_offset + h->frame_num;
3444         else
3445             abs_frame_num = 0;
3446
3447         if(h->nal_ref_idc==0 && abs_frame_num > 0)
3448             abs_frame_num--;
3449
3450         expected_delta_per_poc_cycle = 0;
3451         for(i=0; i < h->sps.poc_cycle_length; i++)
3452             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3453
3454         if(abs_frame_num > 0){
3455             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3456             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3457
3458             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3459             for(i = 0; i <= frame_num_in_poc_cycle; i++)
3460                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3461         } else
3462             expectedpoc = 0;
3463
3464         if(h->nal_ref_idc == 0)
3465             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3466
3467         field_poc[0] = expectedpoc + h->delta_poc[0];
3468         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3469
3470         if(s->picture_structure == PICT_FRAME)
3471             field_poc[1] += h->delta_poc[1];
3472     }else{
3473         int poc= 2*(h->frame_num_offset + h->frame_num);
3474
3475         if(!h->nal_ref_idc)
3476             poc--;
3477
3478         field_poc[0]= poc;
3479         field_poc[1]= poc;
3480     }
3481
3482     if(s->picture_structure != PICT_BOTTOM_FIELD)
3483         s->current_picture_ptr->field_poc[0]= field_poc[0];
3484     if(s->picture_structure != PICT_TOP_FIELD)
3485         s->current_picture_ptr->field_poc[1]= field_poc[1];
3486     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3487
3488     return 0;
3489 }
3490
3491
3492 /**
3493  * initialize scan tables
3494  */
3495 static void init_scan_tables(H264Context *h){
3496     MpegEncContext * const s = &h->s;
3497     int i;
3498     if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3499         memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3500         memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
3501     }else{
3502         for(i=0; i<16; i++){
3503 #define T(x) (x>>2) | ((x<<2) & 0xF)
3504             h->zigzag_scan[i] = T(zigzag_scan[i]);
3505             h-> field_scan[i] = T( field_scan[i]);
3506 #undef T
3507         }
3508     }
3509     if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3510         memcpy(h->zigzag_scan8x8,       zigzag_scan8x8,       64*sizeof(uint8_t));
3511         memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3512         memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
3513         memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
3514     }else{
3515         for(i=0; i<64; i++){
3516 #define T(x) (x>>3) | ((x&7)<<3)
3517             h->zigzag_scan8x8[i]       = T(zigzag_scan8x8[i]);
3518             h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3519             h->field_scan8x8[i]        = T(field_scan8x8[i]);
3520             h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
3521 #undef T
3522         }
3523     }
3524     if(h->sps.transform_bypass){ //FIXME same ugly
3525         h->zigzag_scan_q0          = zigzag_scan;
3526         h->zigzag_scan8x8_q0       = zigzag_scan8x8;
3527         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3528         h->field_scan_q0           = field_scan;
3529         h->field_scan8x8_q0        = field_scan8x8;
3530         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
3531     }else{
3532         h->zigzag_scan_q0          = h->zigzag_scan;
3533         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
3534         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3535         h->field_scan_q0           = h->field_scan;
3536         h->field_scan8x8_q0        = h->field_scan8x8;
3537         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
3538     }
3539 }
3540
3541 /**
3542  * Replicates H264 "master" context to thread contexts.
3543  */
3544 static void clone_slice(H264Context *dst, H264Context *src)
3545 {
3546     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
3547     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
3548     dst->s.current_picture      = src->s.current_picture;
3549     dst->s.linesize             = src->s.linesize;
3550     dst->s.uvlinesize           = src->s.uvlinesize;
3551     dst->s.first_field          = src->s.first_field;
3552
3553     dst->prev_poc_msb           = src->prev_poc_msb;
3554     dst->prev_poc_lsb           = src->prev_poc_lsb;
3555     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
3556     dst->prev_frame_num         = src->prev_frame_num;
3557     dst->short_ref_count        = src->short_ref_count;
3558
3559     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
3560     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
3561     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3562     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
3563
3564     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
3565     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
3566 }
3567
3568 /**
3569  * decodes a slice header.
3570  * This will also call MPV_common_init() and frame_start() as needed.
3571  *
3572  * @param h h264context
3573  * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3574  *
3575  * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3576  */
3577 static int decode_slice_header(H264Context *h, H264Context *h0){
3578     MpegEncContext * const s = &h->s;
3579     MpegEncContext * const s0 = &h0->s;
3580     unsigned int first_mb_in_slice;
3581     unsigned int pps_id;
3582     int num_ref_idx_active_override_flag;
3583     unsigned int slice_type, tmp, i, j;
3584     int default_ref_list_done = 0;
3585     int last_pic_structure;
3586
3587     s->dropable= h->nal_ref_idc == 0;
3588
3589     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3590         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3591         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3592     }else{
3593         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3594         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3595     }
3596
3597     first_mb_in_slice= get_ue_golomb(&s->gb);
3598
3599     if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3600         h0->current_slice = 0;
3601         if (!s0->first_field)
3602             s->current_picture_ptr= NULL;
3603     }
3604
3605     slice_type= get_ue_golomb(&s->gb);
3606     if(slice_type > 9){
3607         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3608         return -1;
3609     }
3610     if(slice_type > 4){
3611         slice_type -= 5;
3612         h->slice_type_fixed=1;
3613     }else
3614         h->slice_type_fixed=0;
3615
3616     slice_type= golomb_to_pict_type[ slice_type ];
3617     if (slice_type == FF_I_TYPE
3618         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3619         default_ref_list_done = 1;
3620     }
3621     h->slice_type= slice_type;
3622     h->slice_type_nos= slice_type & 3;
3623
3624     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3625     if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3626         av_log(h->s.avctx, AV_LOG_ERROR,
3627                "B picture before any references, skipping\n");
3628         return -1;
3629     }
3630
3631     pps_id= get_ue_golomb(&s->gb);
3632     if(pps_id>=MAX_PPS_COUNT){
3633         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3634         return -1;
3635     }
3636     if(!h0->pps_buffers[pps_id]) {
3637         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3638         return -1;
3639     }
3640     h->pps= *h0->pps_buffers[pps_id];
3641
3642     if(!h0->sps_buffers[h->pps.sps_id]) {
3643         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3644         return -1;
3645     }
3646     h->sps = *h0->sps_buffers[h->pps.sps_id];
3647
3648     if(h == h0 && h->dequant_coeff_pps != pps_id){
3649         h->dequant_coeff_pps = pps_id;
3650         init_dequant_tables(h);
3651     }
3652
3653     s->mb_width= h->sps.mb_width;
3654     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3655
3656     h->b_stride=  s->mb_width*4;
3657     h->b8_stride= s->mb_width*2;
3658
3659     s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3660     if(h->sps.frame_mbs_only_flag)
3661         s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3662     else
3663         s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3664
3665     if (s->context_initialized
3666         && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
3667         if(h != h0)
3668             return -1;   // width / height changed during parallelized decoding
3669         free_tables(h);
3670         flush_dpb(s->avctx);
3671         MPV_common_end(s);
3672     }
3673     if (!s->context_initialized) {
3674         if(h != h0)
3675             return -1;  // we cant (re-)initialize context during parallel decoding
3676         if (MPV_common_init(s) < 0)
3677             return -1;
3678         s->first_field = 0;
3679
3680         init_scan_tables(h);
3681         alloc_tables(h);
3682
3683         for(i = 1; i < s->avctx->thread_count; i++) {
3684             H264Context *c;
3685             c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3686             memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3687             memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3688             c->sps = h->sps;
3689             c->pps = h->pps;
3690             init_scan_tables(c);
3691             clone_tables(c, h);
3692         }
3693
3694         for(i = 0; i < s->avctx->thread_count; i++)
3695             if(context_init(h->thread_context[i]) < 0)
3696                 return -1;
3697
3698         s->avctx->width = s->width;
3699         s->avctx->height = s->height;
3700         s->avctx->sample_aspect_ratio= h->sps.sar;
3701         if(!s->avctx->sample_aspect_ratio.den)
3702             s->avctx->sample_aspect_ratio.den = 1;
3703
3704         if(h->sps.timing_info_present_flag){
3705             s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3706             if(h->x264_build > 0 && h->x264_build < 44)
3707                 s->avctx->time_base.den *= 2;
3708             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3709                       s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3710         }
3711     }
3712
3713     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3714
3715     h->mb_mbaff = 0;
3716     h->mb_aff_frame = 0;
3717     last_pic_structure = s0->picture_structure;
3718     if(h->sps.frame_mbs_only_flag){
3719         s->picture_structure= PICT_FRAME;
3720     }else{
3721         if(get_bits1(&s->gb)) { //field_pic_flag
3722             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3723         } else {
3724             s->picture_structure= PICT_FRAME;
3725             h->mb_aff_frame = h->sps.mb_aff;
3726         }
3727     }
3728     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3729
3730     if(h0->current_slice == 0){
3731         while(h->frame_num !=  h->prev_frame_num &&
3732               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3733             av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3734             frame_start(h);
3735             h->prev_frame_num++;
3736             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3737             s->current_picture_ptr->frame_num= h->prev_frame_num;
3738             execute_ref_pic_marking(h, NULL, 0);
3739         }
3740
3741         /* See if we have a decoded first field looking for a pair... */
3742         if (s0->first_field) {
3743             assert(s0->current_picture_ptr);
3744             assert(s0->current_picture_ptr->data[0]);
3745             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3746
3747             /* figure out if we have a complementary field pair */
3748             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3749                 /*
3750                  * Previous field is unmatched. Don't display it, but let it
3751                  * remain for reference if marked as such.
3752                  */
3753                 s0->current_picture_ptr = NULL;
3754                 s0->first_field = FIELD_PICTURE;
3755
3756             } else {
3757                 if (h->nal_ref_idc &&
3758                         s0->current_picture_ptr->reference &&
3759                         s0->current_picture_ptr->frame_num != h->frame_num) {
3760                     /*
3761                      * This and previous field were reference, but had
3762                      * different frame_nums. Consider this field first in
3763                      * pair. Throw away previous field except for reference
3764                      * purposes.
3765                      */
3766                     s0->first_field = 1;
3767                     s0->current_picture_ptr = NULL;
3768
3769                 } else {
3770                     /* Second field in complementary pair */
3771                     s0->first_field = 0;
3772                 }
3773             }
3774
3775         } else {
3776             /* Frame or first field in a potentially complementary pair */
3777             assert(!s0->current_picture_ptr);
3778             s0->first_field = FIELD_PICTURE;
3779         }
3780
3781         if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3782             s0->first_field = 0;
3783             return -1;
3784         }
3785     }
3786     if(h != h0)
3787         clone_slice(h, h0);
3788
3789     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3790
3791     assert(s->mb_num == s->mb_width * s->mb_height);
3792     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3793        first_mb_in_slice                    >= s->mb_num){
3794         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3795         return -1;
3796     }
3797     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3798     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3799     if (s->picture_structure == PICT_BOTTOM_FIELD)
3800         s->resync_mb_y = s->mb_y = s->mb_y + 1;
3801     assert(s->mb_y < s->mb_height);
3802
3803     if(s->picture_structure==PICT_FRAME){
3804         h->curr_pic_num=   h->frame_num;
3805         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3806     }else{
3807         h->curr_pic_num= 2*h->frame_num + 1;
3808         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3809     }
3810
3811     if(h->nal_unit_type == NAL_IDR_SLICE){
3812         get_ue_golomb(&s->gb); /* idr_pic_id */
3813     }
3814
3815     if(h->sps.poc_type==0){
3816         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3817
3818         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3819             h->delta_poc_bottom= get_se_golomb(&s->gb);
3820         }
3821     }
3822
3823     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3824         h->delta_poc[0]= get_se_golomb(&s->gb);
3825
3826         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3827             h->delta_poc[1]= get_se_golomb(&s->gb);
3828     }
3829
3830     init_poc(h);
3831
3832     if(h->pps.redundant_pic_cnt_present){
3833         h->redundant_pic_count= get_ue_golomb(&s->gb);
3834     }
3835
3836     //set defaults, might be overridden a few lines later
3837     h->ref_count[0]= h->pps.ref_count[0];
3838     h->ref_count[1]= h->pps.ref_count[1];
3839
3840     if(h->slice_type_nos != FF_I_TYPE){
3841         if(h->slice_type_nos == FF_B_TYPE){
3842             h->direct_spatial_mv_pred= get_bits1(&s->gb);
3843         }
3844         num_ref_idx_active_override_flag= get_bits1(&s->gb);
3845
3846         if(num_ref_idx_active_override_flag){
3847             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3848             if(h->slice_type_nos==FF_B_TYPE)
3849                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3850
3851             if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3852                 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3853                 h->ref_count[0]= h->ref_count[1]= 1;
3854                 return -1;
3855             }
3856         }
3857         if(h->slice_type_nos == FF_B_TYPE)
3858             h->list_count= 2;
3859         else
3860             h->list_count= 1;
3861     }else
3862         h->list_count= 0;
3863
3864     if(!default_ref_list_done){
3865         fill_default_ref_list(h);
3866     }
3867
3868     if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3869         return -1;
3870
3871     if(h->slice_type_nos!=FF_I_TYPE){
3872         s->last_picture_ptr= &h->ref_list[0][0];
3873         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3874     }
3875     if(h->slice_type_nos==FF_B_TYPE){
3876         s->next_picture_ptr= &h->ref_list[1][0];
3877         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3878     }
3879
3880     if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
3881        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3882         pred_weight_table(h);
3883     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3884         implicit_weight_table(h);
3885     else
3886         h->use_weight = 0;
3887
3888     if(h->nal_ref_idc)
3889         decode_ref_pic_marking(h0, &s->gb);
3890
3891     if(FRAME_MBAFF)
3892         fill_mbaff_ref_list(h);
3893
3894     if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3895         direct_dist_scale_factor(h);
3896     direct_ref_list_init(h);
3897
3898     if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3899         tmp = get_ue_golomb(&s->gb);
3900         if(tmp > 2){
3901             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3902             return -1;
3903         }
3904         h->cabac_init_idc= tmp;
3905     }
3906
3907     h->last_qscale_diff = 0;
3908     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3909     if(tmp>51){
3910         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3911         return -1;
3912     }
3913     s->qscale= tmp;
3914     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3915     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3916     //FIXME qscale / qp ... stuff
3917     if(h->slice_type == FF_SP_TYPE){
3918         get_bits1(&s->gb); /* sp_for_switch_flag */
3919     }
3920     if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3921         get_se_golomb(&s->gb); /* slice_qs_delta */
3922     }
3923
3924     h->deblocking_filter = 1;
3925     h->slice_alpha_c0_offset = 0;
3926     h->slice_beta_offset = 0;
3927     if( h->pps.deblocking_filter_parameters_present ) {
3928         tmp= get_ue_golomb(&s->gb);
3929         if(tmp > 2){
3930             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3931             return -1;
3932         }
3933         h->deblocking_filter= tmp;
3934         if(h->deblocking_filter < 2)
3935             h->deblocking_filter^= 1; // 1<->0
3936
3937         if( h->deblocking_filter ) {
3938             h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3939             h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3940         }
3941     }
3942
3943     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
3944        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3945        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == FF_B_TYPE)
3946        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3947         h->deblocking_filter= 0;
3948
3949     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3950         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3951             /* Cheat slightly for speed:
3952                Do not bother to deblock across slices. */
3953             h->deblocking_filter = 2;
3954         } else {
3955             h0->max_contexts = 1;
3956             if(!h0->single_decode_warning) {
3957                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3958                 h0->single_decode_warning = 1;
3959             }
3960             if(h != h0)
3961                 return 1; // deblocking switched inside frame
3962         }
3963     }
3964
3965 #if 0 //FMO
3966     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3967         slice_group_change_cycle= get_bits(&s->gb, ?);
3968 #endif
3969
3970     h0->last_slice_type = slice_type;
3971     h->slice_num = ++h0->current_slice;
3972     if(h->slice_num >= MAX_SLICES){
3973         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3974     }
3975
3976     for(j=0; j<2; j++){
3977         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3978         ref2frm[0]=
3979         ref2frm[1]= -1;
3980         for(i=0; i<16; i++)
3981             ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3982                           +(h->ref_list[j][i].reference&3);
3983         ref2frm[18+0]=
3984         ref2frm[18+1]= -1;
3985         for(i=16; i<48; i++)
3986             ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3987                           +(h->ref_list[j][i].reference&3);
3988     }
3989
3990     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3991     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3992
3993     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3994         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3995                h->slice_num,
3996                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3997                first_mb_in_slice,
3998                av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3999                pps_id, h->frame_num,
4000                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4001                h->ref_count[0], h->ref_count[1],
4002                s->qscale,
4003                h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4004                h->use_weight,
4005                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4006                h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4007                );
4008     }
4009
4010     return 0;
4011 }
4012
4013 /**
4014  *
4015  */
4016 static inline int get_level_prefix(GetBitContext *gb){
4017     unsigned int buf;
4018     int log;
4019
4020     OPEN_READER(re, gb);
4021     UPDATE_CACHE(re, gb);
4022     buf=GET_CACHE(re, gb);
4023
4024     log= 32 - av_log2(buf);
4025 #ifdef TRACE
4026     print_bin(buf>>(32-log), log);
4027     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4028 #endif
4029
4030     LAST_SKIP_BITS(re, gb, log);
4031     CLOSE_READER(re, gb);
4032
4033     return log-1;
4034 }
4035
4036 static inline int get_dct8x8_allowed(H264Context *h){
4037     if(h->sps.direct_8x8_inference_flag)
4038         return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
4039     else
4040         return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4041 }
4042
4043 /**
4044  * decodes a residual block.
4045  * @param n block index
4046  * @param scantable scantable
4047  * @param max_coeff number of coefficients in the block
4048  * @return <0 if an error occurred
4049  */
4050 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4051     MpegEncContext * const s = &h->s;
4052     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4053     int level[16];
4054     int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4055
4056     //FIXME put trailing_onex into the context
4057
4058     if(n == CHROMA_DC_BLOCK_INDEX){
4059         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4060         total_coeff= coeff_token>>2;
4061     }else{
4062         if(n == LUMA_DC_BLOCK_INDEX){
4063             total_coeff= pred_non_zero_count(h, 0);
4064             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4065             total_coeff= coeff_token>>2;
4066         }else{
4067             total_coeff= pred_non_zero_count(h, n);
4068             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4069             total_coeff= coeff_token>>2;
4070             h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4071         }
4072     }
4073
4074     //FIXME set last_non_zero?
4075
4076     if(total_coeff==0)
4077         return 0;
4078     if(total_coeff > (unsigned)max_coeff) {
4079         av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4080         return -1;
4081     }
4082
4083     trailing_ones= coeff_token&3;
4084     tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4085     assert(total_coeff<=16);
4086
4087     i = show_bits(gb, 3);
4088     skip_bits(gb, trailing_ones);
4089     level[0] = 1-((i&4)>>1);
4090     level[1] = 1-((i&2)   );
4091     level[2] = 1-((i&1)<<1);
4092
4093     if(trailing_ones<total_coeff) {
4094         int level_code, mask;
4095         int suffix_length = total_coeff > 10 && trailing_ones < 3;
4096         int prefix= get_level_prefix(gb);
4097
4098         //first coefficient has suffix_length equal to 0 or 1
4099         if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4100             if(suffix_length)
4101                 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4102             else
4103                 level_code= (prefix<<suffix_length); //part
4104         }else if(prefix==14){
4105             if(suffix_length)
4106                 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4107             else
4108                 level_code= prefix + get_bits(gb, 4); //part
4109         }else{
4110             level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4111             if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4112             if(prefix>=16)
4113                 level_code += (1<<(prefix-3))-4096;
4114         }
4115
4116         if(trailing_ones < 3) level_code += 2;
4117
4118         suffix_length = 1;
4119         if(level_code > 5)
4120             suffix_length++;
4121         mask= -(level_code&1);
4122         level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4123
4124         //remaining coefficients have suffix_length > 0
4125         for(i=trailing_ones+1;i<total_coeff;i++) {
4126             static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4127             prefix = get_level_prefix(gb);
4128             if(prefix<15){
4129                 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4130             }else{
4131                 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4132                 if(prefix>=16)
4133                     level_code += (1<<(prefix-3))-4096;
4134             }
4135             mask= -(level_code&1);
4136             level[i]= (((2+level_code)>>1) ^ mask) - mask;
4137             if(level_code > suffix_limit[suffix_length])
4138                 suffix_length++;
4139         }
4140     }
4141
4142     if(total_coeff == max_coeff)
4143         zeros_left=0;
4144     else{
4145         if(n == CHROMA_DC_BLOCK_INDEX)
4146             zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4147         else
4148             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4149     }
4150
4151     coeff_num = zeros_left + total_coeff - 1;
4152     j = scantable[coeff_num];
4153     if(n > 24){
4154         block[j] = level[0];
4155         for(i=1;i<total_coeff;i++) {
4156             if(zeros_left <= 0)
4157                 run_before = 0;
4158             else if(zeros_left < 7){
4159                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4160             }else{
4161                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4162             }
4163             zeros_left -= run_before;
4164             coeff_num -= 1 + run_before;
4165             j= scantable[ coeff_num ];
4166
4167             block[j]= level[i];
4168         }
4169     }else{
4170         block[j] = (level[0] * qmul[j] + 32)>>6;
4171         for(i=1;i<total_coeff;i++) {
4172             if(zeros_left <= 0)
4173                 run_before = 0;
4174             else if(zeros_left < 7){
4175                 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4176             }else{
4177                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4178             }
4179             zeros_left -= run_before;
4180             coeff_num -= 1 + run_before;
4181             j= scantable[ coeff_num ];
4182
4183             block[j]= (level[i] * qmul[j] + 32)>>6;
4184         }
4185     }
4186
4187     if(zeros_left<0){
4188         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4189         return -1;
4190     }
4191
4192     return 0;
4193 }
4194
4195 static void predict_field_decoding_flag(H264Context *h){
4196     MpegEncContext * const s = &h->s;
4197     const int mb_xy= h->mb_xy;
4198     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4199                 ? s->current_picture.mb_type[mb_xy-1]
4200                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4201                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4202                 : 0;
4203     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4204 }
4205
4206 /**
4207  * decodes a P_SKIP or B_SKIP macroblock
4208  */
4209 static void decode_mb_skip(H264Context *h){
4210     MpegEncContext * const s = &h->s;
4211     const int mb_xy= h->mb_xy;
4212     int mb_type=0;
4213
4214     memset(h->non_zero_count[mb_xy], 0, 16);
4215     memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4216
4217     if(MB_FIELD)
4218         mb_type|= MB_TYPE_INTERLACED;
4219
4220     if( h->slice_type_nos == FF_B_TYPE )
4221     {
4222         // just for fill_caches. pred_direct_motion will set the real mb_type
4223         mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4224
4225         fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4226         pred_direct_motion(h, &mb_type);
4227         mb_type|= MB_TYPE_SKIP;
4228     }
4229     else
4230     {
4231         int mx, my;
4232         mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4233
4234         fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4235         pred_pskip_motion(h, &mx, &my);
4236         fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4237         fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4238     }
4239
4240     write_back_motion(h, mb_type);
4241     s->current_picture.mb_type[mb_xy]= mb_type;
4242     s->current_picture.qscale_table[mb_xy]= s->qscale;
4243     h->slice_table[ mb_xy ]= h->slice_num;
4244     h->prev_mb_skipped= 1;
4245 }
4246
4247 /**
4248  * decodes a macroblock
4249  * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4250  */
4251 static int decode_mb_cavlc(H264Context *h){
4252     MpegEncContext * const s = &h->s;
4253     int mb_xy;
4254     int partition_count;
4255     unsigned int mb_type, cbp;
4256     int dct8x8_allowed= h->pps.transform_8x8_mode;
4257
4258     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4259
4260     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4261     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4262                 down the code */
4263     if(h->slice_type_nos != FF_I_TYPE){
4264         if(s->mb_skip_run==-1)
4265             s->mb_skip_run= get_ue_golomb(&s->gb);
4266
4267         if (s->mb_skip_run--) {
4268             if(FRAME_MBAFF && (s->mb_y&1) == 0){
4269                 if(s->mb_skip_run==0)
4270                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4271                 else
4272                     predict_field_decoding_flag(h);
4273             }
4274             decode_mb_skip(h);
4275             return 0;
4276         }
4277     }
4278     if(FRAME_MBAFF){
4279         if( (s->mb_y&1) == 0 )
4280             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4281     }
4282
4283     h->prev_mb_skipped= 0;
4284
4285     mb_type= get_ue_golomb(&s->gb);
4286     if(h->slice_type_nos == FF_B_TYPE){
4287         if(mb_type < 23){
4288             partition_count= b_mb_type_info[mb_type].partition_count;
4289             mb_type=         b_mb_type_info[mb_type].type;
4290         }else{
4291             mb_type -= 23;
4292             goto decode_intra_mb;
4293         }
4294     }else if(h->slice_type_nos == FF_P_TYPE){
4295         if(mb_type < 5){
4296             partition_count= p_mb_type_info[mb_type].partition_count;
4297             mb_type=         p_mb_type_info[mb_type].type;
4298         }else{
4299             mb_type -= 5;
4300             goto decode_intra_mb;
4301         }
4302     }else{
4303        assert(h->slice_type_nos == FF_I_TYPE);
4304         if(h->slice_type == FF_SI_TYPE && mb_type)
4305             mb_type--;
4306 decode_intra_mb:
4307         if(mb_type > 25){
4308             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4309             return -1;
4310         }
4311         partition_count=0;
4312         cbp= i_mb_type_info[mb_type].cbp;
4313         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4314         mb_type= i_mb_type_info[mb_type].type;
4315     }
4316
4317     if(MB_FIELD)
4318         mb_type |= MB_TYPE_INTERLACED;
4319
4320     h->slice_table[ mb_xy ]= h->slice_num;
4321
4322     if(IS_INTRA_PCM(mb_type)){
4323         unsigned int x;
4324
4325         // We assume these blocks are very rare so we do not optimize it.
4326         align_get_bits(&s->gb);
4327
4328         // The pixels are stored in the same order as levels in h->mb array.
4329         for(x=0; x < (CHROMA ? 384 : 256); x++){
4330             ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4331         }
4332
4333         // In deblocking, the quantizer is 0
4334         s->current_picture.qscale_table[mb_xy]= 0;
4335         // All coeffs are present
4336         memset(h->non_zero_count[mb_xy], 16, 16);
4337
4338         s->current_picture.mb_type[mb_xy]= mb_type;
4339         return 0;
4340     }
4341
4342     if(MB_MBAFF){
4343         h->ref_count[0] <<= 1;
4344         h->ref_count[1] <<= 1;
4345     }
4346
4347     fill_caches(h, mb_type, 0);
4348
4349     //mb_pred
4350     if(IS_INTRA(mb_type)){
4351         int pred_mode;
4352 //            init_top_left_availability(h);
4353         if(IS_INTRA4x4(mb_type)){
4354             int i;
4355             int di = 1;
4356             if(dct8x8_allowed && get_bits1(&s->gb)){
4357                 mb_type |= MB_TYPE_8x8DCT;
4358                 di = 4;
4359             }
4360
4361 //                fill_intra4x4_pred_table(h);
4362             for(i=0; i<16; i+=di){
4363                 int mode= pred_intra_mode(h, i);
4364
4365                 if(!get_bits1(&s->gb)){
4366                     const int rem_mode= get_bits(&s->gb, 3);
4367                     mode = rem_mode + (rem_mode >= mode);
4368                 }
4369
4370                 if(di==4)
4371                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4372                 else
4373                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4374             }
4375             write_back_intra_pred_mode(h);
4376             if( check_intra4x4_pred_mode(h) < 0)
4377                 return -1;
4378         }else{
4379             h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4380             if(h->intra16x16_pred_mode < 0)
4381                 return -1;
4382         }
4383         if(CHROMA){
4384             pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4385             if(pred_mode < 0)
4386                 return -1;
4387             h->chroma_pred_mode= pred_mode;
4388         }
4389     }else if(partition_count==4){
4390         int i, j, sub_partition_count[4], list, ref[2][4];
4391
4392         if(h->slice_type_nos == FF_B_TYPE){
4393             for(i=0; i<4; i++){
4394                 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4395                 if(h->sub_mb_type[i] >=13){
4396                     av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4397                     return -1;
4398                 }
4399                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4400                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4401             }
4402             if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4403                || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4404                 pred_direct_motion(h, &mb_type);
4405                 h->ref_cache[0][scan8[4]] =
4406                 h->ref_cache[1][scan8[4]] =
4407                 h->ref_cache[0][scan8[12]] =
4408                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4409             }
4410         }else{
4411             assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4412             for(i=0; i<4; i++){
4413                 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4414                 if(h->sub_mb_type[i] >=4){
4415                     av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4416                     return -1;
4417                 }
4418                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4419                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4420             }
4421         }
4422
4423         for(list=0; list<h->list_count; list++){
4424             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4425             for(i=0; i<4; i++){
4426                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4427                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4428                     unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4429                     if(tmp>=ref_count){
4430                         av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4431                         return -1;
4432                     }
4433                     ref[list][i]= tmp;
4434                 }else{
4435                  //FIXME
4436                     ref[list][i] = -1;
4437                 }
4438             }
4439         }
4440
4441         if(dct8x8_allowed)
4442             dct8x8_allowed = get_dct8x8_allowed(h);
4443
4444         for(list=0; list<h->list_count; list++){
4445             for(i=0; i<4; i++){
4446                 if(IS_DIRECT(h->sub_mb_type[i])) {
4447                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4448                     continue;
4449                 }
4450                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4451                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4452
4453                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4454                     const int sub_mb_type= h->sub_mb_type[i];
4455                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4456                     for(j=0; j<sub_partition_count[i]; j++){
4457                         int mx, my;
4458                         const int index= 4*i + block_width*j;
4459                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4460                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4461                         mx += get_se_golomb(&s->gb);
4462                         my += get_se_golomb(&s->gb);
4463                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4464
4465                         if(IS_SUB_8X8(sub_mb_type)){
4466                             mv_cache[ 1 ][0]=
4467                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4468                             mv_cache[ 1 ][1]=
4469                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4470                         }else if(IS_SUB_8X4(sub_mb_type)){
4471                             mv_cache[ 1 ][0]= mx;
4472                             mv_cache[ 1 ][1]= my;
4473                         }else if(IS_SUB_4X8(sub_mb_type)){
4474                             mv_cache[ 8 ][0]= mx;
4475                             mv_cache[ 8 ][1]= my;
4476                         }
4477                         mv_cache[ 0 ][0]= mx;
4478                         mv_cache[ 0 ][1]= my;
4479                     }
4480                 }else{
4481                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4482                     p[0] = p[1]=
4483                     p[8] = p[9]= 0;
4484                 }
4485             }
4486         }
4487     }else if(IS_DIRECT(mb_type)){
4488         pred_direct_motion(h, &mb_type);
4489         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4490     }else{
4491         int list, mx, my, i;
4492          //FIXME we should set ref_idx_l? to 0 if we use that later ...
4493         if(IS_16X16(mb_type)){
4494             for(list=0; list<h->list_count; list++){
4495                     unsigned int val;
4496                     if(IS_DIR(mb_type, 0, list)){
4497                         val= get_te0_golomb(&s->gb, h->ref_count[list]);
4498                         if(val >= h->ref_count[list]){
4499                             av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4500                             return -1;
4501                         }
4502                     }else
4503                         val= LIST_NOT_USED&0xFF;
4504                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4505             }
4506             for(list=0; list<h->list_count; list++){
4507                 unsigned int val;
4508                 if(IS_DIR(mb_type, 0, list)){
4509                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4510                     mx += get_se_golomb(&s->gb);
4511                     my += get_se_golomb(&s->gb);
4512                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4513
4514                     val= pack16to32(mx,my);
4515                 }else
4516                     val=0;
4517                 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4518             }
4519         }
4520         else if(IS_16X8(mb_type)){
4521             for(list=0; list<h->list_count; list++){
4522                     for(i=0; i<2; i++){
4523                         unsigned int val;
4524                         if(IS_DIR(mb_type, i, list)){
4525                             val= get_te0_golomb(&s->gb, h->ref_count[list]);
4526                             if(val >= h->ref_count[list]){
4527                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4528                                 return -1;
4529                             }
4530                         }else
4531                             val= LIST_NOT_USED&0xFF;
4532                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4533                     }
4534             }
4535             for(list=0; list<h->list_count; list++){
4536                 for(i=0; i<2; i++){
4537                     unsigned int val;
4538                     if(IS_DIR(mb_type, i, list)){
4539                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4540                         mx += get_se_golomb(&s->gb);
4541                         my += get_se_golomb(&s->gb);
4542                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4543
4544                         val= pack16to32(mx,my);
4545                     }else
4546                         val=0;
4547                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4548                 }
4549             }
4550         }else{
4551             assert(IS_8X16(mb_type));
4552             for(list=0; list<h->list_count; list++){
4553                     for(i=0; i<2; i++){
4554                         unsigned int val;
4555                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4556                             val= get_te0_golomb(&s->gb, h->ref_count[list]);
4557                             if(val >= h->ref_count[list]){
4558                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4559                                 return -1;
4560                             }
4561                         }else
4562                             val= LIST_NOT_USED&0xFF;
4563                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4564                     }
4565             }
4566             for(list=0; list<h->list_count; list++){
4567                 for(i=0; i<2; i++){
4568                     unsigned int val;
4569                     if(IS_DIR(mb_type, i, list)){
4570                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4571                         mx += get_se_golomb(&s->gb);
4572                         my += get_se_golomb(&s->gb);
4573                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4574
4575                         val= pack16to32(mx,my);
4576                     }else
4577                         val=0;
4578                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4579                 }
4580             }
4581         }
4582     }
4583
4584     if(IS_INTER(mb_type))
4585         write_back_motion(h, mb_type);
4586
4587     if(!IS_INTRA16x16(mb_type)){
4588         cbp= get_ue_golomb(&s->gb);
4589         if(cbp > 47){
4590             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4591             return -1;
4592         }
4593
4594         if(CHROMA){
4595             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4596             else                     cbp= golomb_to_inter_cbp   [cbp];
4597         }else{
4598             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4599             else                     cbp= golomb_to_inter_cbp_gray[cbp];
4600         }
4601     }
4602     h->cbp = cbp;
4603
4604     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4605         if(get_bits1(&s->gb)){
4606             mb_type |= MB_TYPE_8x8DCT;
4607             h->cbp_table[mb_xy]= cbp;
4608         }
4609     }
4610     s->current_picture.mb_type[mb_xy]= mb_type;
4611
4612     if(cbp || IS_INTRA16x16(mb_type)){
4613         int i8x8, i4x4, chroma_idx;
4614         int dquant;
4615         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4616         const uint8_t *scan, *scan8x8, *dc_scan;
4617
4618 //        fill_non_zero_count_cache(h);
4619
4620         if(IS_INTERLACED(mb_type)){
4621             scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4622             scan= s->qscale ? h->field_scan : h->field_scan_q0;
4623             dc_scan= luma_dc_field_scan;
4624         }else{
4625             scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4626             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4627             dc_scan= luma_dc_zigzag_scan;
4628         }
4629
4630         dquant= get_se_golomb(&s->gb);
4631
4632         if( dquant > 25 || dquant < -26 ){
4633             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4634             return -1;
4635         }
4636
4637         s->qscale += dquant;
4638         if(((unsigned)s->qscale) > 51){
4639             if(s->qscale<0) s->qscale+= 52;
4640             else            s->qscale-= 52;
4641         }
4642
4643         h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4644         h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4645         if(IS_INTRA16x16(mb_type)){
4646             if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4647                 return -1; //FIXME continue if partitioned and other return -1 too
4648             }
4649
4650             assert((cbp&15) == 0 || (cbp&15) == 15);
4651
4652             if(cbp&15){
4653                 for(i8x8=0; i8x8<4; i8x8++){
4654                     for(i4x4=0; i4x4<4; i4x4++){
4655                         const int index= i4x4 + 4*i8x8;
4656                         if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4657                             return -1;
4658                         }
4659                     }
4660                 }
4661             }else{
4662                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4663             }
4664         }else{
4665             for(i8x8=0; i8x8<4; i8x8++){
4666                 if(cbp & (1<<i8x8)){
4667                     if(IS_8x8DCT(mb_type)){
4668                         DCTELEM *buf = &h->mb[64*i8x8];
4669                         uint8_t *nnz;
4670                         for(i4x4=0; i4x4<4; i4x4++){
4671                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4672                                                 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4673                                 return -1;
4674                         }
4675                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4676                         nnz[0] += nnz[1] + nnz[8] + nnz[9];
4677                     }else{
4678                         for(i4x4=0; i4x4<4; i4x4++){
4679                             const int index= i4x4 + 4*i8x8;
4680
4681                             if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4682                                 return -1;
4683                             }
4684                         }
4685                     }
4686                 }else{
4687                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4688                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4689                 }
4690             }
4691         }
4692
4693         if(cbp&0x30){
4694             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4695                 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4696                     return -1;
4697                 }
4698         }
4699
4700         if(cbp&0x20){
4701             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4702                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4703                 for(i4x4=0; i4x4<4; i4x4++){
4704                     const int index= 16 + 4*chroma_idx + i4x4;
4705                     if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4706                         return -1;
4707                     }
4708                 }
4709             }
4710         }else{
4711             uint8_t * const nnz= &h->non_zero_count_cache[0];
4712             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4713             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4714         }
4715     }else{
4716         uint8_t * const nnz= &h->non_zero_count_cache[0];
4717         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4718         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4719         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4720     }
4721     s->current_picture.qscale_table[mb_xy]= s->qscale;
4722     write_back_non_zero_count(h);
4723
4724     if(MB_MBAFF){
4725         h->ref_count[0] >>= 1;
4726         h->ref_count[1] >>= 1;
4727     }
4728
4729     return 0;
4730 }
4731
4732 static int decode_cabac_field_decoding_flag(H264Context *h) {
4733     MpegEncContext * const s = &h->s;
4734     const int mb_x = s->mb_x;
4735     const int mb_y = s->mb_y & ~1;
4736     const int mba_xy = mb_x - 1 +  mb_y   *s->mb_stride;
4737     const int mbb_xy = mb_x     + (mb_y-2)*s->mb_stride;
4738
4739     unsigned int ctx = 0;
4740
4741     if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4742         ctx += 1;
4743     }
4744     if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4745         ctx += 1;
4746     }
4747
4748     return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4749 }
4750
4751 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4752     uint8_t *state= &h->cabac_state[ctx_base];
4753     int mb_type;
4754
4755     if(intra_slice){
4756         MpegEncContext * const s = &h->s;
4757         const int mba_xy = h->left_mb_xy[0];
4758         const int mbb_xy = h->top_mb_xy;
4759         int ctx=0;
4760         if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4761             ctx++;
4762         if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4763             ctx++;
4764         if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4765             return 0;   /* I4x4 */
4766         state += 2;
4767     }else{
4768         if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4769             return 0;   /* I4x4 */
4770     }
4771
4772     if( get_cabac_terminate( &h->cabac ) )
4773         return 25;  /* PCM */
4774
4775     mb_type = 1; /* I16x16 */
4776     mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4777     if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4778         mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4779     mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4780     mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4781     return mb_type;
4782 }
4783
4784 static int decode_cabac_mb_type( H264Context *h ) {
4785     MpegEncContext * const s = &h->s;
4786
4787     if( h->slice_type_nos == FF_I_TYPE ) {
4788         return decode_cabac_intra_mb_type(h, 3, 1);
4789     } else if( h->slice_type_nos == FF_P_TYPE ) {
4790         if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4791             /* P-type */
4792             if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4793                 /* P_L0_D16x16, P_8x8 */
4794                 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4795             } else {
4796                 /* P_L0_D8x16, P_L0_D16x8 */
4797                 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4798             }
4799         } else {
4800             return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4801         }
4802     } else {
4803         const int mba_xy = h->left_mb_xy[0];
4804         const int mbb_xy = h->top_mb_xy;
4805         int ctx = 0;
4806         int bits;
4807         assert(h->slice_type_nos == FF_B_TYPE);
4808
4809         if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4810             ctx++;
4811         if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4812             ctx++;
4813
4814         if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4815             return 0; /* B_Direct_16x16 */
4816
4817         if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4818             return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4819         }
4820
4821         bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4822         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4823         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4824         bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4825         if( bits < 8 )
4826             return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4827         else if( bits == 13 ) {
4828             return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4829         } else if( bits == 14 )
4830             return 11; /* B_L1_L0_8x16 */
4831         else if( bits == 15 )
4832             return 22; /* B_8x8 */
4833
4834         bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4835         return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4836     }
4837 }
4838
4839 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4840     MpegEncContext * const s = &h->s;
4841     int mba_xy, mbb_xy;
4842     int ctx = 0;
4843
4844     if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4845         int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4846         mba_xy = mb_xy - 1;
4847         if( (mb_y&1)
4848             && h->slice_table[mba_xy] == h->slice_num
4849             && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4850             mba_xy += s->mb_stride;
4851         if( MB_FIELD ){
4852             mbb_xy = mb_xy - s->mb_stride;
4853             if( !(mb_y&1)
4854                 && h->slice_table[mbb_xy] == h->slice_num
4855                 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4856                 mbb_xy -= s->mb_stride;
4857         }else
4858             mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4859     }else{
4860         int mb_xy = h->mb_xy;
4861         mba_xy = mb_xy - 1;
4862         mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4863     }
4864
4865     if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4866         ctx++;
4867     if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4868         ctx++;
4869
4870     if( h->slice_type_nos == FF_B_TYPE )
4871         ctx += 13;
4872     return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4873 }
4874
4875 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4876     int mode = 0;
4877
4878     if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4879         return pred_mode;
4880
4881     mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4882     mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4883     mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4884
4885     if( mode >= pred_mode )
4886         return mode + 1;
4887     else
4888         return mode;
4889 }
4890
4891 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4892     const int mba_xy = h->left_mb_xy[0];
4893     const int mbb_xy = h->top_mb_xy;
4894
4895     int ctx = 0;
4896
4897     /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4898     if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4899         ctx++;
4900
4901     if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4902         ctx++;
4903
4904     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4905         return 0;
4906
4907     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4908         return 1;
4909     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4910         return 2;
4911     else
4912         return 3;
4913 }
4914
4915 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4916     int cbp_b, cbp_a, ctx, cbp = 0;
4917
4918     cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4919     cbp_b = h->slice_table[h->top_mb_xy]     == h->slice_num ? h->top_cbp  : -1;
4920
4921     ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4922     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4923     ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
4924     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4925     ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
4926     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4927     ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
4928     cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4929     return cbp;
4930 }
4931 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4932     int ctx;
4933     int cbp_a, cbp_b;
4934
4935     cbp_a = (h->left_cbp>>4)&0x03;
4936     cbp_b = (h-> top_cbp>>4)&0x03;
4937
4938     ctx = 0;
4939     if( cbp_a > 0 ) ctx++;
4940     if( cbp_b > 0 ) ctx += 2;
4941     if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4942         return 0;
4943
4944     ctx = 4;
4945     if( cbp_a == 2 ) ctx++;
4946     if( cbp_b == 2 ) ctx += 2;
4947     return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4948 }
4949 static int decode_cabac_mb_dqp( H264Context *h) {
4950     int   ctx= h->last_qscale_diff != 0;
4951     int   val = 0;
4952
4953     while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4954         ctx= 2+(ctx>>1);
4955         val++;
4956         if(val > 102) //prevent infinite loop
4957             return INT_MIN;
4958     }
4959
4960     if( val&0x01 )
4961         return   (val + 1)>>1 ;
4962     else
4963         return -((val + 1)>>1);
4964 }
4965 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4966     if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4967         return 0;   /* 8x8 */
4968     if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4969         return 1;   /* 8x4 */
4970     if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4971         return 2;   /* 4x8 */
4972     return 3;       /* 4x4 */
4973 }
4974 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4975     int type;
4976     if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4977         return 0;   /* B_Direct_8x8 */
4978     if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4979         return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4980     type = 3;
4981     if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4982         if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4983             return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4984         type += 4;
4985     }
4986     type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4987     type +=   get_cabac( &h->cabac, &h->cabac_state[39] );
4988     return type;
4989 }
4990
4991 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4992     return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4993 }
4994
4995 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4996     int refa = h->ref_cache[list][scan8[n] - 1];
4997     int refb = h->ref_cache[list][scan8[n] - 8];
4998     int ref  = 0;
4999     int ctx  = 0;
5000
5001     if( h->slice_type_nos == FF_B_TYPE) {
5002         if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5003             ctx++;
5004         if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5005             ctx += 2;
5006     } else {
5007         if( refa > 0 )
5008             ctx++;
5009         if( refb > 0 )
5010             ctx += 2;
5011     }
5012
5013     while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5014         ref++;
5015         if( ctx < 4 )
5016             ctx = 4;
5017         else
5018             ctx = 5;
5019         if(ref >= 32 /*h->ref_list[list]*/){
5020             return -1;
5021         }
5022     }
5023     return ref;
5024 }
5025
5026 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5027     int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5028                abs( h->mvd_cache[list][scan8[n] - 8][l] );
5029     int ctxbase = (l == 0) ? 40 : 47;
5030     int mvd;
5031     int ctx = (amvd>2) + (amvd>32);
5032
5033     if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5034         return 0;
5035
5036     mvd= 1;
5037     ctx= 3;
5038     while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5039         mvd++;
5040         if( ctx < 6 )
5041             ctx++;
5042     }
5043
5044     if( mvd >= 9 ) {
5045         int k = 3;
5046         while( get_cabac_bypass( &h->cabac ) ) {
5047             mvd += 1 << k;
5048             k++;
5049             if(k>24){
5050                 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5051                 return INT_MIN;
5052             }
5053         }
5054         while( k-- ) {
5055             if( get_cabac_bypass( &h->cabac ) )
5056                 mvd += 1 << k;
5057         }
5058     }
5059     return get_cabac_bypass_sign( &h->cabac, -mvd );
5060 }
5061
5062 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5063     int nza, nzb;
5064     int ctx = 0;
5065
5066     if( is_dc ) {
5067         if( cat == 0 ) {
5068             nza = h->left_cbp&0x100;
5069             nzb = h-> top_cbp&0x100;
5070         } else {
5071             nza = (h->left_cbp>>(6+idx))&0x01;
5072             nzb = (h-> top_cbp>>(6+idx))&0x01;
5073         }
5074     } else {
5075         assert(cat == 1 || cat == 2 || cat == 4);
5076         nza = h->non_zero_count_cache[scan8[idx] - 1];
5077         nzb = h->non_zero_count_cache[scan8[idx] - 8];
5078     }
5079
5080     if( nza > 0 )
5081         ctx++;
5082
5083     if( nzb > 0 )
5084         ctx += 2;
5085
5086     return ctx + 4 * cat;
5087 }
5088
5089 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5090     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5091     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5092     3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5093     5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5094 };
5095
5096 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5097     static const int significant_coeff_flag_offset[2][6] = {
5098       { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5099       { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5100     };
5101     static const int last_coeff_flag_offset[2][6] = {
5102       { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5103       { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5104     };
5105     static const int coeff_abs_level_m1_offset[6] = {
5106         227+0, 227+10, 227+20, 227+30, 227+39, 426
5107     };
5108     static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5109       { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5110         4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5111         7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5112        12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5113       { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5114         6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5115         9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5116         9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5117     };
5118     /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5119      * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5120      * map node ctx => cabac ctx for level=1 */
5121     static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5122     /* map node ctx => cabac ctx for level>1 */
5123     static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5124     static const uint8_t coeff_abs_level_transition[2][8] = {
5125     /* update node ctx after decoding a level=1 */
5126         { 1, 2, 3, 3, 4, 5, 6, 7 },
5127     /* update node ctx after decoding a level>1 */
5128         { 4, 4, 4, 4, 5, 6, 7, 7 }
5129     };
5130
5131     int index[64];
5132
5133     int av_unused last;
5134     int coeff_count = 0;
5135     int node_ctx = 0;
5136
5137     uint8_t *significant_coeff_ctx_base;
5138     uint8_t *last_coeff_ctx_base;
5139     uint8_t *abs_level_m1_ctx_base;
5140
5141 #ifndef ARCH_X86
5142 #define CABAC_ON_STACK
5143 #endif
5144 #ifdef CABAC_ON_STACK
5145 #define CC &cc
5146     CABACContext cc;
5147     cc.range     = h->cabac.range;
5148     cc.low       = h->cabac.low;
5149     cc.bytestream= h->cabac.bytestream;
5150 #else
5151 #define CC &h->cabac
5152 #endif
5153
5154
5155     /* cat: 0-> DC 16x16  n = 0
5156      *      1-> AC 16x16  n = luma4x4idx
5157      *      2-> Luma4x4   n = luma4x4idx
5158      *      3-> DC Chroma n = iCbCr
5159      *      4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5160      *      5-> Luma8x8   n = 4 * luma8x8idx
5161      */
5162
5163     /* read coded block flag */
5164     if( is_dc || cat != 5 ) {
5165         if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5166             if( !is_dc )
5167                 h->non_zero_count_cache[scan8[n]] = 0;
5168
5169 #ifdef CABAC_ON_STACK
5170             h->cabac.range     = cc.range     ;
5171             h->cabac.low       = cc.low       ;
5172             h->cabac.bytestream= cc.bytestream;
5173 #endif
5174             return;
5175         }
5176     }
5177
5178     significant_coeff_ctx_base = h->cabac_state
5179         + significant_coeff_flag_offset[MB_FIELD][cat];
5180     last_coeff_ctx_base = h->cabac_state
5181         + last_coeff_flag_offset[MB_FIELD][cat];
5182     abs_level_m1_ctx_base = h->cabac_state
5183         + coeff_abs_level_m1_offset[cat];
5184
5185     if( !is_dc && cat == 5 ) {
5186 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5187         for(last= 0; last < coefs; last++) { \
5188             uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5189             if( get_cabac( CC, sig_ctx )) { \
5190                 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5191                 index[coeff_count++] = last; \
5192                 if( get_cabac( CC, last_ctx ) ) { \
5193                     last= max_coeff; \
5194                     break; \
5195                 } \
5196             } \
5197         }\
5198         if( last == max_coeff -1 ) {\
5199             index[coeff_count++] = last;\
5200         }
5201         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5202 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5203         coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5204     } else {
5205         coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5206 #else
5207         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5208     } else {
5209         DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5210 #endif
5211     }
5212     assert(coeff_count > 0);
5213
5214     if( is_dc ) {
5215         if( cat == 0 )
5216             h->cbp_table[h->mb_xy] |= 0x100;
5217         else
5218             h->cbp_table[h->mb_xy] |= 0x40 << n;
5219     } else {
5220         if( cat == 5 )
5221             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5222         else {
5223             assert( cat == 1 || cat == 2 || cat == 4 );
5224             h->non_zero_count_cache[scan8[n]] = coeff_count;
5225         }
5226     }
5227
5228     do {
5229         uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5230
5231         int j= scantable[index[--coeff_count]];
5232
5233         if( get_cabac( CC, ctx ) == 0 ) {
5234             node_ctx = coeff_abs_level_transition[0][node_ctx];
5235             if( is_dc ) {
5236                 block[j] = get_cabac_bypass_sign( CC, -1);
5237             }else{
5238                 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5239             }
5240         } else {
5241             int coeff_abs = 2;
5242             ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5243             node_ctx = coeff_abs_level_transition[1][node_ctx];
5244
5245             while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5246                 coeff_abs++;
5247             }
5248
5249             if( coeff_abs >= 15 ) {
5250                 int j = 0;
5251                 while( get_cabac_bypass( CC ) ) {
5252                     j++;
5253                 }
5254
5255                 coeff_abs=1;
5256                 while( j-- ) {
5257                     coeff_abs += coeff_abs + get_cabac_bypass( CC );
5258                 }
5259                 coeff_abs+= 14;
5260             }
5261
5262             if( is_dc ) {
5263                 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5264             }else{
5265                 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5266             }
5267         }
5268     } while( coeff_count );
5269 #ifdef CABAC_ON_STACK
5270             h->cabac.range     = cc.range     ;
5271             h->cabac.low       = cc.low       ;
5272             h->cabac.bytestream= cc.bytestream;
5273 #endif
5274
5275 }
5276
5277 #ifndef CONFIG_SMALL
5278 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5279     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5280 }
5281
5282 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5283     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5284 }
5285 #endif
5286
5287 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5288 #ifdef CONFIG_SMALL
5289     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5290 #else
5291     if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5292     else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5293 #endif
5294 }
5295
5296 static inline void compute_mb_neighbors(H264Context *h)
5297 {
5298     MpegEncContext * const s = &h->s;
5299     const int mb_xy  = h->mb_xy;
5300     h->top_mb_xy     = mb_xy - s->mb_stride;
5301     h->left_mb_xy[0] = mb_xy - 1;
5302     if(FRAME_MBAFF){
5303         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
5304         const int top_pair_xy      = pair_xy     - s->mb_stride;
5305         const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5306         const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5307         const int curr_mb_frame_flag = !MB_FIELD;
5308         const int bottom = (s->mb_y & 1);
5309
5310         if (!curr_mb_frame_flag && (bottom || !top_mb_frame_flag)){
5311             h->top_mb_xy -= s->mb_stride;
5312         }
5313         if (left_mb_frame_flag != curr_mb_frame_flag) {
5314             h->left_mb_xy[0] = pair_xy - 1;
5315         }
5316     } else if (FIELD_PICTURE) {
5317         h->top_mb_xy -= s->mb_stride;
5318     }
5319     return;
5320 }
5321
5322 /**
5323  * decodes a macroblock
5324  * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5325  */
5326 static int decode_mb_cabac(H264Context *h) {
5327     MpegEncContext * const s = &h->s;
5328     int mb_xy;
5329     int mb_type, partition_count, cbp = 0;
5330     int dct8x8_allowed= h->pps.transform_8x8_mode;
5331
5332     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5333
5334     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5335     if( h->slice_type_nos != FF_I_TYPE ) {
5336         int skip;
5337         /* a skipped mb needs the aff flag from the following mb */
5338         if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5339             predict_field_decoding_flag(h);
5340         if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5341             skip = h->next_mb_skipped;
5342         else
5343             skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5344         /* read skip flags */
5345         if( skip ) {
5346             if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5347                 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5348                 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5349                 if(!h->next_mb_skipped)
5350                     h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5351             }
5352
5353             decode_mb_skip(h);
5354
5355             h->cbp_table[mb_xy] = 0;
5356             h->chroma_pred_mode_table[mb_xy] = 0;
5357             h->last_qscale_diff = 0;
5358
5359             return 0;
5360
5361         }
5362     }
5363     if(FRAME_MBAFF){
5364         if( (s->mb_y&1) == 0 )
5365             h->mb_mbaff =
5366             h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5367     }
5368
5369     h->prev_mb_skipped = 0;
5370
5371     compute_mb_neighbors(h);
5372     mb_type = decode_cabac_mb_type( h );
5373     assert(mb_type >= 0);
5374
5375     if( h->slice_type_nos == FF_B_TYPE ) {
5376         if( mb_type < 23 ){
5377             partition_count= b_mb_type_info[mb_type].partition_count;
5378             mb_type=         b_mb_type_info[mb_type].type;
5379         }else{
5380             mb_type -= 23;
5381             goto decode_intra_mb;
5382         }
5383     } else if( h->slice_type_nos == FF_P_TYPE ) {
5384         if( mb_type < 5) {
5385             partition_count= p_mb_type_info[mb_type].partition_count;
5386             mb_type=         p_mb_type_info[mb_type].type;
5387         } else {
5388             mb_type -= 5;
5389             goto decode_intra_mb;
5390         }
5391     } else {
5392         if(h->slice_type == FF_SI_TYPE && mb_type)
5393             mb_type--;
5394         assert(h->slice_type_nos == FF_I_TYPE);
5395 decode_intra_mb:
5396         partition_count = 0;
5397         cbp= i_mb_type_info[mb_type].cbp;
5398         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5399         mb_type= i_mb_type_info[mb_type].type;
5400     }
5401     if(MB_FIELD)
5402         mb_type |= MB_TYPE_INTERLACED;
5403
5404     h->slice_table[ mb_xy ]= h->slice_num;
5405
5406     if(IS_INTRA_PCM(mb_type)) {
5407         const uint8_t *ptr;
5408
5409         // We assume these blocks are very rare so we do not optimize it.
5410         // FIXME The two following lines get the bitstream position in the cabac
5411         // decode, I think it should be done by a function in cabac.h (or cabac.c).
5412         ptr= h->cabac.bytestream;
5413         if(h->cabac.low&0x1) ptr--;
5414         if(CABAC_BITS==16){
5415             if(h->cabac.low&0x1FF) ptr--;
5416         }
5417
5418         // The pixels are stored in the same order as levels in h->mb array.
5419         memcpy(h->mb, ptr, 256); ptr+=256;
5420         if(CHROMA){
5421             memcpy(h->mb+128, ptr, 128); ptr+=128;
5422         }
5423
5424         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5425
5426         // All blocks are present
5427         h->cbp_table[mb_xy] = 0x1ef;
5428         h->chroma_pred_mode_table[mb_xy] = 0;
5429         // In deblocking, the quantizer is 0
5430         s->current_picture.qscale_table[mb_xy]= 0;
5431         // All coeffs are present
5432         memset(h->non_zero_count[mb_xy], 16, 16);
5433         s->current_picture.mb_type[mb_xy]= mb_type;
5434         h->last_qscale_diff = 0;
5435         return 0;
5436     }
5437
5438     if(MB_MBAFF){
5439         h->ref_count[0] <<= 1;
5440         h->ref_count[1] <<= 1;
5441     }
5442
5443     fill_caches(h, mb_type, 0);
5444
5445     if( IS_INTRA( mb_type ) ) {
5446         int i, pred_mode;
5447         if( IS_INTRA4x4( mb_type ) ) {
5448             if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5449                 mb_type |= MB_TYPE_8x8DCT;
5450                 for( i = 0; i < 16; i+=4 ) {
5451                     int pred = pred_intra_mode( h, i );
5452                     int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5453                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5454                 }
5455             } else {
5456                 for( i = 0; i < 16; i++ ) {
5457                     int pred = pred_intra_mode( h, i );
5458                     h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5459
5460                 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5461                 }
5462             }
5463             write_back_intra_pred_mode(h);
5464             if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5465         } else {
5466             h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5467             if( h->intra16x16_pred_mode < 0 ) return -1;
5468         }
5469         if(CHROMA){
5470             h->chroma_pred_mode_table[mb_xy] =
5471             pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
5472
5473             pred_mode= check_intra_pred_mode( h, pred_mode );
5474             if( pred_mode < 0 ) return -1;
5475             h->chroma_pred_mode= pred_mode;
5476         }
5477     } else if( partition_count == 4 ) {
5478         int i, j, sub_partition_count[4], list, ref[2][4];
5479
5480         if( h->slice_type_nos == FF_B_TYPE ) {
5481             for( i = 0; i < 4; i++ ) {
5482                 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5483                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5484                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5485             }
5486             if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5487                           h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5488                 pred_direct_motion(h, &mb_type);
5489                 h->ref_cache[0][scan8[4]] =
5490                 h->ref_cache[1][scan8[4]] =
5491                 h->ref_cache[0][scan8[12]] =
5492                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5493                 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5494                     for( i = 0; i < 4; i++ )
5495                         if( IS_DIRECT(h->sub_mb_type[i]) )
5496                             fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5497                 }
5498             }
5499         } else {
5500             for( i = 0; i < 4; i++ ) {
5501                 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5502                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5503                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5504             }
5505         }
5506
5507         for( list = 0; list < h->list_count; list++ ) {
5508                 for( i = 0; i < 4; i++ ) {
5509                     if(IS_DIRECT(h->sub_mb_type[i])) continue;
5510                     if(IS_DIR(h->sub_mb_type[i], 0, list)){
5511                         if( h->ref_count[list] > 1 ){
5512                             ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5513                             if(ref[list][i] >= (unsigned)h->ref_count[list]){
5514                                 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5515                                 return -1;
5516                             }
5517                         }else
5518                             ref[list][i] = 0;
5519                     } else {
5520                         ref[list][i] = -1;
5521                     }
5522                                                        h->ref_cache[list][ scan8[4*i]+1 ]=
5523                     h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5524                 }
5525         }
5526
5527         if(dct8x8_allowed)
5528             dct8x8_allowed = get_dct8x8_allowed(h);
5529
5530         for(list=0; list<h->list_count; list++){
5531             for(i=0; i<4; i++){
5532                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
5533                 if(IS_DIRECT(h->sub_mb_type[i])){
5534                     fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5535                     continue;
5536                 }
5537
5538                 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5539                     const int sub_mb_type= h->sub_mb_type[i];
5540                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5541                     for(j=0; j<sub_partition_count[i]; j++){
5542                         int mpx, mpy;
5543                         int mx, my;
5544                         const int index= 4*i + block_width*j;
5545                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5546                         int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5547                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5548
5549                         mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5550                         my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5551                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5552
5553                         if(IS_SUB_8X8(sub_mb_type)){
5554                             mv_cache[ 1 ][0]=
5555                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5556                             mv_cache[ 1 ][1]=
5557                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5558
5559                             mvd_cache[ 1 ][0]=
5560                             mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5561                             mvd_cache[ 1 ][1]=
5562                             mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5563                         }else if(IS_SUB_8X4(sub_mb_type)){
5564                             mv_cache[ 1 ][0]= mx;
5565                             mv_cache[ 1 ][1]= my;
5566
5567                             mvd_cache[ 1 ][0]= mx - mpx;
5568                             mvd_cache[ 1 ][1]= my - mpy;
5569                         }else if(IS_SUB_4X8(sub_mb_type)){
5570                             mv_cache[ 8 ][0]= mx;
5571                             mv_cache[ 8 ][1]= my;
5572
5573                             mvd_cache[ 8 ][0]= mx - mpx;
5574                             mvd_cache[ 8 ][1]= my - mpy;
5575                         }
5576                         mv_cache[ 0 ][0]= mx;
5577                         mv_cache[ 0 ][1]= my;
5578
5579                         mvd_cache[ 0 ][0]= mx - mpx;
5580                         mvd_cache[ 0 ][1]= my - mpy;
5581                     }
5582                 }else{
5583                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5584                     uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5585                     p[0] = p[1] = p[8] = p[9] = 0;
5586                     pd[0]= pd[1]= pd[8]= pd[9]= 0;
5587                 }
5588             }
5589         }
5590     } else if( IS_DIRECT(mb_type) ) {
5591         pred_direct_motion(h, &mb_type);
5592         fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5593         fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5594         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5595     } else {
5596         int list, mx, my, i, mpx, mpy;
5597         if(IS_16X16(mb_type)){
5598             for(list=0; list<h->list_count; list++){
5599                 if(IS_DIR(mb_type, 0, list)){
5600                     int ref;
5601                     if(h->ref_count[list] > 1){
5602                         ref= decode_cabac_mb_ref(h, list, 0);
5603                         if(ref >= (unsigned)h->ref_count[list]){
5604                             av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5605                             return -1;
5606                         }
5607                     }else
5608                         ref=0;
5609                         fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5610                 }else
5611                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5612             }
5613             for(list=0; list<h->list_count; list++){
5614                 if(IS_DIR(mb_type, 0, list)){
5615                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5616
5617                     mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5618                     my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5619                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5620
5621                     fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5622                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5623                 }else
5624                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5625             }
5626         }
5627         else if(IS_16X8(mb_type)){
5628             for(list=0; list<h->list_count; list++){
5629                     for(i=0; i<2; i++){
5630                         if(IS_DIR(mb_type, i, list)){
5631                             int ref;
5632                             if(h->ref_count[list] > 1){
5633                                 ref= decode_cabac_mb_ref( h, list, 8*i );
5634                                 if(ref >= (unsigned)h->ref_count[list]){
5635                                     av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5636                                     return -1;
5637                                 }
5638                             }else
5639                                 ref=0;
5640                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5641                         }else
5642                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5643                     }
5644             }
5645             for(list=0; list<h->list_count; list++){
5646                 for(i=0; i<2; i++){
5647                     if(IS_DIR(mb_type, i, list)){
5648                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5649                         mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5650                         my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5651                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5652
5653                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5654                         fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5655                     }else{
5656                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5657                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5658                     }
5659                 }
5660             }
5661         }else{
5662             assert(IS_8X16(mb_type));
5663             for(list=0; list<h->list_count; list++){
5664                     for(i=0; i<2; i++){
5665                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5666                             int ref;
5667                             if(h->ref_count[list] > 1){
5668                                 ref= decode_cabac_mb_ref( h, list, 4*i );
5669                                 if(ref >= (unsigned)h->ref_count[list]){
5670                                     av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5671                                     return -1;
5672                                 }
5673                             }else
5674                                 ref=0;
5675                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5676                         }else
5677                             fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5678                     }
5679             }
5680             for(list=0; list<h->list_count; list++){
5681                 for(i=0; i<2; i++){
5682                     if(IS_DIR(mb_type, i, list)){
5683                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5684                         mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5685                         my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5686
5687                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5688                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5689                         fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5690                     }else{
5691                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5692                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5693                     }
5694                 }
5695             }
5696         }
5697     }
5698
5699    if( IS_INTER( mb_type ) ) {
5700         h->chroma_pred_mode_table[mb_xy] = 0;
5701         write_back_motion( h, mb_type );
5702    }
5703
5704     if( !IS_INTRA16x16( mb_type ) ) {
5705         cbp  = decode_cabac_mb_cbp_luma( h );
5706         if(CHROMA)
5707             cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5708     }
5709
5710     h->cbp_table[mb_xy] = h->cbp = cbp;
5711
5712     if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5713         if( decode_cabac_mb_transform_size( h ) )
5714             mb_type |= MB_TYPE_8x8DCT;
5715     }
5716     s->current_picture.mb_type[mb_xy]= mb_type;
5717
5718     if( cbp || IS_INTRA16x16( mb_type ) ) {
5719         const uint8_t *scan, *scan8x8, *dc_scan;
5720         const uint32_t *qmul;
5721         int dqp;
5722
5723         if(IS_INTERLACED(mb_type)){
5724             scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5725             scan= s->qscale ? h->field_scan : h->field_scan_q0;
5726             dc_scan= luma_dc_field_scan;
5727         }else{
5728             scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5729             scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5730             dc_scan= luma_dc_zigzag_scan;
5731         }
5732
5733         h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5734         if( dqp == INT_MIN ){
5735             av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5736             return -1;
5737         }
5738         s->qscale += dqp;
5739         if(((unsigned)s->qscale) > 51){
5740             if(s->qscale<0) s->qscale+= 52;
5741             else            s->qscale-= 52;
5742         }
5743         h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5744         h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5745
5746         if( IS_INTRA16x16( mb_type ) ) {
5747             int i;
5748             //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5749             decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5750
5751             if( cbp&15 ) {
5752                 qmul = h->dequant4_coeff[0][s->qscale];
5753                 for( i = 0; i < 16; i++ ) {
5754                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5755                     decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5756                 }
5757             } else {
5758                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5759             }
5760         } else {
5761             int i8x8, i4x4;
5762             for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5763                 if( cbp & (1<<i8x8) ) {
5764                     if( IS_8x8DCT(mb_type) ) {
5765                         decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5766                             scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5767                     } else {
5768                         qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5769                         for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5770                             const int index = 4*i8x8 + i4x4;
5771                             //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5772 //START_TIMER
5773                             decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5774 //STOP_TIMER("decode_residual")
5775                         }
5776                     }
5777                 } else {
5778                     uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5779                     nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5780                 }
5781             }
5782         }
5783
5784         if( cbp&0x30 ){
5785             int c;
5786             for( c = 0; c < 2; c++ ) {
5787                 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5788                 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5789             }
5790         }
5791
5792         if( cbp&0x20 ) {
5793             int c, i;
5794             for( c = 0; c < 2; c++ ) {
5795                 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5796                 for( i = 0; i < 4; i++ ) {
5797                     const int index = 16 + 4 * c + i;
5798                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5799                     decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5800                 }
5801             }
5802         } else {
5803             uint8_t * const nnz= &h->non_zero_count_cache[0];
5804             nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5805             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5806         }
5807     } else {
5808         uint8_t * const nnz= &h->non_zero_count_cache[0];
5809         fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5810         nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5811         nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5812         h->last_qscale_diff = 0;
5813     }
5814
5815     s->current_picture.qscale_table[mb_xy]= s->qscale;
5816     write_back_non_zero_count(h);
5817
5818     if(MB_MBAFF){
5819         h->ref_count[0] >>= 1;
5820         h->ref_count[1] >>= 1;
5821     }
5822
5823     return 0;
5824 }
5825
5826
5827 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5828     const int index_a = qp + h->slice_alpha_c0_offset;
5829     const int alpha = (alpha_table+52)[index_a];
5830     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
5831
5832     if( bS[0] < 4 ) {
5833         int8_t tc[4];
5834         tc[0] = (tc0_table+52)[index_a][bS[0]];
5835         tc[1] = (tc0_table+52)[index_a][bS[1]];
5836         tc[2] = (tc0_table+52)[index_a][bS[2]];
5837         tc[3] = (tc0_table+52)[index_a][bS[3]];
5838         h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5839     } else {
5840         h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5841     }
5842 }
5843 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5844     const int index_a = qp + h->slice_alpha_c0_offset;
5845     const int alpha = (alpha_table+52)[index_a];
5846     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
5847
5848     if( bS[0] < 4 ) {
5849         int8_t tc[4];
5850         tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5851         tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5852         tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5853         tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5854         h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5855     } else {
5856         h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5857     }
5858 }
5859
5860 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5861     int i;
5862     for( i = 0; i < 16; i++, pix += stride) {
5863         int index_a;
5864         int alpha;
5865         int beta;
5866
5867         int qp_index;
5868         int bS_index = (i >> 1);
5869         if (!MB_FIELD) {
5870             bS_index &= ~1;
5871             bS_index |= (i & 1);
5872         }
5873
5874         if( bS[bS_index] == 0 ) {
5875             continue;
5876         }
5877
5878         qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5879         index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5880         alpha = (alpha_table+52)[index_a];
5881         beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5882
5883         if( bS[bS_index] < 4 ) {
5884             const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5885             const int p0 = pix[-1];
5886             const int p1 = pix[-2];
5887             const int p2 = pix[-3];
5888             const int q0 = pix[0];
5889             const int q1 = pix[1];
5890             const int q2 = pix[2];
5891
5892             if( FFABS( p0 - q0 ) < alpha &&
5893                 FFABS( p1 - p0 ) < beta &&
5894                 FFABS( q1 - q0 ) < beta ) {
5895                 int tc = tc0;
5896                 int i_delta;
5897
5898                 if( FFABS( p2 - p0 ) < beta ) {
5899                     pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5900                     tc++;
5901                 }
5902                 if( FFABS( q2 - q0 ) < beta ) {
5903                     pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5904                     tc++;
5905                 }
5906
5907                 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5908                 pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
5909                 pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
5910                 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5911             }
5912         }else{
5913             const int p0 = pix[-1];
5914             const int p1 = pix[-2];
5915             const int p2 = pix[-3];
5916
5917             const int q0 = pix[0];
5918             const int q1 = pix[1];
5919             const int q2 = pix[2];
5920
5921             if( FFABS( p0 - q0 ) < alpha &&
5922                 FFABS( p1 - p0 ) < beta &&
5923                 FFABS( q1 - q0 ) < beta ) {
5924
5925                 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5926                     if( FFABS( p2 - p0 ) < beta)
5927                     {
5928                         const int p3 = pix[-4];
5929                         /* p0', p1', p2' */
5930                         pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5931                         pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5932                         pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5933                     } else {
5934                         /* p0' */
5935                         pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5936                     }
5937                     if( FFABS( q2 - q0 ) < beta)
5938                     {
5939                         const int q3 = pix[3];
5940                         /* q0', q1', q2' */
5941                         pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5942                         pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5943                         pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5944                     } else {
5945                         /* q0' */
5946                         pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5947                     }
5948                 }else{
5949                     /* p0', q0' */
5950                     pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5951                     pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5952                 }
5953                 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5954             }
5955         }
5956     }
5957 }
5958 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5959     int i;
5960     for( i = 0; i < 8; i++, pix += stride) {
5961         int index_a;
5962         int alpha;
5963         int beta;
5964
5965         int qp_index;
5966         int bS_index = i;
5967
5968         if( bS[bS_index] == 0 ) {
5969             continue;
5970         }
5971
5972         qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5973         index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5974         alpha = (alpha_table+52)[index_a];
5975         beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5976
5977         if( bS[bS_index] < 4 ) {
5978             const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
5979             const int p0 = pix[-1];
5980             const int p1 = pix[-2];
5981             const int q0 = pix[0];
5982             const int q1 = pix[1];
5983
5984             if( FFABS( p0 - q0 ) < alpha &&
5985                 FFABS( p1 - p0 ) < beta &&
5986                 FFABS( q1 - q0 ) < beta ) {
5987                 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5988
5989                 pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
5990                 pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
5991                 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5992             }
5993         }else{
5994             const int p0 = pix[-1];
5995             const int p1 = pix[-2];
5996             const int q0 = pix[0];
5997             const int q1 = pix[1];
5998
5999             if( FFABS( p0 - q0 ) < alpha &&
6000                 FFABS( p1 - p0 ) < beta &&
6001                 FFABS( q1 - q0 ) < beta ) {
6002
6003                 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
6004                 pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
6005                 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6006             }
6007         }
6008     }
6009 }
6010
6011 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6012     const int index_a = qp + h->slice_alpha_c0_offset;
6013     const int alpha = (alpha_table+52)[index_a];
6014     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
6015
6016     if( bS[0] < 4 ) {
6017         int8_t tc[4];
6018         tc[0] = (tc0_table+52)[index_a][bS[0]];
6019         tc[1] = (tc0_table+52)[index_a][bS[1]];
6020         tc[2] = (tc0_table+52)[index_a][bS[2]];
6021         tc[3] = (tc0_table+52)[index_a][bS[3]];
6022         h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6023     } else {
6024         h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6025     }
6026 }
6027
6028 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6029     const int index_a = qp + h->slice_alpha_c0_offset;
6030     const int alpha = (alpha_table+52)[index_a];
6031     const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
6032
6033     if( bS[0] < 4 ) {
6034         int8_t tc[4];
6035         tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6036         tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6037         tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6038         tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6039         h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6040     } else {
6041         h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6042     }
6043 }
6044
6045 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6046     MpegEncContext * const s = &h->s;
6047     int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6048     int mb_xy, mb_type;
6049     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6050
6051     mb_xy = h->mb_xy;
6052
6053     if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6054         !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6055        (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6056                                       h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6057         filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6058         return;
6059     }
6060     assert(!FRAME_MBAFF);
6061
6062     mb_type = s->current_picture.mb_type[mb_xy];
6063     qp = s->current_picture.qscale_table[mb_xy];
6064     qp0 = s->current_picture.qscale_table[mb_xy-1];
6065     qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6066     qpc = get_chroma_qp( h, 0, qp );
6067     qpc0 = get_chroma_qp( h, 0, qp0 );
6068     qpc1 = get_chroma_qp( h, 0, qp1 );
6069     qp0 = (qp + qp0 + 1) >> 1;
6070     qp1 = (qp + qp1 + 1) >> 1;
6071     qpc0 = (qpc + qpc0 + 1) >> 1;
6072     qpc1 = (qpc + qpc1 + 1) >> 1;
6073     qp_thresh = 15 - h->slice_alpha_c0_offset;
6074     if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6075        qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6076         return;
6077
6078     if( IS_INTRA(mb_type) ) {
6079         int16_t bS4[4] = {4,4,4,4};
6080         int16_t bS3[4] = {3,3,3,3};
6081         int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6082         if( IS_8x8DCT(mb_type) ) {
6083             filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6084             filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6085             filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6086             filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6087         } else {
6088             filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6089             filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6090             filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6091             filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6092             filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6093             filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6094             filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6095             filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6096         }
6097         filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6098         filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6099         filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6100         filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6101         filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6102         filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6103         filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6104         filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6105         return;
6106     } else {
6107         DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6108         uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6109         int edges;
6110         if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6111             edges = 4;
6112             bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6113         } else {
6114             int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6115                              (mb_type & MB_TYPE_16x8) ? 1 : 0;
6116             int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6117                              && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6118                              ? 3 : 0;
6119             int step = IS_8x8DCT(mb_type) ? 2 : 1;
6120             edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6121             s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6122                                               (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6123         }
6124         if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6125             bSv[0][0] = 0x0004000400040004ULL;
6126         if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6127             bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6128
6129 #define FILTER(hv,dir,edge)\
6130         if(bSv[dir][edge]) {\
6131             filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6132             if(!(edge&1)) {\
6133                 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6134                 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6135             }\
6136         }
6137         if( edges == 1 ) {
6138             FILTER(v,0,0);
6139             FILTER(h,1,0);
6140         } else if( IS_8x8DCT(mb_type) ) {
6141             FILTER(v,0,0);
6142             FILTER(v,0,2);
6143             FILTER(h,1,0);
6144             FILTER(h,1,2);
6145         } else {
6146             FILTER(v,0,0);
6147             FILTER(v,0,1);
6148             FILTER(v,0,2);
6149             FILTER(v,0,3);
6150             FILTER(h,1,0);
6151             FILTER(h,1,1);
6152             FILTER(h,1,2);
6153             FILTER(h,1,3);
6154         }
6155 #undef FILTER
6156     }
6157 }
6158
6159
6160 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6161     MpegEncContext * const s = &h->s;
6162     int edge;
6163     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6164     const int mbm_type = s->current_picture.mb_type[mbm_xy];
6165     int (*ref2frm) [64] = h->ref2frm[ h->slice_num          &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6166     int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6167     int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6168
6169     const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6170                               == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6171     // how often to recheck mv-based bS when iterating between edges
6172     const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6173                           (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6174     // how often to recheck mv-based bS when iterating along each edge
6175     const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6176
6177     if (first_vertical_edge_done) {
6178         start = 1;
6179     }
6180
6181     if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6182         start = 1;
6183
6184     if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6185         && !IS_INTERLACED(mb_type)
6186         && IS_INTERLACED(mbm_type)
6187         ) {
6188         // This is a special case in the norm where the filtering must
6189         // be done twice (one each of the field) even if we are in a
6190         // frame macroblock.
6191         //
6192         static const int nnz_idx[4] = {4,5,6,3};
6193         unsigned int tmp_linesize   = 2 *   linesize;
6194         unsigned int tmp_uvlinesize = 2 * uvlinesize;
6195         int mbn_xy = mb_xy - 2 * s->mb_stride;
6196         int qp;
6197         int i, j;
6198         int16_t bS[4];
6199
6200         for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6201             if( IS_INTRA(mb_type) ||
6202                 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6203                 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6204             } else {
6205                 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6206                 for( i = 0; i < 4; i++ ) {
6207                     if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6208                         mbn_nnz[nnz_idx[i]] != 0 )
6209                         bS[i] = 2;
6210                     else
6211                         bS[i] = 1;
6212                 }
6213             }
6214             // Do not use s->qscale as luma quantizer because it has not the same
6215             // value in IPCM macroblocks.
6216             qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6217             tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6218             { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6219             filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6220             filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6221                               ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6222             filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6223                               ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6224         }
6225
6226         start = 1;
6227     }
6228
6229     /* Calculate bS */
6230     for( edge = start; edge < edges; edge++ ) {
6231         /* mbn_xy: neighbor macroblock */
6232         const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6233         const int mbn_type = s->current_picture.mb_type[mbn_xy];
6234         int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6235         int16_t bS[4];
6236         int qp;
6237
6238         if( (edge&1) && IS_8x8DCT(mb_type) )
6239             continue;
6240
6241         if( IS_INTRA(mb_type) ||
6242             IS_INTRA(mbn_type) ) {
6243             int value;
6244             if (edge == 0) {
6245                 if (   (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6246                     || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6247                 ) {
6248                     value = 4;
6249                 } else {
6250                     value = 3;
6251                 }
6252             } else {
6253                 value = 3;
6254             }
6255             bS[0] = bS[1] = bS[2] = bS[3] = value;
6256         } else {
6257             int i, l;
6258             int mv_done;
6259
6260             if( edge & mask_edge ) {
6261                 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6262                 mv_done = 1;
6263             }
6264             else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6265                 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6266                 mv_done = 1;
6267             }
6268             else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6269                 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6270                 int bn_idx= b_idx - (dir ? 8:1);
6271                 int v = 0;
6272
6273                 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6274                     v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6275                          FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6276                          FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6277                 }
6278
6279                 if(h->slice_type_nos == FF_B_TYPE && v){
6280                     v=0;
6281                     for( l = 0; !v && l < 2; l++ ) {
6282                         int ln= 1-l;
6283                         v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6284                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6285                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6286                     }
6287                 }
6288
6289                 bS[0] = bS[1] = bS[2] = bS[3] = v;
6290                 mv_done = 1;
6291             }
6292             else
6293                 mv_done = 0;
6294
6295             for( i = 0; i < 4; i++ ) {
6296                 int x = dir == 0 ? edge : i;
6297                 int y = dir == 0 ? i    : edge;
6298                 int b_idx= 8 + 4 + x + 8*y;
6299                 int bn_idx= b_idx - (dir ? 8:1);
6300
6301                 if( h->non_zero_count_cache[b_idx] |
6302                     h->non_zero_count_cache[bn_idx] ) {
6303                     bS[i] = 2;
6304                 }
6305                 else if(!mv_done)
6306                 {
6307                     bS[i] = 0;
6308                     for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6309                         if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6310                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6311                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6312                             bS[i] = 1;
6313                             break;
6314                         }
6315                     }
6316
6317                     if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6318                         bS[i] = 0;
6319                         for( l = 0; l < 2; l++ ) {
6320                             int ln= 1-l;
6321                             if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6322                                 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6323                                 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6324                                 bS[i] = 1;
6325                                 break;
6326                             }
6327                         }
6328                     }
6329                 }
6330             }
6331
6332             if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6333                 continue;
6334         }
6335
6336         /* Filter edge */
6337         // Do not use s->qscale as luma quantizer because it has not the same
6338         // value in IPCM macroblocks.
6339         qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6340         //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6341         tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6342         { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6343         if( dir == 0 ) {
6344             filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6345             if( (edge&1) == 0 ) {
6346                 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6347                                   ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6348                 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6349                                   ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6350             }
6351         } else {
6352             filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6353             if( (edge&1) == 0 ) {
6354                 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6355                                   ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6356                 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6357                                   ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6358             }
6359         }
6360     }
6361 }
6362
6363 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6364     MpegEncContext * const s = &h->s;
6365     const int mb_xy= mb_x + mb_y*s->mb_stride;
6366     const int mb_type = s->current_picture.mb_type[mb_xy];
6367     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6368     int first_vertical_edge_done = 0;
6369     int dir;
6370
6371     //for sufficiently low qp, filtering wouldn't do anything
6372     //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6373     if(!FRAME_MBAFF){
6374         int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6375         int qp = s->current_picture.qscale_table[mb_xy];
6376         if(qp <= qp_thresh
6377            && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6378            && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6379             return;
6380         }
6381     }
6382
6383     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6384     if(!h->pps.cabac && h->pps.transform_8x8_mode){
6385         int top_type, left_type[2];
6386         top_type     = s->current_picture.mb_type[h->top_mb_xy]    ;
6387         left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6388         left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6389
6390         if(IS_8x8DCT(top_type)){
6391             h->non_zero_count_cache[4+8*0]=
6392             h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6393             h->non_zero_count_cache[6+8*0]=
6394             h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6395         }
6396         if(IS_8x8DCT(left_type[0])){
6397             h->non_zero_count_cache[3+8*1]=
6398             h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6399         }
6400         if(IS_8x8DCT(left_type[1])){
6401             h->non_zero_count_cache[3+8*3]=
6402             h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6403         }
6404
6405         if(IS_8x8DCT(mb_type)){
6406             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
6407             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
6408
6409             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6410             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6411
6412             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6413             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6414
6415             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6416             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6417         }
6418     }
6419
6420     if (FRAME_MBAFF
6421             // left mb is in picture
6422             && h->slice_table[mb_xy-1] != 0xFFFF
6423             // and current and left pair do not have the same interlaced type
6424             && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6425             // and left mb is in the same slice if deblocking_filter == 2
6426             && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6427         /* First vertical edge is different in MBAFF frames
6428          * There are 8 different bS to compute and 2 different Qp
6429          */
6430         const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6431         const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6432         int16_t bS[8];
6433         int qp[2];
6434         int bqp[2];
6435         int rqp[2];
6436         int mb_qp, mbn0_qp, mbn1_qp;
6437         int i;
6438         first_vertical_edge_done = 1;
6439
6440         if( IS_INTRA(mb_type) )
6441             bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6442         else {
6443             for( i = 0; i < 8; i++ ) {
6444                 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6445
6446                 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6447                     bS[i] = 4;
6448                 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6449                          ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6450                             (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6451                                                                        :
6452                             h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6453                     bS[i] = 2;
6454                 else
6455                     bS[i] = 1;
6456             }
6457         }
6458
6459         mb_qp = s->current_picture.qscale_table[mb_xy];
6460         mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6461         mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6462         qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6463         bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6464                    get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6465         rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6466                    get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6467         qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6468         bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6469                    get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6470         rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6471                    get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6472
6473         /* Filter edge */
6474         tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6475         { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6476         filter_mb_mbaff_edgev ( h, &img_y [0], linesize,   bS, qp );
6477         filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6478         filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6479     }
6480
6481 #ifdef CONFIG_SMALL
6482     for( dir = 0; dir < 2; dir++ )
6483         filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6484 #else
6485     filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6486     filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6487 #endif
6488 }
6489
6490 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6491     H264Context *h = *(void**)arg;
6492     MpegEncContext * const s = &h->s;
6493     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6494
6495     s->mb_skip_run= -1;
6496
6497     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6498                     (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6499
6500     if( h->pps.cabac ) {
6501         int i;
6502
6503         /* realign */
6504         align_get_bits( &s->gb );
6505
6506         /* init cabac */
6507         ff_init_cabac_states( &h->cabac);
6508         ff_init_cabac_decoder( &h->cabac,
6509                                s->gb.buffer + get_bits_count(&s->gb)/8,
6510                                ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6511         /* calculate pre-state */
6512         for( i= 0; i < 460; i++ ) {
6513             int pre;
6514             if( h->slice_type_nos == FF_I_TYPE )
6515                 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6516             else
6517                 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6518
6519             if( pre <= 63 )
6520                 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6521             else
6522                 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6523         }
6524
6525         for(;;){
6526 //START_TIMER
6527             int ret = decode_mb_cabac(h);
6528             int eos;
6529 //STOP_TIMER("decode_mb_cabac")
6530
6531             if(ret>=0) hl_decode_mb(h);
6532
6533             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6534                 s->mb_y++;
6535
6536                 ret = decode_mb_cabac(h);
6537
6538                 if(ret>=0) hl_decode_mb(h);
6539                 s->mb_y--;
6540             }
6541             eos = get_cabac_terminate( &h->cabac );
6542
6543             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6544                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6545                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6546                 return -1;
6547             }
6548
6549             if( ++s->mb_x >= s->mb_width ) {
6550                 s->mb_x = 0;
6551                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6552                 ++s->mb_y;
6553                 if(FIELD_OR_MBAFF_PICTURE) {
6554                     ++s->mb_y;
6555                 }
6556             }
6557
6558             if( eos || s->mb_y >= s->mb_height ) {
6559                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6560                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6561                 return 0;
6562             }
6563         }
6564
6565     } else {
6566         for(;;){
6567             int ret = decode_mb_cavlc(h);
6568
6569             if(ret>=0) hl_decode_mb(h);
6570
6571             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6572                 s->mb_y++;
6573                 ret = decode_mb_cavlc(h);
6574
6575                 if(ret>=0) hl_decode_mb(h);
6576                 s->mb_y--;
6577             }
6578
6579             if(ret<0){
6580                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6581                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6582
6583                 return -1;
6584             }
6585
6586             if(++s->mb_x >= s->mb_width){
6587                 s->mb_x=0;
6588                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6589                 ++s->mb_y;
6590                 if(FIELD_OR_MBAFF_PICTURE) {
6591                     ++s->mb_y;
6592                 }
6593                 if(s->mb_y >= s->mb_height){
6594                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6595
6596                     if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6597                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6598
6599                         return 0;
6600                     }else{
6601                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6602
6603                         return -1;
6604                     }
6605                 }
6606             }
6607
6608             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6609                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6610                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6611                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6612
6613                     return 0;
6614                 }else{
6615                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6616
6617                     return -1;
6618                 }
6619             }
6620         }
6621     }
6622
6623 #if 0
6624     for(;s->mb_y < s->mb_height; s->mb_y++){
6625         for(;s->mb_x < s->mb_width; s->mb_x++){
6626             int ret= decode_mb(h);
6627
6628             hl_decode_mb(h);
6629
6630             if(ret<0){
6631                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6632                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6633
6634                 return -1;
6635             }
6636
6637             if(++s->mb_x >= s->mb_width){
6638                 s->mb_x=0;
6639                 if(++s->mb_y >= s->mb_height){
6640                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
6641                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6642
6643                         return 0;
6644                     }else{
6645                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6646
6647                         return -1;
6648                     }
6649                 }
6650             }
6651
6652             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6653                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6654                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6655
6656                     return 0;
6657                 }else{
6658                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6659
6660                     return -1;
6661                 }
6662             }
6663         }
6664         s->mb_x=0;
6665         ff_draw_horiz_band(s, 16*s->mb_y, 16);
6666     }
6667 #endif
6668     return -1; //not reached
6669 }
6670
6671 static int decode_picture_timing(H264Context *h){
6672     MpegEncContext * const s = &h->s;
6673     if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6674         skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6675         skip_bits(&s->gb, h->sps.dpb_output_delay_length);  /* dpb_output_delay */
6676     }
6677     if(h->sps.pic_struct_present_flag){
6678         unsigned int i, num_clock_ts;
6679         h->sei_pic_struct = get_bits(&s->gb, 4);
6680
6681         if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6682             return -1;
6683
6684         num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6685
6686         for (i = 0 ; i < num_clock_ts ; i++){
6687             if(get_bits(&s->gb, 1)){                  /* clock_timestamp_flag */
6688                 unsigned int full_timestamp_flag;
6689                 skip_bits(&s->gb, 2);                 /* ct_type */
6690                 skip_bits(&s->gb, 1);                 /* nuit_field_based_flag */
6691                 skip_bits(&s->gb, 5);                 /* counting_type */
6692                 full_timestamp_flag = get_bits(&s->gb, 1);
6693                 skip_bits(&s->gb, 1);                 /* discontinuity_flag */
6694                 skip_bits(&s->gb, 1);                 /* cnt_dropped_flag */
6695                 skip_bits(&s->gb, 8);                 /* n_frames */
6696                 if(full_timestamp_flag){
6697                     skip_bits(&s->gb, 6);             /* seconds_value 0..59 */
6698                     skip_bits(&s->gb, 6);             /* minutes_value 0..59 */
6699                     skip_bits(&s->gb, 5);             /* hours_value 0..23 */
6700                 }else{
6701                     if(get_bits(&s->gb, 1)){          /* seconds_flag */
6702                         skip_bits(&s->gb, 6);         /* seconds_value range 0..59 */
6703                         if(get_bits(&s->gb, 1)){      /* minutes_flag */
6704                             skip_bits(&s->gb, 6);     /* minutes_value 0..59 */
6705                             if(get_bits(&s->gb, 1))   /* hours_flag */
6706                                 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6707                         }
6708                     }
6709                 }
6710                 if(h->sps.time_offset_length > 0)
6711                     skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6712             }
6713         }
6714     }
6715     return 0;
6716 }
6717
6718 static int decode_unregistered_user_data(H264Context *h, int size){
6719     MpegEncContext * const s = &h->s;
6720     uint8_t user_data[16+256];
6721     int e, build, i;
6722
6723     if(size<16)
6724         return -1;
6725
6726     for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6727         user_data[i]= get_bits(&s->gb, 8);
6728     }
6729
6730     user_data[i]= 0;
6731     e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6732     if(e==1 && build>=0)
6733         h->x264_build= build;
6734
6735     if(s->avctx->debug & FF_DEBUG_BUGS)
6736         av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6737
6738     for(; i<size; i++)
6739         skip_bits(&s->gb, 8);
6740
6741     return 0;
6742 }
6743
6744 static int decode_sei(H264Context *h){
6745     MpegEncContext * const s = &h->s;
6746
6747     while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6748         int size, type;
6749
6750         type=0;
6751         do{
6752             type+= show_bits(&s->gb, 8);
6753         }while(get_bits(&s->gb, 8) == 255);
6754
6755         size=0;
6756         do{
6757             size+= show_bits(&s->gb, 8);
6758         }while(get_bits(&s->gb, 8) == 255);
6759
6760         switch(type){
6761         case 1: // Picture timing SEI
6762             if(decode_picture_timing(h) < 0)
6763                 return -1;
6764             break;
6765         case 5:
6766             if(decode_unregistered_user_data(h, size) < 0)
6767                 return -1;
6768             break;
6769         default:
6770             skip_bits(&s->gb, 8*size);
6771         }
6772
6773         //FIXME check bits here
6774         align_get_bits(&s->gb);
6775     }
6776
6777     return 0;
6778 }
6779
6780 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6781     MpegEncContext * const s = &h->s;
6782     int cpb_count, i;
6783     cpb_count = get_ue_golomb(&s->gb) + 1;
6784
6785     if(cpb_count > 32U){
6786         av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6787         return -1;
6788     }
6789
6790     get_bits(&s->gb, 4); /* bit_rate_scale */
6791     get_bits(&s->gb, 4); /* cpb_size_scale */
6792     for(i=0; i<cpb_count; i++){
6793         get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6794         get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6795         get_bits1(&s->gb);     /* cbr_flag */
6796     }
6797     get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6798     sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6799     sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6800     sps->time_offset_length = get_bits(&s->gb, 5);
6801     return 0;
6802 }
6803
6804 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6805     MpegEncContext * const s = &h->s;
6806     int aspect_ratio_info_present_flag;
6807     unsigned int aspect_ratio_idc;
6808
6809     aspect_ratio_info_present_flag= get_bits1(&s->gb);
6810
6811     if( aspect_ratio_info_present_flag ) {
6812         aspect_ratio_idc= get_bits(&s->gb, 8);
6813         if( aspect_ratio_idc == EXTENDED_SAR ) {
6814             sps->sar.num= get_bits(&s->gb, 16);
6815             sps->sar.den= get_bits(&s->gb, 16);
6816         }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6817             sps->sar=  pixel_aspect[aspect_ratio_idc];
6818         }else{
6819             av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6820             return -1;
6821         }
6822     }else{
6823         sps->sar.num=
6824         sps->sar.den= 0;
6825     }
6826 //            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6827
6828     if(get_bits1(&s->gb)){      /* overscan_info_present_flag */
6829         get_bits1(&s->gb);      /* overscan_appropriate_flag */
6830     }
6831
6832     if(get_bits1(&s->gb)){      /* video_signal_type_present_flag */
6833         get_bits(&s->gb, 3);    /* video_format */
6834         get_bits1(&s->gb);      /* video_full_range_flag */
6835         if(get_bits1(&s->gb)){  /* colour_description_present_flag */
6836             get_bits(&s->gb, 8); /* colour_primaries */
6837             get_bits(&s->gb, 8); /* transfer_characteristics */
6838             get_bits(&s->gb, 8); /* matrix_coefficients */
6839         }
6840     }
6841
6842     if(get_bits1(&s->gb)){      /* chroma_location_info_present_flag */
6843         get_ue_golomb(&s->gb);  /* chroma_sample_location_type_top_field */
6844         get_ue_golomb(&s->gb);  /* chroma_sample_location_type_bottom_field */
6845     }
6846
6847     sps->timing_info_present_flag = get_bits1(&s->gb);
6848     if(sps->timing_info_present_flag){
6849         sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6850         sps->time_scale = get_bits_long(&s->gb, 32);
6851         sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6852     }
6853
6854     sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6855     if(sps->nal_hrd_parameters_present_flag)
6856         if(decode_hrd_parameters(h, sps) < 0)
6857             return -1;
6858     sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6859     if(sps->vcl_hrd_parameters_present_flag)
6860         if(decode_hrd_parameters(h, sps) < 0)
6861             return -1;
6862     if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6863         get_bits1(&s->gb);     /* low_delay_hrd_flag */
6864     sps->pic_struct_present_flag = get_bits1(&s->gb);
6865
6866     sps->bitstream_restriction_flag = get_bits1(&s->gb);
6867     if(sps->bitstream_restriction_flag){
6868         get_bits1(&s->gb);     /* motion_vectors_over_pic_boundaries_flag */
6869         get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6870         get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6871         get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6872         get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6873         sps->num_reorder_frames= get_ue_golomb(&s->gb);
6874         get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6875
6876         if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6877             av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6878             return -1;
6879         }
6880     }
6881
6882     return 0;
6883 }
6884
6885 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6886                                 const uint8_t *jvt_list, const uint8_t *fallback_list){
6887     MpegEncContext * const s = &h->s;
6888     int i, last = 8, next = 8;
6889     const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6890     if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6891         memcpy(factors, fallback_list, size*sizeof(uint8_t));
6892     else
6893     for(i=0;i<size;i++){
6894         if(next)
6895             next = (last + get_se_golomb(&s->gb)) & 0xff;
6896         if(!i && !next){ /* matrix not written, we use the preset one */
6897             memcpy(factors, jvt_list, size*sizeof(uint8_t));
6898             break;
6899         }
6900         last = factors[scan[i]] = next ? next : last;
6901     }
6902 }
6903
6904 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6905                                    uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6906     MpegEncContext * const s = &h->s;
6907     int fallback_sps = !is_sps && sps->scaling_matrix_present;
6908     const uint8_t *fallback[4] = {
6909         fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6910         fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6911         fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6912         fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6913     };
6914     if(get_bits1(&s->gb)){
6915         sps->scaling_matrix_present |= is_sps;
6916         decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6917         decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6918         decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6919         decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6920         decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6921         decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6922         if(is_sps || pps->transform_8x8_mode){
6923             decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
6924             decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
6925         }
6926     }
6927 }
6928
6929 static inline int decode_seq_parameter_set(H264Context *h){
6930     MpegEncContext * const s = &h->s;
6931     int profile_idc, level_idc;
6932     unsigned int sps_id;
6933     int i;
6934     SPS *sps;
6935
6936     profile_idc= get_bits(&s->gb, 8);
6937     get_bits1(&s->gb);   //constraint_set0_flag
6938     get_bits1(&s->gb);   //constraint_set1_flag
6939     get_bits1(&s->gb);   //constraint_set2_flag
6940     get_bits1(&s->gb);   //constraint_set3_flag
6941     get_bits(&s->gb, 4); // reserved
6942     level_idc= get_bits(&s->gb, 8);
6943     sps_id= get_ue_golomb(&s->gb);
6944
6945     if(sps_id >= MAX_SPS_COUNT) {
6946         av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
6947         return -1;
6948     }
6949     sps= av_mallocz(sizeof(SPS));
6950     if(sps == NULL)
6951         return -1;
6952
6953     sps->profile_idc= profile_idc;
6954     sps->level_idc= level_idc;
6955
6956     memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
6957     memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
6958     sps->scaling_matrix_present = 0;
6959
6960     if(sps->profile_idc >= 100){ //high profile
6961         sps->chroma_format_idc= get_ue_golomb(&s->gb);
6962         if(sps->chroma_format_idc == 3)
6963             get_bits1(&s->gb);  //residual_color_transform_flag
6964         get_ue_golomb(&s->gb);  //bit_depth_luma_minus8
6965         get_ue_golomb(&s->gb);  //bit_depth_chroma_minus8
6966         sps->transform_bypass = get_bits1(&s->gb);
6967         decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6968     }else{
6969         sps->chroma_format_idc= 1;
6970     }
6971
6972     sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6973     sps->poc_type= get_ue_golomb(&s->gb);
6974
6975     if(sps->poc_type == 0){ //FIXME #define
6976         sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6977     } else if(sps->poc_type == 1){//FIXME #define
6978         sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6979         sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6980         sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6981         sps->poc_cycle_length                = get_ue_golomb(&s->gb);
6982
6983         if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
6984             av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
6985             goto fail;
6986         }
6987
6988         for(i=0; i<sps->poc_cycle_length; i++)
6989             sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6990     }else if(sps->poc_type != 2){
6991         av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6992         goto fail;
6993     }
6994
6995     sps->ref_frame_count= get_ue_golomb(&s->gb);
6996     if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
6997         av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
6998         goto fail;
6999     }
7000     sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7001     sps->mb_width = get_ue_golomb(&s->gb) + 1;
7002     sps->mb_height= get_ue_golomb(&s->gb) + 1;
7003     if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7004        avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7005         av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7006         goto fail;
7007     }
7008
7009     sps->frame_mbs_only_flag= get_bits1(&s->gb);
7010     if(!sps->frame_mbs_only_flag)
7011         sps->mb_aff= get_bits1(&s->gb);
7012     else
7013         sps->mb_aff= 0;
7014
7015     sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7016
7017 #ifndef ALLOW_INTERLACE
7018     if(sps->mb_aff)
7019         av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7020 #endif
7021     sps->crop= get_bits1(&s->gb);
7022     if(sps->crop){
7023         sps->crop_left  = get_ue_golomb(&s->gb);
7024         sps->crop_right = get_ue_golomb(&s->gb);
7025         sps->crop_top   = get_ue_golomb(&s->gb);
7026         sps->crop_bottom= get_ue_golomb(&s->gb);
7027         if(sps->crop_left || sps->crop_top){
7028             av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7029         }
7030         if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7031             av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7032         }
7033     }else{
7034         sps->crop_left  =
7035         sps->crop_right =
7036         sps->crop_top   =
7037         sps->crop_bottom= 0;
7038     }
7039
7040     sps->vui_parameters_present_flag= get_bits1(&s->gb);
7041     if( sps->vui_parameters_present_flag )
7042         decode_vui_parameters(h, sps);
7043
7044     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7045         av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7046                sps_id, sps->profile_idc, sps->level_idc,
7047                sps->poc_type,
7048                sps->ref_frame_count,
7049                sps->mb_width, sps->mb_height,
7050                sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7051                sps->direct_8x8_inference_flag ? "8B8" : "",
7052                sps->crop_left, sps->crop_right,
7053                sps->crop_top, sps->crop_bottom,
7054                sps->vui_parameters_present_flag ? "VUI" : "",
7055                ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7056                );
7057     }
7058     av_free(h->sps_buffers[sps_id]);
7059     h->sps_buffers[sps_id]= sps;
7060     return 0;
7061 fail:
7062     av_free(sps);
7063     return -1;
7064 }
7065
7066 static void
7067 build_qp_table(PPS *pps, int t, int index)
7068 {
7069     int i;
7070     for(i = 0; i < 52; i++)
7071         pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7072 }
7073
7074 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7075     MpegEncContext * const s = &h->s;
7076     unsigned int pps_id= get_ue_golomb(&s->gb);
7077     PPS *pps;
7078
7079     if(pps_id >= MAX_PPS_COUNT) {
7080         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7081         return -1;
7082     }
7083
7084     pps= av_mallocz(sizeof(PPS));
7085     if(pps == NULL)
7086         return -1;
7087     pps->sps_id= get_ue_golomb(&s->gb);
7088     if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7089         av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7090         goto fail;
7091     }
7092
7093     pps->cabac= get_bits1(&s->gb);
7094     pps->pic_order_present= get_bits1(&s->gb);
7095     pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7096     if(pps->slice_group_count > 1 ){
7097         pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7098         av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7099         switch(pps->mb_slice_group_map_type){
7100         case 0:
7101 #if 0
7102 |   for( i = 0; i <= num_slice_groups_minus1; i++ ) |   |        |
7103 |    run_length[ i ]                                |1  |ue(v)   |
7104 #endif
7105             break;
7106         case 2:
7107 #if 0
7108 |   for( i = 0; i < num_slice_groups_minus1; i++ )  |   |        |
7109 |{                                                  |   |        |
7110 |    top_left_mb[ i ]                               |1  |ue(v)   |
7111 |    bottom_right_mb[ i ]                           |1  |ue(v)   |
7112 |   }                                               |   |        |
7113 #endif
7114             break;
7115         case 3:
7116         case 4:
7117         case 5:
7118 #if 0
7119 |   slice_group_change_direction_flag               |1  |u(1)    |
7120 |   slice_group_change_rate_minus1                  |1  |ue(v)   |
7121 #endif
7122             break;
7123         case 6:
7124 #if 0
7125 |   slice_group_id_cnt_minus1                       |1  |ue(v)   |
7126 |   for( i = 0; i <= slice_group_id_cnt_minus1; i++ |   |        |
7127 |)                                                  |   |        |
7128 |    slice_group_id[ i ]                            |1  |u(v)    |
7129 #endif
7130             break;
7131         }
7132     }
7133     pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7134     pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7135     if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7136         av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7137         goto fail;
7138     }
7139
7140     pps->weighted_pred= get_bits1(&s->gb);
7141     pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7142     pps->init_qp= get_se_golomb(&s->gb) + 26;
7143     pps->init_qs= get_se_golomb(&s->gb) + 26;
7144     pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7145     pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7146     pps->constrained_intra_pred= get_bits1(&s->gb);
7147     pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7148
7149     pps->transform_8x8_mode= 0;
7150     h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7151     memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7152     memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7153
7154     if(get_bits_count(&s->gb) < bit_length){
7155         pps->transform_8x8_mode= get_bits1(&s->gb);
7156         decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7157         pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7158     } else {
7159         pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7160     }
7161
7162     build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7163     build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7164     if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7165         h->pps.chroma_qp_diff= 1;
7166
7167     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7168         av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7169                pps_id, pps->sps_id,
7170                pps->cabac ? "CABAC" : "CAVLC",
7171                pps->slice_group_count,
7172                pps->ref_count[0], pps->ref_count[1],
7173                pps->weighted_pred ? "weighted" : "",
7174                pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7175                pps->deblocking_filter_parameters_present ? "LPAR" : "",
7176                pps->constrained_intra_pred ? "CONSTR" : "",
7177                pps->redundant_pic_cnt_present ? "REDU" : "",
7178                pps->transform_8x8_mode ? "8x8DCT" : ""
7179                );
7180     }
7181
7182     av_free(h->pps_buffers[pps_id]);
7183     h->pps_buffers[pps_id]= pps;
7184     return 0;
7185 fail:
7186     av_free(pps);
7187     return -1;
7188 }
7189
7190 /**
7191  * Call decode_slice() for each context.
7192  *
7193  * @param h h264 master context
7194  * @param context_count number of contexts to execute
7195  */
7196 static void execute_decode_slices(H264Context *h, int context_count){
7197     MpegEncContext * const s = &h->s;
7198     AVCodecContext * const avctx= s->avctx;
7199     H264Context *hx;
7200     int i;
7201
7202     if(context_count == 1) {
7203         decode_slice(avctx, &h);
7204     } else {
7205         for(i = 1; i < context_count; i++) {
7206             hx = h->thread_context[i];
7207             hx->s.error_recognition = avctx->error_recognition;
7208             hx->s.error_count = 0;
7209         }
7210
7211         avctx->execute(avctx, (void *)decode_slice,
7212                        (void **)h->thread_context, NULL, context_count, sizeof(void*));
7213
7214         /* pull back stuff from slices to master context */
7215         hx = h->thread_context[context_count - 1];
7216         s->mb_x = hx->s.mb_x;
7217         s->mb_y = hx->s.mb_y;
7218         s->dropable = hx->s.dropable;
7219         s->picture_structure = hx->s.picture_structure;
7220         for(i = 1; i < context_count; i++)
7221             h->s.error_count += h->thread_context[i]->s.error_count;
7222     }
7223 }
7224
7225
7226 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7227     MpegEncContext * const s = &h->s;
7228     AVCodecContext * const avctx= s->avctx;
7229     int buf_index=0;
7230     H264Context *hx; ///< thread context
7231     int context_count = 0;
7232
7233     h->max_contexts = avctx->thread_count;
7234 #if 0
7235     int i;
7236     for(i=0; i<50; i++){
7237         av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7238     }
7239 #endif
7240     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7241         h->current_slice = 0;
7242         if (!s->first_field)
7243             s->current_picture_ptr= NULL;
7244     }
7245
7246     for(;;){
7247         int consumed;
7248         int dst_length;
7249         int bit_length;
7250         const uint8_t *ptr;
7251         int i, nalsize = 0;
7252         int err;
7253
7254         if(h->is_avc) {
7255             if(buf_index >= buf_size) break;
7256             nalsize = 0;
7257             for(i = 0; i < h->nal_length_size; i++)
7258                 nalsize = (nalsize << 8) | buf[buf_index++];
7259             if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7260                 if(nalsize == 1){
7261                     buf_index++;
7262                     continue;
7263                 }else{
7264                     av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7265                     break;
7266                 }
7267             }
7268         } else {
7269             // start code prefix search
7270             for(; buf_index + 3 < buf_size; buf_index++){
7271                 // This should always succeed in the first iteration.
7272                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7273                     break;
7274             }
7275
7276             if(buf_index+3 >= buf_size) break;
7277
7278             buf_index+=3;
7279         }
7280
7281         hx = h->thread_context[context_count];
7282
7283         ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7284         if (ptr==NULL || dst_length < 0){
7285             return -1;
7286         }
7287         while(ptr[dst_length - 1] == 0 && dst_length > 0)
7288             dst_length--;
7289         bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7290
7291         if(s->avctx->debug&FF_DEBUG_STARTCODE){
7292             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7293         }
7294
7295         if (h->is_avc && (nalsize != consumed)){
7296             av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7297             consumed= nalsize;
7298         }
7299
7300         buf_index += consumed;
7301
7302         if(  (s->hurry_up == 1 && h->nal_ref_idc  == 0) //FIXME do not discard SEI id
7303            ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
7304             continue;
7305
7306       again:
7307         err = 0;
7308         switch(hx->nal_unit_type){
7309         case NAL_IDR_SLICE:
7310             if (h->nal_unit_type != NAL_IDR_SLICE) {
7311                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7312                 return -1;
7313             }
7314             idr(h); //FIXME ensure we don't loose some frames if there is reordering
7315         case NAL_SLICE:
7316             init_get_bits(&hx->s.gb, ptr, bit_length);
7317             hx->intra_gb_ptr=
7318             hx->inter_gb_ptr= &hx->s.gb;
7319             hx->s.data_partitioning = 0;
7320
7321             if((err = decode_slice_header(hx, h)))
7322                break;
7323
7324             s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7325             if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7326                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7327                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
7328                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7329                && avctx->skip_frame < AVDISCARD_ALL)
7330                 context_count++;
7331             break;
7332         case NAL_DPA:
7333             init_get_bits(&hx->s.gb, ptr, bit_length);
7334             hx->intra_gb_ptr=
7335             hx->inter_gb_ptr= NULL;
7336             hx->s.data_partitioning = 1;
7337
7338             err = decode_slice_header(hx, h);
7339             break;
7340         case NAL_DPB:
7341             init_get_bits(&hx->intra_gb, ptr, bit_length);
7342             hx->intra_gb_ptr= &hx->intra_gb;
7343             break;
7344         case NAL_DPC:
7345             init_get_bits(&hx->inter_gb, ptr, bit_length);
7346             hx->inter_gb_ptr= &hx->inter_gb;
7347
7348             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7349                && s->context_initialized
7350                && s->hurry_up < 5
7351                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7352                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
7353                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7354                && avctx->skip_frame < AVDISCARD_ALL)
7355                 context_count++;
7356             break;
7357         case NAL_SEI:
7358             init_get_bits(&s->gb, ptr, bit_length);
7359             decode_sei(h);
7360             break;
7361         case NAL_SPS:
7362             init_get_bits(&s->gb, ptr, bit_length);
7363             decode_seq_parameter_set(h);
7364
7365             if(s->flags& CODEC_FLAG_LOW_DELAY)
7366                 s->low_delay=1;
7367
7368             if(avctx->has_b_frames < 2)
7369                 avctx->has_b_frames= !s->low_delay;
7370             break;
7371         case NAL_PPS:
7372             init_get_bits(&s->gb, ptr, bit_length);
7373
7374             decode_picture_parameter_set(h, bit_length);
7375
7376             break;
7377         case NAL_AUD:
7378         case NAL_END_SEQUENCE:
7379         case NAL_END_STREAM:
7380         case NAL_FILLER_DATA:
7381         case NAL_SPS_EXT:
7382         case NAL_AUXILIARY_SLICE:
7383             break;
7384         default:
7385             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7386         }
7387
7388         if(context_count == h->max_contexts) {
7389             execute_decode_slices(h, context_count);
7390             context_count = 0;
7391         }
7392
7393         if (err < 0)
7394             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7395         else if(err == 1) {
7396             /* Slice could not be decoded in parallel mode, copy down
7397              * NAL unit stuff to context 0 and restart. Note that
7398              * rbsp_buffer is not transferred, but since we no longer
7399              * run in parallel mode this should not be an issue. */
7400             h->nal_unit_type = hx->nal_unit_type;
7401             h->nal_ref_idc   = hx->nal_ref_idc;
7402             hx = h;
7403             goto again;
7404         }
7405     }
7406     if(context_count)
7407         execute_decode_slices(h, context_count);
7408     return buf_index;
7409 }
7410
7411 /**
7412  * returns the number of bytes consumed for building the current frame
7413  */
7414 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7415         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7416         if(pos+10>buf_size) pos=buf_size; // oops ;)
7417
7418         return pos;
7419 }
7420
7421 static int decode_frame(AVCodecContext *avctx,
7422                              void *data, int *data_size,
7423                              const uint8_t *buf, int buf_size)
7424 {
7425     H264Context *h = avctx->priv_data;
7426     MpegEncContext *s = &h->s;
7427     AVFrame *pict = data;
7428     int buf_index;
7429
7430     s->flags= avctx->flags;
7431     s->flags2= avctx->flags2;
7432
7433    /* end of stream, output what is still in the buffers */
7434     if (buf_size == 0) {
7435         Picture *out;
7436         int i, out_idx;
7437
7438 //FIXME factorize this with the output code below
7439         out = h->delayed_pic[0];
7440         out_idx = 0;
7441         for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7442             if(h->delayed_pic[i]->poc < out->poc){
7443                 out = h->delayed_pic[i];
7444                 out_idx = i;
7445             }
7446
7447         for(i=out_idx; h->delayed_pic[i]; i++)
7448             h->delayed_pic[i] = h->delayed_pic[i+1];
7449
7450         if(out){
7451             *data_size = sizeof(AVFrame);
7452             *pict= *(AVFrame*)out;
7453         }
7454
7455         return 0;
7456     }
7457
7458     if(h->is_avc && !h->got_avcC) {
7459         int i, cnt, nalsize;
7460         unsigned char *p = avctx->extradata;
7461         if(avctx->extradata_size < 7) {
7462             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7463             return -1;
7464         }
7465         if(*p != 1) {
7466             av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7467             return -1;
7468         }
7469         /* sps and pps in the avcC always have length coded with 2 bytes,
7470            so put a fake nal_length_size = 2 while parsing them */
7471         h->nal_length_size = 2;
7472         // Decode sps from avcC
7473         cnt = *(p+5) & 0x1f; // Number of sps
7474         p += 6;
7475         for (i = 0; i < cnt; i++) {
7476             nalsize = AV_RB16(p) + 2;
7477             if(decode_nal_units(h, p, nalsize) < 0) {
7478                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7479                 return -1;
7480             }
7481             p += nalsize;
7482         }
7483         // Decode pps from avcC
7484         cnt = *(p++); // Number of pps
7485         for (i = 0; i < cnt; i++) {
7486             nalsize = AV_RB16(p) + 2;
7487             if(decode_nal_units(h, p, nalsize)  != nalsize) {
7488                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7489                 return -1;
7490             }
7491             p += nalsize;
7492         }
7493         // Now store right nal length size, that will be use to parse all other nals
7494         h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7495         // Do not reparse avcC
7496         h->got_avcC = 1;
7497     }
7498
7499     if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7500         if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7501             return -1;
7502         h->got_avcC = 1;
7503     }
7504
7505     buf_index=decode_nal_units(h, buf, buf_size);
7506     if(buf_index < 0)
7507         return -1;
7508
7509     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7510         if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7511         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7512         return -1;
7513     }
7514
7515     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7516         Picture *out = s->current_picture_ptr;
7517         Picture *cur = s->current_picture_ptr;
7518         int i, pics, cross_idr, out_of_order, out_idx;
7519
7520         s->mb_y= 0;
7521
7522         s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7523         s->current_picture_ptr->pict_type= s->pict_type;
7524
7525         if(!s->dropable) {
7526             execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7527             h->prev_poc_msb= h->poc_msb;
7528             h->prev_poc_lsb= h->poc_lsb;
7529         }
7530         h->prev_frame_num_offset= h->frame_num_offset;
7531         h->prev_frame_num= h->frame_num;
7532
7533         /*
7534          * FIXME: Error handling code does not seem to support interlaced
7535          * when slices span multiple rows
7536          * The ff_er_add_slice calls don't work right for bottom
7537          * fields; they cause massive erroneous error concealing
7538          * Error marking covers both fields (top and bottom).
7539          * This causes a mismatched s->error_count
7540          * and a bad error table. Further, the error count goes to
7541          * INT_MAX when called for bottom field, because mb_y is
7542          * past end by one (callers fault) and resync_mb_y != 0
7543          * causes problems for the first MB line, too.
7544          */
7545         if (!FIELD_PICTURE)
7546             ff_er_frame_end(s);
7547
7548         MPV_frame_end(s);
7549
7550         if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7551             /* Wait for second field. */
7552             *data_size = 0;
7553
7554         } else {
7555             cur->repeat_pict = 0;
7556
7557             /* Signal interlacing information externally. */
7558             /* Prioritize picture timing SEI information over used decoding process if it exists. */
7559             if(h->sps.pic_struct_present_flag){
7560                 switch (h->sei_pic_struct)
7561                 {
7562                 case SEI_PIC_STRUCT_FRAME:
7563                     cur->interlaced_frame = 0;
7564                     break;
7565                 case SEI_PIC_STRUCT_TOP_FIELD:
7566                 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7567                 case SEI_PIC_STRUCT_TOP_BOTTOM:
7568                 case SEI_PIC_STRUCT_BOTTOM_TOP:
7569                     cur->interlaced_frame = 1;
7570                     break;
7571                 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7572                 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7573                     // Signal the possibility of telecined film externally (pic_struct 5,6)
7574                     // From these hints, let the applications decide if they apply deinterlacing.
7575                     cur->repeat_pict = 1;
7576                     cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7577                     break;
7578                 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7579                     // Force progressive here, as doubling interlaced frame is a bad idea.
7580                     cur->interlaced_frame = 0;
7581                     cur->repeat_pict = 2;
7582                     break;
7583                 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7584                     cur->interlaced_frame = 0;
7585                     cur->repeat_pict = 4;
7586                     break;
7587                 }
7588             }else{
7589                 /* Derive interlacing flag from used decoding process. */
7590                 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7591             }
7592
7593             if (cur->field_poc[0] != cur->field_poc[1]){
7594                 /* Derive top_field_first from field pocs. */
7595                 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7596             }else{
7597                 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7598                     /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7599                     if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7600                       || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7601                         cur->top_field_first = 1;
7602                     else
7603                         cur->top_field_first = 0;
7604                 }else{
7605                     /* Most likely progressive */
7606                     cur->top_field_first = 0;
7607                 }
7608             }
7609
7610         //FIXME do something with unavailable reference frames
7611
7612             /* Sort B-frames into display order */
7613
7614             if(h->sps.bitstream_restriction_flag
7615                && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7616                 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7617                 s->low_delay = 0;
7618             }
7619
7620             if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7621                && !h->sps.bitstream_restriction_flag){
7622                 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7623                 s->low_delay= 0;
7624             }
7625
7626             pics = 0;
7627             while(h->delayed_pic[pics]) pics++;
7628
7629             assert(pics <= MAX_DELAYED_PIC_COUNT);
7630
7631             h->delayed_pic[pics++] = cur;
7632             if(cur->reference == 0)
7633                 cur->reference = DELAYED_PIC_REF;
7634
7635             out = h->delayed_pic[0];
7636             out_idx = 0;
7637             for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7638                 if(h->delayed_pic[i]->poc < out->poc){
7639                     out = h->delayed_pic[i];
7640                     out_idx = i;
7641                 }
7642             cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7643
7644             out_of_order = !cross_idr && out->poc < h->outputed_poc;
7645
7646             if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7647                 { }
7648             else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7649                || (s->low_delay &&
7650                 ((!cross_idr && out->poc > h->outputed_poc + 2)
7651                  || cur->pict_type == FF_B_TYPE)))
7652             {
7653                 s->low_delay = 0;
7654                 s->avctx->has_b_frames++;
7655             }
7656
7657             if(out_of_order || pics > s->avctx->has_b_frames){
7658                 out->reference &= ~DELAYED_PIC_REF;
7659                 for(i=out_idx; h->delayed_pic[i]; i++)
7660                     h->delayed_pic[i] = h->delayed_pic[i+1];
7661             }
7662             if(!out_of_order && pics > s->avctx->has_b_frames){
7663                 *data_size = sizeof(AVFrame);
7664
7665                 h->outputed_poc = out->poc;
7666                 *pict= *(AVFrame*)out;
7667             }else{
7668                 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7669             }
7670         }
7671     }
7672
7673     assert(pict->data[0] || !*data_size);
7674     ff_print_debug_info(s, pict);
7675 //printf("out %d\n", (int)pict->data[0]);
7676 #if 0 //?
7677
7678     /* Return the Picture timestamp as the frame number */
7679     /* we subtract 1 because it is added on utils.c     */
7680     avctx->frame_number = s->picture_number - 1;
7681 #endif
7682     return get_consumed_bytes(s, buf_index, buf_size);
7683 }
7684 #if 0
7685 static inline void fill_mb_avail(H264Context *h){
7686     MpegEncContext * const s = &h->s;
7687     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7688
7689     if(s->mb_y){
7690         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7691         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
7692         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7693     }else{
7694         h->mb_avail[0]=
7695         h->mb_avail[1]=
7696         h->mb_avail[2]= 0;
7697     }
7698     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7699     h->mb_avail[4]= 1; //FIXME move out
7700     h->mb_avail[5]= 0; //FIXME move out
7701 }
7702 #endif
7703
7704 #ifdef TEST
7705 #undef printf
7706 #undef random
7707 #define COUNT 8000
7708 #define SIZE (COUNT*40)
7709 int main(void){
7710     int i;
7711     uint8_t temp[SIZE];
7712     PutBitContext pb;
7713     GetBitContext gb;
7714 //    int int_temp[10000];
7715     DSPContext dsp;
7716     AVCodecContext avctx;
7717
7718     dsputil_init(&dsp, &avctx);
7719
7720     init_put_bits(&pb, temp, SIZE);
7721     printf("testing unsigned exp golomb\n");
7722     for(i=0; i<COUNT; i++){
7723         START_TIMER
7724         set_ue_golomb(&pb, i);
7725         STOP_TIMER("set_ue_golomb");
7726     }
7727     flush_put_bits(&pb);
7728
7729     init_get_bits(&gb, temp, 8*SIZE);
7730     for(i=0; i<COUNT; i++){
7731         int j, s;
7732
7733         s= show_bits(&gb, 24);
7734
7735         START_TIMER
7736         j= get_ue_golomb(&gb);
7737         if(j != i){
7738             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7739 //            return -1;
7740         }
7741         STOP_TIMER("get_ue_golomb");
7742     }
7743
7744
7745     init_put_bits(&pb, temp, SIZE);
7746     printf("testing signed exp golomb\n");
7747     for(i=0; i<COUNT; i++){
7748         START_TIMER
7749         set_se_golomb(&pb, i - COUNT/2);
7750         STOP_TIMER("set_se_golomb");
7751     }
7752     flush_put_bits(&pb);
7753
7754     init_get_bits(&gb, temp, 8*SIZE);
7755     for(i=0; i<COUNT; i++){
7756         int j, s;
7757
7758         s= show_bits(&gb, 24);
7759
7760         START_TIMER
7761         j= get_se_golomb(&gb);
7762         if(j != i - COUNT/2){
7763             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7764 //            return -1;
7765         }
7766         STOP_TIMER("get_se_golomb");
7767     }
7768
7769 #if 0
7770     printf("testing 4x4 (I)DCT\n");
7771
7772     DCTELEM block[16];
7773     uint8_t src[16], ref[16];
7774     uint64_t error= 0, max_error=0;
7775
7776     for(i=0; i<COUNT; i++){
7777         int j;
7778 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7779         for(j=0; j<16; j++){
7780             ref[j]= random()%255;
7781             src[j]= random()%255;
7782         }
7783
7784         h264_diff_dct_c(block, src, ref, 4);
7785
7786         //normalize
7787         for(j=0; j<16; j++){
7788 //            printf("%d ", block[j]);
7789             block[j]= block[j]*4;
7790             if(j&1) block[j]= (block[j]*4 + 2)/5;
7791             if(j&4) block[j]= (block[j]*4 + 2)/5;
7792         }
7793 //        printf("\n");
7794
7795         s->dsp.h264_idct_add(ref, block, 4);
7796 /*        for(j=0; j<16; j++){
7797             printf("%d ", ref[j]);
7798         }
7799         printf("\n");*/
7800
7801         for(j=0; j<16; j++){
7802             int diff= FFABS(src[j] - ref[j]);
7803
7804             error+= diff*diff;
7805             max_error= FFMAX(max_error, diff);
7806         }
7807     }
7808     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7809     printf("testing quantizer\n");
7810     for(qp=0; qp<52; qp++){
7811         for(i=0; i<16; i++)
7812             src1_block[i]= src2_block[i]= random()%255;
7813
7814     }
7815     printf("Testing NAL layer\n");
7816
7817     uint8_t bitstream[COUNT];
7818     uint8_t nal[COUNT*2];
7819     H264Context h;
7820     memset(&h, 0, sizeof(H264Context));
7821
7822     for(i=0; i<COUNT; i++){
7823         int zeros= i;
7824         int nal_length;
7825         int consumed;
7826         int out_length;
7827         uint8_t *out;
7828         int j;
7829
7830         for(j=0; j<COUNT; j++){
7831             bitstream[j]= (random() % 255) + 1;
7832         }
7833
7834         for(j=0; j<zeros; j++){
7835             int pos= random() % COUNT;
7836             while(bitstream[pos] == 0){
7837                 pos++;
7838                 pos %= COUNT;
7839             }
7840             bitstream[pos]=0;
7841         }
7842
7843         START_TIMER
7844
7845         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7846         if(nal_length<0){
7847             printf("encoding failed\n");
7848             return -1;
7849         }
7850
7851         out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7852
7853         STOP_TIMER("NAL")
7854
7855         if(out_length != COUNT){
7856             printf("incorrect length %d %d\n", out_length, COUNT);
7857             return -1;
7858         }
7859
7860         if(consumed != nal_length){
7861             printf("incorrect consumed length %d %d\n", nal_length, consumed);
7862             return -1;
7863         }
7864
7865         if(memcmp(bitstream, out, COUNT)){
7866             printf("mismatch\n");
7867             return -1;
7868         }
7869     }
7870 #endif
7871
7872     printf("Testing RBSP\n");
7873
7874
7875     return 0;
7876 }
7877 #endif /* TEST */
7878
7879
7880 static av_cold int decode_end(AVCodecContext *avctx)
7881 {
7882     H264Context *h = avctx->priv_data;
7883     MpegEncContext *s = &h->s;
7884     int i;
7885
7886     av_freep(&h->rbsp_buffer[0]);
7887     av_freep(&h->rbsp_buffer[1]);
7888     free_tables(h); //FIXME cleanup init stuff perhaps
7889
7890     for(i = 0; i < MAX_SPS_COUNT; i++)
7891         av_freep(h->sps_buffers + i);
7892
7893     for(i = 0; i < MAX_PPS_COUNT; i++)
7894         av_freep(h->pps_buffers + i);
7895
7896     MPV_common_end(s);
7897
7898 //    memset(h, 0, sizeof(H264Context));
7899
7900     return 0;
7901 }
7902
7903
7904 AVCodec h264_decoder = {
7905     "h264",
7906     CODEC_TYPE_VIDEO,
7907     CODEC_ID_H264,
7908     sizeof(H264Context),
7909     decode_init,
7910     NULL,
7911     decode_end,
7912     decode_frame,
7913     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7914     .flush= flush_dpb,
7915     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
7916 };
7917
7918 #include "svq3.c"