2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
42 #include "x86/h264_i386.h"
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
88 return (a&0xFFFF) + (b<<16);
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
254 if(!(top_type & type_mask))
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
270 if(!(left_type[i] & type_mask))
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
570 if(!(h->top_samples_available&0x8000)){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if((h->left_samples_available&0x8080) != 0x8080){
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
780 }else if(top_ref==ref){
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
886 pred_motion(h, 0, 4, 0, 0, mx, my);
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
945 map[list][old_ref] = cur_ref;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
996 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
998 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
999 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1000 int cur_poc = s->current_picture_ptr->poc;
1001 int *col_poc = h->ref_list[1]->field_poc;
1002 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1003 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1005 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1006 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1007 mb_xy += s->mb_stride*fieldoff;
1010 }else{ // AFL/AFR/FR/FL -> AFR/FR
1011 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1012 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1013 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1014 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1017 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1018 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1019 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1024 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1025 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1027 }else{ // AFR/FR -> AFR/FR
1030 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1031 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1032 /* FIXME save sub mb types from previous frames (or derive from MVs)
1033 * so we know exactly what block size to use */
1034 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1035 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1036 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1040 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1046 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1047 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1048 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1049 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1052 l1ref0 += h->b8_stride;
1053 l1ref1 += h->b8_stride;
1054 l1mv0 += 2*b4_stride;
1055 l1mv1 += 2*b4_stride;
1059 if(h->direct_spatial_mv_pred){
1064 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1066 /* ref = min(neighbors) */
1067 for(list=0; list<2; list++){
1068 int refa = h->ref_cache[list][scan8[0] - 1];
1069 int refb = h->ref_cache[list][scan8[0] - 8];
1070 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1071 if(refc == PART_NOT_AVAILABLE)
1072 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1073 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1078 if(ref[0] < 0 && ref[1] < 0){
1079 ref[0] = ref[1] = 0;
1080 mv[0][0] = mv[0][1] =
1081 mv[1][0] = mv[1][1] = 0;
1083 for(list=0; list<2; list++){
1085 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1087 mv[list][0] = mv[list][1] = 0;
1093 *mb_type &= ~MB_TYPE_L1;
1094 sub_mb_type &= ~MB_TYPE_L1;
1095 }else if(ref[0] < 0){
1097 *mb_type &= ~MB_TYPE_L0;
1098 sub_mb_type &= ~MB_TYPE_L0;
1101 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1102 for(i8=0; i8<4; i8++){
1105 int xy8 = x8+y8*b8_stride;
1106 int xy4 = 3*x8+y8*b4_stride;
1109 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1111 h->sub_mb_type[i8] = sub_mb_type;
1113 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1114 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1115 if(!IS_INTRA(mb_type_col[y8])
1116 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1117 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1119 a= pack16to32(mv[0][0],mv[0][1]);
1121 b= pack16to32(mv[1][0],mv[1][1]);
1123 a= pack16to32(mv[0][0],mv[0][1]);
1124 b= pack16to32(mv[1][0],mv[1][1]);
1126 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1127 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1129 }else if(IS_16X16(*mb_type)){
1132 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1133 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1134 if(!IS_INTRA(mb_type_col[0])
1135 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1136 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1137 && (h->x264_build>33 || !h->x264_build)))){
1139 a= pack16to32(mv[0][0],mv[0][1]);
1141 b= pack16to32(mv[1][0],mv[1][1]);
1143 a= pack16to32(mv[0][0],mv[0][1]);
1144 b= pack16to32(mv[1][0],mv[1][1]);
1146 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1147 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1149 for(i8=0; i8<4; i8++){
1150 const int x8 = i8&1;
1151 const int y8 = i8>>1;
1153 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1155 h->sub_mb_type[i8] = sub_mb_type;
1157 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1158 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1159 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1160 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1163 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1164 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1165 && (h->x264_build>33 || !h->x264_build)))){
1166 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1167 if(IS_SUB_8X8(sub_mb_type)){
1168 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1169 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1171 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1173 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1176 for(i4=0; i4<4; i4++){
1177 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1178 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1180 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1182 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1188 }else{ /* direct temporal mv pred */
1189 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1190 const int *dist_scale_factor = h->dist_scale_factor;
1193 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1194 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1195 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1196 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1198 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1201 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1202 /* FIXME assumes direct_8x8_inference == 1 */
1203 int y_shift = 2*!IS_INTERLACED(*mb_type);
1205 for(i8=0; i8<4; i8++){
1206 const int x8 = i8&1;
1207 const int y8 = i8>>1;
1209 const int16_t (*l1mv)[2]= l1mv0;
1211 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1213 h->sub_mb_type[i8] = sub_mb_type;
1215 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1216 if(IS_INTRA(mb_type_col[y8])){
1217 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1219 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1223 ref0 = l1ref0[x8 + y8*b8_stride];
1225 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1227 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1230 scale = dist_scale_factor[ref0];
1231 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1234 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1235 int my_col = (mv_col[1]<<y_shift)/2;
1236 int mx = (scale * mv_col[0] + 128) >> 8;
1237 int my = (scale * my_col + 128) >> 8;
1238 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1239 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1245 /* one-to-one mv scaling */
1247 if(IS_16X16(*mb_type)){
1250 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1251 if(IS_INTRA(mb_type_col[0])){
1254 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1255 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1256 const int scale = dist_scale_factor[ref0];
1257 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1259 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1260 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1262 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1263 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1265 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1266 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1267 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1269 for(i8=0; i8<4; i8++){
1270 const int x8 = i8&1;
1271 const int y8 = i8>>1;
1273 const int16_t (*l1mv)[2]= l1mv0;
1275 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1277 h->sub_mb_type[i8] = sub_mb_type;
1278 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1279 if(IS_INTRA(mb_type_col[0])){
1280 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1282 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1286 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1288 ref0 = map_col_to_list0[0][ref0];
1290 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1293 scale = dist_scale_factor[ref0];
1295 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1296 if(IS_SUB_8X8(sub_mb_type)){
1297 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1298 int mx = (scale * mv_col[0] + 128) >> 8;
1299 int my = (scale * mv_col[1] + 128) >> 8;
1300 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1301 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1303 for(i4=0; i4<4; i4++){
1304 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1305 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1306 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1307 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1308 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1309 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1316 static inline void write_back_motion(H264Context *h, int mb_type){
1317 MpegEncContext * const s = &h->s;
1318 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1319 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1322 if(!USES_LIST(mb_type, 0))
1323 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1325 for(list=0; list<h->list_count; list++){
1327 if(!USES_LIST(mb_type, list))
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1332 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1334 if( h->pps.cabac ) {
1335 if(IS_SKIP(mb_type))
1336 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1339 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1340 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1345 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1346 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1347 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1348 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1349 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1353 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1354 if(IS_8X8(mb_type)){
1355 uint8_t *direct_table = &h->direct_table[b8_xy];
1356 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1357 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1358 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1363 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1368 // src[0]&0x80; //forbidden bit
1369 h->nal_ref_idc= src[0]>>5;
1370 h->nal_unit_type= src[0]&0x1F;
1374 for(i=0; i<length; i++)
1375 printf("%2X ", src[i]);
1378 #if HAVE_FAST_UNALIGNED
1379 # if HAVE_FAST_64BIT
1381 for(i=0; i+1<length; i+=9){
1382 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1385 for(i=0; i+1<length; i+=5){
1386 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1389 if(i>0 && !src[i]) i--;
1393 for(i=0; i+1<length; i+=2){
1394 if(src[i]) continue;
1395 if(i>0 && src[i-1]==0) i--;
1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1399 /* startcode, so we must be past the end */
1407 if(i>=length-1){ //no escaped 0
1408 *dst_length= length;
1409 *consumed= length+1; //+1 for the header
1413 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1414 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1415 dst= h->rbsp_buffer[bufidx];
1421 //printf("decoding esc\n");
1422 memcpy(dst, src, i);
1425 //remove escapes (very rare 1:2^22)
1427 dst[di++]= src[si++];
1428 dst[di++]= src[si++];
1429 }else if(src[si]==0 && src[si+1]==0){
1430 if(src[si+2]==3){ //escape
1435 }else //next start code
1439 dst[di++]= src[si++];
1442 dst[di++]= src[si++];
1445 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1448 *consumed= si + 1;//+1 for the header
1449 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1453 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1457 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1467 * IDCT transforms the 16 dc values and dequantizes them.
1468 * @param qp quantization parameter
1470 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1473 int temp[16]; //FIXME check if this is a good idea
1474 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1475 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1477 //memset(block, 64, 2*256);
1480 const int offset= y_offset[i];
1481 const int z0= block[offset+stride*0] + block[offset+stride*4];
1482 const int z1= block[offset+stride*0] - block[offset+stride*4];
1483 const int z2= block[offset+stride*1] - block[offset+stride*5];
1484 const int z3= block[offset+stride*1] + block[offset+stride*5];
1493 const int offset= x_offset[i];
1494 const int z0= temp[4*0+i] + temp[4*2+i];
1495 const int z1= temp[4*0+i] - temp[4*2+i];
1496 const int z2= temp[4*1+i] - temp[4*3+i];
1497 const int z3= temp[4*1+i] + temp[4*3+i];
1499 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1500 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1501 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1502 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1508 * DCT transforms the 16 dc values.
1509 * @param qp quantization parameter ??? FIXME
1511 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1512 // const int qmul= dequant_coeff[qp][0];
1514 int temp[16]; //FIXME check if this is a good idea
1515 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1516 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1519 const int offset= y_offset[i];
1520 const int z0= block[offset+stride*0] + block[offset+stride*4];
1521 const int z1= block[offset+stride*0] - block[offset+stride*4];
1522 const int z2= block[offset+stride*1] - block[offset+stride*5];
1523 const int z3= block[offset+stride*1] + block[offset+stride*5];
1532 const int offset= x_offset[i];
1533 const int z0= temp[4*0+i] + temp[4*2+i];
1534 const int z1= temp[4*0+i] - temp[4*2+i];
1535 const int z2= temp[4*1+i] - temp[4*3+i];
1536 const int z3= temp[4*1+i] + temp[4*3+i];
1538 block[stride*0 +offset]= (z0 + z3)>>1;
1539 block[stride*2 +offset]= (z1 + z2)>>1;
1540 block[stride*8 +offset]= (z1 - z2)>>1;
1541 block[stride*10+offset]= (z0 - z3)>>1;
1549 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1550 const int stride= 16*2;
1551 const int xStride= 16;
1554 a= block[stride*0 + xStride*0];
1555 b= block[stride*0 + xStride*1];
1556 c= block[stride*1 + xStride*0];
1557 d= block[stride*1 + xStride*1];
1564 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1565 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1566 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1567 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1571 static void chroma_dc_dct_c(DCTELEM *block){
1572 const int stride= 16*2;
1573 const int xStride= 16;
1576 a= block[stride*0 + xStride*0];
1577 b= block[stride*0 + xStride*1];
1578 c= block[stride*1 + xStride*0];
1579 d= block[stride*1 + xStride*1];
1586 block[stride*0 + xStride*0]= (a+c);
1587 block[stride*0 + xStride*1]= (e+b);
1588 block[stride*1 + xStride*0]= (a-c);
1589 block[stride*1 + xStride*1]= (e-b);
1594 * gets the chroma qp.
1596 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1597 return h->pps.chroma_qp_table[t][qscale];
1600 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1601 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1602 int src_x_offset, int src_y_offset,
1603 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1604 MpegEncContext * const s = &h->s;
1605 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1606 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1607 const int luma_xy= (mx&3) + ((my&3)<<2);
1608 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1609 uint8_t * src_cb, * src_cr;
1610 int extra_width= h->emu_edge_width;
1611 int extra_height= h->emu_edge_height;
1613 const int full_mx= mx>>2;
1614 const int full_my= my>>2;
1615 const int pic_width = 16*s->mb_width;
1616 const int pic_height = 16*s->mb_height >> MB_FIELD;
1618 if(mx&7) extra_width -= 3;
1619 if(my&7) extra_height -= 3;
1621 if( full_mx < 0-extra_width
1622 || full_my < 0-extra_height
1623 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1624 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1625 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1626 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1630 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1632 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1635 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1638 // chroma offset when predicting from a field of opposite parity
1639 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1640 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1642 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1643 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1646 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1647 src_cb= s->edge_emu_buffer;
1649 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1652 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1653 src_cr= s->edge_emu_buffer;
1655 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1658 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1659 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1660 int x_offset, int y_offset,
1661 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1662 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1663 int list0, int list1){
1664 MpegEncContext * const s = &h->s;
1665 qpel_mc_func *qpix_op= qpix_put;
1666 h264_chroma_mc_func chroma_op= chroma_put;
1668 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1669 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1670 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1671 x_offset += 8*s->mb_x;
1672 y_offset += 8*(s->mb_y >> MB_FIELD);
1675 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1676 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1677 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1678 qpix_op, chroma_op);
1681 chroma_op= chroma_avg;
1685 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1692 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1693 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1694 int x_offset, int y_offset,
1695 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1696 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1697 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1698 int list0, int list1){
1699 MpegEncContext * const s = &h->s;
1701 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1702 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1703 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1704 x_offset += 8*s->mb_x;
1705 y_offset += 8*(s->mb_y >> MB_FIELD);
1708 /* don't optimize for luma-only case, since B-frames usually
1709 * use implicit weights => chroma too. */
1710 uint8_t *tmp_cb = s->obmc_scratchpad;
1711 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1712 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1713 int refn0 = h->ref_cache[0][ scan8[n] ];
1714 int refn1 = h->ref_cache[1][ scan8[n] ];
1716 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1717 dest_y, dest_cb, dest_cr,
1718 x_offset, y_offset, qpix_put, chroma_put);
1719 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1720 tmp_y, tmp_cb, tmp_cr,
1721 x_offset, y_offset, qpix_put, chroma_put);
1723 if(h->use_weight == 2){
1724 int weight0 = h->implicit_weight[refn0][refn1];
1725 int weight1 = 64 - weight0;
1726 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1727 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1728 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1731 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1732 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1733 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1734 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1735 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1736 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1737 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1738 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1741 int list = list1 ? 1 : 0;
1742 int refn = h->ref_cache[list][ scan8[n] ];
1743 Picture *ref= &h->ref_list[list][refn];
1744 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1745 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1746 qpix_put, chroma_put);
1748 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1749 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1750 if(h->use_weight_chroma){
1751 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1752 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1753 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1759 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1760 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1761 int x_offset, int y_offset,
1762 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1763 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1764 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1765 int list0, int list1){
1766 if((h->use_weight==2 && list0 && list1
1767 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1768 || h->use_weight==1)
1769 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1770 x_offset, y_offset, qpix_put, chroma_put,
1771 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1773 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1774 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1777 static inline void prefetch_motion(H264Context *h, int list){
1778 /* fetch pixels for estimated mv 4 macroblocks ahead
1779 * optimized for 64byte cache lines */
1780 MpegEncContext * const s = &h->s;
1781 const int refn = h->ref_cache[list][scan8[0]];
1783 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1784 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1785 uint8_t **src= h->ref_list[list][refn].data;
1786 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1787 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1788 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1789 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1793 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1794 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1795 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1796 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1797 MpegEncContext * const s = &h->s;
1798 const int mb_xy= h->mb_xy;
1799 const int mb_type= s->current_picture.mb_type[mb_xy];
1801 assert(IS_INTER(mb_type));
1803 prefetch_motion(h, 0);
1805 if(IS_16X16(mb_type)){
1806 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1807 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1808 &weight_op[0], &weight_avg[0],
1809 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1810 }else if(IS_16X8(mb_type)){
1811 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1812 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1813 &weight_op[1], &weight_avg[1],
1814 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1815 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1816 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1817 &weight_op[1], &weight_avg[1],
1818 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1819 }else if(IS_8X16(mb_type)){
1820 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1821 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1822 &weight_op[2], &weight_avg[2],
1823 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1824 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1825 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1826 &weight_op[2], &weight_avg[2],
1827 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1831 assert(IS_8X8(mb_type));
1834 const int sub_mb_type= h->sub_mb_type[i];
1836 int x_offset= (i&1)<<2;
1837 int y_offset= (i&2)<<1;
1839 if(IS_SUB_8X8(sub_mb_type)){
1840 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1841 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1842 &weight_op[3], &weight_avg[3],
1843 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1844 }else if(IS_SUB_8X4(sub_mb_type)){
1845 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1846 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1847 &weight_op[4], &weight_avg[4],
1848 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1849 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1850 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1851 &weight_op[4], &weight_avg[4],
1852 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853 }else if(IS_SUB_4X8(sub_mb_type)){
1854 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1855 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1856 &weight_op[5], &weight_avg[5],
1857 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1858 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1859 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1860 &weight_op[5], &weight_avg[5],
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1864 assert(IS_SUB_4X4(sub_mb_type));
1866 int sub_x_offset= x_offset + 2*(j&1);
1867 int sub_y_offset= y_offset + (j&2);
1868 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870 &weight_op[6], &weight_avg[6],
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1877 prefetch_motion(h, 1);
1880 static av_cold void init_cavlc_level_tab(void){
1881 int suffix_length, mask;
1884 for(suffix_length=0; suffix_length<7; suffix_length++){
1885 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1886 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1887 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1889 mask= -(level_code&1);
1890 level_code= (((2+level_code)>>1) ^ mask) - mask;
1891 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1892 cavlc_level_tab[suffix_length][i][0]= level_code;
1893 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1894 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1895 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1896 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1898 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1899 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1905 static av_cold void decode_init_vlc(void){
1906 static int done = 0;
1913 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1914 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1915 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1916 &chroma_dc_coeff_token_len [0], 1, 1,
1917 &chroma_dc_coeff_token_bits[0], 1, 1,
1918 INIT_VLC_USE_NEW_STATIC);
1922 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1923 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1924 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1925 &coeff_token_len [i][0], 1, 1,
1926 &coeff_token_bits[i][0], 1, 1,
1927 INIT_VLC_USE_NEW_STATIC);
1928 offset += coeff_token_vlc_tables_size[i];
1931 * This is a one time safety check to make sure that
1932 * the packed static coeff_token_vlc table sizes
1933 * were initialized correctly.
1935 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1938 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1939 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1940 init_vlc(&chroma_dc_total_zeros_vlc[i],
1941 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1942 &chroma_dc_total_zeros_len [i][0], 1, 1,
1943 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1944 INIT_VLC_USE_NEW_STATIC);
1946 for(i=0; i<15; i++){
1947 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1948 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1949 init_vlc(&total_zeros_vlc[i],
1950 TOTAL_ZEROS_VLC_BITS, 16,
1951 &total_zeros_len [i][0], 1, 1,
1952 &total_zeros_bits[i][0], 1, 1,
1953 INIT_VLC_USE_NEW_STATIC);
1957 run_vlc[i].table = run_vlc_tables[i];
1958 run_vlc[i].table_allocated = run_vlc_tables_size;
1959 init_vlc(&run_vlc[i],
1961 &run_len [i][0], 1, 1,
1962 &run_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
1965 run7_vlc.table = run7_vlc_table,
1966 run7_vlc.table_allocated = run7_vlc_table_size;
1967 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1968 &run_len [6][0], 1, 1,
1969 &run_bits[6][0], 1, 1,
1970 INIT_VLC_USE_NEW_STATIC);
1972 init_cavlc_level_tab();
1976 static void free_tables(H264Context *h){
1979 av_freep(&h->intra4x4_pred_mode);
1980 av_freep(&h->chroma_pred_mode_table);
1981 av_freep(&h->cbp_table);
1982 av_freep(&h->mvd_table[0]);
1983 av_freep(&h->mvd_table[1]);
1984 av_freep(&h->direct_table);
1985 av_freep(&h->non_zero_count);
1986 av_freep(&h->slice_table_base);
1987 h->slice_table= NULL;
1989 av_freep(&h->mb2b_xy);
1990 av_freep(&h->mb2b8_xy);
1992 for(i = 0; i < h->s.avctx->thread_count; i++) {
1993 hx = h->thread_context[i];
1995 av_freep(&hx->top_borders[1]);
1996 av_freep(&hx->top_borders[0]);
1997 av_freep(&hx->s.obmc_scratchpad);
2001 static void init_dequant8_coeff_table(H264Context *h){
2003 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2004 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2005 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2007 for(i=0; i<2; i++ ){
2008 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2009 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2013 for(q=0; q<52; q++){
2014 int shift = div6[q];
2017 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2018 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2019 h->pps.scaling_matrix8[i][x]) << shift;
2024 static void init_dequant4_coeff_table(H264Context *h){
2026 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2027 for(i=0; i<6; i++ ){
2028 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2030 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2031 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2038 for(q=0; q<52; q++){
2039 int shift = div6[q] + 2;
2042 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2043 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2044 h->pps.scaling_matrix4[i][x]) << shift;
2049 static void init_dequant_tables(H264Context *h){
2051 init_dequant4_coeff_table(h);
2052 if(h->pps.transform_8x8_mode)
2053 init_dequant8_coeff_table(h);
2054 if(h->sps.transform_bypass){
2057 h->dequant4_coeff[i][0][x] = 1<<6;
2058 if(h->pps.transform_8x8_mode)
2061 h->dequant8_coeff[i][0][x] = 1<<6;
2068 * needs width/height
2070 static int alloc_tables(H264Context *h){
2071 MpegEncContext * const s = &h->s;
2072 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2075 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2077 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2078 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2079 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2081 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2082 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2083 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2084 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2086 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2087 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2089 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2090 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2091 for(y=0; y<s->mb_height; y++){
2092 for(x=0; x<s->mb_width; x++){
2093 const int mb_xy= x + y*s->mb_stride;
2094 const int b_xy = 4*x + 4*y*h->b_stride;
2095 const int b8_xy= 2*x + 2*y*h->b8_stride;
2097 h->mb2b_xy [mb_xy]= b_xy;
2098 h->mb2b8_xy[mb_xy]= b8_xy;
2102 s->obmc_scratchpad = NULL;
2104 if(!h->dequant4_coeff[0])
2105 init_dequant_tables(h);
2114 * Mimic alloc_tables(), but for every context thread.
2116 static void clone_tables(H264Context *dst, H264Context *src){
2117 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2118 dst->non_zero_count = src->non_zero_count;
2119 dst->slice_table = src->slice_table;
2120 dst->cbp_table = src->cbp_table;
2121 dst->mb2b_xy = src->mb2b_xy;
2122 dst->mb2b8_xy = src->mb2b8_xy;
2123 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2124 dst->mvd_table[0] = src->mvd_table[0];
2125 dst->mvd_table[1] = src->mvd_table[1];
2126 dst->direct_table = src->direct_table;
2128 dst->s.obmc_scratchpad = NULL;
2129 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2134 * Allocate buffers which are not shared amongst multiple threads.
2136 static int context_init(H264Context *h){
2137 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2138 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2142 return -1; // free_tables will clean up for us
2145 static av_cold void common_init(H264Context *h){
2146 MpegEncContext * const s = &h->s;
2148 s->width = s->avctx->width;
2149 s->height = s->avctx->height;
2150 s->codec_id= s->avctx->codec->id;
2152 ff_h264_pred_init(&h->hpc, s->codec_id);
2154 h->dequant_coeff_pps= -1;
2155 s->unrestricted_mv=1;
2156 s->decode=1; //FIXME
2158 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2160 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2161 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2164 static av_cold int decode_init(AVCodecContext *avctx){
2165 H264Context *h= avctx->priv_data;
2166 MpegEncContext * const s = &h->s;
2168 MPV_decode_defaults(s);
2173 s->out_format = FMT_H264;
2174 s->workaround_bugs= avctx->workaround_bugs;
2177 // s->decode_mb= ff_h263_decode_mb;
2178 s->quarter_sample = 1;
2181 if(avctx->codec_id == CODEC_ID_SVQ3)
2182 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2183 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2184 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2186 avctx->pix_fmt= PIX_FMT_YUV420P;
2190 if(avctx->extradata_size > 0 && avctx->extradata &&
2191 *(char *)avctx->extradata == 1){
2198 h->thread_context[0] = h;
2199 h->outputed_poc = INT_MIN;
2200 h->prev_poc_msb= 1<<16;
2201 h->sei_recovery_frame_cnt = -1;
2202 h->sei_dpb_output_delay = 0;
2203 h->sei_cpb_removal_delay = -1;
2204 h->sei_buffering_period_present = 0;
2208 static int frame_start(H264Context *h){
2209 MpegEncContext * const s = &h->s;
2212 if(MPV_frame_start(s, s->avctx) < 0)
2214 ff_er_frame_start(s);
2216 * MPV_frame_start uses pict_type to derive key_frame.
2217 * This is incorrect for H.264; IDR markings must be used.
2218 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2219 * See decode_nal_units().
2221 s->current_picture_ptr->key_frame= 0;
2223 assert(s->linesize && s->uvlinesize);
2225 for(i=0; i<16; i++){
2226 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2227 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2230 h->block_offset[16+i]=
2231 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2232 h->block_offset[24+16+i]=
2233 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2236 /* can't be in alloc_tables because linesize isn't known there.
2237 * FIXME: redo bipred weight to not require extra buffer? */
2238 for(i = 0; i < s->avctx->thread_count; i++)
2239 if(!h->thread_context[i]->s.obmc_scratchpad)
2240 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2242 /* some macroblocks will be accessed before they're available */
2243 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2244 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2246 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2248 // We mark the current picture as non-reference after allocating it, so
2249 // that if we break out due to an error it can be released automatically
2250 // in the next MPV_frame_start().
2251 // SVQ3 as well as most other codecs have only last/next/current and thus
2252 // get released even with set reference, besides SVQ3 and others do not
2253 // mark frames as reference later "naturally".
2254 if(s->codec_id != CODEC_ID_SVQ3)
2255 s->current_picture_ptr->reference= 0;
2257 s->current_picture_ptr->field_poc[0]=
2258 s->current_picture_ptr->field_poc[1]= INT_MAX;
2259 assert(s->current_picture_ptr->long_ref==0);
2264 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2265 MpegEncContext * const s = &h->s;
2274 src_cb -= uvlinesize;
2275 src_cr -= uvlinesize;
2277 if(!simple && FRAME_MBAFF){
2279 offset = MB_MBAFF ? 1 : 17;
2280 uvoffset= MB_MBAFF ? 1 : 9;
2282 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2283 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2284 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2291 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2292 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2293 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2294 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2300 top_idx = MB_MBAFF ? 0 : 1;
2302 step= MB_MBAFF ? 2 : 1;
2305 // There are two lines saved, the line above the the top macroblock of a pair,
2306 // and the line above the bottom macroblock
2307 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2308 for(i=1; i<17 - skiplast; i++){
2309 h->left_border[offset+i*step]= src_y[15+i* linesize];
2312 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2313 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2315 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2316 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2317 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2318 for(i=1; i<9 - skiplast; i++){
2319 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2320 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2322 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2323 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2327 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2328 MpegEncContext * const s = &h->s;
2339 if(!simple && FRAME_MBAFF){
2341 offset = MB_MBAFF ? 1 : 17;
2342 uvoffset= MB_MBAFF ? 1 : 9;
2346 top_idx = MB_MBAFF ? 0 : 1;
2348 step= MB_MBAFF ? 2 : 1;
2351 if(h->deblocking_filter == 2) {
2353 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2354 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2356 deblock_left = (s->mb_x > 0);
2357 deblock_top = (s->mb_y > !!MB_FIELD);
2360 src_y -= linesize + 1;
2361 src_cb -= uvlinesize + 1;
2362 src_cr -= uvlinesize + 1;
2364 #define XCHG(a,b,t,xchg)\
2371 for(i = !deblock_top; i<16; i++){
2372 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2374 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2378 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2379 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2380 if(s->mb_x+1 < s->mb_width){
2381 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2385 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2387 for(i = !deblock_top; i<8; i++){
2388 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2389 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2391 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2392 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2396 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2401 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2402 MpegEncContext * const s = &h->s;
2403 const int mb_x= s->mb_x;
2404 const int mb_y= s->mb_y;
2405 const int mb_xy= h->mb_xy;
2406 const int mb_type= s->current_picture.mb_type[mb_xy];
2407 uint8_t *dest_y, *dest_cb, *dest_cr;
2408 int linesize, uvlinesize /*dct_offset*/;
2410 int *block_offset = &h->block_offset[0];
2411 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2412 /* is_h264 should always be true if SVQ3 is disabled. */
2413 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2414 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2415 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2417 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2418 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2419 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2421 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2422 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2424 if (!simple && MB_FIELD) {
2425 linesize = h->mb_linesize = s->linesize * 2;
2426 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2427 block_offset = &h->block_offset[24];
2428 if(mb_y&1){ //FIXME move out of this function?
2429 dest_y -= s->linesize*15;
2430 dest_cb-= s->uvlinesize*7;
2431 dest_cr-= s->uvlinesize*7;
2435 for(list=0; list<h->list_count; list++){
2436 if(!USES_LIST(mb_type, list))
2438 if(IS_16X16(mb_type)){
2439 int8_t *ref = &h->ref_cache[list][scan8[0]];
2440 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2442 for(i=0; i<16; i+=4){
2443 int ref = h->ref_cache[list][scan8[i]];
2445 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2451 linesize = h->mb_linesize = s->linesize;
2452 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2453 // dct_offset = s->linesize * 16;
2456 if (!simple && IS_INTRA_PCM(mb_type)) {
2457 for (i=0; i<16; i++) {
2458 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2460 for (i=0; i<8; i++) {
2461 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2462 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2465 if(IS_INTRA(mb_type)){
2466 if(h->deblocking_filter)
2467 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2469 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2470 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2471 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2474 if(IS_INTRA4x4(mb_type)){
2475 if(simple || !s->encoding){
2476 if(IS_8x8DCT(mb_type)){
2477 if(transform_bypass){
2479 idct_add = s->dsp.add_pixels8;
2481 idct_dc_add = s->dsp.h264_idct8_dc_add;
2482 idct_add = s->dsp.h264_idct8_add;
2484 for(i=0; i<16; i+=4){
2485 uint8_t * const ptr= dest_y + block_offset[i];
2486 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2487 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2488 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2490 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2491 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2492 (h->topright_samples_available<<i)&0x4000, linesize);
2494 if(nnz == 1 && h->mb[i*16])
2495 idct_dc_add(ptr, h->mb + i*16, linesize);
2497 idct_add (ptr, h->mb + i*16, linesize);
2502 if(transform_bypass){
2504 idct_add = s->dsp.add_pixels4;
2506 idct_dc_add = s->dsp.h264_idct_dc_add;
2507 idct_add = s->dsp.h264_idct_add;
2509 for(i=0; i<16; i++){
2510 uint8_t * const ptr= dest_y + block_offset[i];
2511 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2513 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2514 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2518 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2519 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2520 assert(mb_y || linesize <= block_offset[i]);
2521 if(!topright_avail){
2522 tr= ptr[3 - linesize]*0x01010101;
2523 topright= (uint8_t*) &tr;
2525 topright= ptr + 4 - linesize;
2529 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2530 nnz = h->non_zero_count_cache[ scan8[i] ];
2533 if(nnz == 1 && h->mb[i*16])
2534 idct_dc_add(ptr, h->mb + i*16, linesize);
2536 idct_add (ptr, h->mb + i*16, linesize);
2538 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2545 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2547 if(!transform_bypass)
2548 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2550 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2552 if(h->deblocking_filter)
2553 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2555 hl_motion(h, dest_y, dest_cb, dest_cr,
2556 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2557 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2558 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2562 if(!IS_INTRA4x4(mb_type)){
2564 if(IS_INTRA16x16(mb_type)){
2565 if(transform_bypass){
2566 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2567 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2569 for(i=0; i<16; i++){
2570 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2571 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2575 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2577 }else if(h->cbp&15){
2578 if(transform_bypass){
2579 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2580 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2581 for(i=0; i<16; i+=di){
2582 if(h->non_zero_count_cache[ scan8[i] ]){
2583 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2587 if(IS_8x8DCT(mb_type)){
2588 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2590 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2595 for(i=0; i<16; i++){
2596 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2597 uint8_t * const ptr= dest_y + block_offset[i];
2598 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2604 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2605 uint8_t *dest[2] = {dest_cb, dest_cr};
2606 if(transform_bypass){
2607 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2608 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2609 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2611 idct_add = s->dsp.add_pixels4;
2612 for(i=16; i<16+8; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2614 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2618 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2619 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2621 idct_add = s->dsp.h264_idct_add;
2622 idct_dc_add = s->dsp.h264_idct_dc_add;
2623 for(i=16; i<16+8; i++){
2624 if(h->non_zero_count_cache[ scan8[i] ])
2625 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2626 else if(h->mb[i*16])
2627 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2630 for(i=16; i<16+8; i++){
2631 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2632 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2633 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2640 if(h->cbp || IS_INTRA(mb_type))
2641 s->dsp.clear_blocks(h->mb);
2643 if(h->deblocking_filter) {
2644 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2645 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2646 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2647 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2648 if (!simple && FRAME_MBAFF) {
2649 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2651 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2657 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2659 static void hl_decode_mb_simple(H264Context *h){
2660 hl_decode_mb_internal(h, 1);
2664 * Process a macroblock; this handles edge cases, such as interlacing.
2666 static void av_noinline hl_decode_mb_complex(H264Context *h){
2667 hl_decode_mb_internal(h, 0);
2670 static void hl_decode_mb(H264Context *h){
2671 MpegEncContext * const s = &h->s;
2672 const int mb_xy= h->mb_xy;
2673 const int mb_type= s->current_picture.mb_type[mb_xy];
2674 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2677 hl_decode_mb_complex(h);
2678 else hl_decode_mb_simple(h);
2681 static void pic_as_field(Picture *pic, const int parity){
2683 for (i = 0; i < 4; ++i) {
2684 if (parity == PICT_BOTTOM_FIELD)
2685 pic->data[i] += pic->linesize[i];
2686 pic->reference = parity;
2687 pic->linesize[i] *= 2;
2689 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2692 static int split_field_copy(Picture *dest, Picture *src,
2693 int parity, int id_add){
2694 int match = !!(src->reference & parity);
2698 if(parity != PICT_FRAME){
2699 pic_as_field(dest, parity);
2701 dest->pic_id += id_add;
2708 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2712 while(i[0]<len || i[1]<len){
2713 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2715 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2718 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2719 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2722 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2730 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2735 best_poc= dir ? INT_MIN : INT_MAX;
2737 for(i=0; i<len; i++){
2738 const int poc= src[i]->poc;
2739 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2741 sorted[out_i]= src[i];
2744 if(best_poc == (dir ? INT_MIN : INT_MAX))
2746 limit= sorted[out_i++]->poc - dir;
2752 * fills the default_ref_list.
2754 static int fill_default_ref_list(H264Context *h){
2755 MpegEncContext * const s = &h->s;
2758 if(h->slice_type_nos==FF_B_TYPE){
2759 Picture *sorted[32];
2764 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2766 cur_poc= s->current_picture_ptr->poc;
2768 for(list= 0; list<2; list++){
2769 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2770 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2772 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2773 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2776 if(len < h->ref_count[list])
2777 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2781 if(lens[0] == lens[1] && lens[1] > 1){
2782 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2784 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2787 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2788 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2790 if(len < h->ref_count[0])
2791 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2794 for (i=0; i<h->ref_count[0]; i++) {
2795 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2797 if(h->slice_type_nos==FF_B_TYPE){
2798 for (i=0; i<h->ref_count[1]; i++) {
2799 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2806 static void print_short_term(H264Context *h);
2807 static void print_long_term(H264Context *h);
2810 * Extract structure information about the picture described by pic_num in
2811 * the current decoding context (frame or field). Note that pic_num is
2812 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2813 * @param pic_num picture number for which to extract structure information
2814 * @param structure one of PICT_XXX describing structure of picture
2816 * @return frame number (short term) or long term index of picture
2817 * described by pic_num
2819 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2820 MpegEncContext * const s = &h->s;
2822 *structure = s->picture_structure;
2825 /* opposite field */
2826 *structure ^= PICT_FRAME;
2833 static int decode_ref_pic_list_reordering(H264Context *h){
2834 MpegEncContext * const s = &h->s;
2835 int list, index, pic_structure;
2837 print_short_term(h);
2840 for(list=0; list<h->list_count; list++){
2841 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2843 if(get_bits1(&s->gb)){
2844 int pred= h->curr_pic_num;
2846 for(index=0; ; index++){
2847 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2848 unsigned int pic_id;
2850 Picture *ref = NULL;
2852 if(reordering_of_pic_nums_idc==3)
2855 if(index >= h->ref_count[list]){
2856 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2860 if(reordering_of_pic_nums_idc<3){
2861 if(reordering_of_pic_nums_idc<2){
2862 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2865 if(abs_diff_pic_num > h->max_pic_num){
2866 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2870 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2871 else pred+= abs_diff_pic_num;
2872 pred &= h->max_pic_num - 1;
2874 frame_num = pic_num_extract(h, pred, &pic_structure);
2876 for(i= h->short_ref_count-1; i>=0; i--){
2877 ref = h->short_ref[i];
2878 assert(ref->reference);
2879 assert(!ref->long_ref);
2881 ref->frame_num == frame_num &&
2882 (ref->reference & pic_structure)
2890 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2892 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2895 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2898 ref = h->long_ref[long_idx];
2899 assert(!(ref && !ref->reference));
2900 if(ref && (ref->reference & pic_structure)){
2901 ref->pic_id= pic_id;
2902 assert(ref->long_ref);
2910 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2911 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2913 for(i=index; i+1<h->ref_count[list]; i++){
2914 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2917 for(; i > index; i--){
2918 h->ref_list[list][i]= h->ref_list[list][i-1];
2920 h->ref_list[list][index]= *ref;
2922 pic_as_field(&h->ref_list[list][index], pic_structure);
2926 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2932 for(list=0; list<h->list_count; list++){
2933 for(index= 0; index < h->ref_count[list]; index++){
2934 if(!h->ref_list[list][index].data[0]){
2935 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2936 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2944 static void fill_mbaff_ref_list(H264Context *h){
2946 for(list=0; list<2; list++){ //FIXME try list_count
2947 for(i=0; i<h->ref_count[list]; i++){
2948 Picture *frame = &h->ref_list[list][i];
2949 Picture *field = &h->ref_list[list][16+2*i];
2952 field[0].linesize[j] <<= 1;
2953 field[0].reference = PICT_TOP_FIELD;
2954 field[0].poc= field[0].field_poc[0];
2955 field[1] = field[0];
2957 field[1].data[j] += frame->linesize[j];
2958 field[1].reference = PICT_BOTTOM_FIELD;
2959 field[1].poc= field[1].field_poc[1];
2961 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2962 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2964 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2965 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2969 for(j=0; j<h->ref_count[1]; j++){
2970 for(i=0; i<h->ref_count[0]; i++)
2971 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2972 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2973 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2977 static int pred_weight_table(H264Context *h){
2978 MpegEncContext * const s = &h->s;
2980 int luma_def, chroma_def;
2983 h->use_weight_chroma= 0;
2984 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2985 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2986 luma_def = 1<<h->luma_log2_weight_denom;
2987 chroma_def = 1<<h->chroma_log2_weight_denom;
2989 for(list=0; list<2; list++){
2990 h->luma_weight_flag[list] = 0;
2991 h->chroma_weight_flag[list] = 0;
2992 for(i=0; i<h->ref_count[list]; i++){
2993 int luma_weight_flag, chroma_weight_flag;
2995 luma_weight_flag= get_bits1(&s->gb);
2996 if(luma_weight_flag){
2997 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2998 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2999 if( h->luma_weight[list][i] != luma_def
3000 || h->luma_offset[list][i] != 0) {
3002 h->luma_weight_flag[list]= 1;
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
3017 || h->chroma_offset[list][i][j] != 0) {
3018 h->use_weight_chroma= 1;
3019 h->chroma_weight_flag[list]= 1;
3025 h->chroma_weight[list][i][j]= chroma_def;
3026 h->chroma_offset[list][i][j]= 0;
3031 if(h->slice_type_nos != FF_B_TYPE) break;
3033 h->use_weight= h->use_weight || h->use_weight_chroma;
3037 static void implicit_weight_table(H264Context *h){
3038 MpegEncContext * const s = &h->s;
3040 int cur_poc = s->current_picture_ptr->poc;
3042 for (i = 0; i < 2; i++) {
3043 h->luma_weight_flag[i] = 0;
3044 h->chroma_weight_flag[i] = 0;
3047 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3048 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3050 h->use_weight_chroma= 0;
3055 h->use_weight_chroma= 2;
3056 h->luma_log2_weight_denom= 5;
3057 h->chroma_log2_weight_denom= 5;
3059 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3060 int poc0 = h->ref_list[0][ref0].poc;
3061 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3062 int poc1 = h->ref_list[1][ref1].poc;
3063 int td = av_clip(poc1 - poc0, -128, 127);
3065 int tb = av_clip(cur_poc - poc0, -128, 127);
3066 int tx = (16384 + (FFABS(td) >> 1)) / td;
3067 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3068 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3069 h->implicit_weight[ref0][ref1] = 32;
3071 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3073 h->implicit_weight[ref0][ref1] = 32;
3079 * Mark a picture as no longer needed for reference. The refmask
3080 * argument allows unreferencing of individual fields or the whole frame.
3081 * If the picture becomes entirely unreferenced, but is being held for
3082 * display purposes, it is marked as such.
3083 * @param refmask mask of fields to unreference; the mask is bitwise
3084 * anded with the reference marking of pic
3085 * @return non-zero if pic becomes entirely unreferenced (except possibly
3086 * for display purposes) zero if one of the fields remains in
3089 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3091 if (pic->reference &= refmask) {
3094 for(i = 0; h->delayed_pic[i]; i++)
3095 if(pic == h->delayed_pic[i]){
3096 pic->reference=DELAYED_PIC_REF;
3104 * instantaneous decoder refresh.
3106 static void idr(H264Context *h){
3109 for(i=0; i<16; i++){
3110 remove_long(h, i, 0);
3112 assert(h->long_ref_count==0);
3114 for(i=0; i<h->short_ref_count; i++){
3115 unreference_pic(h, h->short_ref[i], 0);
3116 h->short_ref[i]= NULL;
3118 h->short_ref_count=0;
3119 h->prev_frame_num= 0;
3120 h->prev_frame_num_offset= 0;
3125 /* forget old pics after a seek */
3126 static void flush_dpb(AVCodecContext *avctx){
3127 H264Context *h= avctx->priv_data;
3129 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3130 if(h->delayed_pic[i])
3131 h->delayed_pic[i]->reference= 0;
3132 h->delayed_pic[i]= NULL;
3134 h->outputed_poc= INT_MIN;
3136 if(h->s.current_picture_ptr)
3137 h->s.current_picture_ptr->reference= 0;
3138 h->s.first_field= 0;
3139 h->sei_recovery_frame_cnt = -1;
3140 h->sei_dpb_output_delay = 0;
3141 h->sei_cpb_removal_delay = -1;
3142 h->sei_buffering_period_present = 0;
3143 ff_mpeg_flush(avctx);
3147 * Find a Picture in the short term reference list by frame number.
3148 * @param frame_num frame number to search for
3149 * @param idx the index into h->short_ref where returned picture is found
3150 * undefined if no picture found.
3151 * @return pointer to the found picture, or NULL if no pic with the provided
3152 * frame number is found
3154 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3155 MpegEncContext * const s = &h->s;
3158 for(i=0; i<h->short_ref_count; i++){
3159 Picture *pic= h->short_ref[i];
3160 if(s->avctx->debug&FF_DEBUG_MMCO)
3161 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3162 if(pic->frame_num == frame_num) {
3171 * Remove a picture from the short term reference list by its index in
3172 * that list. This does no checking on the provided index; it is assumed
3173 * to be valid. Other list entries are shifted down.
3174 * @param i index into h->short_ref of picture to remove.
3176 static void remove_short_at_index(H264Context *h, int i){
3177 assert(i >= 0 && i < h->short_ref_count);
3178 h->short_ref[i]= NULL;
3179 if (--h->short_ref_count)
3180 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3185 * @return the removed picture or NULL if an error occurs
3187 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3188 MpegEncContext * const s = &h->s;
3192 if(s->avctx->debug&FF_DEBUG_MMCO)
3193 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3195 pic = find_short(h, frame_num, &i);
3197 if(unreference_pic(h, pic, ref_mask))
3198 remove_short_at_index(h, i);
3205 * Remove a picture from the long term reference list by its index in
3207 * @return the removed picture or NULL if an error occurs
3209 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3212 pic= h->long_ref[i];
3214 if(unreference_pic(h, pic, ref_mask)){
3215 assert(h->long_ref[i]->long_ref == 1);
3216 h->long_ref[i]->long_ref= 0;
3217 h->long_ref[i]= NULL;
3218 h->long_ref_count--;
3226 * print short term list
3228 static void print_short_term(H264Context *h) {
3230 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3231 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3232 for(i=0; i<h->short_ref_count; i++){
3233 Picture *pic= h->short_ref[i];
3234 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3240 * print long term list
3242 static void print_long_term(H264Context *h) {
3244 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3245 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3246 for(i = 0; i < 16; i++){
3247 Picture *pic= h->long_ref[i];
3249 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3256 * Executes the reference picture marking (memory management control operations).
3258 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3259 MpegEncContext * const s = &h->s;
3261 int current_ref_assigned=0;
3264 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3265 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3267 for(i=0; i<mmco_count; i++){
3268 int structure, frame_num;
3269 if(s->avctx->debug&FF_DEBUG_MMCO)
3270 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3272 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3273 || mmco[i].opcode == MMCO_SHORT2LONG){
3274 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3275 pic = find_short(h, frame_num, &j);
3277 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3278 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3279 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3284 switch(mmco[i].opcode){
3285 case MMCO_SHORT2UNUSED:
3286 if(s->avctx->debug&FF_DEBUG_MMCO)
3287 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3288 remove_short(h, frame_num, structure ^ PICT_FRAME);
3290 case MMCO_SHORT2LONG:
3291 if (h->long_ref[mmco[i].long_arg] != pic)
3292 remove_long(h, mmco[i].long_arg, 0);
3294 remove_short_at_index(h, j);
3295 h->long_ref[ mmco[i].long_arg ]= pic;
3296 if (h->long_ref[ mmco[i].long_arg ]){
3297 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3298 h->long_ref_count++;
3301 case MMCO_LONG2UNUSED:
3302 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3303 pic = h->long_ref[j];
3305 remove_long(h, j, structure ^ PICT_FRAME);
3306 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3307 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3310 // Comment below left from previous code as it is an interresting note.
3311 /* First field in pair is in short term list or
3312 * at a different long term index.
3313 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3314 * Report the problem and keep the pair where it is,
3315 * and mark this field valid.
3318 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3319 remove_long(h, mmco[i].long_arg, 0);
3321 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3322 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3323 h->long_ref_count++;
3326 s->current_picture_ptr->reference |= s->picture_structure;
3327 current_ref_assigned=1;
3329 case MMCO_SET_MAX_LONG:
3330 assert(mmco[i].long_arg <= 16);
3331 // just remove the long term which index is greater than new max
3332 for(j = mmco[i].long_arg; j<16; j++){
3333 remove_long(h, j, 0);
3337 while(h->short_ref_count){
3338 remove_short(h, h->short_ref[0]->frame_num, 0);
3340 for(j = 0; j < 16; j++) {
3341 remove_long(h, j, 0);
3343 s->current_picture_ptr->poc=
3344 s->current_picture_ptr->field_poc[0]=
3345 s->current_picture_ptr->field_poc[1]=
3349 s->current_picture_ptr->frame_num= 0;
3355 if (!current_ref_assigned) {
3356 /* Second field of complementary field pair; the first field of
3357 * which is already referenced. If short referenced, it
3358 * should be first entry in short_ref. If not, it must exist
3359 * in long_ref; trying to put it on the short list here is an
3360 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3362 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3363 /* Just mark the second field valid */
3364 s->current_picture_ptr->reference = PICT_FRAME;
3365 } else if (s->current_picture_ptr->long_ref) {
3366 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3367 "assignment for second field "
3368 "in complementary field pair "
3369 "(first field is long term)\n");
3371 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3373 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3376 if(h->short_ref_count)
3377 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3379 h->short_ref[0]= s->current_picture_ptr;
3380 h->short_ref_count++;
3381 s->current_picture_ptr->reference |= s->picture_structure;
3385 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3387 /* We have too many reference frames, probably due to corrupted
3388 * stream. Need to discard one frame. Prevents overrun of the
3389 * short_ref and long_ref buffers.
3391 av_log(h->s.avctx, AV_LOG_ERROR,
3392 "number of reference frames exceeds max (probably "
3393 "corrupt input), discarding one\n");
3395 if (h->long_ref_count && !h->short_ref_count) {
3396 for (i = 0; i < 16; ++i)
3401 remove_long(h, i, 0);
3403 pic = h->short_ref[h->short_ref_count - 1];
3404 remove_short(h, pic->frame_num, 0);
3408 print_short_term(h);
3413 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3414 MpegEncContext * const s = &h->s;
3418 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3419 s->broken_link= get_bits1(gb) -1;
3421 h->mmco[0].opcode= MMCO_LONG;
3422 h->mmco[0].long_arg= 0;
3426 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3427 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3428 MMCOOpcode opcode= get_ue_golomb_31(gb);
3430 h->mmco[i].opcode= opcode;
3431 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3432 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3433 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3434 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3438 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3439 unsigned int long_arg= get_ue_golomb_31(gb);
3440 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3441 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3444 h->mmco[i].long_arg= long_arg;
3447 if(opcode > (unsigned)MMCO_LONG){
3448 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3451 if(opcode == MMCO_END)
3456 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3458 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3459 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3460 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3461 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3463 if (FIELD_PICTURE) {
3464 h->mmco[0].short_pic_num *= 2;
3465 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3466 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3476 static int init_poc(H264Context *h){
3477 MpegEncContext * const s = &h->s;
3478 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3480 Picture *cur = s->current_picture_ptr;
3482 h->frame_num_offset= h->prev_frame_num_offset;
3483 if(h->frame_num < h->prev_frame_num)
3484 h->frame_num_offset += max_frame_num;
3486 if(h->sps.poc_type==0){
3487 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3489 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3490 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3491 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3492 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3494 h->poc_msb = h->prev_poc_msb;
3495 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3497 field_poc[1] = h->poc_msb + h->poc_lsb;
3498 if(s->picture_structure == PICT_FRAME)
3499 field_poc[1] += h->delta_poc_bottom;
3500 }else if(h->sps.poc_type==1){
3501 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3504 if(h->sps.poc_cycle_length != 0)
3505 abs_frame_num = h->frame_num_offset + h->frame_num;
3509 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3512 expected_delta_per_poc_cycle = 0;
3513 for(i=0; i < h->sps.poc_cycle_length; i++)
3514 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3516 if(abs_frame_num > 0){
3517 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3518 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3520 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3521 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3522 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3526 if(h->nal_ref_idc == 0)
3527 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3529 field_poc[0] = expectedpoc + h->delta_poc[0];
3530 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3532 if(s->picture_structure == PICT_FRAME)
3533 field_poc[1] += h->delta_poc[1];
3535 int poc= 2*(h->frame_num_offset + h->frame_num);
3544 if(s->picture_structure != PICT_BOTTOM_FIELD)
3545 s->current_picture_ptr->field_poc[0]= field_poc[0];
3546 if(s->picture_structure != PICT_TOP_FIELD)
3547 s->current_picture_ptr->field_poc[1]= field_poc[1];
3548 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3555 * initialize scan tables
3557 static void init_scan_tables(H264Context *h){
3558 MpegEncContext * const s = &h->s;
3560 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3561 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3562 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3564 for(i=0; i<16; i++){
3565 #define T(x) (x>>2) | ((x<<2) & 0xF)
3566 h->zigzag_scan[i] = T(zigzag_scan[i]);
3567 h-> field_scan[i] = T( field_scan[i]);
3571 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3572 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3573 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3574 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3575 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3577 for(i=0; i<64; i++){
3578 #define T(x) (x>>3) | ((x&7)<<3)
3579 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3580 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3581 h->field_scan8x8[i] = T(field_scan8x8[i]);
3582 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3586 if(h->sps.transform_bypass){ //FIXME same ugly
3587 h->zigzag_scan_q0 = zigzag_scan;
3588 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3589 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3590 h->field_scan_q0 = field_scan;
3591 h->field_scan8x8_q0 = field_scan8x8;
3592 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3594 h->zigzag_scan_q0 = h->zigzag_scan;
3595 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3596 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3597 h->field_scan_q0 = h->field_scan;
3598 h->field_scan8x8_q0 = h->field_scan8x8;
3599 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3604 * Replicates H264 "master" context to thread contexts.
3606 static void clone_slice(H264Context *dst, H264Context *src)
3608 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3609 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3610 dst->s.current_picture = src->s.current_picture;
3611 dst->s.linesize = src->s.linesize;
3612 dst->s.uvlinesize = src->s.uvlinesize;
3613 dst->s.first_field = src->s.first_field;
3615 dst->prev_poc_msb = src->prev_poc_msb;
3616 dst->prev_poc_lsb = src->prev_poc_lsb;
3617 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3618 dst->prev_frame_num = src->prev_frame_num;
3619 dst->short_ref_count = src->short_ref_count;
3621 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3622 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3623 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3624 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3626 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3627 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3631 * decodes a slice header.
3632 * This will also call MPV_common_init() and frame_start() as needed.
3634 * @param h h264context
3635 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3637 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3639 static int decode_slice_header(H264Context *h, H264Context *h0){
3640 MpegEncContext * const s = &h->s;
3641 MpegEncContext * const s0 = &h0->s;
3642 unsigned int first_mb_in_slice;
3643 unsigned int pps_id;
3644 int num_ref_idx_active_override_flag;
3645 unsigned int slice_type, tmp, i, j;
3646 int default_ref_list_done = 0;
3647 int last_pic_structure;
3649 s->dropable= h->nal_ref_idc == 0;
3651 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3652 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3653 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3655 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3656 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3659 first_mb_in_slice= get_ue_golomb(&s->gb);
3661 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3662 h0->current_slice = 0;
3663 if (!s0->first_field)
3664 s->current_picture_ptr= NULL;
3667 slice_type= get_ue_golomb_31(&s->gb);
3669 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3674 h->slice_type_fixed=1;
3676 h->slice_type_fixed=0;
3678 slice_type= golomb_to_pict_type[ slice_type ];
3679 if (slice_type == FF_I_TYPE
3680 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3681 default_ref_list_done = 1;
3683 h->slice_type= slice_type;
3684 h->slice_type_nos= slice_type & 3;
3686 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3687 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3688 av_log(h->s.avctx, AV_LOG_ERROR,
3689 "B picture before any references, skipping\n");
3693 pps_id= get_ue_golomb(&s->gb);
3694 if(pps_id>=MAX_PPS_COUNT){
3695 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3698 if(!h0->pps_buffers[pps_id]) {
3699 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3702 h->pps= *h0->pps_buffers[pps_id];
3704 if(!h0->sps_buffers[h->pps.sps_id]) {
3705 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3708 h->sps = *h0->sps_buffers[h->pps.sps_id];
3710 if(h == h0 && h->dequant_coeff_pps != pps_id){
3711 h->dequant_coeff_pps = pps_id;
3712 init_dequant_tables(h);
3715 s->mb_width= h->sps.mb_width;
3716 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3718 h->b_stride= s->mb_width*4;
3719 h->b8_stride= s->mb_width*2;
3721 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3722 if(h->sps.frame_mbs_only_flag)
3723 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3725 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3727 if (s->context_initialized
3728 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3730 return -1; // width / height changed during parallelized decoding
3732 flush_dpb(s->avctx);
3735 if (!s->context_initialized) {
3737 return -1; // we cant (re-)initialize context during parallel decoding
3738 if (MPV_common_init(s) < 0)
3742 init_scan_tables(h);
3745 for(i = 1; i < s->avctx->thread_count; i++) {
3747 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3748 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3749 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3752 init_scan_tables(c);
3756 for(i = 0; i < s->avctx->thread_count; i++)
3757 if(context_init(h->thread_context[i]) < 0)
3760 s->avctx->width = s->width;
3761 s->avctx->height = s->height;
3762 s->avctx->sample_aspect_ratio= h->sps.sar;
3763 if(!s->avctx->sample_aspect_ratio.den)
3764 s->avctx->sample_aspect_ratio.den = 1;
3766 if(h->sps.timing_info_present_flag){
3767 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3768 if(h->x264_build > 0 && h->x264_build < 44)
3769 s->avctx->time_base.den *= 2;
3770 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3771 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3775 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3778 h->mb_aff_frame = 0;
3779 last_pic_structure = s0->picture_structure;
3780 if(h->sps.frame_mbs_only_flag){
3781 s->picture_structure= PICT_FRAME;
3783 if(get_bits1(&s->gb)) { //field_pic_flag
3784 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3786 s->picture_structure= PICT_FRAME;
3787 h->mb_aff_frame = h->sps.mb_aff;
3790 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3792 if(h0->current_slice == 0){
3793 while(h->frame_num != h->prev_frame_num &&
3794 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3795 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3796 if (frame_start(h) < 0)
3798 h->prev_frame_num++;
3799 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3800 s->current_picture_ptr->frame_num= h->prev_frame_num;
3801 execute_ref_pic_marking(h, NULL, 0);
3804 /* See if we have a decoded first field looking for a pair... */
3805 if (s0->first_field) {
3806 assert(s0->current_picture_ptr);
3807 assert(s0->current_picture_ptr->data[0]);
3808 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3810 /* figure out if we have a complementary field pair */
3811 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3813 * Previous field is unmatched. Don't display it, but let it
3814 * remain for reference if marked as such.
3816 s0->current_picture_ptr = NULL;
3817 s0->first_field = FIELD_PICTURE;
3820 if (h->nal_ref_idc &&
3821 s0->current_picture_ptr->reference &&
3822 s0->current_picture_ptr->frame_num != h->frame_num) {
3824 * This and previous field were reference, but had
3825 * different frame_nums. Consider this field first in
3826 * pair. Throw away previous field except for reference
3829 s0->first_field = 1;
3830 s0->current_picture_ptr = NULL;
3833 /* Second field in complementary pair */
3834 s0->first_field = 0;
3839 /* Frame or first field in a potentially complementary pair */
3840 assert(!s0->current_picture_ptr);
3841 s0->first_field = FIELD_PICTURE;
3844 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3845 s0->first_field = 0;
3852 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3854 assert(s->mb_num == s->mb_width * s->mb_height);
3855 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3856 first_mb_in_slice >= s->mb_num){
3857 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3860 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3861 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3862 if (s->picture_structure == PICT_BOTTOM_FIELD)
3863 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3864 assert(s->mb_y < s->mb_height);
3866 if(s->picture_structure==PICT_FRAME){
3867 h->curr_pic_num= h->frame_num;
3868 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3870 h->curr_pic_num= 2*h->frame_num + 1;
3871 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3874 if(h->nal_unit_type == NAL_IDR_SLICE){
3875 get_ue_golomb(&s->gb); /* idr_pic_id */
3878 if(h->sps.poc_type==0){
3879 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3881 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3882 h->delta_poc_bottom= get_se_golomb(&s->gb);
3886 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3887 h->delta_poc[0]= get_se_golomb(&s->gb);
3889 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3890 h->delta_poc[1]= get_se_golomb(&s->gb);
3895 if(h->pps.redundant_pic_cnt_present){
3896 h->redundant_pic_count= get_ue_golomb(&s->gb);
3899 //set defaults, might be overridden a few lines later
3900 h->ref_count[0]= h->pps.ref_count[0];
3901 h->ref_count[1]= h->pps.ref_count[1];
3903 if(h->slice_type_nos != FF_I_TYPE){
3904 if(h->slice_type_nos == FF_B_TYPE){
3905 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3907 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3909 if(num_ref_idx_active_override_flag){
3910 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3911 if(h->slice_type_nos==FF_B_TYPE)
3912 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3914 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3915 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3916 h->ref_count[0]= h->ref_count[1]= 1;
3920 if(h->slice_type_nos == FF_B_TYPE)
3927 if(!default_ref_list_done){
3928 fill_default_ref_list(h);
3931 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3934 if(h->slice_type_nos!=FF_I_TYPE){
3935 s->last_picture_ptr= &h->ref_list[0][0];
3936 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3938 if(h->slice_type_nos==FF_B_TYPE){
3939 s->next_picture_ptr= &h->ref_list[1][0];
3940 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3943 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3944 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3945 pred_weight_table(h);
3946 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3947 implicit_weight_table(h);
3950 for (i = 0; i < 2; i++) {
3951 h->luma_weight_flag[i] = 0;
3952 h->chroma_weight_flag[i] = 0;
3957 decode_ref_pic_marking(h0, &s->gb);
3960 fill_mbaff_ref_list(h);
3962 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3963 direct_dist_scale_factor(h);
3964 direct_ref_list_init(h);
3966 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3967 tmp = get_ue_golomb_31(&s->gb);
3969 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3972 h->cabac_init_idc= tmp;
3975 h->last_qscale_diff = 0;
3976 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3978 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3982 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3983 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3984 //FIXME qscale / qp ... stuff
3985 if(h->slice_type == FF_SP_TYPE){
3986 get_bits1(&s->gb); /* sp_for_switch_flag */
3988 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3989 get_se_golomb(&s->gb); /* slice_qs_delta */
3992 h->deblocking_filter = 1;
3993 h->slice_alpha_c0_offset = 0;
3994 h->slice_beta_offset = 0;
3995 if( h->pps.deblocking_filter_parameters_present ) {
3996 tmp= get_ue_golomb_31(&s->gb);
3998 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4001 h->deblocking_filter= tmp;
4002 if(h->deblocking_filter < 2)
4003 h->deblocking_filter^= 1; // 1<->0
4005 if( h->deblocking_filter ) {
4006 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4007 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4011 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4012 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4013 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4014 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4015 h->deblocking_filter= 0;
4017 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4018 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4019 /* Cheat slightly for speed:
4020 Do not bother to deblock across slices. */
4021 h->deblocking_filter = 2;
4023 h0->max_contexts = 1;
4024 if(!h0->single_decode_warning) {
4025 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4026 h0->single_decode_warning = 1;
4029 return 1; // deblocking switched inside frame
4034 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4035 slice_group_change_cycle= get_bits(&s->gb, ?);
4038 h0->last_slice_type = slice_type;
4039 h->slice_num = ++h0->current_slice;
4040 if(h->slice_num >= MAX_SLICES){
4041 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4045 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4049 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4050 +(h->ref_list[j][i].reference&3);
4053 for(i=16; i<48; i++)
4054 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4055 +(h->ref_list[j][i].reference&3);
4058 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4059 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4061 s->avctx->refs= h->sps.ref_frame_count;
4063 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4064 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4066 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4068 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4069 pps_id, h->frame_num,
4070 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4071 h->ref_count[0], h->ref_count[1],
4073 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4075 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4076 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4086 static inline int get_level_prefix(GetBitContext *gb){
4090 OPEN_READER(re, gb);
4091 UPDATE_CACHE(re, gb);
4092 buf=GET_CACHE(re, gb);
4094 log= 32 - av_log2(buf);
4096 print_bin(buf>>(32-log), log);
4097 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4100 LAST_SKIP_BITS(re, gb, log);
4101 CLOSE_READER(re, gb);
4106 static inline int get_dct8x8_allowed(H264Context *h){
4107 if(h->sps.direct_8x8_inference_flag)
4108 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4110 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4114 * decodes a residual block.
4115 * @param n block index
4116 * @param scantable scantable
4117 * @param max_coeff number of coefficients in the block
4118 * @return <0 if an error occurred
4120 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4121 MpegEncContext * const s = &h->s;
4122 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4124 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4126 //FIXME put trailing_onex into the context
4128 if(n == CHROMA_DC_BLOCK_INDEX){
4129 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4130 total_coeff= coeff_token>>2;
4132 if(n == LUMA_DC_BLOCK_INDEX){
4133 total_coeff= pred_non_zero_count(h, 0);
4134 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4135 total_coeff= coeff_token>>2;
4137 total_coeff= pred_non_zero_count(h, n);
4138 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4139 total_coeff= coeff_token>>2;
4140 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4144 //FIXME set last_non_zero?
4148 if(total_coeff > (unsigned)max_coeff) {
4149 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4153 trailing_ones= coeff_token&3;
4154 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4155 assert(total_coeff<=16);
4157 i = show_bits(gb, 3);
4158 skip_bits(gb, trailing_ones);
4159 level[0] = 1-((i&4)>>1);
4160 level[1] = 1-((i&2) );
4161 level[2] = 1-((i&1)<<1);
4163 if(trailing_ones<total_coeff) {
4165 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4166 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4167 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4169 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4170 if(level_code >= 100){
4171 prefix= level_code - 100;
4172 if(prefix == LEVEL_TAB_BITS)
4173 prefix += get_level_prefix(gb);
4175 //first coefficient has suffix_length equal to 0 or 1
4176 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4178 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4180 level_code= (prefix<<suffix_length); //part
4181 }else if(prefix==14){
4183 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4185 level_code= prefix + get_bits(gb, 4); //part
4187 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4188 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4190 level_code += (1<<(prefix-3))-4096;
4193 if(trailing_ones < 3) level_code += 2;
4196 mask= -(level_code&1);
4197 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4199 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4202 if(level_code + 3U > 6U)
4204 level[trailing_ones]= level_code;
4207 //remaining coefficients have suffix_length > 0
4208 for(i=trailing_ones+1;i<total_coeff;i++) {
4209 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4210 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4211 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4213 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4214 if(level_code >= 100){
4215 prefix= level_code - 100;
4216 if(prefix == LEVEL_TAB_BITS){
4217 prefix += get_level_prefix(gb);
4220 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4222 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4224 level_code += (1<<(prefix-3))-4096;
4226 mask= -(level_code&1);
4227 level_code= (((2+level_code)>>1) ^ mask) - mask;
4229 level[i]= level_code;
4231 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4236 if(total_coeff == max_coeff)
4239 if(n == CHROMA_DC_BLOCK_INDEX)
4240 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4242 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4245 coeff_num = zeros_left + total_coeff - 1;
4246 j = scantable[coeff_num];
4248 block[j] = level[0];
4249 for(i=1;i<total_coeff;i++) {
4252 else if(zeros_left < 7){
4253 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4255 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4257 zeros_left -= run_before;
4258 coeff_num -= 1 + run_before;
4259 j= scantable[ coeff_num ];
4264 block[j] = (level[0] * qmul[j] + 32)>>6;
4265 for(i=1;i<total_coeff;i++) {
4268 else if(zeros_left < 7){
4269 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4271 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4273 zeros_left -= run_before;
4274 coeff_num -= 1 + run_before;
4275 j= scantable[ coeff_num ];
4277 block[j]= (level[i] * qmul[j] + 32)>>6;
4282 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4289 static void predict_field_decoding_flag(H264Context *h){
4290 MpegEncContext * const s = &h->s;
4291 const int mb_xy= h->mb_xy;
4292 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4293 ? s->current_picture.mb_type[mb_xy-1]
4294 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4295 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4297 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4301 * decodes a P_SKIP or B_SKIP macroblock
4303 static void decode_mb_skip(H264Context *h){
4304 MpegEncContext * const s = &h->s;
4305 const int mb_xy= h->mb_xy;
4308 memset(h->non_zero_count[mb_xy], 0, 16);
4309 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4312 mb_type|= MB_TYPE_INTERLACED;
4314 if( h->slice_type_nos == FF_B_TYPE )
4316 // just for fill_caches. pred_direct_motion will set the real mb_type
4317 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4319 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4320 pred_direct_motion(h, &mb_type);
4321 mb_type|= MB_TYPE_SKIP;
4326 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4328 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4329 pred_pskip_motion(h, &mx, &my);
4330 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4331 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4334 write_back_motion(h, mb_type);
4335 s->current_picture.mb_type[mb_xy]= mb_type;
4336 s->current_picture.qscale_table[mb_xy]= s->qscale;
4337 h->slice_table[ mb_xy ]= h->slice_num;
4338 h->prev_mb_skipped= 1;
4342 * decodes a macroblock
4343 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4345 static int decode_mb_cavlc(H264Context *h){
4346 MpegEncContext * const s = &h->s;
4348 int partition_count;
4349 unsigned int mb_type, cbp;
4350 int dct8x8_allowed= h->pps.transform_8x8_mode;
4352 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4354 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4355 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4357 if(h->slice_type_nos != FF_I_TYPE){
4358 if(s->mb_skip_run==-1)
4359 s->mb_skip_run= get_ue_golomb(&s->gb);
4361 if (s->mb_skip_run--) {
4362 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4363 if(s->mb_skip_run==0)
4364 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4366 predict_field_decoding_flag(h);
4373 if( (s->mb_y&1) == 0 )
4374 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4377 h->prev_mb_skipped= 0;
4379 mb_type= get_ue_golomb(&s->gb);
4380 if(h->slice_type_nos == FF_B_TYPE){
4382 partition_count= b_mb_type_info[mb_type].partition_count;
4383 mb_type= b_mb_type_info[mb_type].type;
4386 goto decode_intra_mb;
4388 }else if(h->slice_type_nos == FF_P_TYPE){
4390 partition_count= p_mb_type_info[mb_type].partition_count;
4391 mb_type= p_mb_type_info[mb_type].type;
4394 goto decode_intra_mb;
4397 assert(h->slice_type_nos == FF_I_TYPE);
4398 if(h->slice_type == FF_SI_TYPE && mb_type)
4402 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4406 cbp= i_mb_type_info[mb_type].cbp;
4407 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4408 mb_type= i_mb_type_info[mb_type].type;
4412 mb_type |= MB_TYPE_INTERLACED;
4414 h->slice_table[ mb_xy ]= h->slice_num;
4416 if(IS_INTRA_PCM(mb_type)){
4419 // We assume these blocks are very rare so we do not optimize it.
4420 align_get_bits(&s->gb);
4422 // The pixels are stored in the same order as levels in h->mb array.
4423 for(x=0; x < (CHROMA ? 384 : 256); x++){
4424 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4427 // In deblocking, the quantizer is 0
4428 s->current_picture.qscale_table[mb_xy]= 0;
4429 // All coeffs are present
4430 memset(h->non_zero_count[mb_xy], 16, 16);
4432 s->current_picture.mb_type[mb_xy]= mb_type;
4437 h->ref_count[0] <<= 1;
4438 h->ref_count[1] <<= 1;
4441 fill_caches(h, mb_type, 0);
4444 if(IS_INTRA(mb_type)){
4446 // init_top_left_availability(h);
4447 if(IS_INTRA4x4(mb_type)){
4450 if(dct8x8_allowed && get_bits1(&s->gb)){
4451 mb_type |= MB_TYPE_8x8DCT;
4455 // fill_intra4x4_pred_table(h);
4456 for(i=0; i<16; i+=di){
4457 int mode= pred_intra_mode(h, i);
4459 if(!get_bits1(&s->gb)){
4460 const int rem_mode= get_bits(&s->gb, 3);
4461 mode = rem_mode + (rem_mode >= mode);
4465 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4467 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4469 write_back_intra_pred_mode(h);
4470 if( check_intra4x4_pred_mode(h) < 0)
4473 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4474 if(h->intra16x16_pred_mode < 0)
4478 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4481 h->chroma_pred_mode= pred_mode;
4483 }else if(partition_count==4){
4484 int i, j, sub_partition_count[4], list, ref[2][4];
4486 if(h->slice_type_nos == FF_B_TYPE){
4488 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4489 if(h->sub_mb_type[i] >=13){
4490 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4493 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4494 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4496 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4497 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4498 pred_direct_motion(h, &mb_type);
4499 h->ref_cache[0][scan8[4]] =
4500 h->ref_cache[1][scan8[4]] =
4501 h->ref_cache[0][scan8[12]] =
4502 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4505 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4507 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4508 if(h->sub_mb_type[i] >=4){
4509 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4512 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4513 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4517 for(list=0; list<h->list_count; list++){
4518 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4520 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4521 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4525 }else if(ref_count == 2){
4526 tmp= get_bits1(&s->gb)^1;
4528 tmp= get_ue_golomb_31(&s->gb);
4530 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4543 dct8x8_allowed = get_dct8x8_allowed(h);
4545 for(list=0; list<h->list_count; list++){
4547 if(IS_DIRECT(h->sub_mb_type[i])) {
4548 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4551 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4552 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4554 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4555 const int sub_mb_type= h->sub_mb_type[i];
4556 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4557 for(j=0; j<sub_partition_count[i]; j++){
4559 const int index= 4*i + block_width*j;
4560 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4561 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4562 mx += get_se_golomb(&s->gb);
4563 my += get_se_golomb(&s->gb);
4564 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4566 if(IS_SUB_8X8(sub_mb_type)){
4568 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4570 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4571 }else if(IS_SUB_8X4(sub_mb_type)){
4572 mv_cache[ 1 ][0]= mx;
4573 mv_cache[ 1 ][1]= my;
4574 }else if(IS_SUB_4X8(sub_mb_type)){
4575 mv_cache[ 8 ][0]= mx;
4576 mv_cache[ 8 ][1]= my;
4578 mv_cache[ 0 ][0]= mx;
4579 mv_cache[ 0 ][1]= my;
4582 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4588 }else if(IS_DIRECT(mb_type)){
4589 pred_direct_motion(h, &mb_type);
4590 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4592 int list, mx, my, i;
4593 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4594 if(IS_16X16(mb_type)){
4595 for(list=0; list<h->list_count; list++){
4597 if(IS_DIR(mb_type, 0, list)){
4598 if(h->ref_count[list]==1){
4600 }else if(h->ref_count[list]==2){
4601 val= get_bits1(&s->gb)^1;
4603 val= get_ue_golomb_31(&s->gb);
4604 if(val >= h->ref_count[list]){
4605 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4610 val= LIST_NOT_USED&0xFF;
4611 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4613 for(list=0; list<h->list_count; list++){
4615 if(IS_DIR(mb_type, 0, list)){
4616 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4617 mx += get_se_golomb(&s->gb);
4618 my += get_se_golomb(&s->gb);
4619 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4621 val= pack16to32(mx,my);
4624 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4627 else if(IS_16X8(mb_type)){
4628 for(list=0; list<h->list_count; list++){
4631 if(IS_DIR(mb_type, i, list)){
4632 if(h->ref_count[list] == 1){
4634 }else if(h->ref_count[list] == 2){
4635 val= get_bits1(&s->gb)^1;
4637 val= get_ue_golomb_31(&s->gb);
4638 if(val >= h->ref_count[list]){
4639 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4644 val= LIST_NOT_USED&0xFF;
4645 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4648 for(list=0; list<h->list_count; list++){
4651 if(IS_DIR(mb_type, i, list)){
4652 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4653 mx += get_se_golomb(&s->gb);
4654 my += get_se_golomb(&s->gb);
4655 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4657 val= pack16to32(mx,my);
4660 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4664 assert(IS_8X16(mb_type));
4665 for(list=0; list<h->list_count; list++){
4668 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4669 if(h->ref_count[list]==1){
4671 }else if(h->ref_count[list]==2){
4672 val= get_bits1(&s->gb)^1;
4674 val= get_ue_golomb_31(&s->gb);
4675 if(val >= h->ref_count[list]){
4676 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4681 val= LIST_NOT_USED&0xFF;
4682 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4685 for(list=0; list<h->list_count; list++){
4688 if(IS_DIR(mb_type, i, list)){
4689 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4690 mx += get_se_golomb(&s->gb);
4691 my += get_se_golomb(&s->gb);
4692 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4694 val= pack16to32(mx,my);
4697 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4703 if(IS_INTER(mb_type))
4704 write_back_motion(h, mb_type);
4706 if(!IS_INTRA16x16(mb_type)){
4707 cbp= get_ue_golomb(&s->gb);
4709 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4714 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4715 else cbp= golomb_to_inter_cbp [cbp];
4717 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4718 else cbp= golomb_to_inter_cbp_gray[cbp];
4723 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4724 if(get_bits1(&s->gb)){
4725 mb_type |= MB_TYPE_8x8DCT;
4726 h->cbp_table[mb_xy]= cbp;
4729 s->current_picture.mb_type[mb_xy]= mb_type;
4731 if(cbp || IS_INTRA16x16(mb_type)){
4732 int i8x8, i4x4, chroma_idx;
4734 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4735 const uint8_t *scan, *scan8x8, *dc_scan;
4737 // fill_non_zero_count_cache(h);
4739 if(IS_INTERLACED(mb_type)){
4740 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4741 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4742 dc_scan= luma_dc_field_scan;
4744 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4745 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4746 dc_scan= luma_dc_zigzag_scan;
4749 dquant= get_se_golomb(&s->gb);
4751 if( dquant > 25 || dquant < -26 ){
4752 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4756 s->qscale += dquant;
4757 if(((unsigned)s->qscale) > 51){
4758 if(s->qscale<0) s->qscale+= 52;
4759 else s->qscale-= 52;
4762 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4763 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4764 if(IS_INTRA16x16(mb_type)){
4765 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4766 return -1; //FIXME continue if partitioned and other return -1 too
4769 assert((cbp&15) == 0 || (cbp&15) == 15);
4772 for(i8x8=0; i8x8<4; i8x8++){
4773 for(i4x4=0; i4x4<4; i4x4++){
4774 const int index= i4x4 + 4*i8x8;
4775 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4781 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4784 for(i8x8=0; i8x8<4; i8x8++){
4785 if(cbp & (1<<i8x8)){
4786 if(IS_8x8DCT(mb_type)){
4787 DCTELEM *buf = &h->mb[64*i8x8];
4789 for(i4x4=0; i4x4<4; i4x4++){
4790 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4791 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4794 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4795 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4797 for(i4x4=0; i4x4<4; i4x4++){
4798 const int index= i4x4 + 4*i8x8;
4800 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4806 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4807 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4813 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4814 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4820 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4821 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4822 for(i4x4=0; i4x4<4; i4x4++){
4823 const int index= 16 + 4*chroma_idx + i4x4;
4824 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4830 uint8_t * const nnz= &h->non_zero_count_cache[0];
4831 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4832 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4835 uint8_t * const nnz= &h->non_zero_count_cache[0];
4836 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4837 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4838 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4840 s->current_picture.qscale_table[mb_xy]= s->qscale;
4841 write_back_non_zero_count(h);
4844 h->ref_count[0] >>= 1;
4845 h->ref_count[1] >>= 1;
4851 static int decode_cabac_field_decoding_flag(H264Context *h) {
4852 MpegEncContext * const s = &h->s;
4853 const int mb_x = s->mb_x;
4854 const int mb_y = s->mb_y & ~1;
4855 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4856 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4858 unsigned int ctx = 0;
4860 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4863 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4867 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4870 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4871 uint8_t *state= &h->cabac_state[ctx_base];
4875 MpegEncContext * const s = &h->s;
4876 const int mba_xy = h->left_mb_xy[0];
4877 const int mbb_xy = h->top_mb_xy;
4879 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4881 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4883 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4884 return 0; /* I4x4 */
4887 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4888 return 0; /* I4x4 */
4891 if( get_cabac_terminate( &h->cabac ) )
4892 return 25; /* PCM */
4894 mb_type = 1; /* I16x16 */
4895 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4896 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4897 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4898 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4899 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4903 static int decode_cabac_mb_type_b( H264Context *h ) {
4904 MpegEncContext * const s = &h->s;
4906 const int mba_xy = h->left_mb_xy[0];
4907 const int mbb_xy = h->top_mb_xy;
4910 assert(h->slice_type_nos == FF_B_TYPE);
4912 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4914 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4917 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4918 return 0; /* B_Direct_16x16 */
4920 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4921 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4924 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4925 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4926 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4927 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4929 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4930 else if( bits == 13 ) {
4931 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4932 } else if( bits == 14 )
4933 return 11; /* B_L1_L0_8x16 */
4934 else if( bits == 15 )
4935 return 22; /* B_8x8 */
4937 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4938 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4941 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4942 MpegEncContext * const s = &h->s;
4946 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4947 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4950 && h->slice_table[mba_xy] == h->slice_num
4951 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4952 mba_xy += s->mb_stride;
4954 mbb_xy = mb_xy - s->mb_stride;
4956 && h->slice_table[mbb_xy] == h->slice_num
4957 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4958 mbb_xy -= s->mb_stride;
4960 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4962 int mb_xy = h->mb_xy;
4964 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4967 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4969 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4972 if( h->slice_type_nos == FF_B_TYPE )
4974 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4977 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4980 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4983 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4984 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4985 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4987 if( mode >= pred_mode )
4993 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4994 const int mba_xy = h->left_mb_xy[0];
4995 const int mbb_xy = h->top_mb_xy;
4999 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5000 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5003 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5006 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5009 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5011 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5017 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5018 int cbp_b, cbp_a, ctx, cbp = 0;
5020 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5021 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5023 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5024 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5025 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5026 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5027 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5028 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5029 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5030 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5033 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5037 cbp_a = (h->left_cbp>>4)&0x03;
5038 cbp_b = (h-> top_cbp>>4)&0x03;
5041 if( cbp_a > 0 ) ctx++;
5042 if( cbp_b > 0 ) ctx += 2;
5043 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5047 if( cbp_a == 2 ) ctx++;
5048 if( cbp_b == 2 ) ctx += 2;
5049 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5051 static int decode_cabac_mb_dqp( H264Context *h) {
5052 int ctx= h->last_qscale_diff != 0;
5055 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5058 if(val > 102) //prevent infinite loop
5063 return (val + 1)>>1 ;
5065 return -((val + 1)>>1);
5067 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5068 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5070 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5072 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5076 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5078 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5079 return 0; /* B_Direct_8x8 */
5080 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5081 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5083 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5084 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5085 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5088 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5089 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5093 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5094 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5097 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5098 int refa = h->ref_cache[list][scan8[n] - 1];
5099 int refb = h->ref_cache[list][scan8[n] - 8];
5103 if( h->slice_type_nos == FF_B_TYPE) {
5104 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5106 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5115 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5118 if(ref >= 32 /*h->ref_list[list]*/){
5125 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5126 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5127 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5128 int ctxbase = (l == 0) ? 40 : 47;
5130 int ctx = (amvd>2) + (amvd>32);
5132 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5137 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5145 while( get_cabac_bypass( &h->cabac ) ) {
5149 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5154 if( get_cabac_bypass( &h->cabac ) )
5158 return get_cabac_bypass_sign( &h->cabac, -mvd );
5161 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5167 nza = h->left_cbp&0x100;
5168 nzb = h-> top_cbp&0x100;
5170 nza = (h->left_cbp>>(6+idx))&0x01;
5171 nzb = (h-> top_cbp>>(6+idx))&0x01;
5174 assert(cat == 1 || cat == 2 || cat == 4);
5175 nza = h->non_zero_count_cache[scan8[idx] - 1];
5176 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5185 return ctx + 4 * cat;
5188 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5189 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5190 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5191 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5192 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5195 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5196 static const int significant_coeff_flag_offset[2][6] = {
5197 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5198 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5200 static const int last_coeff_flag_offset[2][6] = {
5201 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5202 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5204 static const int coeff_abs_level_m1_offset[6] = {
5205 227+0, 227+10, 227+20, 227+30, 227+39, 426
5207 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5208 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5209 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5210 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5211 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5212 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5213 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5214 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5215 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5217 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5218 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5219 * map node ctx => cabac ctx for level=1 */
5220 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5221 /* map node ctx => cabac ctx for level>1 */
5222 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5223 static const uint8_t coeff_abs_level_transition[2][8] = {
5224 /* update node ctx after decoding a level=1 */
5225 { 1, 2, 3, 3, 4, 5, 6, 7 },
5226 /* update node ctx after decoding a level>1 */
5227 { 4, 4, 4, 4, 5, 6, 7, 7 }
5233 int coeff_count = 0;
5236 uint8_t *significant_coeff_ctx_base;
5237 uint8_t *last_coeff_ctx_base;
5238 uint8_t *abs_level_m1_ctx_base;
5241 #define CABAC_ON_STACK
5243 #ifdef CABAC_ON_STACK
5246 cc.range = h->cabac.range;
5247 cc.low = h->cabac.low;
5248 cc.bytestream= h->cabac.bytestream;
5250 #define CC &h->cabac
5254 /* cat: 0-> DC 16x16 n = 0
5255 * 1-> AC 16x16 n = luma4x4idx
5256 * 2-> Luma4x4 n = luma4x4idx
5257 * 3-> DC Chroma n = iCbCr
5258 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5259 * 5-> Luma8x8 n = 4 * luma8x8idx
5262 /* read coded block flag */
5263 if( is_dc || cat != 5 ) {
5264 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5266 h->non_zero_count_cache[scan8[n]] = 0;
5268 #ifdef CABAC_ON_STACK
5269 h->cabac.range = cc.range ;
5270 h->cabac.low = cc.low ;
5271 h->cabac.bytestream= cc.bytestream;
5277 significant_coeff_ctx_base = h->cabac_state
5278 + significant_coeff_flag_offset[MB_FIELD][cat];
5279 last_coeff_ctx_base = h->cabac_state
5280 + last_coeff_flag_offset[MB_FIELD][cat];
5281 abs_level_m1_ctx_base = h->cabac_state
5282 + coeff_abs_level_m1_offset[cat];
5284 if( !is_dc && cat == 5 ) {
5285 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5286 for(last= 0; last < coefs; last++) { \
5287 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5288 if( get_cabac( CC, sig_ctx )) { \
5289 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5290 index[coeff_count++] = last; \
5291 if( get_cabac( CC, last_ctx ) ) { \
5297 if( last == max_coeff -1 ) {\
5298 index[coeff_count++] = last;\
5300 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5301 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5302 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5304 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5306 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5308 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5311 assert(coeff_count > 0);
5315 h->cbp_table[h->mb_xy] |= 0x100;
5317 h->cbp_table[h->mb_xy] |= 0x40 << n;
5320 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5322 assert( cat == 1 || cat == 2 || cat == 4 );
5323 h->non_zero_count_cache[scan8[n]] = coeff_count;
5328 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5330 int j= scantable[index[--coeff_count]];
5332 if( get_cabac( CC, ctx ) == 0 ) {
5333 node_ctx = coeff_abs_level_transition[0][node_ctx];
5335 block[j] = get_cabac_bypass_sign( CC, -1);
5337 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5341 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5342 node_ctx = coeff_abs_level_transition[1][node_ctx];
5344 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5348 if( coeff_abs >= 15 ) {
5350 while( get_cabac_bypass( CC ) ) {
5356 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5362 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5364 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5367 } while( coeff_count );
5368 #ifdef CABAC_ON_STACK
5369 h->cabac.range = cc.range ;
5370 h->cabac.low = cc.low ;
5371 h->cabac.bytestream= cc.bytestream;
5377 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5378 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5381 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5382 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5386 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5388 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5390 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5391 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5395 static inline void compute_mb_neighbors(H264Context *h)
5397 MpegEncContext * const s = &h->s;
5398 const int mb_xy = h->mb_xy;
5399 h->top_mb_xy = mb_xy - s->mb_stride;
5400 h->left_mb_xy[0] = mb_xy - 1;
5402 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5403 const int top_pair_xy = pair_xy - s->mb_stride;
5404 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5405 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5406 const int curr_mb_field_flag = MB_FIELD;
5407 const int bottom = (s->mb_y & 1);
5409 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5410 h->top_mb_xy -= s->mb_stride;
5412 if (!left_mb_field_flag == curr_mb_field_flag) {
5413 h->left_mb_xy[0] = pair_xy - 1;
5415 } else if (FIELD_PICTURE) {
5416 h->top_mb_xy -= s->mb_stride;
5422 * decodes a macroblock
5423 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5425 static int decode_mb_cabac(H264Context *h) {
5426 MpegEncContext * const s = &h->s;
5428 int mb_type, partition_count, cbp = 0;
5429 int dct8x8_allowed= h->pps.transform_8x8_mode;
5431 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5433 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5434 if( h->slice_type_nos != FF_I_TYPE ) {
5436 /* a skipped mb needs the aff flag from the following mb */
5437 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5438 predict_field_decoding_flag(h);
5439 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5440 skip = h->next_mb_skipped;
5442 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5443 /* read skip flags */
5445 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5446 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5447 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5448 if(!h->next_mb_skipped)
5449 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5454 h->cbp_table[mb_xy] = 0;
5455 h->chroma_pred_mode_table[mb_xy] = 0;
5456 h->last_qscale_diff = 0;
5463 if( (s->mb_y&1) == 0 )
5465 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5468 h->prev_mb_skipped = 0;
5470 compute_mb_neighbors(h);
5472 if( h->slice_type_nos == FF_B_TYPE ) {
5473 mb_type = decode_cabac_mb_type_b( h );
5475 partition_count= b_mb_type_info[mb_type].partition_count;
5476 mb_type= b_mb_type_info[mb_type].type;
5479 goto decode_intra_mb;
5481 } else if( h->slice_type_nos == FF_P_TYPE ) {
5482 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5484 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5485 /* P_L0_D16x16, P_8x8 */
5486 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5488 /* P_L0_D8x16, P_L0_D16x8 */
5489 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5491 partition_count= p_mb_type_info[mb_type].partition_count;
5492 mb_type= p_mb_type_info[mb_type].type;
5494 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5495 goto decode_intra_mb;
5498 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5499 if(h->slice_type == FF_SI_TYPE && mb_type)
5501 assert(h->slice_type_nos == FF_I_TYPE);
5503 partition_count = 0;
5504 cbp= i_mb_type_info[mb_type].cbp;
5505 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5506 mb_type= i_mb_type_info[mb_type].type;
5509 mb_type |= MB_TYPE_INTERLACED;
5511 h->slice_table[ mb_xy ]= h->slice_num;
5513 if(IS_INTRA_PCM(mb_type)) {
5516 // We assume these blocks are very rare so we do not optimize it.
5517 // FIXME The two following lines get the bitstream position in the cabac
5518 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5519 ptr= h->cabac.bytestream;
5520 if(h->cabac.low&0x1) ptr--;
5522 if(h->cabac.low&0x1FF) ptr--;
5525 // The pixels are stored in the same order as levels in h->mb array.
5526 memcpy(h->mb, ptr, 256); ptr+=256;
5528 memcpy(h->mb+128, ptr, 128); ptr+=128;
5531 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5533 // All blocks are present
5534 h->cbp_table[mb_xy] = 0x1ef;
5535 h->chroma_pred_mode_table[mb_xy] = 0;
5536 // In deblocking, the quantizer is 0
5537 s->current_picture.qscale_table[mb_xy]= 0;
5538 // All coeffs are present
5539 memset(h->non_zero_count[mb_xy], 16, 16);
5540 s->current_picture.mb_type[mb_xy]= mb_type;
5541 h->last_qscale_diff = 0;
5546 h->ref_count[0] <<= 1;
5547 h->ref_count[1] <<= 1;
5550 fill_caches(h, mb_type, 0);
5552 if( IS_INTRA( mb_type ) ) {
5554 if( IS_INTRA4x4( mb_type ) ) {
5555 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5556 mb_type |= MB_TYPE_8x8DCT;
5557 for( i = 0; i < 16; i+=4 ) {
5558 int pred = pred_intra_mode( h, i );
5559 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5560 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5563 for( i = 0; i < 16; i++ ) {
5564 int pred = pred_intra_mode( h, i );
5565 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5567 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5570 write_back_intra_pred_mode(h);
5571 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5573 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5574 if( h->intra16x16_pred_mode < 0 ) return -1;
5577 h->chroma_pred_mode_table[mb_xy] =
5578 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5580 pred_mode= check_intra_pred_mode( h, pred_mode );
5581 if( pred_mode < 0 ) return -1;
5582 h->chroma_pred_mode= pred_mode;
5584 } else if( partition_count == 4 ) {
5585 int i, j, sub_partition_count[4], list, ref[2][4];
5587 if( h->slice_type_nos == FF_B_TYPE ) {
5588 for( i = 0; i < 4; i++ ) {
5589 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5590 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5591 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5593 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5594 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5595 pred_direct_motion(h, &mb_type);
5596 h->ref_cache[0][scan8[4]] =
5597 h->ref_cache[1][scan8[4]] =
5598 h->ref_cache[0][scan8[12]] =
5599 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5600 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5601 for( i = 0; i < 4; i++ )
5602 if( IS_DIRECT(h->sub_mb_type[i]) )
5603 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5607 for( i = 0; i < 4; i++ ) {
5608 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5609 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5610 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5614 for( list = 0; list < h->list_count; list++ ) {
5615 for( i = 0; i < 4; i++ ) {
5616 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5617 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5618 if( h->ref_count[list] > 1 ){
5619 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5620 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5621 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5629 h->ref_cache[list][ scan8[4*i]+1 ]=
5630 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5635 dct8x8_allowed = get_dct8x8_allowed(h);
5637 for(list=0; list<h->list_count; list++){
5639 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5640 if(IS_DIRECT(h->sub_mb_type[i])){
5641 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5645 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5646 const int sub_mb_type= h->sub_mb_type[i];
5647 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5648 for(j=0; j<sub_partition_count[i]; j++){
5651 const int index= 4*i + block_width*j;
5652 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5653 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5654 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5656 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5657 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5658 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5660 if(IS_SUB_8X8(sub_mb_type)){
5662 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5664 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5667 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5669 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5670 }else if(IS_SUB_8X4(sub_mb_type)){
5671 mv_cache[ 1 ][0]= mx;
5672 mv_cache[ 1 ][1]= my;
5674 mvd_cache[ 1 ][0]= mx - mpx;
5675 mvd_cache[ 1 ][1]= my - mpy;
5676 }else if(IS_SUB_4X8(sub_mb_type)){
5677 mv_cache[ 8 ][0]= mx;
5678 mv_cache[ 8 ][1]= my;
5680 mvd_cache[ 8 ][0]= mx - mpx;
5681 mvd_cache[ 8 ][1]= my - mpy;
5683 mv_cache[ 0 ][0]= mx;
5684 mv_cache[ 0 ][1]= my;
5686 mvd_cache[ 0 ][0]= mx - mpx;
5687 mvd_cache[ 0 ][1]= my - mpy;
5690 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5691 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5692 p[0] = p[1] = p[8] = p[9] = 0;
5693 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5697 } else if( IS_DIRECT(mb_type) ) {
5698 pred_direct_motion(h, &mb_type);
5699 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5700 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5701 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5703 int list, mx, my, i, mpx, mpy;
5704 if(IS_16X16(mb_type)){
5705 for(list=0; list<h->list_count; list++){
5706 if(IS_DIR(mb_type, 0, list)){
5708 if(h->ref_count[list] > 1){
5709 ref= decode_cabac_mb_ref(h, list, 0);
5710 if(ref >= (unsigned)h->ref_count[list]){
5711 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5716 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5718 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5720 for(list=0; list<h->list_count; list++){
5721 if(IS_DIR(mb_type, 0, list)){
5722 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5724 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5725 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5726 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5728 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5729 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5731 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5734 else if(IS_16X8(mb_type)){
5735 for(list=0; list<h->list_count; list++){
5737 if(IS_DIR(mb_type, i, list)){
5739 if(h->ref_count[list] > 1){
5740 ref= decode_cabac_mb_ref( h, list, 8*i );
5741 if(ref >= (unsigned)h->ref_count[list]){
5742 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5747 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5749 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5752 for(list=0; list<h->list_count; list++){
5754 if(IS_DIR(mb_type, i, list)){
5755 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5756 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5757 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5758 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5760 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5761 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5763 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5764 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5769 assert(IS_8X16(mb_type));
5770 for(list=0; list<h->list_count; list++){
5772 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5774 if(h->ref_count[list] > 1){
5775 ref= decode_cabac_mb_ref( h, list, 4*i );
5776 if(ref >= (unsigned)h->ref_count[list]){
5777 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5782 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5784 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5787 for(list=0; list<h->list_count; list++){
5789 if(IS_DIR(mb_type, i, list)){
5790 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5791 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5792 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5794 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5795 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5796 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5798 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5799 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5806 if( IS_INTER( mb_type ) ) {
5807 h->chroma_pred_mode_table[mb_xy] = 0;
5808 write_back_motion( h, mb_type );
5811 if( !IS_INTRA16x16( mb_type ) ) {
5812 cbp = decode_cabac_mb_cbp_luma( h );
5814 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5817 h->cbp_table[mb_xy] = h->cbp = cbp;
5819 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5820 if( decode_cabac_mb_transform_size( h ) )
5821 mb_type |= MB_TYPE_8x8DCT;
5823 s->current_picture.mb_type[mb_xy]= mb_type;
5825 if( cbp || IS_INTRA16x16( mb_type ) ) {
5826 const uint8_t *scan, *scan8x8, *dc_scan;
5827 const uint32_t *qmul;
5830 if(IS_INTERLACED(mb_type)){
5831 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5832 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5833 dc_scan= luma_dc_field_scan;
5835 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5836 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5837 dc_scan= luma_dc_zigzag_scan;
5840 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5841 if( dqp == INT_MIN ){
5842 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5846 if(((unsigned)s->qscale) > 51){
5847 if(s->qscale<0) s->qscale+= 52;
5848 else s->qscale-= 52;
5850 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5851 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5853 if( IS_INTRA16x16( mb_type ) ) {
5855 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5856 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5859 qmul = h->dequant4_coeff[0][s->qscale];
5860 for( i = 0; i < 16; i++ ) {
5861 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5862 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5865 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5869 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5870 if( cbp & (1<<i8x8) ) {
5871 if( IS_8x8DCT(mb_type) ) {
5872 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5873 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5875 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5876 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5877 const int index = 4*i8x8 + i4x4;
5878 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5880 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5881 //STOP_TIMER("decode_residual")
5885 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5886 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5893 for( c = 0; c < 2; c++ ) {
5894 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5895 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5901 for( c = 0; c < 2; c++ ) {
5902 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5903 for( i = 0; i < 4; i++ ) {
5904 const int index = 16 + 4 * c + i;
5905 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5906 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5910 uint8_t * const nnz= &h->non_zero_count_cache[0];
5911 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5912 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5915 uint8_t * const nnz= &h->non_zero_count_cache[0];
5916 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5917 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5918 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5919 h->last_qscale_diff = 0;
5922 s->current_picture.qscale_table[mb_xy]= s->qscale;
5923 write_back_non_zero_count(h);
5926 h->ref_count[0] >>= 1;
5927 h->ref_count[1] >>= 1;
5934 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5935 const int index_a = qp + h->slice_alpha_c0_offset;
5936 const int alpha = (alpha_table+52)[index_a];
5937 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5941 tc[0] = (tc0_table+52)[index_a][bS[0]];
5942 tc[1] = (tc0_table+52)[index_a][bS[1]];
5943 tc[2] = (tc0_table+52)[index_a][bS[2]];
5944 tc[3] = (tc0_table+52)[index_a][bS[3]];
5945 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5947 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5950 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5951 const int index_a = qp + h->slice_alpha_c0_offset;
5952 const int alpha = (alpha_table+52)[index_a];
5953 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5957 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5958 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5959 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5960 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5961 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5963 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5967 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5969 for( i = 0; i < 16; i++, pix += stride) {
5975 int bS_index = (i >> 1);
5978 bS_index |= (i & 1);
5981 if( bS[bS_index] == 0 ) {
5985 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5986 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5987 alpha = (alpha_table+52)[index_a];
5988 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5990 if( bS[bS_index] < 4 ) {
5991 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5992 const int p0 = pix[-1];
5993 const int p1 = pix[-2];
5994 const int p2 = pix[-3];
5995 const int q0 = pix[0];
5996 const int q1 = pix[1];
5997 const int q2 = pix[2];
5999 if( FFABS( p0 - q0 ) < alpha &&
6000 FFABS( p1 - p0 ) < beta &&
6001 FFABS( q1 - q0 ) < beta ) {
6005 if( FFABS( p2 - p0 ) < beta ) {
6006 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6009 if( FFABS( q2 - q0 ) < beta ) {
6010 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6014 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6015 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6016 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6017 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6020 const int p0 = pix[-1];
6021 const int p1 = pix[-2];
6022 const int p2 = pix[-3];
6024 const int q0 = pix[0];
6025 const int q1 = pix[1];
6026 const int q2 = pix[2];
6028 if( FFABS( p0 - q0 ) < alpha &&
6029 FFABS( p1 - p0 ) < beta &&
6030 FFABS( q1 - q0 ) < beta ) {
6032 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6033 if( FFABS( p2 - p0 ) < beta)
6035 const int p3 = pix[-4];
6037 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6038 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6039 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6042 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6044 if( FFABS( q2 - q0 ) < beta)
6046 const int q3 = pix[3];
6048 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6049 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6050 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6053 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6057 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6058 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6060 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6065 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6067 for( i = 0; i < 8; i++, pix += stride) {
6075 if( bS[bS_index] == 0 ) {
6079 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6080 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6081 alpha = (alpha_table+52)[index_a];
6082 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6084 if( bS[bS_index] < 4 ) {
6085 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6086 const int p0 = pix[-1];
6087 const int p1 = pix[-2];
6088 const int q0 = pix[0];
6089 const int q1 = pix[1];
6091 if( FFABS( p0 - q0 ) < alpha &&
6092 FFABS( p1 - p0 ) < beta &&
6093 FFABS( q1 - q0 ) < beta ) {
6094 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6096 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6097 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6098 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6101 const int p0 = pix[-1];
6102 const int p1 = pix[-2];
6103 const int q0 = pix[0];
6104 const int q1 = pix[1];
6106 if( FFABS( p0 - q0 ) < alpha &&
6107 FFABS( p1 - p0 ) < beta &&
6108 FFABS( q1 - q0 ) < beta ) {
6110 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6111 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6112 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6118 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6119 const int index_a = qp + h->slice_alpha_c0_offset;
6120 const int alpha = (alpha_table+52)[index_a];
6121 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6125 tc[0] = (tc0_table+52)[index_a][bS[0]];
6126 tc[1] = (tc0_table+52)[index_a][bS[1]];
6127 tc[2] = (tc0_table+52)[index_a][bS[2]];
6128 tc[3] = (tc0_table+52)[index_a][bS[3]];
6129 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6131 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6135 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6136 const int index_a = qp + h->slice_alpha_c0_offset;
6137 const int alpha = (alpha_table+52)[index_a];
6138 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6142 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6143 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6144 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6145 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6146 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6148 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6152 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6153 MpegEncContext * const s = &h->s;
6154 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6156 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6160 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6161 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6162 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6163 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6164 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6167 assert(!FRAME_MBAFF);
6169 mb_type = s->current_picture.mb_type[mb_xy];
6170 qp = s->current_picture.qscale_table[mb_xy];
6171 qp0 = s->current_picture.qscale_table[mb_xy-1];
6172 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6173 qpc = get_chroma_qp( h, 0, qp );
6174 qpc0 = get_chroma_qp( h, 0, qp0 );
6175 qpc1 = get_chroma_qp( h, 0, qp1 );
6176 qp0 = (qp + qp0 + 1) >> 1;
6177 qp1 = (qp + qp1 + 1) >> 1;
6178 qpc0 = (qpc + qpc0 + 1) >> 1;
6179 qpc1 = (qpc + qpc1 + 1) >> 1;
6180 qp_thresh = 15 - h->slice_alpha_c0_offset;
6181 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6182 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6185 if( IS_INTRA(mb_type) ) {
6186 int16_t bS4[4] = {4,4,4,4};
6187 int16_t bS3[4] = {3,3,3,3};
6188 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6189 if( IS_8x8DCT(mb_type) ) {
6190 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6191 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6192 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6193 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6195 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6196 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6197 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6198 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6199 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6200 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6201 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6202 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6204 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6205 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6206 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6207 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6208 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6209 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6210 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6211 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6214 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6215 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6217 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6219 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6221 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6222 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6223 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6224 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6226 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6227 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6228 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6229 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6231 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6232 bSv[0][0] = 0x0004000400040004ULL;
6233 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6234 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6236 #define FILTER(hv,dir,edge)\
6237 if(bSv[dir][edge]) {\
6238 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6240 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6241 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6247 } else if( IS_8x8DCT(mb_type) ) {
6267 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6268 MpegEncContext * const s = &h->s;
6270 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6271 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6272 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6273 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6274 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6276 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6277 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6278 // how often to recheck mv-based bS when iterating between edges
6279 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6280 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6281 // how often to recheck mv-based bS when iterating along each edge
6282 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6284 if (first_vertical_edge_done) {
6288 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6291 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6292 && !IS_INTERLACED(mb_type)
6293 && IS_INTERLACED(mbm_type)
6295 // This is a special case in the norm where the filtering must
6296 // be done twice (one each of the field) even if we are in a
6297 // frame macroblock.
6299 static const int nnz_idx[4] = {4,5,6,3};
6300 unsigned int tmp_linesize = 2 * linesize;
6301 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6302 int mbn_xy = mb_xy - 2 * s->mb_stride;
6307 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6308 if( IS_INTRA(mb_type) ||
6309 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6310 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6312 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6313 for( i = 0; i < 4; i++ ) {
6314 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6315 mbn_nnz[nnz_idx[i]] != 0 )
6321 // Do not use s->qscale as luma quantizer because it has not the same
6322 // value in IPCM macroblocks.
6323 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6324 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6325 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6326 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6327 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6328 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6329 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6330 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6337 for( edge = start; edge < edges; edge++ ) {
6338 /* mbn_xy: neighbor macroblock */
6339 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6340 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6341 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6345 if( (edge&1) && IS_8x8DCT(mb_type) )
6348 if( IS_INTRA(mb_type) ||
6349 IS_INTRA(mbn_type) ) {
6352 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6353 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6362 bS[0] = bS[1] = bS[2] = bS[3] = value;
6367 if( edge & mask_edge ) {
6368 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6371 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6372 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6375 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6376 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6377 int bn_idx= b_idx - (dir ? 8:1);
6380 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6381 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6382 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6383 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6386 if(h->slice_type_nos == FF_B_TYPE && v){
6388 for( l = 0; !v && l < 2; l++ ) {
6390 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6391 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6392 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6396 bS[0] = bS[1] = bS[2] = bS[3] = v;
6402 for( i = 0; i < 4; i++ ) {
6403 int x = dir == 0 ? edge : i;
6404 int y = dir == 0 ? i : edge;
6405 int b_idx= 8 + 4 + x + 8*y;
6406 int bn_idx= b_idx - (dir ? 8:1);
6408 if( h->non_zero_count_cache[b_idx] |
6409 h->non_zero_count_cache[bn_idx] ) {
6415 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6416 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6417 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6418 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6424 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6426 for( l = 0; l < 2; l++ ) {
6428 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6429 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6430 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6439 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6444 // Do not use s->qscale as luma quantizer because it has not the same
6445 // value in IPCM macroblocks.
6446 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6447 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6448 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6449 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6451 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6452 if( (edge&1) == 0 ) {
6453 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6454 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6455 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6456 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6459 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6460 if( (edge&1) == 0 ) {
6461 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6462 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6463 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6464 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6470 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6471 MpegEncContext * const s = &h->s;
6472 const int mb_xy= mb_x + mb_y*s->mb_stride;
6473 const int mb_type = s->current_picture.mb_type[mb_xy];
6474 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6475 int first_vertical_edge_done = 0;
6478 //for sufficiently low qp, filtering wouldn't do anything
6479 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6481 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6482 int qp = s->current_picture.qscale_table[mb_xy];
6484 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6485 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6490 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6491 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6492 int top_type, left_type[2];
6493 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6494 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6495 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6497 if(IS_8x8DCT(top_type)){
6498 h->non_zero_count_cache[4+8*0]=
6499 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6500 h->non_zero_count_cache[6+8*0]=
6501 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6503 if(IS_8x8DCT(left_type[0])){
6504 h->non_zero_count_cache[3+8*1]=
6505 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6507 if(IS_8x8DCT(left_type[1])){
6508 h->non_zero_count_cache[3+8*3]=
6509 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6512 if(IS_8x8DCT(mb_type)){
6513 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6514 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6516 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6517 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6519 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6520 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6522 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6523 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6528 // left mb is in picture
6529 && h->slice_table[mb_xy-1] != 0xFFFF
6530 // and current and left pair do not have the same interlaced type
6531 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6532 // and left mb is in the same slice if deblocking_filter == 2
6533 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6534 /* First vertical edge is different in MBAFF frames
6535 * There are 8 different bS to compute and 2 different Qp
6537 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6538 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6543 int mb_qp, mbn0_qp, mbn1_qp;
6545 first_vertical_edge_done = 1;
6547 if( IS_INTRA(mb_type) )
6548 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6550 for( i = 0; i < 8; i++ ) {
6551 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6553 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6555 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6556 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6557 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6559 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6566 mb_qp = s->current_picture.qscale_table[mb_xy];
6567 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6568 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6569 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6570 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6571 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6572 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6573 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6574 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6575 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6576 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6577 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6578 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6581 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6582 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6583 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6584 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6585 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6589 for( dir = 0; dir < 2; dir++ )
6590 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6592 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6593 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6597 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6598 H264Context *h = *(void**)arg;
6599 MpegEncContext * const s = &h->s;
6600 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6604 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6605 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6607 if( h->pps.cabac ) {
6611 align_get_bits( &s->gb );
6614 ff_init_cabac_states( &h->cabac);
6615 ff_init_cabac_decoder( &h->cabac,
6616 s->gb.buffer + get_bits_count(&s->gb)/8,
6617 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6618 /* calculate pre-state */
6619 for( i= 0; i < 460; i++ ) {
6621 if( h->slice_type_nos == FF_I_TYPE )
6622 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6624 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6627 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6629 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6634 int ret = decode_mb_cabac(h);
6636 //STOP_TIMER("decode_mb_cabac")
6638 if(ret>=0) hl_decode_mb(h);
6640 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6643 ret = decode_mb_cabac(h);
6645 if(ret>=0) hl_decode_mb(h);
6648 eos = get_cabac_terminate( &h->cabac );
6650 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6651 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6652 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6656 if( ++s->mb_x >= s->mb_width ) {
6658 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6660 if(FIELD_OR_MBAFF_PICTURE) {
6665 if( eos || s->mb_y >= s->mb_height ) {
6666 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6667 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6674 int ret = decode_mb_cavlc(h);
6676 if(ret>=0) hl_decode_mb(h);
6678 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6680 ret = decode_mb_cavlc(h);
6682 if(ret>=0) hl_decode_mb(h);
6687 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6688 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6693 if(++s->mb_x >= s->mb_width){
6695 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6697 if(FIELD_OR_MBAFF_PICTURE) {
6700 if(s->mb_y >= s->mb_height){
6701 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6703 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6704 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6708 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6715 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6716 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6717 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6718 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6722 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6731 for(;s->mb_y < s->mb_height; s->mb_y++){
6732 for(;s->mb_x < s->mb_width; s->mb_x++){
6733 int ret= decode_mb(h);
6738 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6739 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6744 if(++s->mb_x >= s->mb_width){
6746 if(++s->mb_y >= s->mb_height){
6747 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6748 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6752 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6759 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6760 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6761 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6765 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6772 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6775 return -1; //not reached
6778 static int decode_picture_timing(H264Context *h){
6779 MpegEncContext * const s = &h->s;
6780 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6781 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6782 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6784 if(h->sps.pic_struct_present_flag){
6785 unsigned int i, num_clock_ts;
6786 h->sei_pic_struct = get_bits(&s->gb, 4);
6788 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6791 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6793 for (i = 0 ; i < num_clock_ts ; i++){
6794 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6795 unsigned int full_timestamp_flag;
6796 skip_bits(&s->gb, 2); /* ct_type */
6797 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6798 skip_bits(&s->gb, 5); /* counting_type */
6799 full_timestamp_flag = get_bits(&s->gb, 1);
6800 skip_bits(&s->gb, 1); /* discontinuity_flag */
6801 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6802 skip_bits(&s->gb, 8); /* n_frames */
6803 if(full_timestamp_flag){
6804 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6805 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6806 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6808 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6809 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6810 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6811 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6812 if(get_bits(&s->gb, 1)) /* hours_flag */
6813 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6817 if(h->sps.time_offset_length > 0)
6818 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6825 static int decode_unregistered_user_data(H264Context *h, int size){
6826 MpegEncContext * const s = &h->s;
6827 uint8_t user_data[16+256];
6833 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6834 user_data[i]= get_bits(&s->gb, 8);
6838 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6839 if(e==1 && build>=0)
6840 h->x264_build= build;
6842 if(s->avctx->debug & FF_DEBUG_BUGS)
6843 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6846 skip_bits(&s->gb, 8);
6851 static int decode_recovery_point(H264Context *h){
6852 MpegEncContext * const s = &h->s;
6854 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6855 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6860 static int decode_buffering_period(H264Context *h){
6861 MpegEncContext * const s = &h->s;
6862 unsigned int sps_id;
6866 sps_id = get_ue_golomb_31(&s->gb);
6867 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6868 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6871 sps = h->sps_buffers[sps_id];
6873 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6874 if (sps->nal_hrd_parameters_present_flag) {
6875 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6876 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6877 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6880 if (sps->vcl_hrd_parameters_present_flag) {
6881 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6882 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6883 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6887 h->sei_buffering_period_present = 1;
6891 int ff_h264_decode_sei(H264Context *h){
6892 MpegEncContext * const s = &h->s;
6894 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6899 type+= show_bits(&s->gb, 8);
6900 }while(get_bits(&s->gb, 8) == 255);
6904 size+= show_bits(&s->gb, 8);
6905 }while(get_bits(&s->gb, 8) == 255);
6908 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6909 if(decode_picture_timing(h) < 0)
6912 case SEI_TYPE_USER_DATA_UNREGISTERED:
6913 if(decode_unregistered_user_data(h, size) < 0)
6916 case SEI_TYPE_RECOVERY_POINT:
6917 if(decode_recovery_point(h) < 0)
6920 case SEI_BUFFERING_PERIOD:
6921 if(decode_buffering_period(h) < 0)
6925 skip_bits(&s->gb, 8*size);
6928 //FIXME check bits here
6929 align_get_bits(&s->gb);
6935 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6936 MpegEncContext * const s = &h->s;
6938 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6940 if(cpb_count > 32U){
6941 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6945 get_bits(&s->gb, 4); /* bit_rate_scale */
6946 get_bits(&s->gb, 4); /* cpb_size_scale */
6947 for(i=0; i<cpb_count; i++){
6948 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6949 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6950 get_bits1(&s->gb); /* cbr_flag */
6952 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6953 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6954 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6955 sps->time_offset_length = get_bits(&s->gb, 5);
6956 sps->cpb_cnt = cpb_count;
6960 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6961 MpegEncContext * const s = &h->s;
6962 int aspect_ratio_info_present_flag;
6963 unsigned int aspect_ratio_idc;
6965 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6967 if( aspect_ratio_info_present_flag ) {
6968 aspect_ratio_idc= get_bits(&s->gb, 8);
6969 if( aspect_ratio_idc == EXTENDED_SAR ) {
6970 sps->sar.num= get_bits(&s->gb, 16);
6971 sps->sar.den= get_bits(&s->gb, 16);
6972 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6973 sps->sar= pixel_aspect[aspect_ratio_idc];
6975 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6982 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6984 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6985 get_bits1(&s->gb); /* overscan_appropriate_flag */
6988 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6989 get_bits(&s->gb, 3); /* video_format */
6990 get_bits1(&s->gb); /* video_full_range_flag */
6991 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6992 get_bits(&s->gb, 8); /* colour_primaries */
6993 get_bits(&s->gb, 8); /* transfer_characteristics */
6994 get_bits(&s->gb, 8); /* matrix_coefficients */
6998 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6999 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7000 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7003 sps->timing_info_present_flag = get_bits1(&s->gb);
7004 if(sps->timing_info_present_flag){
7005 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7006 sps->time_scale = get_bits_long(&s->gb, 32);
7007 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7010 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7011 if(sps->nal_hrd_parameters_present_flag)
7012 if(decode_hrd_parameters(h, sps) < 0)
7014 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7015 if(sps->vcl_hrd_parameters_present_flag)
7016 if(decode_hrd_parameters(h, sps) < 0)
7018 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7019 get_bits1(&s->gb); /* low_delay_hrd_flag */
7020 sps->pic_struct_present_flag = get_bits1(&s->gb);
7022 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7023 if(sps->bitstream_restriction_flag){
7024 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7025 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7026 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7027 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7028 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7029 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7030 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7032 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7033 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7041 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7042 const uint8_t *jvt_list, const uint8_t *fallback_list){
7043 MpegEncContext * const s = &h->s;
7044 int i, last = 8, next = 8;
7045 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7046 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7047 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7049 for(i=0;i<size;i++){
7051 next = (last + get_se_golomb(&s->gb)) & 0xff;
7052 if(!i && !next){ /* matrix not written, we use the preset one */
7053 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7056 last = factors[scan[i]] = next ? next : last;
7060 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7061 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7062 MpegEncContext * const s = &h->s;
7063 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7064 const uint8_t *fallback[4] = {
7065 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7066 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7067 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7068 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7070 if(get_bits1(&s->gb)){
7071 sps->scaling_matrix_present |= is_sps;
7072 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7073 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7074 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7075 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7076 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7077 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7078 if(is_sps || pps->transform_8x8_mode){
7079 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7080 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7085 int ff_h264_decode_seq_parameter_set(H264Context *h){
7086 MpegEncContext * const s = &h->s;
7087 int profile_idc, level_idc;
7088 unsigned int sps_id;
7092 profile_idc= get_bits(&s->gb, 8);
7093 get_bits1(&s->gb); //constraint_set0_flag
7094 get_bits1(&s->gb); //constraint_set1_flag
7095 get_bits1(&s->gb); //constraint_set2_flag
7096 get_bits1(&s->gb); //constraint_set3_flag
7097 get_bits(&s->gb, 4); // reserved
7098 level_idc= get_bits(&s->gb, 8);
7099 sps_id= get_ue_golomb_31(&s->gb);
7101 if(sps_id >= MAX_SPS_COUNT) {
7102 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7105 sps= av_mallocz(sizeof(SPS));
7109 sps->profile_idc= profile_idc;
7110 sps->level_idc= level_idc;
7112 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7113 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7114 sps->scaling_matrix_present = 0;
7116 if(sps->profile_idc >= 100){ //high profile
7117 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7118 if(sps->chroma_format_idc == 3)
7119 sps->residual_color_transform_flag = get_bits1(&s->gb);
7120 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7121 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7122 sps->transform_bypass = get_bits1(&s->gb);
7123 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7125 sps->chroma_format_idc= 1;
7128 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7129 sps->poc_type= get_ue_golomb_31(&s->gb);
7131 if(sps->poc_type == 0){ //FIXME #define
7132 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7133 } else if(sps->poc_type == 1){//FIXME #define
7134 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7135 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7136 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7137 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7139 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7140 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7144 for(i=0; i<sps->poc_cycle_length; i++)
7145 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7146 }else if(sps->poc_type != 2){
7147 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7151 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7152 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7153 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7156 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7157 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7158 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7159 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7160 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7161 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7165 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7166 if(!sps->frame_mbs_only_flag)
7167 sps->mb_aff= get_bits1(&s->gb);
7171 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7173 #ifndef ALLOW_INTERLACE
7175 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7177 sps->crop= get_bits1(&s->gb);
7179 sps->crop_left = get_ue_golomb(&s->gb);
7180 sps->crop_right = get_ue_golomb(&s->gb);
7181 sps->crop_top = get_ue_golomb(&s->gb);
7182 sps->crop_bottom= get_ue_golomb(&s->gb);
7183 if(sps->crop_left || sps->crop_top){
7184 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7186 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7187 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7193 sps->crop_bottom= 0;
7196 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7197 if( sps->vui_parameters_present_flag )
7198 decode_vui_parameters(h, sps);
7200 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7201 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7202 sps_id, sps->profile_idc, sps->level_idc,
7204 sps->ref_frame_count,
7205 sps->mb_width, sps->mb_height,
7206 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7207 sps->direct_8x8_inference_flag ? "8B8" : "",
7208 sps->crop_left, sps->crop_right,
7209 sps->crop_top, sps->crop_bottom,
7210 sps->vui_parameters_present_flag ? "VUI" : "",
7211 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7215 av_free(h->sps_buffers[sps_id]);
7216 h->sps_buffers[sps_id]= sps;
7225 build_qp_table(PPS *pps, int t, int index)
7228 for(i = 0; i < 52; i++)
7229 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7232 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7233 MpegEncContext * const s = &h->s;
7234 unsigned int pps_id= get_ue_golomb(&s->gb);
7237 if(pps_id >= MAX_PPS_COUNT) {
7238 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7242 pps= av_mallocz(sizeof(PPS));
7245 pps->sps_id= get_ue_golomb_31(&s->gb);
7246 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7247 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7251 pps->cabac= get_bits1(&s->gb);
7252 pps->pic_order_present= get_bits1(&s->gb);
7253 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7254 if(pps->slice_group_count > 1 ){
7255 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7256 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7257 switch(pps->mb_slice_group_map_type){
7260 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7261 | run_length[ i ] |1 |ue(v) |
7266 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7268 | top_left_mb[ i ] |1 |ue(v) |
7269 | bottom_right_mb[ i ] |1 |ue(v) |
7277 | slice_group_change_direction_flag |1 |u(1) |
7278 | slice_group_change_rate_minus1 |1 |ue(v) |
7283 | slice_group_id_cnt_minus1 |1 |ue(v) |
7284 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7286 | slice_group_id[ i ] |1 |u(v) |
7291 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7292 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7293 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7294 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7298 pps->weighted_pred= get_bits1(&s->gb);
7299 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7300 pps->init_qp= get_se_golomb(&s->gb) + 26;
7301 pps->init_qs= get_se_golomb(&s->gb) + 26;
7302 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7303 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7304 pps->constrained_intra_pred= get_bits1(&s->gb);
7305 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7307 pps->transform_8x8_mode= 0;
7308 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7309 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7310 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7312 if(get_bits_count(&s->gb) < bit_length){
7313 pps->transform_8x8_mode= get_bits1(&s->gb);
7314 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7315 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7317 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7320 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7321 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7322 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7323 h->pps.chroma_qp_diff= 1;
7325 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7326 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7327 pps_id, pps->sps_id,
7328 pps->cabac ? "CABAC" : "CAVLC",
7329 pps->slice_group_count,
7330 pps->ref_count[0], pps->ref_count[1],
7331 pps->weighted_pred ? "weighted" : "",
7332 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7333 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7334 pps->constrained_intra_pred ? "CONSTR" : "",
7335 pps->redundant_pic_cnt_present ? "REDU" : "",
7336 pps->transform_8x8_mode ? "8x8DCT" : ""
7340 av_free(h->pps_buffers[pps_id]);
7341 h->pps_buffers[pps_id]= pps;
7349 * Call decode_slice() for each context.
7351 * @param h h264 master context
7352 * @param context_count number of contexts to execute
7354 static void execute_decode_slices(H264Context *h, int context_count){
7355 MpegEncContext * const s = &h->s;
7356 AVCodecContext * const avctx= s->avctx;
7360 if (s->avctx->hwaccel)
7362 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7364 if(context_count == 1) {
7365 decode_slice(avctx, &h);
7367 for(i = 1; i < context_count; i++) {
7368 hx = h->thread_context[i];
7369 hx->s.error_recognition = avctx->error_recognition;
7370 hx->s.error_count = 0;
7373 avctx->execute(avctx, (void *)decode_slice,
7374 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7376 /* pull back stuff from slices to master context */
7377 hx = h->thread_context[context_count - 1];
7378 s->mb_x = hx->s.mb_x;
7379 s->mb_y = hx->s.mb_y;
7380 s->dropable = hx->s.dropable;
7381 s->picture_structure = hx->s.picture_structure;
7382 for(i = 1; i < context_count; i++)
7383 h->s.error_count += h->thread_context[i]->s.error_count;
7388 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7389 MpegEncContext * const s = &h->s;
7390 AVCodecContext * const avctx= s->avctx;
7392 H264Context *hx; ///< thread context
7393 int context_count = 0;
7395 h->max_contexts = avctx->thread_count;
7398 for(i=0; i<50; i++){
7399 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7402 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7403 h->current_slice = 0;
7404 if (!s->first_field)
7405 s->current_picture_ptr= NULL;
7417 if(buf_index >= buf_size) break;
7419 for(i = 0; i < h->nal_length_size; i++)
7420 nalsize = (nalsize << 8) | buf[buf_index++];
7421 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7426 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7431 // start code prefix search
7432 for(; buf_index + 3 < buf_size; buf_index++){
7433 // This should always succeed in the first iteration.
7434 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7438 if(buf_index+3 >= buf_size) break;
7443 hx = h->thread_context[context_count];
7445 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7446 if (ptr==NULL || dst_length < 0){
7449 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7451 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7453 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7454 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7457 if (h->is_avc && (nalsize != consumed)){
7458 int i, debug_level = AV_LOG_DEBUG;
7459 for (i = consumed; i < nalsize; i++)
7460 if (buf[buf_index+i])
7461 debug_level = AV_LOG_ERROR;
7462 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7466 buf_index += consumed;
7468 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7469 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7474 switch(hx->nal_unit_type){
7476 if (h->nal_unit_type != NAL_IDR_SLICE) {
7477 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7480 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7482 init_get_bits(&hx->s.gb, ptr, bit_length);
7484 hx->inter_gb_ptr= &hx->s.gb;
7485 hx->s.data_partitioning = 0;
7487 if((err = decode_slice_header(hx, h)))
7490 s->current_picture_ptr->key_frame |=
7491 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7492 (h->sei_recovery_frame_cnt >= 0);
7493 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7494 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7495 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7496 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7497 && avctx->skip_frame < AVDISCARD_ALL){
7498 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7499 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7500 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7501 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7507 init_get_bits(&hx->s.gb, ptr, bit_length);
7509 hx->inter_gb_ptr= NULL;
7510 hx->s.data_partitioning = 1;
7512 err = decode_slice_header(hx, h);
7515 init_get_bits(&hx->intra_gb, ptr, bit_length);
7516 hx->intra_gb_ptr= &hx->intra_gb;
7519 init_get_bits(&hx->inter_gb, ptr, bit_length);
7520 hx->inter_gb_ptr= &hx->inter_gb;
7522 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7523 && s->context_initialized
7525 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7526 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7527 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7528 && avctx->skip_frame < AVDISCARD_ALL)
7532 init_get_bits(&s->gb, ptr, bit_length);
7533 ff_h264_decode_sei(h);
7536 init_get_bits(&s->gb, ptr, bit_length);
7537 ff_h264_decode_seq_parameter_set(h);
7539 if(s->flags& CODEC_FLAG_LOW_DELAY)
7542 if(avctx->has_b_frames < 2)
7543 avctx->has_b_frames= !s->low_delay;
7546 init_get_bits(&s->gb, ptr, bit_length);
7548 ff_h264_decode_picture_parameter_set(h, bit_length);
7552 case NAL_END_SEQUENCE:
7553 case NAL_END_STREAM:
7554 case NAL_FILLER_DATA:
7556 case NAL_AUXILIARY_SLICE:
7559 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7562 if(context_count == h->max_contexts) {
7563 execute_decode_slices(h, context_count);
7568 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7570 /* Slice could not be decoded in parallel mode, copy down
7571 * NAL unit stuff to context 0 and restart. Note that
7572 * rbsp_buffer is not transferred, but since we no longer
7573 * run in parallel mode this should not be an issue. */
7574 h->nal_unit_type = hx->nal_unit_type;
7575 h->nal_ref_idc = hx->nal_ref_idc;
7581 execute_decode_slices(h, context_count);
7586 * returns the number of bytes consumed for building the current frame
7588 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7589 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7590 if(pos+10>buf_size) pos=buf_size; // oops ;)
7595 static int decode_frame(AVCodecContext *avctx,
7596 void *data, int *data_size,
7597 const uint8_t *buf, int buf_size)
7599 H264Context *h = avctx->priv_data;
7600 MpegEncContext *s = &h->s;
7601 AVFrame *pict = data;
7604 s->flags= avctx->flags;
7605 s->flags2= avctx->flags2;
7607 /* end of stream, output what is still in the buffers */
7608 if (buf_size == 0) {
7612 //FIXME factorize this with the output code below
7613 out = h->delayed_pic[0];
7615 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7616 if(h->delayed_pic[i]->poc < out->poc){
7617 out = h->delayed_pic[i];
7621 for(i=out_idx; h->delayed_pic[i]; i++)
7622 h->delayed_pic[i] = h->delayed_pic[i+1];
7625 *data_size = sizeof(AVFrame);
7626 *pict= *(AVFrame*)out;
7632 if(h->is_avc && !h->got_avcC) {
7633 int i, cnt, nalsize;
7634 unsigned char *p = avctx->extradata;
7635 if(avctx->extradata_size < 7) {
7636 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7640 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7643 /* sps and pps in the avcC always have length coded with 2 bytes,
7644 so put a fake nal_length_size = 2 while parsing them */
7645 h->nal_length_size = 2;
7646 // Decode sps from avcC
7647 cnt = *(p+5) & 0x1f; // Number of sps
7649 for (i = 0; i < cnt; i++) {
7650 nalsize = AV_RB16(p) + 2;
7651 if(decode_nal_units(h, p, nalsize) < 0) {
7652 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7657 // Decode pps from avcC
7658 cnt = *(p++); // Number of pps
7659 for (i = 0; i < cnt; i++) {
7660 nalsize = AV_RB16(p) + 2;
7661 if(decode_nal_units(h, p, nalsize) != nalsize) {
7662 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7667 // Now store right nal length size, that will be use to parse all other nals
7668 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7669 // Do not reparse avcC
7673 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7674 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7679 buf_index=decode_nal_units(h, buf, buf_size);
7683 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7684 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7685 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7689 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7690 Picture *out = s->current_picture_ptr;
7691 Picture *cur = s->current_picture_ptr;
7692 int i, pics, cross_idr, out_of_order, out_idx;
7696 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7697 s->current_picture_ptr->pict_type= s->pict_type;
7699 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7700 ff_vdpau_h264_set_reference_frames(s);
7703 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7704 h->prev_poc_msb= h->poc_msb;
7705 h->prev_poc_lsb= h->poc_lsb;
7707 h->prev_frame_num_offset= h->frame_num_offset;
7708 h->prev_frame_num= h->frame_num;
7710 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7711 ff_vdpau_h264_picture_complete(s);
7714 * FIXME: Error handling code does not seem to support interlaced
7715 * when slices span multiple rows
7716 * The ff_er_add_slice calls don't work right for bottom
7717 * fields; they cause massive erroneous error concealing
7718 * Error marking covers both fields (top and bottom).
7719 * This causes a mismatched s->error_count
7720 * and a bad error table. Further, the error count goes to
7721 * INT_MAX when called for bottom field, because mb_y is
7722 * past end by one (callers fault) and resync_mb_y != 0
7723 * causes problems for the first MB line, too.
7729 h->sei_recovery_frame_cnt = -1;
7730 h->sei_dpb_output_delay = 0;
7731 h->sei_cpb_removal_delay = -1;
7732 h->sei_buffering_period_present = 0;
7734 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7735 /* Wait for second field. */
7739 cur->repeat_pict = 0;
7741 /* Signal interlacing information externally. */
7742 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7743 if(h->sps.pic_struct_present_flag){
7744 switch (h->sei_pic_struct)
7746 case SEI_PIC_STRUCT_FRAME:
7747 cur->interlaced_frame = 0;
7749 case SEI_PIC_STRUCT_TOP_FIELD:
7750 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7751 case SEI_PIC_STRUCT_TOP_BOTTOM:
7752 case SEI_PIC_STRUCT_BOTTOM_TOP:
7753 cur->interlaced_frame = 1;
7755 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7756 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7757 // Signal the possibility of telecined film externally (pic_struct 5,6)
7758 // From these hints, let the applications decide if they apply deinterlacing.
7759 cur->repeat_pict = 1;
7760 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7762 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7763 // Force progressive here, as doubling interlaced frame is a bad idea.
7764 cur->interlaced_frame = 0;
7765 cur->repeat_pict = 2;
7767 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7768 cur->interlaced_frame = 0;
7769 cur->repeat_pict = 4;
7773 /* Derive interlacing flag from used decoding process. */
7774 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7777 if (cur->field_poc[0] != cur->field_poc[1]){
7778 /* Derive top_field_first from field pocs. */
7779 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7781 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7782 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7783 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7784 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7785 cur->top_field_first = 1;
7787 cur->top_field_first = 0;
7789 /* Most likely progressive */
7790 cur->top_field_first = 0;
7794 //FIXME do something with unavailable reference frames
7796 /* Sort B-frames into display order */
7798 if(h->sps.bitstream_restriction_flag
7799 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7800 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7804 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7805 && !h->sps.bitstream_restriction_flag){
7806 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7811 while(h->delayed_pic[pics]) pics++;
7813 assert(pics <= MAX_DELAYED_PIC_COUNT);
7815 h->delayed_pic[pics++] = cur;
7816 if(cur->reference == 0)
7817 cur->reference = DELAYED_PIC_REF;
7819 out = h->delayed_pic[0];
7821 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7822 if(h->delayed_pic[i]->poc < out->poc){
7823 out = h->delayed_pic[i];
7826 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7828 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7830 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7832 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7834 ((!cross_idr && out->poc > h->outputed_poc + 2)
7835 || cur->pict_type == FF_B_TYPE)))
7838 s->avctx->has_b_frames++;
7841 if(out_of_order || pics > s->avctx->has_b_frames){
7842 out->reference &= ~DELAYED_PIC_REF;
7843 for(i=out_idx; h->delayed_pic[i]; i++)
7844 h->delayed_pic[i] = h->delayed_pic[i+1];
7846 if(!out_of_order && pics > s->avctx->has_b_frames){
7847 *data_size = sizeof(AVFrame);
7849 h->outputed_poc = out->poc;
7850 *pict= *(AVFrame*)out;
7852 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7857 assert(pict->data[0] || !*data_size);
7858 ff_print_debug_info(s, pict);
7859 //printf("out %d\n", (int)pict->data[0]);
7862 /* Return the Picture timestamp as the frame number */
7863 /* we subtract 1 because it is added on utils.c */
7864 avctx->frame_number = s->picture_number - 1;
7866 return get_consumed_bytes(s, buf_index, buf_size);
7869 static inline void fill_mb_avail(H264Context *h){
7870 MpegEncContext * const s = &h->s;
7871 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7874 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7875 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7876 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7882 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7883 h->mb_avail[4]= 1; //FIXME move out
7884 h->mb_avail[5]= 0; //FIXME move out
7892 #define SIZE (COUNT*40)
7898 // int int_temp[10000];
7900 AVCodecContext avctx;
7902 dsputil_init(&dsp, &avctx);
7904 init_put_bits(&pb, temp, SIZE);
7905 printf("testing unsigned exp golomb\n");
7906 for(i=0; i<COUNT; i++){
7908 set_ue_golomb(&pb, i);
7909 STOP_TIMER("set_ue_golomb");
7911 flush_put_bits(&pb);
7913 init_get_bits(&gb, temp, 8*SIZE);
7914 for(i=0; i<COUNT; i++){
7917 s= show_bits(&gb, 24);
7920 j= get_ue_golomb(&gb);
7922 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7925 STOP_TIMER("get_ue_golomb");
7929 init_put_bits(&pb, temp, SIZE);
7930 printf("testing signed exp golomb\n");
7931 for(i=0; i<COUNT; i++){
7933 set_se_golomb(&pb, i - COUNT/2);
7934 STOP_TIMER("set_se_golomb");
7936 flush_put_bits(&pb);
7938 init_get_bits(&gb, temp, 8*SIZE);
7939 for(i=0; i<COUNT; i++){
7942 s= show_bits(&gb, 24);
7945 j= get_se_golomb(&gb);
7946 if(j != i - COUNT/2){
7947 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7950 STOP_TIMER("get_se_golomb");
7954 printf("testing 4x4 (I)DCT\n");
7957 uint8_t src[16], ref[16];
7958 uint64_t error= 0, max_error=0;
7960 for(i=0; i<COUNT; i++){
7962 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7963 for(j=0; j<16; j++){
7964 ref[j]= random()%255;
7965 src[j]= random()%255;
7968 h264_diff_dct_c(block, src, ref, 4);
7971 for(j=0; j<16; j++){
7972 // printf("%d ", block[j]);
7973 block[j]= block[j]*4;
7974 if(j&1) block[j]= (block[j]*4 + 2)/5;
7975 if(j&4) block[j]= (block[j]*4 + 2)/5;
7979 s->dsp.h264_idct_add(ref, block, 4);
7980 /* for(j=0; j<16; j++){
7981 printf("%d ", ref[j]);
7985 for(j=0; j<16; j++){
7986 int diff= FFABS(src[j] - ref[j]);
7989 max_error= FFMAX(max_error, diff);
7992 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7993 printf("testing quantizer\n");
7994 for(qp=0; qp<52; qp++){
7996 src1_block[i]= src2_block[i]= random()%255;
7999 printf("Testing NAL layer\n");
8001 uint8_t bitstream[COUNT];
8002 uint8_t nal[COUNT*2];
8004 memset(&h, 0, sizeof(H264Context));
8006 for(i=0; i<COUNT; i++){
8014 for(j=0; j<COUNT; j++){
8015 bitstream[j]= (random() % 255) + 1;
8018 for(j=0; j<zeros; j++){
8019 int pos= random() % COUNT;
8020 while(bitstream[pos] == 0){
8029 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8031 printf("encoding failed\n");
8035 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8039 if(out_length != COUNT){
8040 printf("incorrect length %d %d\n", out_length, COUNT);
8044 if(consumed != nal_length){
8045 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8049 if(memcmp(bitstream, out, COUNT)){
8050 printf("mismatch\n");
8056 printf("Testing RBSP\n");
8064 static av_cold int decode_end(AVCodecContext *avctx)
8066 H264Context *h = avctx->priv_data;
8067 MpegEncContext *s = &h->s;
8070 av_freep(&h->rbsp_buffer[0]);
8071 av_freep(&h->rbsp_buffer[1]);
8072 free_tables(h); //FIXME cleanup init stuff perhaps
8074 for(i = 0; i < MAX_SPS_COUNT; i++)
8075 av_freep(h->sps_buffers + i);
8077 for(i = 0; i < MAX_PPS_COUNT; i++)
8078 av_freep(h->pps_buffers + i);
8082 // memset(h, 0, sizeof(H264Context));
8088 AVCodec h264_decoder = {
8092 sizeof(H264Context),
8097 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8099 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8102 #if CONFIG_H264_VDPAU_DECODER
8103 AVCodec h264_vdpau_decoder = {
8107 sizeof(H264Context),
8112 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8114 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8118 #if CONFIG_SVQ3_DECODER