2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
42 #include "x86/h264_i386.h"
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
88 return (a&0xFFFF) + (b<<16);
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
254 if(!(top_type & type_mask))
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
270 if(!(left_type[i] & type_mask))
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
570 if(!(h->top_samples_available&0x8000)){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if((h->left_samples_available&0x8080) != 0x8080){
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
780 }else if(top_ref==ref){
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
886 pred_motion(h, 0, 4, 0, 0, mx, my);
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
945 map[list][old_ref] = cur_ref;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
996 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
998 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
999 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1000 int cur_poc = s->current_picture_ptr->poc;
1001 int *col_poc = h->ref_list[1]->field_poc;
1002 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1003 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1005 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1006 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1007 mb_xy += s->mb_stride*fieldoff;
1010 }else{ // AFL/AFR/FR/FL -> AFR/FR
1011 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1012 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1013 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1014 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1017 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1018 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1019 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1024 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1025 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1027 }else{ // AFR/FR -> AFR/FR
1030 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1031 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1032 /* FIXME save sub mb types from previous frames (or derive from MVs)
1033 * so we know exactly what block size to use */
1034 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1035 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1036 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1040 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1046 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1047 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1048 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1049 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1052 l1ref0 += h->b8_stride;
1053 l1ref1 += h->b8_stride;
1054 l1mv0 += 2*b4_stride;
1055 l1mv1 += 2*b4_stride;
1059 if(h->direct_spatial_mv_pred){
1064 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1066 /* ref = min(neighbors) */
1067 for(list=0; list<2; list++){
1068 int refa = h->ref_cache[list][scan8[0] - 1];
1069 int refb = h->ref_cache[list][scan8[0] - 8];
1070 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1071 if(refc == PART_NOT_AVAILABLE)
1072 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1073 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1078 if(ref[0] < 0 && ref[1] < 0){
1079 ref[0] = ref[1] = 0;
1080 mv[0][0] = mv[0][1] =
1081 mv[1][0] = mv[1][1] = 0;
1083 for(list=0; list<2; list++){
1085 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1087 mv[list][0] = mv[list][1] = 0;
1093 *mb_type &= ~MB_TYPE_L1;
1094 sub_mb_type &= ~MB_TYPE_L1;
1095 }else if(ref[0] < 0){
1097 *mb_type &= ~MB_TYPE_L0;
1098 sub_mb_type &= ~MB_TYPE_L0;
1101 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1102 for(i8=0; i8<4; i8++){
1105 int xy8 = x8+y8*b8_stride;
1106 int xy4 = 3*x8+y8*b4_stride;
1109 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1111 h->sub_mb_type[i8] = sub_mb_type;
1113 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1114 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1115 if(!IS_INTRA(mb_type_col[y8])
1116 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1117 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1119 a= pack16to32(mv[0][0],mv[0][1]);
1121 b= pack16to32(mv[1][0],mv[1][1]);
1123 a= pack16to32(mv[0][0],mv[0][1]);
1124 b= pack16to32(mv[1][0],mv[1][1]);
1126 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1127 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1129 }else if(IS_16X16(*mb_type)){
1132 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1133 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1134 if(!IS_INTRA(mb_type_col[0])
1135 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1136 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1137 && (h->x264_build>33 || !h->x264_build)))){
1139 a= pack16to32(mv[0][0],mv[0][1]);
1141 b= pack16to32(mv[1][0],mv[1][1]);
1143 a= pack16to32(mv[0][0],mv[0][1]);
1144 b= pack16to32(mv[1][0],mv[1][1]);
1146 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1147 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1149 for(i8=0; i8<4; i8++){
1150 const int x8 = i8&1;
1151 const int y8 = i8>>1;
1153 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1155 h->sub_mb_type[i8] = sub_mb_type;
1157 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1158 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1159 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1160 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1163 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1164 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1165 && (h->x264_build>33 || !h->x264_build)))){
1166 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1167 if(IS_SUB_8X8(sub_mb_type)){
1168 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1169 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1171 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1173 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1176 for(i4=0; i4<4; i4++){
1177 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1178 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1180 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1182 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1188 }else{ /* direct temporal mv pred */
1189 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1190 const int *dist_scale_factor = h->dist_scale_factor;
1193 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1194 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1195 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1196 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1198 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1201 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1202 /* FIXME assumes direct_8x8_inference == 1 */
1203 int y_shift = 2*!IS_INTERLACED(*mb_type);
1205 for(i8=0; i8<4; i8++){
1206 const int x8 = i8&1;
1207 const int y8 = i8>>1;
1209 const int16_t (*l1mv)[2]= l1mv0;
1211 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1213 h->sub_mb_type[i8] = sub_mb_type;
1215 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1216 if(IS_INTRA(mb_type_col[y8])){
1217 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1219 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1223 ref0 = l1ref0[x8 + y8*b8_stride];
1225 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1227 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1230 scale = dist_scale_factor[ref0];
1231 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1234 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1235 int my_col = (mv_col[1]<<y_shift)/2;
1236 int mx = (scale * mv_col[0] + 128) >> 8;
1237 int my = (scale * my_col + 128) >> 8;
1238 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1239 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1245 /* one-to-one mv scaling */
1247 if(IS_16X16(*mb_type)){
1250 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1251 if(IS_INTRA(mb_type_col[0])){
1254 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1255 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1256 const int scale = dist_scale_factor[ref0];
1257 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1259 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1260 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1262 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1263 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1265 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1266 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1267 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1269 for(i8=0; i8<4; i8++){
1270 const int x8 = i8&1;
1271 const int y8 = i8>>1;
1273 const int16_t (*l1mv)[2]= l1mv0;
1275 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1277 h->sub_mb_type[i8] = sub_mb_type;
1278 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1279 if(IS_INTRA(mb_type_col[0])){
1280 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1282 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1286 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1288 ref0 = map_col_to_list0[0][ref0];
1290 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1293 scale = dist_scale_factor[ref0];
1295 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1296 if(IS_SUB_8X8(sub_mb_type)){
1297 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1298 int mx = (scale * mv_col[0] + 128) >> 8;
1299 int my = (scale * mv_col[1] + 128) >> 8;
1300 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1301 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1303 for(i4=0; i4<4; i4++){
1304 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1305 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1306 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1307 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1308 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1309 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1316 static inline void write_back_motion(H264Context *h, int mb_type){
1317 MpegEncContext * const s = &h->s;
1318 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1319 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1322 if(!USES_LIST(mb_type, 0))
1323 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1325 for(list=0; list<h->list_count; list++){
1327 if(!USES_LIST(mb_type, list))
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1332 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1334 if( h->pps.cabac ) {
1335 if(IS_SKIP(mb_type))
1336 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1339 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1340 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1345 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1346 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1347 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1348 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1349 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1353 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1354 if(IS_8X8(mb_type)){
1355 uint8_t *direct_table = &h->direct_table[b8_xy];
1356 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1357 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1358 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1363 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1368 // src[0]&0x80; //forbidden bit
1369 h->nal_ref_idc= src[0]>>5;
1370 h->nal_unit_type= src[0]&0x1F;
1374 for(i=0; i<length; i++)
1375 printf("%2X ", src[i]);
1378 #if HAVE_FAST_UNALIGNED
1379 # if HAVE_FAST_64BIT
1381 for(i=0; i+1<length; i+=9){
1382 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1385 for(i=0; i+1<length; i+=5){
1386 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1389 if(i>0 && !src[i]) i--;
1393 for(i=0; i+1<length; i+=2){
1394 if(src[i]) continue;
1395 if(i>0 && src[i-1]==0) i--;
1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1399 /* startcode, so we must be past the end */
1407 if(i>=length-1){ //no escaped 0
1408 *dst_length= length;
1409 *consumed= length+1; //+1 for the header
1413 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1414 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1415 dst= h->rbsp_buffer[bufidx];
1421 //printf("decoding esc\n");
1422 memcpy(dst, src, i);
1425 //remove escapes (very rare 1:2^22)
1427 dst[di++]= src[si++];
1428 dst[di++]= src[si++];
1429 }else if(src[si]==0 && src[si+1]==0){
1430 if(src[si+2]==3){ //escape
1435 }else //next start code
1439 dst[di++]= src[si++];
1442 dst[di++]= src[si++];
1445 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1448 *consumed= si + 1;//+1 for the header
1449 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1453 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1457 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1467 * IDCT transforms the 16 dc values and dequantizes them.
1468 * @param qp quantization parameter
1470 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1473 int temp[16]; //FIXME check if this is a good idea
1474 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1475 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1477 //memset(block, 64, 2*256);
1480 const int offset= y_offset[i];
1481 const int z0= block[offset+stride*0] + block[offset+stride*4];
1482 const int z1= block[offset+stride*0] - block[offset+stride*4];
1483 const int z2= block[offset+stride*1] - block[offset+stride*5];
1484 const int z3= block[offset+stride*1] + block[offset+stride*5];
1493 const int offset= x_offset[i];
1494 const int z0= temp[4*0+i] + temp[4*2+i];
1495 const int z1= temp[4*0+i] - temp[4*2+i];
1496 const int z2= temp[4*1+i] - temp[4*3+i];
1497 const int z3= temp[4*1+i] + temp[4*3+i];
1499 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1500 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1501 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1502 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1508 * DCT transforms the 16 dc values.
1509 * @param qp quantization parameter ??? FIXME
1511 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1512 // const int qmul= dequant_coeff[qp][0];
1514 int temp[16]; //FIXME check if this is a good idea
1515 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1516 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1519 const int offset= y_offset[i];
1520 const int z0= block[offset+stride*0] + block[offset+stride*4];
1521 const int z1= block[offset+stride*0] - block[offset+stride*4];
1522 const int z2= block[offset+stride*1] - block[offset+stride*5];
1523 const int z3= block[offset+stride*1] + block[offset+stride*5];
1532 const int offset= x_offset[i];
1533 const int z0= temp[4*0+i] + temp[4*2+i];
1534 const int z1= temp[4*0+i] - temp[4*2+i];
1535 const int z2= temp[4*1+i] - temp[4*3+i];
1536 const int z3= temp[4*1+i] + temp[4*3+i];
1538 block[stride*0 +offset]= (z0 + z3)>>1;
1539 block[stride*2 +offset]= (z1 + z2)>>1;
1540 block[stride*8 +offset]= (z1 - z2)>>1;
1541 block[stride*10+offset]= (z0 - z3)>>1;
1549 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1550 const int stride= 16*2;
1551 const int xStride= 16;
1554 a= block[stride*0 + xStride*0];
1555 b= block[stride*0 + xStride*1];
1556 c= block[stride*1 + xStride*0];
1557 d= block[stride*1 + xStride*1];
1564 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1565 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1566 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1567 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1571 static void chroma_dc_dct_c(DCTELEM *block){
1572 const int stride= 16*2;
1573 const int xStride= 16;
1576 a= block[stride*0 + xStride*0];
1577 b= block[stride*0 + xStride*1];
1578 c= block[stride*1 + xStride*0];
1579 d= block[stride*1 + xStride*1];
1586 block[stride*0 + xStride*0]= (a+c);
1587 block[stride*0 + xStride*1]= (e+b);
1588 block[stride*1 + xStride*0]= (a-c);
1589 block[stride*1 + xStride*1]= (e-b);
1594 * gets the chroma qp.
1596 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1597 return h->pps.chroma_qp_table[t][qscale];
1600 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1601 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1602 int src_x_offset, int src_y_offset,
1603 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1604 MpegEncContext * const s = &h->s;
1605 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1606 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1607 const int luma_xy= (mx&3) + ((my&3)<<2);
1608 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1609 uint8_t * src_cb, * src_cr;
1610 int extra_width= h->emu_edge_width;
1611 int extra_height= h->emu_edge_height;
1613 const int full_mx= mx>>2;
1614 const int full_my= my>>2;
1615 const int pic_width = 16*s->mb_width;
1616 const int pic_height = 16*s->mb_height >> MB_FIELD;
1618 if(mx&7) extra_width -= 3;
1619 if(my&7) extra_height -= 3;
1621 if( full_mx < 0-extra_width
1622 || full_my < 0-extra_height
1623 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1624 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1625 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1626 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1630 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1632 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1635 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1638 // chroma offset when predicting from a field of opposite parity
1639 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1640 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1642 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1643 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1646 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1647 src_cb= s->edge_emu_buffer;
1649 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1652 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1653 src_cr= s->edge_emu_buffer;
1655 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1658 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1659 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1660 int x_offset, int y_offset,
1661 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1662 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1663 int list0, int list1){
1664 MpegEncContext * const s = &h->s;
1665 qpel_mc_func *qpix_op= qpix_put;
1666 h264_chroma_mc_func chroma_op= chroma_put;
1668 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1669 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1670 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1671 x_offset += 8*s->mb_x;
1672 y_offset += 8*(s->mb_y >> MB_FIELD);
1675 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1676 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1677 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1678 qpix_op, chroma_op);
1681 chroma_op= chroma_avg;
1685 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1692 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1693 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1694 int x_offset, int y_offset,
1695 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1696 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1697 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1698 int list0, int list1){
1699 MpegEncContext * const s = &h->s;
1701 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1702 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1703 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1704 x_offset += 8*s->mb_x;
1705 y_offset += 8*(s->mb_y >> MB_FIELD);
1708 /* don't optimize for luma-only case, since B-frames usually
1709 * use implicit weights => chroma too. */
1710 uint8_t *tmp_cb = s->obmc_scratchpad;
1711 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1712 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1713 int refn0 = h->ref_cache[0][ scan8[n] ];
1714 int refn1 = h->ref_cache[1][ scan8[n] ];
1716 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1717 dest_y, dest_cb, dest_cr,
1718 x_offset, y_offset, qpix_put, chroma_put);
1719 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1720 tmp_y, tmp_cb, tmp_cr,
1721 x_offset, y_offset, qpix_put, chroma_put);
1723 if(h->use_weight == 2){
1724 int weight0 = h->implicit_weight[refn0][refn1];
1725 int weight1 = 64 - weight0;
1726 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1727 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1728 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1731 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1732 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1733 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1734 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1735 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1736 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1737 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1738 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1741 int list = list1 ? 1 : 0;
1742 int refn = h->ref_cache[list][ scan8[n] ];
1743 Picture *ref= &h->ref_list[list][refn];
1744 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1745 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1746 qpix_put, chroma_put);
1748 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1749 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1750 if(h->use_weight_chroma){
1751 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1752 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1753 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1759 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1760 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1761 int x_offset, int y_offset,
1762 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1763 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1764 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1765 int list0, int list1){
1766 if((h->use_weight==2 && list0 && list1
1767 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1768 || h->use_weight==1)
1769 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1770 x_offset, y_offset, qpix_put, chroma_put,
1771 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1773 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1774 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1777 static inline void prefetch_motion(H264Context *h, int list){
1778 /* fetch pixels for estimated mv 4 macroblocks ahead
1779 * optimized for 64byte cache lines */
1780 MpegEncContext * const s = &h->s;
1781 const int refn = h->ref_cache[list][scan8[0]];
1783 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1784 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1785 uint8_t **src= h->ref_list[list][refn].data;
1786 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1787 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1788 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1789 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1793 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1794 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1795 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1796 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1797 MpegEncContext * const s = &h->s;
1798 const int mb_xy= h->mb_xy;
1799 const int mb_type= s->current_picture.mb_type[mb_xy];
1801 assert(IS_INTER(mb_type));
1803 prefetch_motion(h, 0);
1805 if(IS_16X16(mb_type)){
1806 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1807 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1808 &weight_op[0], &weight_avg[0],
1809 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1810 }else if(IS_16X8(mb_type)){
1811 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1812 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1813 &weight_op[1], &weight_avg[1],
1814 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1815 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1816 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1817 &weight_op[1], &weight_avg[1],
1818 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1819 }else if(IS_8X16(mb_type)){
1820 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1821 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1822 &weight_op[2], &weight_avg[2],
1823 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1824 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1825 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1826 &weight_op[2], &weight_avg[2],
1827 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1831 assert(IS_8X8(mb_type));
1834 const int sub_mb_type= h->sub_mb_type[i];
1836 int x_offset= (i&1)<<2;
1837 int y_offset= (i&2)<<1;
1839 if(IS_SUB_8X8(sub_mb_type)){
1840 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1841 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1842 &weight_op[3], &weight_avg[3],
1843 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1844 }else if(IS_SUB_8X4(sub_mb_type)){
1845 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1846 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1847 &weight_op[4], &weight_avg[4],
1848 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1849 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1850 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1851 &weight_op[4], &weight_avg[4],
1852 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853 }else if(IS_SUB_4X8(sub_mb_type)){
1854 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1855 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1856 &weight_op[5], &weight_avg[5],
1857 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1858 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1859 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1860 &weight_op[5], &weight_avg[5],
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1864 assert(IS_SUB_4X4(sub_mb_type));
1866 int sub_x_offset= x_offset + 2*(j&1);
1867 int sub_y_offset= y_offset + (j&2);
1868 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870 &weight_op[6], &weight_avg[6],
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1877 prefetch_motion(h, 1);
1880 static av_cold void init_cavlc_level_tab(void){
1881 int suffix_length, mask;
1884 for(suffix_length=0; suffix_length<7; suffix_length++){
1885 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1886 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1887 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1889 mask= -(level_code&1);
1890 level_code= (((2+level_code)>>1) ^ mask) - mask;
1891 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1892 cavlc_level_tab[suffix_length][i][0]= level_code;
1893 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1894 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1895 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1896 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1898 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1899 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1905 static av_cold void decode_init_vlc(void){
1906 static int done = 0;
1913 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1914 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1915 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1916 &chroma_dc_coeff_token_len [0], 1, 1,
1917 &chroma_dc_coeff_token_bits[0], 1, 1,
1918 INIT_VLC_USE_NEW_STATIC);
1922 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1923 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1924 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1925 &coeff_token_len [i][0], 1, 1,
1926 &coeff_token_bits[i][0], 1, 1,
1927 INIT_VLC_USE_NEW_STATIC);
1928 offset += coeff_token_vlc_tables_size[i];
1931 * This is a one time safety check to make sure that
1932 * the packed static coeff_token_vlc table sizes
1933 * were initialized correctly.
1935 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1938 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1939 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1940 init_vlc(&chroma_dc_total_zeros_vlc[i],
1941 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1942 &chroma_dc_total_zeros_len [i][0], 1, 1,
1943 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1944 INIT_VLC_USE_NEW_STATIC);
1946 for(i=0; i<15; i++){
1947 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1948 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1949 init_vlc(&total_zeros_vlc[i],
1950 TOTAL_ZEROS_VLC_BITS, 16,
1951 &total_zeros_len [i][0], 1, 1,
1952 &total_zeros_bits[i][0], 1, 1,
1953 INIT_VLC_USE_NEW_STATIC);
1957 run_vlc[i].table = run_vlc_tables[i];
1958 run_vlc[i].table_allocated = run_vlc_tables_size;
1959 init_vlc(&run_vlc[i],
1961 &run_len [i][0], 1, 1,
1962 &run_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
1965 run7_vlc.table = run7_vlc_table,
1966 run7_vlc.table_allocated = run7_vlc_table_size;
1967 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1968 &run_len [6][0], 1, 1,
1969 &run_bits[6][0], 1, 1,
1970 INIT_VLC_USE_NEW_STATIC);
1972 init_cavlc_level_tab();
1976 static void free_tables(H264Context *h){
1979 av_freep(&h->intra4x4_pred_mode);
1980 av_freep(&h->chroma_pred_mode_table);
1981 av_freep(&h->cbp_table);
1982 av_freep(&h->mvd_table[0]);
1983 av_freep(&h->mvd_table[1]);
1984 av_freep(&h->direct_table);
1985 av_freep(&h->non_zero_count);
1986 av_freep(&h->slice_table_base);
1987 h->slice_table= NULL;
1989 av_freep(&h->mb2b_xy);
1990 av_freep(&h->mb2b8_xy);
1992 for(i = 0; i < h->s.avctx->thread_count; i++) {
1993 hx = h->thread_context[i];
1995 av_freep(&hx->top_borders[1]);
1996 av_freep(&hx->top_borders[0]);
1997 av_freep(&hx->s.obmc_scratchpad);
2001 static void init_dequant8_coeff_table(H264Context *h){
2003 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2004 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2005 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2007 for(i=0; i<2; i++ ){
2008 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2009 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2013 for(q=0; q<52; q++){
2014 int shift = div6[q];
2017 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2018 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2019 h->pps.scaling_matrix8[i][x]) << shift;
2024 static void init_dequant4_coeff_table(H264Context *h){
2026 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2027 for(i=0; i<6; i++ ){
2028 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2030 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2031 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2038 for(q=0; q<52; q++){
2039 int shift = div6[q] + 2;
2042 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2043 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2044 h->pps.scaling_matrix4[i][x]) << shift;
2049 static void init_dequant_tables(H264Context *h){
2051 init_dequant4_coeff_table(h);
2052 if(h->pps.transform_8x8_mode)
2053 init_dequant8_coeff_table(h);
2054 if(h->sps.transform_bypass){
2057 h->dequant4_coeff[i][0][x] = 1<<6;
2058 if(h->pps.transform_8x8_mode)
2061 h->dequant8_coeff[i][0][x] = 1<<6;
2068 * needs width/height
2070 static int alloc_tables(H264Context *h){
2071 MpegEncContext * const s = &h->s;
2072 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2075 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2077 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2078 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2079 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2081 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2082 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2083 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2084 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2086 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2087 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2089 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2090 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2091 for(y=0; y<s->mb_height; y++){
2092 for(x=0; x<s->mb_width; x++){
2093 const int mb_xy= x + y*s->mb_stride;
2094 const int b_xy = 4*x + 4*y*h->b_stride;
2095 const int b8_xy= 2*x + 2*y*h->b8_stride;
2097 h->mb2b_xy [mb_xy]= b_xy;
2098 h->mb2b8_xy[mb_xy]= b8_xy;
2102 s->obmc_scratchpad = NULL;
2104 if(!h->dequant4_coeff[0])
2105 init_dequant_tables(h);
2114 * Mimic alloc_tables(), but for every context thread.
2116 static void clone_tables(H264Context *dst, H264Context *src){
2117 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2118 dst->non_zero_count = src->non_zero_count;
2119 dst->slice_table = src->slice_table;
2120 dst->cbp_table = src->cbp_table;
2121 dst->mb2b_xy = src->mb2b_xy;
2122 dst->mb2b8_xy = src->mb2b8_xy;
2123 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2124 dst->mvd_table[0] = src->mvd_table[0];
2125 dst->mvd_table[1] = src->mvd_table[1];
2126 dst->direct_table = src->direct_table;
2128 dst->s.obmc_scratchpad = NULL;
2129 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2134 * Allocate buffers which are not shared amongst multiple threads.
2136 static int context_init(H264Context *h){
2137 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2138 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2142 return -1; // free_tables will clean up for us
2145 static av_cold void common_init(H264Context *h){
2146 MpegEncContext * const s = &h->s;
2148 s->width = s->avctx->width;
2149 s->height = s->avctx->height;
2150 s->codec_id= s->avctx->codec->id;
2152 ff_h264_pred_init(&h->hpc, s->codec_id);
2154 h->dequant_coeff_pps= -1;
2155 s->unrestricted_mv=1;
2156 s->decode=1; //FIXME
2158 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2160 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2161 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2164 static av_cold int decode_init(AVCodecContext *avctx){
2165 H264Context *h= avctx->priv_data;
2166 MpegEncContext * const s = &h->s;
2168 MPV_decode_defaults(s);
2173 s->out_format = FMT_H264;
2174 s->workaround_bugs= avctx->workaround_bugs;
2177 // s->decode_mb= ff_h263_decode_mb;
2178 s->quarter_sample = 1;
2181 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2182 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2184 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2185 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2189 if(avctx->extradata_size > 0 && avctx->extradata &&
2190 *(char *)avctx->extradata == 1){
2197 h->thread_context[0] = h;
2198 h->outputed_poc = INT_MIN;
2199 h->prev_poc_msb= 1<<16;
2200 h->sei_recovery_frame_cnt = -1;
2201 h->sei_dpb_output_delay = 0;
2202 h->sei_cpb_removal_delay = -1;
2203 h->sei_buffering_period_present = 0;
2207 static int frame_start(H264Context *h){
2208 MpegEncContext * const s = &h->s;
2211 if(MPV_frame_start(s, s->avctx) < 0)
2213 ff_er_frame_start(s);
2215 * MPV_frame_start uses pict_type to derive key_frame.
2216 * This is incorrect for H.264; IDR markings must be used.
2217 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2218 * See decode_nal_units().
2220 s->current_picture_ptr->key_frame= 0;
2222 assert(s->linesize && s->uvlinesize);
2224 for(i=0; i<16; i++){
2225 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2226 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2229 h->block_offset[16+i]=
2230 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2231 h->block_offset[24+16+i]=
2232 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2235 /* can't be in alloc_tables because linesize isn't known there.
2236 * FIXME: redo bipred weight to not require extra buffer? */
2237 for(i = 0; i < s->avctx->thread_count; i++)
2238 if(!h->thread_context[i]->s.obmc_scratchpad)
2239 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2241 /* some macroblocks will be accessed before they're available */
2242 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2243 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2245 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2247 // We mark the current picture as non-reference after allocating it, so
2248 // that if we break out due to an error it can be released automatically
2249 // in the next MPV_frame_start().
2250 // SVQ3 as well as most other codecs have only last/next/current and thus
2251 // get released even with set reference, besides SVQ3 and others do not
2252 // mark frames as reference later "naturally".
2253 if(s->codec_id != CODEC_ID_SVQ3)
2254 s->current_picture_ptr->reference= 0;
2256 s->current_picture_ptr->field_poc[0]=
2257 s->current_picture_ptr->field_poc[1]= INT_MAX;
2258 assert(s->current_picture_ptr->long_ref==0);
2263 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2264 MpegEncContext * const s = &h->s;
2273 src_cb -= uvlinesize;
2274 src_cr -= uvlinesize;
2276 if(!simple && FRAME_MBAFF){
2278 offset = MB_MBAFF ? 1 : 17;
2279 uvoffset= MB_MBAFF ? 1 : 9;
2281 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2282 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2283 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2284 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2290 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2291 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2292 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2293 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2299 top_idx = MB_MBAFF ? 0 : 1;
2301 step= MB_MBAFF ? 2 : 1;
2304 // There are two lines saved, the line above the the top macroblock of a pair,
2305 // and the line above the bottom macroblock
2306 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2307 for(i=1; i<17 - skiplast; i++){
2308 h->left_border[offset+i*step]= src_y[15+i* linesize];
2311 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2312 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2314 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2315 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2316 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2317 for(i=1; i<9 - skiplast; i++){
2318 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2319 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2321 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2322 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2326 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2327 MpegEncContext * const s = &h->s;
2338 if(!simple && FRAME_MBAFF){
2340 offset = MB_MBAFF ? 1 : 17;
2341 uvoffset= MB_MBAFF ? 1 : 9;
2345 top_idx = MB_MBAFF ? 0 : 1;
2347 step= MB_MBAFF ? 2 : 1;
2350 if(h->deblocking_filter == 2) {
2352 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2353 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2355 deblock_left = (s->mb_x > 0);
2356 deblock_top = (s->mb_y > !!MB_FIELD);
2359 src_y -= linesize + 1;
2360 src_cb -= uvlinesize + 1;
2361 src_cr -= uvlinesize + 1;
2363 #define XCHG(a,b,t,xchg)\
2370 for(i = !deblock_top; i<16; i++){
2371 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2373 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2377 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2378 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2379 if(s->mb_x+1 < s->mb_width){
2380 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2384 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2386 for(i = !deblock_top; i<8; i++){
2387 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2388 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2390 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2391 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2394 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2400 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2401 MpegEncContext * const s = &h->s;
2402 const int mb_x= s->mb_x;
2403 const int mb_y= s->mb_y;
2404 const int mb_xy= h->mb_xy;
2405 const int mb_type= s->current_picture.mb_type[mb_xy];
2406 uint8_t *dest_y, *dest_cb, *dest_cr;
2407 int linesize, uvlinesize /*dct_offset*/;
2409 int *block_offset = &h->block_offset[0];
2410 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2411 /* is_h264 should always be true if SVQ3 is disabled. */
2412 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2413 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2414 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2416 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2417 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2418 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2420 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2421 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2423 if (!simple && MB_FIELD) {
2424 linesize = h->mb_linesize = s->linesize * 2;
2425 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2426 block_offset = &h->block_offset[24];
2427 if(mb_y&1){ //FIXME move out of this function?
2428 dest_y -= s->linesize*15;
2429 dest_cb-= s->uvlinesize*7;
2430 dest_cr-= s->uvlinesize*7;
2434 for(list=0; list<h->list_count; list++){
2435 if(!USES_LIST(mb_type, list))
2437 if(IS_16X16(mb_type)){
2438 int8_t *ref = &h->ref_cache[list][scan8[0]];
2439 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2441 for(i=0; i<16; i+=4){
2442 int ref = h->ref_cache[list][scan8[i]];
2444 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2450 linesize = h->mb_linesize = s->linesize;
2451 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2452 // dct_offset = s->linesize * 16;
2455 if (!simple && IS_INTRA_PCM(mb_type)) {
2456 for (i=0; i<16; i++) {
2457 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2459 for (i=0; i<8; i++) {
2460 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2461 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2464 if(IS_INTRA(mb_type)){
2465 if(h->deblocking_filter)
2466 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2468 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2469 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2470 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2473 if(IS_INTRA4x4(mb_type)){
2474 if(simple || !s->encoding){
2475 if(IS_8x8DCT(mb_type)){
2476 if(transform_bypass){
2478 idct_add = s->dsp.add_pixels8;
2480 idct_dc_add = s->dsp.h264_idct8_dc_add;
2481 idct_add = s->dsp.h264_idct8_add;
2483 for(i=0; i<16; i+=4){
2484 uint8_t * const ptr= dest_y + block_offset[i];
2485 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2486 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2487 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2489 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2490 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2491 (h->topright_samples_available<<i)&0x4000, linesize);
2493 if(nnz == 1 && h->mb[i*16])
2494 idct_dc_add(ptr, h->mb + i*16, linesize);
2496 idct_add (ptr, h->mb + i*16, linesize);
2501 if(transform_bypass){
2503 idct_add = s->dsp.add_pixels4;
2505 idct_dc_add = s->dsp.h264_idct_dc_add;
2506 idct_add = s->dsp.h264_idct_add;
2508 for(i=0; i<16; i++){
2509 uint8_t * const ptr= dest_y + block_offset[i];
2510 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2512 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2513 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2517 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2518 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2519 assert(mb_y || linesize <= block_offset[i]);
2520 if(!topright_avail){
2521 tr= ptr[3 - linesize]*0x01010101;
2522 topright= (uint8_t*) &tr;
2524 topright= ptr + 4 - linesize;
2528 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2529 nnz = h->non_zero_count_cache[ scan8[i] ];
2532 if(nnz == 1 && h->mb[i*16])
2533 idct_dc_add(ptr, h->mb + i*16, linesize);
2535 idct_add (ptr, h->mb + i*16, linesize);
2537 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2544 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2546 if(!transform_bypass)
2547 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2549 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2551 if(h->deblocking_filter)
2552 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2554 hl_motion(h, dest_y, dest_cb, dest_cr,
2555 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2556 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2557 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2561 if(!IS_INTRA4x4(mb_type)){
2563 if(IS_INTRA16x16(mb_type)){
2564 if(transform_bypass){
2565 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2566 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2568 for(i=0; i<16; i++){
2569 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2570 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2574 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2576 }else if(h->cbp&15){
2577 if(transform_bypass){
2578 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2579 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2580 for(i=0; i<16; i+=di){
2581 if(h->non_zero_count_cache[ scan8[i] ]){
2582 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2586 if(IS_8x8DCT(mb_type)){
2587 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2589 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2594 for(i=0; i<16; i++){
2595 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2596 uint8_t * const ptr= dest_y + block_offset[i];
2597 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2603 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2604 uint8_t *dest[2] = {dest_cb, dest_cr};
2605 if(transform_bypass){
2606 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2607 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2608 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2610 idct_add = s->dsp.add_pixels4;
2611 for(i=16; i<16+8; i++){
2612 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2613 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2617 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2618 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2620 idct_add = s->dsp.h264_idct_add;
2621 idct_dc_add = s->dsp.h264_idct_dc_add;
2622 for(i=16; i<16+8; i++){
2623 if(h->non_zero_count_cache[ scan8[i] ])
2624 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2625 else if(h->mb[i*16])
2626 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2629 for(i=16; i<16+8; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2631 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2632 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2639 if(h->cbp || IS_INTRA(mb_type))
2640 s->dsp.clear_blocks(h->mb);
2642 if(h->deblocking_filter) {
2643 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2644 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2645 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2646 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2647 if (!simple && FRAME_MBAFF) {
2648 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2650 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2656 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2658 static void hl_decode_mb_simple(H264Context *h){
2659 hl_decode_mb_internal(h, 1);
2663 * Process a macroblock; this handles edge cases, such as interlacing.
2665 static void av_noinline hl_decode_mb_complex(H264Context *h){
2666 hl_decode_mb_internal(h, 0);
2669 static void hl_decode_mb(H264Context *h){
2670 MpegEncContext * const s = &h->s;
2671 const int mb_xy= h->mb_xy;
2672 const int mb_type= s->current_picture.mb_type[mb_xy];
2673 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2676 hl_decode_mb_complex(h);
2677 else hl_decode_mb_simple(h);
2680 static void pic_as_field(Picture *pic, const int parity){
2682 for (i = 0; i < 4; ++i) {
2683 if (parity == PICT_BOTTOM_FIELD)
2684 pic->data[i] += pic->linesize[i];
2685 pic->reference = parity;
2686 pic->linesize[i] *= 2;
2688 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2691 static int split_field_copy(Picture *dest, Picture *src,
2692 int parity, int id_add){
2693 int match = !!(src->reference & parity);
2697 if(parity != PICT_FRAME){
2698 pic_as_field(dest, parity);
2700 dest->pic_id += id_add;
2707 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2711 while(i[0]<len || i[1]<len){
2712 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2714 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2717 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2718 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2721 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2722 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2729 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2734 best_poc= dir ? INT_MIN : INT_MAX;
2736 for(i=0; i<len; i++){
2737 const int poc= src[i]->poc;
2738 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2740 sorted[out_i]= src[i];
2743 if(best_poc == (dir ? INT_MIN : INT_MAX))
2745 limit= sorted[out_i++]->poc - dir;
2751 * fills the default_ref_list.
2753 static int fill_default_ref_list(H264Context *h){
2754 MpegEncContext * const s = &h->s;
2757 if(h->slice_type_nos==FF_B_TYPE){
2758 Picture *sorted[32];
2763 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2765 cur_poc= s->current_picture_ptr->poc;
2767 for(list= 0; list<2; list++){
2768 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2769 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2771 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2772 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2775 if(len < h->ref_count[list])
2776 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2780 if(lens[0] == lens[1] && lens[1] > 1){
2781 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2783 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2786 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2787 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2789 if(len < h->ref_count[0])
2790 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2793 for (i=0; i<h->ref_count[0]; i++) {
2794 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2796 if(h->slice_type_nos==FF_B_TYPE){
2797 for (i=0; i<h->ref_count[1]; i++) {
2798 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2805 static void print_short_term(H264Context *h);
2806 static void print_long_term(H264Context *h);
2809 * Extract structure information about the picture described by pic_num in
2810 * the current decoding context (frame or field). Note that pic_num is
2811 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2812 * @param pic_num picture number for which to extract structure information
2813 * @param structure one of PICT_XXX describing structure of picture
2815 * @return frame number (short term) or long term index of picture
2816 * described by pic_num
2818 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2819 MpegEncContext * const s = &h->s;
2821 *structure = s->picture_structure;
2824 /* opposite field */
2825 *structure ^= PICT_FRAME;
2832 static int decode_ref_pic_list_reordering(H264Context *h){
2833 MpegEncContext * const s = &h->s;
2834 int list, index, pic_structure;
2836 print_short_term(h);
2839 for(list=0; list<h->list_count; list++){
2840 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2842 if(get_bits1(&s->gb)){
2843 int pred= h->curr_pic_num;
2845 for(index=0; ; index++){
2846 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2847 unsigned int pic_id;
2849 Picture *ref = NULL;
2851 if(reordering_of_pic_nums_idc==3)
2854 if(index >= h->ref_count[list]){
2855 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2859 if(reordering_of_pic_nums_idc<3){
2860 if(reordering_of_pic_nums_idc<2){
2861 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2864 if(abs_diff_pic_num > h->max_pic_num){
2865 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2869 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2870 else pred+= abs_diff_pic_num;
2871 pred &= h->max_pic_num - 1;
2873 frame_num = pic_num_extract(h, pred, &pic_structure);
2875 for(i= h->short_ref_count-1; i>=0; i--){
2876 ref = h->short_ref[i];
2877 assert(ref->reference);
2878 assert(!ref->long_ref);
2880 ref->frame_num == frame_num &&
2881 (ref->reference & pic_structure)
2889 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2891 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2894 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2897 ref = h->long_ref[long_idx];
2898 assert(!(ref && !ref->reference));
2899 if(ref && (ref->reference & pic_structure)){
2900 ref->pic_id= pic_id;
2901 assert(ref->long_ref);
2909 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2910 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2912 for(i=index; i+1<h->ref_count[list]; i++){
2913 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2916 for(; i > index; i--){
2917 h->ref_list[list][i]= h->ref_list[list][i-1];
2919 h->ref_list[list][index]= *ref;
2921 pic_as_field(&h->ref_list[list][index], pic_structure);
2925 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2931 for(list=0; list<h->list_count; list++){
2932 for(index= 0; index < h->ref_count[list]; index++){
2933 if(!h->ref_list[list][index].data[0]){
2934 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2935 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2943 static void fill_mbaff_ref_list(H264Context *h){
2945 for(list=0; list<2; list++){ //FIXME try list_count
2946 for(i=0; i<h->ref_count[list]; i++){
2947 Picture *frame = &h->ref_list[list][i];
2948 Picture *field = &h->ref_list[list][16+2*i];
2951 field[0].linesize[j] <<= 1;
2952 field[0].reference = PICT_TOP_FIELD;
2953 field[0].poc= field[0].field_poc[0];
2954 field[1] = field[0];
2956 field[1].data[j] += frame->linesize[j];
2957 field[1].reference = PICT_BOTTOM_FIELD;
2958 field[1].poc= field[1].field_poc[1];
2960 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2961 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2963 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2964 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2968 for(j=0; j<h->ref_count[1]; j++){
2969 for(i=0; i<h->ref_count[0]; i++)
2970 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2971 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2972 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2976 static int pred_weight_table(H264Context *h){
2977 MpegEncContext * const s = &h->s;
2979 int luma_def, chroma_def;
2982 h->use_weight_chroma= 0;
2983 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2984 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2985 luma_def = 1<<h->luma_log2_weight_denom;
2986 chroma_def = 1<<h->chroma_log2_weight_denom;
2988 for(list=0; list<2; list++){
2989 h->luma_weight_flag[list] = 0;
2990 h->chroma_weight_flag[list] = 0;
2991 for(i=0; i<h->ref_count[list]; i++){
2992 int luma_weight_flag, chroma_weight_flag;
2994 luma_weight_flag= get_bits1(&s->gb);
2995 if(luma_weight_flag){
2996 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2997 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2998 if( h->luma_weight[list][i] != luma_def
2999 || h->luma_offset[list][i] != 0) {
3001 h->luma_weight_flag[list]= 1;
3004 h->luma_weight[list][i]= luma_def;
3005 h->luma_offset[list][i]= 0;
3009 chroma_weight_flag= get_bits1(&s->gb);
3010 if(chroma_weight_flag){
3013 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3014 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3015 if( h->chroma_weight[list][i][j] != chroma_def
3016 || h->chroma_offset[list][i][j] != 0) {
3017 h->use_weight_chroma= 1;
3018 h->chroma_weight_flag[list]= 1;
3024 h->chroma_weight[list][i][j]= chroma_def;
3025 h->chroma_offset[list][i][j]= 0;
3030 if(h->slice_type_nos != FF_B_TYPE) break;
3032 h->use_weight= h->use_weight || h->use_weight_chroma;
3036 static void implicit_weight_table(H264Context *h){
3037 MpegEncContext * const s = &h->s;
3039 int cur_poc = s->current_picture_ptr->poc;
3041 for (i = 0; i < 2; i++) {
3042 h->luma_weight_flag[i] = 0;
3043 h->chroma_weight_flag[i] = 0;
3046 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3047 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3049 h->use_weight_chroma= 0;
3054 h->use_weight_chroma= 2;
3055 h->luma_log2_weight_denom= 5;
3056 h->chroma_log2_weight_denom= 5;
3058 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3059 int poc0 = h->ref_list[0][ref0].poc;
3060 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3061 int poc1 = h->ref_list[1][ref1].poc;
3062 int td = av_clip(poc1 - poc0, -128, 127);
3064 int tb = av_clip(cur_poc - poc0, -128, 127);
3065 int tx = (16384 + (FFABS(td) >> 1)) / td;
3066 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3067 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3068 h->implicit_weight[ref0][ref1] = 32;
3070 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3072 h->implicit_weight[ref0][ref1] = 32;
3078 * Mark a picture as no longer needed for reference. The refmask
3079 * argument allows unreferencing of individual fields or the whole frame.
3080 * If the picture becomes entirely unreferenced, but is being held for
3081 * display purposes, it is marked as such.
3082 * @param refmask mask of fields to unreference; the mask is bitwise
3083 * anded with the reference marking of pic
3084 * @return non-zero if pic becomes entirely unreferenced (except possibly
3085 * for display purposes) zero if one of the fields remains in
3088 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3090 if (pic->reference &= refmask) {
3093 for(i = 0; h->delayed_pic[i]; i++)
3094 if(pic == h->delayed_pic[i]){
3095 pic->reference=DELAYED_PIC_REF;
3103 * instantaneous decoder refresh.
3105 static void idr(H264Context *h){
3108 for(i=0; i<16; i++){
3109 remove_long(h, i, 0);
3111 assert(h->long_ref_count==0);
3113 for(i=0; i<h->short_ref_count; i++){
3114 unreference_pic(h, h->short_ref[i], 0);
3115 h->short_ref[i]= NULL;
3117 h->short_ref_count=0;
3118 h->prev_frame_num= 0;
3119 h->prev_frame_num_offset= 0;
3124 /* forget old pics after a seek */
3125 static void flush_dpb(AVCodecContext *avctx){
3126 H264Context *h= avctx->priv_data;
3128 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3129 if(h->delayed_pic[i])
3130 h->delayed_pic[i]->reference= 0;
3131 h->delayed_pic[i]= NULL;
3133 h->outputed_poc= INT_MIN;
3135 if(h->s.current_picture_ptr)
3136 h->s.current_picture_ptr->reference= 0;
3137 h->s.first_field= 0;
3138 h->sei_recovery_frame_cnt = -1;
3139 h->sei_dpb_output_delay = 0;
3140 h->sei_cpb_removal_delay = -1;
3141 h->sei_buffering_period_present = 0;
3142 ff_mpeg_flush(avctx);
3146 * Find a Picture in the short term reference list by frame number.
3147 * @param frame_num frame number to search for
3148 * @param idx the index into h->short_ref where returned picture is found
3149 * undefined if no picture found.
3150 * @return pointer to the found picture, or NULL if no pic with the provided
3151 * frame number is found
3153 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3154 MpegEncContext * const s = &h->s;
3157 for(i=0; i<h->short_ref_count; i++){
3158 Picture *pic= h->short_ref[i];
3159 if(s->avctx->debug&FF_DEBUG_MMCO)
3160 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3161 if(pic->frame_num == frame_num) {
3170 * Remove a picture from the short term reference list by its index in
3171 * that list. This does no checking on the provided index; it is assumed
3172 * to be valid. Other list entries are shifted down.
3173 * @param i index into h->short_ref of picture to remove.
3175 static void remove_short_at_index(H264Context *h, int i){
3176 assert(i >= 0 && i < h->short_ref_count);
3177 h->short_ref[i]= NULL;
3178 if (--h->short_ref_count)
3179 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3184 * @return the removed picture or NULL if an error occurs
3186 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3187 MpegEncContext * const s = &h->s;
3191 if(s->avctx->debug&FF_DEBUG_MMCO)
3192 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3194 pic = find_short(h, frame_num, &i);
3196 if(unreference_pic(h, pic, ref_mask))
3197 remove_short_at_index(h, i);
3204 * Remove a picture from the long term reference list by its index in
3206 * @return the removed picture or NULL if an error occurs
3208 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3211 pic= h->long_ref[i];
3213 if(unreference_pic(h, pic, ref_mask)){
3214 assert(h->long_ref[i]->long_ref == 1);
3215 h->long_ref[i]->long_ref= 0;
3216 h->long_ref[i]= NULL;
3217 h->long_ref_count--;
3225 * print short term list
3227 static void print_short_term(H264Context *h) {
3229 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3230 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3231 for(i=0; i<h->short_ref_count; i++){
3232 Picture *pic= h->short_ref[i];
3233 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3239 * print long term list
3241 static void print_long_term(H264Context *h) {
3243 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3244 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3245 for(i = 0; i < 16; i++){
3246 Picture *pic= h->long_ref[i];
3248 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3255 * Executes the reference picture marking (memory management control operations).
3257 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3258 MpegEncContext * const s = &h->s;
3260 int current_ref_assigned=0;
3261 Picture *av_uninit(pic);
3263 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3264 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3266 for(i=0; i<mmco_count; i++){
3267 int structure, av_uninit(frame_num);
3268 if(s->avctx->debug&FF_DEBUG_MMCO)
3269 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3271 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3272 || mmco[i].opcode == MMCO_SHORT2LONG){
3273 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3274 pic = find_short(h, frame_num, &j);
3276 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3277 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3278 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3283 switch(mmco[i].opcode){
3284 case MMCO_SHORT2UNUSED:
3285 if(s->avctx->debug&FF_DEBUG_MMCO)
3286 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3287 remove_short(h, frame_num, structure ^ PICT_FRAME);
3289 case MMCO_SHORT2LONG:
3290 if (h->long_ref[mmco[i].long_arg] != pic)
3291 remove_long(h, mmco[i].long_arg, 0);
3293 remove_short_at_index(h, j);
3294 h->long_ref[ mmco[i].long_arg ]= pic;
3295 if (h->long_ref[ mmco[i].long_arg ]){
3296 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3297 h->long_ref_count++;
3300 case MMCO_LONG2UNUSED:
3301 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3302 pic = h->long_ref[j];
3304 remove_long(h, j, structure ^ PICT_FRAME);
3305 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3306 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3309 // Comment below left from previous code as it is an interresting note.
3310 /* First field in pair is in short term list or
3311 * at a different long term index.
3312 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3313 * Report the problem and keep the pair where it is,
3314 * and mark this field valid.
3317 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3318 remove_long(h, mmco[i].long_arg, 0);
3320 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3321 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3322 h->long_ref_count++;
3325 s->current_picture_ptr->reference |= s->picture_structure;
3326 current_ref_assigned=1;
3328 case MMCO_SET_MAX_LONG:
3329 assert(mmco[i].long_arg <= 16);
3330 // just remove the long term which index is greater than new max
3331 for(j = mmco[i].long_arg; j<16; j++){
3332 remove_long(h, j, 0);
3336 while(h->short_ref_count){
3337 remove_short(h, h->short_ref[0]->frame_num, 0);
3339 for(j = 0; j < 16; j++) {
3340 remove_long(h, j, 0);
3342 s->current_picture_ptr->poc=
3343 s->current_picture_ptr->field_poc[0]=
3344 s->current_picture_ptr->field_poc[1]=
3348 s->current_picture_ptr->frame_num= 0;
3354 if (!current_ref_assigned) {
3355 /* Second field of complementary field pair; the first field of
3356 * which is already referenced. If short referenced, it
3357 * should be first entry in short_ref. If not, it must exist
3358 * in long_ref; trying to put it on the short list here is an
3359 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3361 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3362 /* Just mark the second field valid */
3363 s->current_picture_ptr->reference = PICT_FRAME;
3364 } else if (s->current_picture_ptr->long_ref) {
3365 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3366 "assignment for second field "
3367 "in complementary field pair "
3368 "(first field is long term)\n");
3370 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3372 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3375 if(h->short_ref_count)
3376 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3378 h->short_ref[0]= s->current_picture_ptr;
3379 h->short_ref_count++;
3380 s->current_picture_ptr->reference |= s->picture_structure;
3384 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3386 /* We have too many reference frames, probably due to corrupted
3387 * stream. Need to discard one frame. Prevents overrun of the
3388 * short_ref and long_ref buffers.
3390 av_log(h->s.avctx, AV_LOG_ERROR,
3391 "number of reference frames exceeds max (probably "
3392 "corrupt input), discarding one\n");
3394 if (h->long_ref_count && !h->short_ref_count) {
3395 for (i = 0; i < 16; ++i)
3400 remove_long(h, i, 0);
3402 pic = h->short_ref[h->short_ref_count - 1];
3403 remove_short(h, pic->frame_num, 0);
3407 print_short_term(h);
3412 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3413 MpegEncContext * const s = &h->s;
3417 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3418 s->broken_link= get_bits1(gb) -1;
3420 h->mmco[0].opcode= MMCO_LONG;
3421 h->mmco[0].long_arg= 0;
3425 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3426 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3427 MMCOOpcode opcode= get_ue_golomb_31(gb);
3429 h->mmco[i].opcode= opcode;
3430 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3431 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3432 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3433 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3437 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3438 unsigned int long_arg= get_ue_golomb_31(gb);
3439 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3440 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3443 h->mmco[i].long_arg= long_arg;
3446 if(opcode > (unsigned)MMCO_LONG){
3447 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3450 if(opcode == MMCO_END)
3455 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3457 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3458 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3459 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3460 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3462 if (FIELD_PICTURE) {
3463 h->mmco[0].short_pic_num *= 2;
3464 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3465 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3475 static int init_poc(H264Context *h){
3476 MpegEncContext * const s = &h->s;
3477 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3479 Picture *cur = s->current_picture_ptr;
3481 h->frame_num_offset= h->prev_frame_num_offset;
3482 if(h->frame_num < h->prev_frame_num)
3483 h->frame_num_offset += max_frame_num;
3485 if(h->sps.poc_type==0){
3486 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3488 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3489 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3490 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3491 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3493 h->poc_msb = h->prev_poc_msb;
3494 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3496 field_poc[1] = h->poc_msb + h->poc_lsb;
3497 if(s->picture_structure == PICT_FRAME)
3498 field_poc[1] += h->delta_poc_bottom;
3499 }else if(h->sps.poc_type==1){
3500 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3503 if(h->sps.poc_cycle_length != 0)
3504 abs_frame_num = h->frame_num_offset + h->frame_num;
3508 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3511 expected_delta_per_poc_cycle = 0;
3512 for(i=0; i < h->sps.poc_cycle_length; i++)
3513 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3515 if(abs_frame_num > 0){
3516 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3517 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3519 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3520 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3521 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3525 if(h->nal_ref_idc == 0)
3526 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3528 field_poc[0] = expectedpoc + h->delta_poc[0];
3529 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3531 if(s->picture_structure == PICT_FRAME)
3532 field_poc[1] += h->delta_poc[1];
3534 int poc= 2*(h->frame_num_offset + h->frame_num);
3543 if(s->picture_structure != PICT_BOTTOM_FIELD)
3544 s->current_picture_ptr->field_poc[0]= field_poc[0];
3545 if(s->picture_structure != PICT_TOP_FIELD)
3546 s->current_picture_ptr->field_poc[1]= field_poc[1];
3547 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3554 * initialize scan tables
3556 static void init_scan_tables(H264Context *h){
3557 MpegEncContext * const s = &h->s;
3559 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3560 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3561 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3563 for(i=0; i<16; i++){
3564 #define T(x) (x>>2) | ((x<<2) & 0xF)
3565 h->zigzag_scan[i] = T(zigzag_scan[i]);
3566 h-> field_scan[i] = T( field_scan[i]);
3570 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3571 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3572 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3573 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3574 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3576 for(i=0; i<64; i++){
3577 #define T(x) (x>>3) | ((x&7)<<3)
3578 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3579 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3580 h->field_scan8x8[i] = T(field_scan8x8[i]);
3581 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3585 if(h->sps.transform_bypass){ //FIXME same ugly
3586 h->zigzag_scan_q0 = zigzag_scan;
3587 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3588 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3589 h->field_scan_q0 = field_scan;
3590 h->field_scan8x8_q0 = field_scan8x8;
3591 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3593 h->zigzag_scan_q0 = h->zigzag_scan;
3594 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3595 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3596 h->field_scan_q0 = h->field_scan;
3597 h->field_scan8x8_q0 = h->field_scan8x8;
3598 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3603 * Replicates H264 "master" context to thread contexts.
3605 static void clone_slice(H264Context *dst, H264Context *src)
3607 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3608 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3609 dst->s.current_picture = src->s.current_picture;
3610 dst->s.linesize = src->s.linesize;
3611 dst->s.uvlinesize = src->s.uvlinesize;
3612 dst->s.first_field = src->s.first_field;
3614 dst->prev_poc_msb = src->prev_poc_msb;
3615 dst->prev_poc_lsb = src->prev_poc_lsb;
3616 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3617 dst->prev_frame_num = src->prev_frame_num;
3618 dst->short_ref_count = src->short_ref_count;
3620 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3621 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3622 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3623 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3625 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3626 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3630 * decodes a slice header.
3631 * This will also call MPV_common_init() and frame_start() as needed.
3633 * @param h h264context
3634 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3636 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3638 static int decode_slice_header(H264Context *h, H264Context *h0){
3639 MpegEncContext * const s = &h->s;
3640 MpegEncContext * const s0 = &h0->s;
3641 unsigned int first_mb_in_slice;
3642 unsigned int pps_id;
3643 int num_ref_idx_active_override_flag;
3644 unsigned int slice_type, tmp, i, j;
3645 int default_ref_list_done = 0;
3646 int last_pic_structure;
3648 s->dropable= h->nal_ref_idc == 0;
3650 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3651 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3652 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3654 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3655 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3658 first_mb_in_slice= get_ue_golomb(&s->gb);
3660 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3661 h0->current_slice = 0;
3662 if (!s0->first_field)
3663 s->current_picture_ptr= NULL;
3666 slice_type= get_ue_golomb_31(&s->gb);
3668 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3673 h->slice_type_fixed=1;
3675 h->slice_type_fixed=0;
3677 slice_type= golomb_to_pict_type[ slice_type ];
3678 if (slice_type == FF_I_TYPE
3679 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3680 default_ref_list_done = 1;
3682 h->slice_type= slice_type;
3683 h->slice_type_nos= slice_type & 3;
3685 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3686 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3687 av_log(h->s.avctx, AV_LOG_ERROR,
3688 "B picture before any references, skipping\n");
3692 pps_id= get_ue_golomb(&s->gb);
3693 if(pps_id>=MAX_PPS_COUNT){
3694 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3697 if(!h0->pps_buffers[pps_id]) {
3698 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3701 h->pps= *h0->pps_buffers[pps_id];
3703 if(!h0->sps_buffers[h->pps.sps_id]) {
3704 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3707 h->sps = *h0->sps_buffers[h->pps.sps_id];
3709 if(h == h0 && h->dequant_coeff_pps != pps_id){
3710 h->dequant_coeff_pps = pps_id;
3711 init_dequant_tables(h);
3714 s->mb_width= h->sps.mb_width;
3715 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3717 h->b_stride= s->mb_width*4;
3718 h->b8_stride= s->mb_width*2;
3720 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3721 if(h->sps.frame_mbs_only_flag)
3722 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3724 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3726 if (s->context_initialized
3727 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3729 return -1; // width / height changed during parallelized decoding
3731 flush_dpb(s->avctx);
3734 if (!s->context_initialized) {
3736 return -1; // we cant (re-)initialize context during parallel decoding
3737 if (MPV_common_init(s) < 0)
3741 init_scan_tables(h);
3744 for(i = 1; i < s->avctx->thread_count; i++) {
3746 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3747 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3748 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3751 init_scan_tables(c);
3755 for(i = 0; i < s->avctx->thread_count; i++)
3756 if(context_init(h->thread_context[i]) < 0)
3759 s->avctx->width = s->width;
3760 s->avctx->height = s->height;
3761 s->avctx->sample_aspect_ratio= h->sps.sar;
3762 if(!s->avctx->sample_aspect_ratio.den)
3763 s->avctx->sample_aspect_ratio.den = 1;
3765 if(h->sps.timing_info_present_flag){
3766 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3767 if(h->x264_build > 0 && h->x264_build < 44)
3768 s->avctx->time_base.den *= 2;
3769 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3770 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3774 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3777 h->mb_aff_frame = 0;
3778 last_pic_structure = s0->picture_structure;
3779 if(h->sps.frame_mbs_only_flag){
3780 s->picture_structure= PICT_FRAME;
3782 if(get_bits1(&s->gb)) { //field_pic_flag
3783 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3785 s->picture_structure= PICT_FRAME;
3786 h->mb_aff_frame = h->sps.mb_aff;
3789 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3791 if(h0->current_slice == 0){
3792 while(h->frame_num != h->prev_frame_num &&
3793 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3794 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3795 if (frame_start(h) < 0)
3797 h->prev_frame_num++;
3798 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3799 s->current_picture_ptr->frame_num= h->prev_frame_num;
3800 execute_ref_pic_marking(h, NULL, 0);
3803 /* See if we have a decoded first field looking for a pair... */
3804 if (s0->first_field) {
3805 assert(s0->current_picture_ptr);
3806 assert(s0->current_picture_ptr->data[0]);
3807 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3809 /* figure out if we have a complementary field pair */
3810 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3812 * Previous field is unmatched. Don't display it, but let it
3813 * remain for reference if marked as such.
3815 s0->current_picture_ptr = NULL;
3816 s0->first_field = FIELD_PICTURE;
3819 if (h->nal_ref_idc &&
3820 s0->current_picture_ptr->reference &&
3821 s0->current_picture_ptr->frame_num != h->frame_num) {
3823 * This and previous field were reference, but had
3824 * different frame_nums. Consider this field first in
3825 * pair. Throw away previous field except for reference
3828 s0->first_field = 1;
3829 s0->current_picture_ptr = NULL;
3832 /* Second field in complementary pair */
3833 s0->first_field = 0;
3838 /* Frame or first field in a potentially complementary pair */
3839 assert(!s0->current_picture_ptr);
3840 s0->first_field = FIELD_PICTURE;
3843 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3844 s0->first_field = 0;
3851 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3853 assert(s->mb_num == s->mb_width * s->mb_height);
3854 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3855 first_mb_in_slice >= s->mb_num){
3856 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3859 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3860 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3861 if (s->picture_structure == PICT_BOTTOM_FIELD)
3862 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3863 assert(s->mb_y < s->mb_height);
3865 if(s->picture_structure==PICT_FRAME){
3866 h->curr_pic_num= h->frame_num;
3867 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3869 h->curr_pic_num= 2*h->frame_num + 1;
3870 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3873 if(h->nal_unit_type == NAL_IDR_SLICE){
3874 get_ue_golomb(&s->gb); /* idr_pic_id */
3877 if(h->sps.poc_type==0){
3878 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3880 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3881 h->delta_poc_bottom= get_se_golomb(&s->gb);
3885 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3886 h->delta_poc[0]= get_se_golomb(&s->gb);
3888 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3889 h->delta_poc[1]= get_se_golomb(&s->gb);
3894 if(h->pps.redundant_pic_cnt_present){
3895 h->redundant_pic_count= get_ue_golomb(&s->gb);
3898 //set defaults, might be overridden a few lines later
3899 h->ref_count[0]= h->pps.ref_count[0];
3900 h->ref_count[1]= h->pps.ref_count[1];
3902 if(h->slice_type_nos != FF_I_TYPE){
3903 if(h->slice_type_nos == FF_B_TYPE){
3904 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3906 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3908 if(num_ref_idx_active_override_flag){
3909 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3910 if(h->slice_type_nos==FF_B_TYPE)
3911 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3913 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3914 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3915 h->ref_count[0]= h->ref_count[1]= 1;
3919 if(h->slice_type_nos == FF_B_TYPE)
3926 if(!default_ref_list_done){
3927 fill_default_ref_list(h);
3930 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3933 if(h->slice_type_nos!=FF_I_TYPE){
3934 s->last_picture_ptr= &h->ref_list[0][0];
3935 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3937 if(h->slice_type_nos==FF_B_TYPE){
3938 s->next_picture_ptr= &h->ref_list[1][0];
3939 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3942 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3943 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3944 pred_weight_table(h);
3945 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3946 implicit_weight_table(h);
3949 for (i = 0; i < 2; i++) {
3950 h->luma_weight_flag[i] = 0;
3951 h->chroma_weight_flag[i] = 0;
3956 decode_ref_pic_marking(h0, &s->gb);
3959 fill_mbaff_ref_list(h);
3961 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3962 direct_dist_scale_factor(h);
3963 direct_ref_list_init(h);
3965 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3966 tmp = get_ue_golomb_31(&s->gb);
3968 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3971 h->cabac_init_idc= tmp;
3974 h->last_qscale_diff = 0;
3975 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3977 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3981 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3982 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3983 //FIXME qscale / qp ... stuff
3984 if(h->slice_type == FF_SP_TYPE){
3985 get_bits1(&s->gb); /* sp_for_switch_flag */
3987 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3988 get_se_golomb(&s->gb); /* slice_qs_delta */
3991 h->deblocking_filter = 1;
3992 h->slice_alpha_c0_offset = 0;
3993 h->slice_beta_offset = 0;
3994 if( h->pps.deblocking_filter_parameters_present ) {
3995 tmp= get_ue_golomb_31(&s->gb);
3997 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4000 h->deblocking_filter= tmp;
4001 if(h->deblocking_filter < 2)
4002 h->deblocking_filter^= 1; // 1<->0
4004 if( h->deblocking_filter ) {
4005 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4006 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4010 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4011 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4012 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4013 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4014 h->deblocking_filter= 0;
4016 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4017 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4018 /* Cheat slightly for speed:
4019 Do not bother to deblock across slices. */
4020 h->deblocking_filter = 2;
4022 h0->max_contexts = 1;
4023 if(!h0->single_decode_warning) {
4024 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4025 h0->single_decode_warning = 1;
4028 return 1; // deblocking switched inside frame
4033 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4034 slice_group_change_cycle= get_bits(&s->gb, ?);
4037 h0->last_slice_type = slice_type;
4038 h->slice_num = ++h0->current_slice;
4039 if(h->slice_num >= MAX_SLICES){
4040 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4044 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4048 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4049 +(h->ref_list[j][i].reference&3);
4052 for(i=16; i<48; i++)
4053 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4054 +(h->ref_list[j][i].reference&3);
4057 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4058 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4060 s->avctx->refs= h->sps.ref_frame_count;
4062 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4063 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4065 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4067 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4068 pps_id, h->frame_num,
4069 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4070 h->ref_count[0], h->ref_count[1],
4072 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4074 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4075 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4085 static inline int get_level_prefix(GetBitContext *gb){
4089 OPEN_READER(re, gb);
4090 UPDATE_CACHE(re, gb);
4091 buf=GET_CACHE(re, gb);
4093 log= 32 - av_log2(buf);
4095 print_bin(buf>>(32-log), log);
4096 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4099 LAST_SKIP_BITS(re, gb, log);
4100 CLOSE_READER(re, gb);
4105 static inline int get_dct8x8_allowed(H264Context *h){
4106 if(h->sps.direct_8x8_inference_flag)
4107 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4109 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4113 * decodes a residual block.
4114 * @param n block index
4115 * @param scantable scantable
4116 * @param max_coeff number of coefficients in the block
4117 * @return <0 if an error occurred
4119 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4120 MpegEncContext * const s = &h->s;
4121 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4123 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4125 //FIXME put trailing_onex into the context
4127 if(n == CHROMA_DC_BLOCK_INDEX){
4128 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4129 total_coeff= coeff_token>>2;
4131 if(n == LUMA_DC_BLOCK_INDEX){
4132 total_coeff= pred_non_zero_count(h, 0);
4133 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4134 total_coeff= coeff_token>>2;
4136 total_coeff= pred_non_zero_count(h, n);
4137 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4138 total_coeff= coeff_token>>2;
4139 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4143 //FIXME set last_non_zero?
4147 if(total_coeff > (unsigned)max_coeff) {
4148 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4152 trailing_ones= coeff_token&3;
4153 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4154 assert(total_coeff<=16);
4156 i = show_bits(gb, 3);
4157 skip_bits(gb, trailing_ones);
4158 level[0] = 1-((i&4)>>1);
4159 level[1] = 1-((i&2) );
4160 level[2] = 1-((i&1)<<1);
4162 if(trailing_ones<total_coeff) {
4164 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4165 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4166 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4168 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4169 if(level_code >= 100){
4170 prefix= level_code - 100;
4171 if(prefix == LEVEL_TAB_BITS)
4172 prefix += get_level_prefix(gb);
4174 //first coefficient has suffix_length equal to 0 or 1
4175 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4177 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4179 level_code= (prefix<<suffix_length); //part
4180 }else if(prefix==14){
4182 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4184 level_code= prefix + get_bits(gb, 4); //part
4186 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4187 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4189 level_code += (1<<(prefix-3))-4096;
4192 if(trailing_ones < 3) level_code += 2;
4195 mask= -(level_code&1);
4196 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4198 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4201 if(level_code + 3U > 6U)
4203 level[trailing_ones]= level_code;
4206 //remaining coefficients have suffix_length > 0
4207 for(i=trailing_ones+1;i<total_coeff;i++) {
4208 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4209 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4210 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4212 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4213 if(level_code >= 100){
4214 prefix= level_code - 100;
4215 if(prefix == LEVEL_TAB_BITS){
4216 prefix += get_level_prefix(gb);
4219 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4221 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4223 level_code += (1<<(prefix-3))-4096;
4225 mask= -(level_code&1);
4226 level_code= (((2+level_code)>>1) ^ mask) - mask;
4228 level[i]= level_code;
4230 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4235 if(total_coeff == max_coeff)
4238 if(n == CHROMA_DC_BLOCK_INDEX)
4239 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4241 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4244 coeff_num = zeros_left + total_coeff - 1;
4245 j = scantable[coeff_num];
4247 block[j] = level[0];
4248 for(i=1;i<total_coeff;i++) {
4251 else if(zeros_left < 7){
4252 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4254 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4256 zeros_left -= run_before;
4257 coeff_num -= 1 + run_before;
4258 j= scantable[ coeff_num ];
4263 block[j] = (level[0] * qmul[j] + 32)>>6;
4264 for(i=1;i<total_coeff;i++) {
4267 else if(zeros_left < 7){
4268 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4270 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4272 zeros_left -= run_before;
4273 coeff_num -= 1 + run_before;
4274 j= scantable[ coeff_num ];
4276 block[j]= (level[i] * qmul[j] + 32)>>6;
4281 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4288 static void predict_field_decoding_flag(H264Context *h){
4289 MpegEncContext * const s = &h->s;
4290 const int mb_xy= h->mb_xy;
4291 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4292 ? s->current_picture.mb_type[mb_xy-1]
4293 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4294 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4296 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4300 * decodes a P_SKIP or B_SKIP macroblock
4302 static void decode_mb_skip(H264Context *h){
4303 MpegEncContext * const s = &h->s;
4304 const int mb_xy= h->mb_xy;
4307 memset(h->non_zero_count[mb_xy], 0, 16);
4308 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4311 mb_type|= MB_TYPE_INTERLACED;
4313 if( h->slice_type_nos == FF_B_TYPE )
4315 // just for fill_caches. pred_direct_motion will set the real mb_type
4316 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4318 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4319 pred_direct_motion(h, &mb_type);
4320 mb_type|= MB_TYPE_SKIP;
4325 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4327 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4328 pred_pskip_motion(h, &mx, &my);
4329 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4330 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4333 write_back_motion(h, mb_type);
4334 s->current_picture.mb_type[mb_xy]= mb_type;
4335 s->current_picture.qscale_table[mb_xy]= s->qscale;
4336 h->slice_table[ mb_xy ]= h->slice_num;
4337 h->prev_mb_skipped= 1;
4341 * decodes a macroblock
4342 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4344 static int decode_mb_cavlc(H264Context *h){
4345 MpegEncContext * const s = &h->s;
4347 int partition_count;
4348 unsigned int mb_type, cbp;
4349 int dct8x8_allowed= h->pps.transform_8x8_mode;
4351 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4353 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4354 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4356 if(h->slice_type_nos != FF_I_TYPE){
4357 if(s->mb_skip_run==-1)
4358 s->mb_skip_run= get_ue_golomb(&s->gb);
4360 if (s->mb_skip_run--) {
4361 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4362 if(s->mb_skip_run==0)
4363 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4365 predict_field_decoding_flag(h);
4372 if( (s->mb_y&1) == 0 )
4373 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4376 h->prev_mb_skipped= 0;
4378 mb_type= get_ue_golomb(&s->gb);
4379 if(h->slice_type_nos == FF_B_TYPE){
4381 partition_count= b_mb_type_info[mb_type].partition_count;
4382 mb_type= b_mb_type_info[mb_type].type;
4385 goto decode_intra_mb;
4387 }else if(h->slice_type_nos == FF_P_TYPE){
4389 partition_count= p_mb_type_info[mb_type].partition_count;
4390 mb_type= p_mb_type_info[mb_type].type;
4393 goto decode_intra_mb;
4396 assert(h->slice_type_nos == FF_I_TYPE);
4397 if(h->slice_type == FF_SI_TYPE && mb_type)
4401 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4405 cbp= i_mb_type_info[mb_type].cbp;
4406 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4407 mb_type= i_mb_type_info[mb_type].type;
4411 mb_type |= MB_TYPE_INTERLACED;
4413 h->slice_table[ mb_xy ]= h->slice_num;
4415 if(IS_INTRA_PCM(mb_type)){
4418 // We assume these blocks are very rare so we do not optimize it.
4419 align_get_bits(&s->gb);
4421 // The pixels are stored in the same order as levels in h->mb array.
4422 for(x=0; x < (CHROMA ? 384 : 256); x++){
4423 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4426 // In deblocking, the quantizer is 0
4427 s->current_picture.qscale_table[mb_xy]= 0;
4428 // All coeffs are present
4429 memset(h->non_zero_count[mb_xy], 16, 16);
4431 s->current_picture.mb_type[mb_xy]= mb_type;
4436 h->ref_count[0] <<= 1;
4437 h->ref_count[1] <<= 1;
4440 fill_caches(h, mb_type, 0);
4443 if(IS_INTRA(mb_type)){
4445 // init_top_left_availability(h);
4446 if(IS_INTRA4x4(mb_type)){
4449 if(dct8x8_allowed && get_bits1(&s->gb)){
4450 mb_type |= MB_TYPE_8x8DCT;
4454 // fill_intra4x4_pred_table(h);
4455 for(i=0; i<16; i+=di){
4456 int mode= pred_intra_mode(h, i);
4458 if(!get_bits1(&s->gb)){
4459 const int rem_mode= get_bits(&s->gb, 3);
4460 mode = rem_mode + (rem_mode >= mode);
4464 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4466 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4468 write_back_intra_pred_mode(h);
4469 if( check_intra4x4_pred_mode(h) < 0)
4472 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4473 if(h->intra16x16_pred_mode < 0)
4477 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4480 h->chroma_pred_mode= pred_mode;
4482 }else if(partition_count==4){
4483 int i, j, sub_partition_count[4], list, ref[2][4];
4485 if(h->slice_type_nos == FF_B_TYPE){
4487 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4488 if(h->sub_mb_type[i] >=13){
4489 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4492 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4493 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4495 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4496 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4497 pred_direct_motion(h, &mb_type);
4498 h->ref_cache[0][scan8[4]] =
4499 h->ref_cache[1][scan8[4]] =
4500 h->ref_cache[0][scan8[12]] =
4501 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4504 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4506 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4507 if(h->sub_mb_type[i] >=4){
4508 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4511 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4512 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4516 for(list=0; list<h->list_count; list++){
4517 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4519 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4520 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4524 }else if(ref_count == 2){
4525 tmp= get_bits1(&s->gb)^1;
4527 tmp= get_ue_golomb_31(&s->gb);
4529 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4542 dct8x8_allowed = get_dct8x8_allowed(h);
4544 for(list=0; list<h->list_count; list++){
4546 if(IS_DIRECT(h->sub_mb_type[i])) {
4547 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4550 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4551 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4553 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4554 const int sub_mb_type= h->sub_mb_type[i];
4555 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4556 for(j=0; j<sub_partition_count[i]; j++){
4558 const int index= 4*i + block_width*j;
4559 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4560 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4561 mx += get_se_golomb(&s->gb);
4562 my += get_se_golomb(&s->gb);
4563 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4565 if(IS_SUB_8X8(sub_mb_type)){
4567 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4569 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4570 }else if(IS_SUB_8X4(sub_mb_type)){
4571 mv_cache[ 1 ][0]= mx;
4572 mv_cache[ 1 ][1]= my;
4573 }else if(IS_SUB_4X8(sub_mb_type)){
4574 mv_cache[ 8 ][0]= mx;
4575 mv_cache[ 8 ][1]= my;
4577 mv_cache[ 0 ][0]= mx;
4578 mv_cache[ 0 ][1]= my;
4581 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4587 }else if(IS_DIRECT(mb_type)){
4588 pred_direct_motion(h, &mb_type);
4589 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4591 int list, mx, my, i;
4592 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4593 if(IS_16X16(mb_type)){
4594 for(list=0; list<h->list_count; list++){
4596 if(IS_DIR(mb_type, 0, list)){
4597 if(h->ref_count[list]==1){
4599 }else if(h->ref_count[list]==2){
4600 val= get_bits1(&s->gb)^1;
4602 val= get_ue_golomb_31(&s->gb);
4603 if(val >= h->ref_count[list]){
4604 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4609 val= LIST_NOT_USED&0xFF;
4610 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4612 for(list=0; list<h->list_count; list++){
4614 if(IS_DIR(mb_type, 0, list)){
4615 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4616 mx += get_se_golomb(&s->gb);
4617 my += get_se_golomb(&s->gb);
4618 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4620 val= pack16to32(mx,my);
4623 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4626 else if(IS_16X8(mb_type)){
4627 for(list=0; list<h->list_count; list++){
4630 if(IS_DIR(mb_type, i, list)){
4631 if(h->ref_count[list] == 1){
4633 }else if(h->ref_count[list] == 2){
4634 val= get_bits1(&s->gb)^1;
4636 val= get_ue_golomb_31(&s->gb);
4637 if(val >= h->ref_count[list]){
4638 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4643 val= LIST_NOT_USED&0xFF;
4644 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4647 for(list=0; list<h->list_count; list++){
4650 if(IS_DIR(mb_type, i, list)){
4651 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4652 mx += get_se_golomb(&s->gb);
4653 my += get_se_golomb(&s->gb);
4654 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4656 val= pack16to32(mx,my);
4659 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4663 assert(IS_8X16(mb_type));
4664 for(list=0; list<h->list_count; list++){
4667 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4668 if(h->ref_count[list]==1){
4670 }else if(h->ref_count[list]==2){
4671 val= get_bits1(&s->gb)^1;
4673 val= get_ue_golomb_31(&s->gb);
4674 if(val >= h->ref_count[list]){
4675 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4680 val= LIST_NOT_USED&0xFF;
4681 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4684 for(list=0; list<h->list_count; list++){
4687 if(IS_DIR(mb_type, i, list)){
4688 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4689 mx += get_se_golomb(&s->gb);
4690 my += get_se_golomb(&s->gb);
4691 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4693 val= pack16to32(mx,my);
4696 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4702 if(IS_INTER(mb_type))
4703 write_back_motion(h, mb_type);
4705 if(!IS_INTRA16x16(mb_type)){
4706 cbp= get_ue_golomb(&s->gb);
4708 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4713 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4714 else cbp= golomb_to_inter_cbp [cbp];
4716 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4717 else cbp= golomb_to_inter_cbp_gray[cbp];
4722 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4723 if(get_bits1(&s->gb)){
4724 mb_type |= MB_TYPE_8x8DCT;
4725 h->cbp_table[mb_xy]= cbp;
4728 s->current_picture.mb_type[mb_xy]= mb_type;
4730 if(cbp || IS_INTRA16x16(mb_type)){
4731 int i8x8, i4x4, chroma_idx;
4733 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4734 const uint8_t *scan, *scan8x8, *dc_scan;
4736 // fill_non_zero_count_cache(h);
4738 if(IS_INTERLACED(mb_type)){
4739 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4740 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4741 dc_scan= luma_dc_field_scan;
4743 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4744 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4745 dc_scan= luma_dc_zigzag_scan;
4748 dquant= get_se_golomb(&s->gb);
4750 if( dquant > 25 || dquant < -26 ){
4751 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4755 s->qscale += dquant;
4756 if(((unsigned)s->qscale) > 51){
4757 if(s->qscale<0) s->qscale+= 52;
4758 else s->qscale-= 52;
4761 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4762 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4763 if(IS_INTRA16x16(mb_type)){
4764 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4765 return -1; //FIXME continue if partitioned and other return -1 too
4768 assert((cbp&15) == 0 || (cbp&15) == 15);
4771 for(i8x8=0; i8x8<4; i8x8++){
4772 for(i4x4=0; i4x4<4; i4x4++){
4773 const int index= i4x4 + 4*i8x8;
4774 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4780 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4783 for(i8x8=0; i8x8<4; i8x8++){
4784 if(cbp & (1<<i8x8)){
4785 if(IS_8x8DCT(mb_type)){
4786 DCTELEM *buf = &h->mb[64*i8x8];
4788 for(i4x4=0; i4x4<4; i4x4++){
4789 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4790 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4793 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4794 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4796 for(i4x4=0; i4x4<4; i4x4++){
4797 const int index= i4x4 + 4*i8x8;
4799 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4805 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4806 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4812 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4813 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4819 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4820 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4821 for(i4x4=0; i4x4<4; i4x4++){
4822 const int index= 16 + 4*chroma_idx + i4x4;
4823 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4829 uint8_t * const nnz= &h->non_zero_count_cache[0];
4830 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4831 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4834 uint8_t * const nnz= &h->non_zero_count_cache[0];
4835 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4836 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4837 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4839 s->current_picture.qscale_table[mb_xy]= s->qscale;
4840 write_back_non_zero_count(h);
4843 h->ref_count[0] >>= 1;
4844 h->ref_count[1] >>= 1;
4850 static int decode_cabac_field_decoding_flag(H264Context *h) {
4851 MpegEncContext * const s = &h->s;
4852 const int mb_x = s->mb_x;
4853 const int mb_y = s->mb_y & ~1;
4854 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4855 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4857 unsigned int ctx = 0;
4859 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4862 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4866 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4869 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4870 uint8_t *state= &h->cabac_state[ctx_base];
4874 MpegEncContext * const s = &h->s;
4875 const int mba_xy = h->left_mb_xy[0];
4876 const int mbb_xy = h->top_mb_xy;
4878 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4880 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4882 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4883 return 0; /* I4x4 */
4886 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4887 return 0; /* I4x4 */
4890 if( get_cabac_terminate( &h->cabac ) )
4891 return 25; /* PCM */
4893 mb_type = 1; /* I16x16 */
4894 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4895 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4896 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4897 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4898 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4902 static int decode_cabac_mb_type_b( H264Context *h ) {
4903 MpegEncContext * const s = &h->s;
4905 const int mba_xy = h->left_mb_xy[0];
4906 const int mbb_xy = h->top_mb_xy;
4909 assert(h->slice_type_nos == FF_B_TYPE);
4911 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4913 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4916 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4917 return 0; /* B_Direct_16x16 */
4919 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4920 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4923 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4924 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4925 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4926 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4928 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4929 else if( bits == 13 ) {
4930 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4931 } else if( bits == 14 )
4932 return 11; /* B_L1_L0_8x16 */
4933 else if( bits == 15 )
4934 return 22; /* B_8x8 */
4936 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4937 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4940 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4941 MpegEncContext * const s = &h->s;
4945 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4946 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4949 && h->slice_table[mba_xy] == h->slice_num
4950 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4951 mba_xy += s->mb_stride;
4953 mbb_xy = mb_xy - s->mb_stride;
4955 && h->slice_table[mbb_xy] == h->slice_num
4956 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4957 mbb_xy -= s->mb_stride;
4959 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4961 int mb_xy = h->mb_xy;
4963 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4966 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4968 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4971 if( h->slice_type_nos == FF_B_TYPE )
4973 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4976 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4979 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4982 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4983 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4984 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4986 if( mode >= pred_mode )
4992 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4993 const int mba_xy = h->left_mb_xy[0];
4994 const int mbb_xy = h->top_mb_xy;
4998 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4999 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5002 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5005 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5008 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5010 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5016 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5017 int cbp_b, cbp_a, ctx, cbp = 0;
5019 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5020 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5022 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5023 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5024 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5025 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5026 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5027 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5028 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5029 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5032 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5036 cbp_a = (h->left_cbp>>4)&0x03;
5037 cbp_b = (h-> top_cbp>>4)&0x03;
5040 if( cbp_a > 0 ) ctx++;
5041 if( cbp_b > 0 ) ctx += 2;
5042 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5046 if( cbp_a == 2 ) ctx++;
5047 if( cbp_b == 2 ) ctx += 2;
5048 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5050 static int decode_cabac_mb_dqp( H264Context *h) {
5051 int ctx= h->last_qscale_diff != 0;
5054 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5057 if(val > 102) //prevent infinite loop
5062 return (val + 1)>>1 ;
5064 return -((val + 1)>>1);
5066 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5067 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5069 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5071 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5075 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5077 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5078 return 0; /* B_Direct_8x8 */
5079 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5080 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5082 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5083 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5084 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5087 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5088 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5092 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5093 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5096 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5097 int refa = h->ref_cache[list][scan8[n] - 1];
5098 int refb = h->ref_cache[list][scan8[n] - 8];
5102 if( h->slice_type_nos == FF_B_TYPE) {
5103 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5105 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5114 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5117 if(ref >= 32 /*h->ref_list[list]*/){
5124 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5125 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5126 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5127 int ctxbase = (l == 0) ? 40 : 47;
5129 int ctx = (amvd>2) + (amvd>32);
5131 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5136 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5144 while( get_cabac_bypass( &h->cabac ) ) {
5148 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5153 if( get_cabac_bypass( &h->cabac ) )
5157 return get_cabac_bypass_sign( &h->cabac, -mvd );
5160 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5166 nza = h->left_cbp&0x100;
5167 nzb = h-> top_cbp&0x100;
5169 nza = (h->left_cbp>>(6+idx))&0x01;
5170 nzb = (h-> top_cbp>>(6+idx))&0x01;
5173 assert(cat == 1 || cat == 2 || cat == 4);
5174 nza = h->non_zero_count_cache[scan8[idx] - 1];
5175 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5184 return ctx + 4 * cat;
5187 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5188 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5189 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5190 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5191 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5194 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5195 static const int significant_coeff_flag_offset[2][6] = {
5196 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5197 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5199 static const int last_coeff_flag_offset[2][6] = {
5200 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5201 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5203 static const int coeff_abs_level_m1_offset[6] = {
5204 227+0, 227+10, 227+20, 227+30, 227+39, 426
5206 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5207 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5208 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5209 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5210 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5211 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5212 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5213 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5214 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5216 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5217 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5218 * map node ctx => cabac ctx for level=1 */
5219 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5220 /* map node ctx => cabac ctx for level>1 */
5221 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5222 static const uint8_t coeff_abs_level_transition[2][8] = {
5223 /* update node ctx after decoding a level=1 */
5224 { 1, 2, 3, 3, 4, 5, 6, 7 },
5225 /* update node ctx after decoding a level>1 */
5226 { 4, 4, 4, 4, 5, 6, 7, 7 }
5232 int coeff_count = 0;
5235 uint8_t *significant_coeff_ctx_base;
5236 uint8_t *last_coeff_ctx_base;
5237 uint8_t *abs_level_m1_ctx_base;
5240 #define CABAC_ON_STACK
5242 #ifdef CABAC_ON_STACK
5245 cc.range = h->cabac.range;
5246 cc.low = h->cabac.low;
5247 cc.bytestream= h->cabac.bytestream;
5249 #define CC &h->cabac
5253 /* cat: 0-> DC 16x16 n = 0
5254 * 1-> AC 16x16 n = luma4x4idx
5255 * 2-> Luma4x4 n = luma4x4idx
5256 * 3-> DC Chroma n = iCbCr
5257 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5258 * 5-> Luma8x8 n = 4 * luma8x8idx
5261 /* read coded block flag */
5262 if( is_dc || cat != 5 ) {
5263 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5265 h->non_zero_count_cache[scan8[n]] = 0;
5267 #ifdef CABAC_ON_STACK
5268 h->cabac.range = cc.range ;
5269 h->cabac.low = cc.low ;
5270 h->cabac.bytestream= cc.bytestream;
5276 significant_coeff_ctx_base = h->cabac_state
5277 + significant_coeff_flag_offset[MB_FIELD][cat];
5278 last_coeff_ctx_base = h->cabac_state
5279 + last_coeff_flag_offset[MB_FIELD][cat];
5280 abs_level_m1_ctx_base = h->cabac_state
5281 + coeff_abs_level_m1_offset[cat];
5283 if( !is_dc && cat == 5 ) {
5284 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5285 for(last= 0; last < coefs; last++) { \
5286 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5287 if( get_cabac( CC, sig_ctx )) { \
5288 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5289 index[coeff_count++] = last; \
5290 if( get_cabac( CC, last_ctx ) ) { \
5296 if( last == max_coeff -1 ) {\
5297 index[coeff_count++] = last;\
5299 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5300 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5301 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5303 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5305 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5307 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5310 assert(coeff_count > 0);
5314 h->cbp_table[h->mb_xy] |= 0x100;
5316 h->cbp_table[h->mb_xy] |= 0x40 << n;
5319 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5321 assert( cat == 1 || cat == 2 || cat == 4 );
5322 h->non_zero_count_cache[scan8[n]] = coeff_count;
5327 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5329 int j= scantable[index[--coeff_count]];
5331 if( get_cabac( CC, ctx ) == 0 ) {
5332 node_ctx = coeff_abs_level_transition[0][node_ctx];
5334 block[j] = get_cabac_bypass_sign( CC, -1);
5336 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5340 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5341 node_ctx = coeff_abs_level_transition[1][node_ctx];
5343 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5347 if( coeff_abs >= 15 ) {
5349 while( get_cabac_bypass( CC ) ) {
5355 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5361 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5363 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5366 } while( coeff_count );
5367 #ifdef CABAC_ON_STACK
5368 h->cabac.range = cc.range ;
5369 h->cabac.low = cc.low ;
5370 h->cabac.bytestream= cc.bytestream;
5376 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5377 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5380 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5381 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5385 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5387 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5389 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5390 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5394 static inline void compute_mb_neighbors(H264Context *h)
5396 MpegEncContext * const s = &h->s;
5397 const int mb_xy = h->mb_xy;
5398 h->top_mb_xy = mb_xy - s->mb_stride;
5399 h->left_mb_xy[0] = mb_xy - 1;
5401 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5402 const int top_pair_xy = pair_xy - s->mb_stride;
5403 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5404 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5405 const int curr_mb_field_flag = MB_FIELD;
5406 const int bottom = (s->mb_y & 1);
5408 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5409 h->top_mb_xy -= s->mb_stride;
5411 if (!left_mb_field_flag == curr_mb_field_flag) {
5412 h->left_mb_xy[0] = pair_xy - 1;
5414 } else if (FIELD_PICTURE) {
5415 h->top_mb_xy -= s->mb_stride;
5421 * decodes a macroblock
5422 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5424 static int decode_mb_cabac(H264Context *h) {
5425 MpegEncContext * const s = &h->s;
5427 int mb_type, partition_count, cbp = 0;
5428 int dct8x8_allowed= h->pps.transform_8x8_mode;
5430 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5432 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5433 if( h->slice_type_nos != FF_I_TYPE ) {
5435 /* a skipped mb needs the aff flag from the following mb */
5436 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5437 predict_field_decoding_flag(h);
5438 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5439 skip = h->next_mb_skipped;
5441 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5442 /* read skip flags */
5444 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5445 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5446 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5447 if(!h->next_mb_skipped)
5448 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5453 h->cbp_table[mb_xy] = 0;
5454 h->chroma_pred_mode_table[mb_xy] = 0;
5455 h->last_qscale_diff = 0;
5462 if( (s->mb_y&1) == 0 )
5464 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5467 h->prev_mb_skipped = 0;
5469 compute_mb_neighbors(h);
5471 if( h->slice_type_nos == FF_B_TYPE ) {
5472 mb_type = decode_cabac_mb_type_b( h );
5474 partition_count= b_mb_type_info[mb_type].partition_count;
5475 mb_type= b_mb_type_info[mb_type].type;
5478 goto decode_intra_mb;
5480 } else if( h->slice_type_nos == FF_P_TYPE ) {
5481 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5483 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5484 /* P_L0_D16x16, P_8x8 */
5485 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5487 /* P_L0_D8x16, P_L0_D16x8 */
5488 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5490 partition_count= p_mb_type_info[mb_type].partition_count;
5491 mb_type= p_mb_type_info[mb_type].type;
5493 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5494 goto decode_intra_mb;
5497 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5498 if(h->slice_type == FF_SI_TYPE && mb_type)
5500 assert(h->slice_type_nos == FF_I_TYPE);
5502 partition_count = 0;
5503 cbp= i_mb_type_info[mb_type].cbp;
5504 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5505 mb_type= i_mb_type_info[mb_type].type;
5508 mb_type |= MB_TYPE_INTERLACED;
5510 h->slice_table[ mb_xy ]= h->slice_num;
5512 if(IS_INTRA_PCM(mb_type)) {
5515 // We assume these blocks are very rare so we do not optimize it.
5516 // FIXME The two following lines get the bitstream position in the cabac
5517 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5518 ptr= h->cabac.bytestream;
5519 if(h->cabac.low&0x1) ptr--;
5521 if(h->cabac.low&0x1FF) ptr--;
5524 // The pixels are stored in the same order as levels in h->mb array.
5525 memcpy(h->mb, ptr, 256); ptr+=256;
5527 memcpy(h->mb+128, ptr, 128); ptr+=128;
5530 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5532 // All blocks are present
5533 h->cbp_table[mb_xy] = 0x1ef;
5534 h->chroma_pred_mode_table[mb_xy] = 0;
5535 // In deblocking, the quantizer is 0
5536 s->current_picture.qscale_table[mb_xy]= 0;
5537 // All coeffs are present
5538 memset(h->non_zero_count[mb_xy], 16, 16);
5539 s->current_picture.mb_type[mb_xy]= mb_type;
5540 h->last_qscale_diff = 0;
5545 h->ref_count[0] <<= 1;
5546 h->ref_count[1] <<= 1;
5549 fill_caches(h, mb_type, 0);
5551 if( IS_INTRA( mb_type ) ) {
5553 if( IS_INTRA4x4( mb_type ) ) {
5554 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5555 mb_type |= MB_TYPE_8x8DCT;
5556 for( i = 0; i < 16; i+=4 ) {
5557 int pred = pred_intra_mode( h, i );
5558 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5559 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5562 for( i = 0; i < 16; i++ ) {
5563 int pred = pred_intra_mode( h, i );
5564 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5566 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5569 write_back_intra_pred_mode(h);
5570 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5572 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5573 if( h->intra16x16_pred_mode < 0 ) return -1;
5576 h->chroma_pred_mode_table[mb_xy] =
5577 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5579 pred_mode= check_intra_pred_mode( h, pred_mode );
5580 if( pred_mode < 0 ) return -1;
5581 h->chroma_pred_mode= pred_mode;
5583 } else if( partition_count == 4 ) {
5584 int i, j, sub_partition_count[4], list, ref[2][4];
5586 if( h->slice_type_nos == FF_B_TYPE ) {
5587 for( i = 0; i < 4; i++ ) {
5588 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5589 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5590 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5592 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5593 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5594 pred_direct_motion(h, &mb_type);
5595 h->ref_cache[0][scan8[4]] =
5596 h->ref_cache[1][scan8[4]] =
5597 h->ref_cache[0][scan8[12]] =
5598 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5599 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5600 for( i = 0; i < 4; i++ )
5601 if( IS_DIRECT(h->sub_mb_type[i]) )
5602 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5606 for( i = 0; i < 4; i++ ) {
5607 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5608 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5609 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5613 for( list = 0; list < h->list_count; list++ ) {
5614 for( i = 0; i < 4; i++ ) {
5615 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5616 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5617 if( h->ref_count[list] > 1 ){
5618 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5619 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5620 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5628 h->ref_cache[list][ scan8[4*i]+1 ]=
5629 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5634 dct8x8_allowed = get_dct8x8_allowed(h);
5636 for(list=0; list<h->list_count; list++){
5638 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5639 if(IS_DIRECT(h->sub_mb_type[i])){
5640 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5644 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5645 const int sub_mb_type= h->sub_mb_type[i];
5646 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5647 for(j=0; j<sub_partition_count[i]; j++){
5650 const int index= 4*i + block_width*j;
5651 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5652 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5653 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5655 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5656 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5657 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5659 if(IS_SUB_8X8(sub_mb_type)){
5661 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5663 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5666 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5668 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5669 }else if(IS_SUB_8X4(sub_mb_type)){
5670 mv_cache[ 1 ][0]= mx;
5671 mv_cache[ 1 ][1]= my;
5673 mvd_cache[ 1 ][0]= mx - mpx;
5674 mvd_cache[ 1 ][1]= my - mpy;
5675 }else if(IS_SUB_4X8(sub_mb_type)){
5676 mv_cache[ 8 ][0]= mx;
5677 mv_cache[ 8 ][1]= my;
5679 mvd_cache[ 8 ][0]= mx - mpx;
5680 mvd_cache[ 8 ][1]= my - mpy;
5682 mv_cache[ 0 ][0]= mx;
5683 mv_cache[ 0 ][1]= my;
5685 mvd_cache[ 0 ][0]= mx - mpx;
5686 mvd_cache[ 0 ][1]= my - mpy;
5689 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5690 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5691 p[0] = p[1] = p[8] = p[9] = 0;
5692 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5696 } else if( IS_DIRECT(mb_type) ) {
5697 pred_direct_motion(h, &mb_type);
5698 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5699 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5700 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5702 int list, mx, my, i, mpx, mpy;
5703 if(IS_16X16(mb_type)){
5704 for(list=0; list<h->list_count; list++){
5705 if(IS_DIR(mb_type, 0, list)){
5707 if(h->ref_count[list] > 1){
5708 ref= decode_cabac_mb_ref(h, list, 0);
5709 if(ref >= (unsigned)h->ref_count[list]){
5710 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5715 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5717 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5719 for(list=0; list<h->list_count; list++){
5720 if(IS_DIR(mb_type, 0, list)){
5721 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5723 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5724 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5725 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5727 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5728 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5730 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5733 else if(IS_16X8(mb_type)){
5734 for(list=0; list<h->list_count; list++){
5736 if(IS_DIR(mb_type, i, list)){
5738 if(h->ref_count[list] > 1){
5739 ref= decode_cabac_mb_ref( h, list, 8*i );
5740 if(ref >= (unsigned)h->ref_count[list]){
5741 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5746 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5748 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5751 for(list=0; list<h->list_count; list++){
5753 if(IS_DIR(mb_type, i, list)){
5754 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5755 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5756 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5757 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5759 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5760 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5762 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5763 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5768 assert(IS_8X16(mb_type));
5769 for(list=0; list<h->list_count; list++){
5771 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5773 if(h->ref_count[list] > 1){
5774 ref= decode_cabac_mb_ref( h, list, 4*i );
5775 if(ref >= (unsigned)h->ref_count[list]){
5776 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5781 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5783 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5786 for(list=0; list<h->list_count; list++){
5788 if(IS_DIR(mb_type, i, list)){
5789 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5790 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5791 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5793 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5794 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5795 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5797 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5798 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5805 if( IS_INTER( mb_type ) ) {
5806 h->chroma_pred_mode_table[mb_xy] = 0;
5807 write_back_motion( h, mb_type );
5810 if( !IS_INTRA16x16( mb_type ) ) {
5811 cbp = decode_cabac_mb_cbp_luma( h );
5813 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5816 h->cbp_table[mb_xy] = h->cbp = cbp;
5818 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5819 if( decode_cabac_mb_transform_size( h ) )
5820 mb_type |= MB_TYPE_8x8DCT;
5822 s->current_picture.mb_type[mb_xy]= mb_type;
5824 if( cbp || IS_INTRA16x16( mb_type ) ) {
5825 const uint8_t *scan, *scan8x8, *dc_scan;
5826 const uint32_t *qmul;
5829 if(IS_INTERLACED(mb_type)){
5830 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5831 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5832 dc_scan= luma_dc_field_scan;
5834 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5835 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5836 dc_scan= luma_dc_zigzag_scan;
5839 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5840 if( dqp == INT_MIN ){
5841 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5845 if(((unsigned)s->qscale) > 51){
5846 if(s->qscale<0) s->qscale+= 52;
5847 else s->qscale-= 52;
5849 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5850 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5852 if( IS_INTRA16x16( mb_type ) ) {
5854 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5855 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5858 qmul = h->dequant4_coeff[0][s->qscale];
5859 for( i = 0; i < 16; i++ ) {
5860 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5861 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5864 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5868 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5869 if( cbp & (1<<i8x8) ) {
5870 if( IS_8x8DCT(mb_type) ) {
5871 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5872 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5874 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5875 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5876 const int index = 4*i8x8 + i4x4;
5877 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5879 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5880 //STOP_TIMER("decode_residual")
5884 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5885 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5892 for( c = 0; c < 2; c++ ) {
5893 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5894 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5900 for( c = 0; c < 2; c++ ) {
5901 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5902 for( i = 0; i < 4; i++ ) {
5903 const int index = 16 + 4 * c + i;
5904 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5905 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5909 uint8_t * const nnz= &h->non_zero_count_cache[0];
5910 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5911 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5914 uint8_t * const nnz= &h->non_zero_count_cache[0];
5915 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5916 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5917 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5918 h->last_qscale_diff = 0;
5921 s->current_picture.qscale_table[mb_xy]= s->qscale;
5922 write_back_non_zero_count(h);
5925 h->ref_count[0] >>= 1;
5926 h->ref_count[1] >>= 1;
5933 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5934 const int index_a = qp + h->slice_alpha_c0_offset;
5935 const int alpha = (alpha_table+52)[index_a];
5936 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5940 tc[0] = (tc0_table+52)[index_a][bS[0]];
5941 tc[1] = (tc0_table+52)[index_a][bS[1]];
5942 tc[2] = (tc0_table+52)[index_a][bS[2]];
5943 tc[3] = (tc0_table+52)[index_a][bS[3]];
5944 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5946 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5949 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5950 const int index_a = qp + h->slice_alpha_c0_offset;
5951 const int alpha = (alpha_table+52)[index_a];
5952 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5956 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5957 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5958 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5959 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5960 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5962 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5966 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5968 for( i = 0; i < 16; i++, pix += stride) {
5974 int bS_index = (i >> 1);
5977 bS_index |= (i & 1);
5980 if( bS[bS_index] == 0 ) {
5984 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5985 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5986 alpha = (alpha_table+52)[index_a];
5987 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5989 if( bS[bS_index] < 4 ) {
5990 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5991 const int p0 = pix[-1];
5992 const int p1 = pix[-2];
5993 const int p2 = pix[-3];
5994 const int q0 = pix[0];
5995 const int q1 = pix[1];
5996 const int q2 = pix[2];
5998 if( FFABS( p0 - q0 ) < alpha &&
5999 FFABS( p1 - p0 ) < beta &&
6000 FFABS( q1 - q0 ) < beta ) {
6004 if( FFABS( p2 - p0 ) < beta ) {
6005 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6008 if( FFABS( q2 - q0 ) < beta ) {
6009 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6013 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6014 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6015 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6016 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6019 const int p0 = pix[-1];
6020 const int p1 = pix[-2];
6021 const int p2 = pix[-3];
6023 const int q0 = pix[0];
6024 const int q1 = pix[1];
6025 const int q2 = pix[2];
6027 if( FFABS( p0 - q0 ) < alpha &&
6028 FFABS( p1 - p0 ) < beta &&
6029 FFABS( q1 - q0 ) < beta ) {
6031 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6032 if( FFABS( p2 - p0 ) < beta)
6034 const int p3 = pix[-4];
6036 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6037 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6038 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6041 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6043 if( FFABS( q2 - q0 ) < beta)
6045 const int q3 = pix[3];
6047 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6048 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6049 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6052 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6056 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6057 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6059 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6064 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6066 for( i = 0; i < 8; i++, pix += stride) {
6074 if( bS[bS_index] == 0 ) {
6078 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6079 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6080 alpha = (alpha_table+52)[index_a];
6081 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6083 if( bS[bS_index] < 4 ) {
6084 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6085 const int p0 = pix[-1];
6086 const int p1 = pix[-2];
6087 const int q0 = pix[0];
6088 const int q1 = pix[1];
6090 if( FFABS( p0 - q0 ) < alpha &&
6091 FFABS( p1 - p0 ) < beta &&
6092 FFABS( q1 - q0 ) < beta ) {
6093 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6095 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6096 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6097 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6100 const int p0 = pix[-1];
6101 const int p1 = pix[-2];
6102 const int q0 = pix[0];
6103 const int q1 = pix[1];
6105 if( FFABS( p0 - q0 ) < alpha &&
6106 FFABS( p1 - p0 ) < beta &&
6107 FFABS( q1 - q0 ) < beta ) {
6109 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6110 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6111 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6117 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6118 const int index_a = qp + h->slice_alpha_c0_offset;
6119 const int alpha = (alpha_table+52)[index_a];
6120 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6124 tc[0] = (tc0_table+52)[index_a][bS[0]];
6125 tc[1] = (tc0_table+52)[index_a][bS[1]];
6126 tc[2] = (tc0_table+52)[index_a][bS[2]];
6127 tc[3] = (tc0_table+52)[index_a][bS[3]];
6128 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6130 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6134 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6135 const int index_a = qp + h->slice_alpha_c0_offset;
6136 const int alpha = (alpha_table+52)[index_a];
6137 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6141 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6142 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6143 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6144 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6145 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6147 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6151 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6152 MpegEncContext * const s = &h->s;
6153 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6155 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6159 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6160 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6161 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6162 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6163 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6166 assert(!FRAME_MBAFF);
6168 mb_type = s->current_picture.mb_type[mb_xy];
6169 qp = s->current_picture.qscale_table[mb_xy];
6170 qp0 = s->current_picture.qscale_table[mb_xy-1];
6171 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6172 qpc = get_chroma_qp( h, 0, qp );
6173 qpc0 = get_chroma_qp( h, 0, qp0 );
6174 qpc1 = get_chroma_qp( h, 0, qp1 );
6175 qp0 = (qp + qp0 + 1) >> 1;
6176 qp1 = (qp + qp1 + 1) >> 1;
6177 qpc0 = (qpc + qpc0 + 1) >> 1;
6178 qpc1 = (qpc + qpc1 + 1) >> 1;
6179 qp_thresh = 15 - h->slice_alpha_c0_offset;
6180 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6181 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6184 if( IS_INTRA(mb_type) ) {
6185 int16_t bS4[4] = {4,4,4,4};
6186 int16_t bS3[4] = {3,3,3,3};
6187 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6188 if( IS_8x8DCT(mb_type) ) {
6189 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6190 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6191 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6192 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6194 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6195 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6196 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6197 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6198 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6199 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6200 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6201 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6203 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6204 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6205 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6206 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6207 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6208 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6209 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6210 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6213 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6214 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6216 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6218 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6220 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6221 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6222 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6223 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6225 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6226 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6227 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6228 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6230 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6231 bSv[0][0] = 0x0004000400040004ULL;
6232 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6233 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6235 #define FILTER(hv,dir,edge)\
6236 if(bSv[dir][edge]) {\
6237 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6239 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6240 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6246 } else if( IS_8x8DCT(mb_type) ) {
6266 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6267 MpegEncContext * const s = &h->s;
6269 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6270 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6271 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6272 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6273 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6275 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6276 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6277 // how often to recheck mv-based bS when iterating between edges
6278 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6279 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6280 // how often to recheck mv-based bS when iterating along each edge
6281 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6283 if (first_vertical_edge_done) {
6287 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6290 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6291 && !IS_INTERLACED(mb_type)
6292 && IS_INTERLACED(mbm_type)
6294 // This is a special case in the norm where the filtering must
6295 // be done twice (one each of the field) even if we are in a
6296 // frame macroblock.
6298 static const int nnz_idx[4] = {4,5,6,3};
6299 unsigned int tmp_linesize = 2 * linesize;
6300 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6301 int mbn_xy = mb_xy - 2 * s->mb_stride;
6306 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6307 if( IS_INTRA(mb_type) ||
6308 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6309 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6311 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6312 for( i = 0; i < 4; i++ ) {
6313 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6314 mbn_nnz[nnz_idx[i]] != 0 )
6320 // Do not use s->qscale as luma quantizer because it has not the same
6321 // value in IPCM macroblocks.
6322 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6323 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6324 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6325 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6326 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6327 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6328 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6329 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6336 for( edge = start; edge < edges; edge++ ) {
6337 /* mbn_xy: neighbor macroblock */
6338 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6339 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6340 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6344 if( (edge&1) && IS_8x8DCT(mb_type) )
6347 if( IS_INTRA(mb_type) ||
6348 IS_INTRA(mbn_type) ) {
6351 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6352 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6361 bS[0] = bS[1] = bS[2] = bS[3] = value;
6366 if( edge & mask_edge ) {
6367 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6370 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6371 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6374 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6375 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6376 int bn_idx= b_idx - (dir ? 8:1);
6379 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6380 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6381 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6382 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6385 if(h->slice_type_nos == FF_B_TYPE && v){
6387 for( l = 0; !v && l < 2; l++ ) {
6389 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6390 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6391 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6395 bS[0] = bS[1] = bS[2] = bS[3] = v;
6401 for( i = 0; i < 4; i++ ) {
6402 int x = dir == 0 ? edge : i;
6403 int y = dir == 0 ? i : edge;
6404 int b_idx= 8 + 4 + x + 8*y;
6405 int bn_idx= b_idx - (dir ? 8:1);
6407 if( h->non_zero_count_cache[b_idx] |
6408 h->non_zero_count_cache[bn_idx] ) {
6414 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6415 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6416 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6417 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6423 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6425 for( l = 0; l < 2; l++ ) {
6427 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6428 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6429 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6438 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6443 // Do not use s->qscale as luma quantizer because it has not the same
6444 // value in IPCM macroblocks.
6445 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6446 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6447 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6448 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6450 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6451 if( (edge&1) == 0 ) {
6452 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6453 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6454 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6455 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6458 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6459 if( (edge&1) == 0 ) {
6460 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6461 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6462 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6463 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6469 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6470 MpegEncContext * const s = &h->s;
6471 const int mb_xy= mb_x + mb_y*s->mb_stride;
6472 const int mb_type = s->current_picture.mb_type[mb_xy];
6473 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6474 int first_vertical_edge_done = 0;
6477 //for sufficiently low qp, filtering wouldn't do anything
6478 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6480 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6481 int qp = s->current_picture.qscale_table[mb_xy];
6483 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6484 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6489 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6490 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6491 int top_type, left_type[2];
6492 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6493 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6494 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6496 if(IS_8x8DCT(top_type)){
6497 h->non_zero_count_cache[4+8*0]=
6498 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6499 h->non_zero_count_cache[6+8*0]=
6500 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6502 if(IS_8x8DCT(left_type[0])){
6503 h->non_zero_count_cache[3+8*1]=
6504 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6506 if(IS_8x8DCT(left_type[1])){
6507 h->non_zero_count_cache[3+8*3]=
6508 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6511 if(IS_8x8DCT(mb_type)){
6512 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6513 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6515 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6516 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6518 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6519 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6521 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6522 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6527 // left mb is in picture
6528 && h->slice_table[mb_xy-1] != 0xFFFF
6529 // and current and left pair do not have the same interlaced type
6530 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6531 // and left mb is in the same slice if deblocking_filter == 2
6532 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6533 /* First vertical edge is different in MBAFF frames
6534 * There are 8 different bS to compute and 2 different Qp
6536 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6537 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6542 int mb_qp, mbn0_qp, mbn1_qp;
6544 first_vertical_edge_done = 1;
6546 if( IS_INTRA(mb_type) )
6547 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6549 for( i = 0; i < 8; i++ ) {
6550 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6552 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6554 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6555 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6556 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6558 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6565 mb_qp = s->current_picture.qscale_table[mb_xy];
6566 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6567 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6568 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6569 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6570 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6571 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6572 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6573 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6574 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6575 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6576 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6577 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6580 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6581 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6582 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6583 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6584 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6588 for( dir = 0; dir < 2; dir++ )
6589 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6591 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6592 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6596 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6597 H264Context *h = *(void**)arg;
6598 MpegEncContext * const s = &h->s;
6599 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6603 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6604 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6606 if( h->pps.cabac ) {
6610 align_get_bits( &s->gb );
6613 ff_init_cabac_states( &h->cabac);
6614 ff_init_cabac_decoder( &h->cabac,
6615 s->gb.buffer + get_bits_count(&s->gb)/8,
6616 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6617 /* calculate pre-state */
6618 for( i= 0; i < 460; i++ ) {
6620 if( h->slice_type_nos == FF_I_TYPE )
6621 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6623 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6626 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6628 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6633 int ret = decode_mb_cabac(h);
6635 //STOP_TIMER("decode_mb_cabac")
6637 if(ret>=0) hl_decode_mb(h);
6639 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6642 ret = decode_mb_cabac(h);
6644 if(ret>=0) hl_decode_mb(h);
6647 eos = get_cabac_terminate( &h->cabac );
6649 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6650 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6651 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6655 if( ++s->mb_x >= s->mb_width ) {
6657 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6659 if(FIELD_OR_MBAFF_PICTURE) {
6664 if( eos || s->mb_y >= s->mb_height ) {
6665 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6666 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6673 int ret = decode_mb_cavlc(h);
6675 if(ret>=0) hl_decode_mb(h);
6677 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6679 ret = decode_mb_cavlc(h);
6681 if(ret>=0) hl_decode_mb(h);
6686 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6687 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6692 if(++s->mb_x >= s->mb_width){
6694 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6696 if(FIELD_OR_MBAFF_PICTURE) {
6699 if(s->mb_y >= s->mb_height){
6700 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6702 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6703 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6707 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6714 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6715 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6716 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6717 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6721 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6730 for(;s->mb_y < s->mb_height; s->mb_y++){
6731 for(;s->mb_x < s->mb_width; s->mb_x++){
6732 int ret= decode_mb(h);
6737 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6738 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6743 if(++s->mb_x >= s->mb_width){
6745 if(++s->mb_y >= s->mb_height){
6746 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6747 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6751 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6758 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6759 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6760 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6764 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6771 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6774 return -1; //not reached
6777 static int decode_picture_timing(H264Context *h){
6778 MpegEncContext * const s = &h->s;
6779 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6780 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6781 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6783 if(h->sps.pic_struct_present_flag){
6784 unsigned int i, num_clock_ts;
6785 h->sei_pic_struct = get_bits(&s->gb, 4);
6787 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6790 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6792 for (i = 0 ; i < num_clock_ts ; i++){
6793 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6794 unsigned int full_timestamp_flag;
6795 skip_bits(&s->gb, 2); /* ct_type */
6796 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6797 skip_bits(&s->gb, 5); /* counting_type */
6798 full_timestamp_flag = get_bits(&s->gb, 1);
6799 skip_bits(&s->gb, 1); /* discontinuity_flag */
6800 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6801 skip_bits(&s->gb, 8); /* n_frames */
6802 if(full_timestamp_flag){
6803 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6804 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6805 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6807 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6808 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6809 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6810 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6811 if(get_bits(&s->gb, 1)) /* hours_flag */
6812 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6816 if(h->sps.time_offset_length > 0)
6817 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6824 static int decode_unregistered_user_data(H264Context *h, int size){
6825 MpegEncContext * const s = &h->s;
6826 uint8_t user_data[16+256];
6832 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6833 user_data[i]= get_bits(&s->gb, 8);
6837 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6838 if(e==1 && build>=0)
6839 h->x264_build= build;
6841 if(s->avctx->debug & FF_DEBUG_BUGS)
6842 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6845 skip_bits(&s->gb, 8);
6850 static int decode_recovery_point(H264Context *h){
6851 MpegEncContext * const s = &h->s;
6853 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6854 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6859 static int decode_buffering_period(H264Context *h){
6860 MpegEncContext * const s = &h->s;
6861 unsigned int sps_id;
6865 sps_id = get_ue_golomb_31(&s->gb);
6866 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6867 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6870 sps = h->sps_buffers[sps_id];
6872 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6873 if (sps->nal_hrd_parameters_present_flag) {
6874 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6875 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6876 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6879 if (sps->vcl_hrd_parameters_present_flag) {
6880 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6881 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6882 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6886 h->sei_buffering_period_present = 1;
6890 int ff_h264_decode_sei(H264Context *h){
6891 MpegEncContext * const s = &h->s;
6893 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6898 type+= show_bits(&s->gb, 8);
6899 }while(get_bits(&s->gb, 8) == 255);
6903 size+= show_bits(&s->gb, 8);
6904 }while(get_bits(&s->gb, 8) == 255);
6907 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6908 if(decode_picture_timing(h) < 0)
6911 case SEI_TYPE_USER_DATA_UNREGISTERED:
6912 if(decode_unregistered_user_data(h, size) < 0)
6915 case SEI_TYPE_RECOVERY_POINT:
6916 if(decode_recovery_point(h) < 0)
6919 case SEI_BUFFERING_PERIOD:
6920 if(decode_buffering_period(h) < 0)
6924 skip_bits(&s->gb, 8*size);
6927 //FIXME check bits here
6928 align_get_bits(&s->gb);
6934 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6935 MpegEncContext * const s = &h->s;
6937 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6939 if(cpb_count > 32U){
6940 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6944 get_bits(&s->gb, 4); /* bit_rate_scale */
6945 get_bits(&s->gb, 4); /* cpb_size_scale */
6946 for(i=0; i<cpb_count; i++){
6947 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6948 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6949 get_bits1(&s->gb); /* cbr_flag */
6951 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6952 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6953 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6954 sps->time_offset_length = get_bits(&s->gb, 5);
6955 sps->cpb_cnt = cpb_count;
6959 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6960 MpegEncContext * const s = &h->s;
6961 int aspect_ratio_info_present_flag;
6962 unsigned int aspect_ratio_idc;
6964 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6966 if( aspect_ratio_info_present_flag ) {
6967 aspect_ratio_idc= get_bits(&s->gb, 8);
6968 if( aspect_ratio_idc == EXTENDED_SAR ) {
6969 sps->sar.num= get_bits(&s->gb, 16);
6970 sps->sar.den= get_bits(&s->gb, 16);
6971 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6972 sps->sar= pixel_aspect[aspect_ratio_idc];
6974 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6981 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6983 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6984 get_bits1(&s->gb); /* overscan_appropriate_flag */
6987 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6988 get_bits(&s->gb, 3); /* video_format */
6989 get_bits1(&s->gb); /* video_full_range_flag */
6990 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6991 get_bits(&s->gb, 8); /* colour_primaries */
6992 get_bits(&s->gb, 8); /* transfer_characteristics */
6993 get_bits(&s->gb, 8); /* matrix_coefficients */
6997 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6998 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6999 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7002 sps->timing_info_present_flag = get_bits1(&s->gb);
7003 if(sps->timing_info_present_flag){
7004 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7005 sps->time_scale = get_bits_long(&s->gb, 32);
7006 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7009 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7010 if(sps->nal_hrd_parameters_present_flag)
7011 if(decode_hrd_parameters(h, sps) < 0)
7013 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7014 if(sps->vcl_hrd_parameters_present_flag)
7015 if(decode_hrd_parameters(h, sps) < 0)
7017 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7018 get_bits1(&s->gb); /* low_delay_hrd_flag */
7019 sps->pic_struct_present_flag = get_bits1(&s->gb);
7021 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7022 if(sps->bitstream_restriction_flag){
7023 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7024 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7025 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7026 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7027 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7028 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7029 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7031 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7032 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7040 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7041 const uint8_t *jvt_list, const uint8_t *fallback_list){
7042 MpegEncContext * const s = &h->s;
7043 int i, last = 8, next = 8;
7044 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7045 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7046 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7048 for(i=0;i<size;i++){
7050 next = (last + get_se_golomb(&s->gb)) & 0xff;
7051 if(!i && !next){ /* matrix not written, we use the preset one */
7052 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7055 last = factors[scan[i]] = next ? next : last;
7059 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7060 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7061 MpegEncContext * const s = &h->s;
7062 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7063 const uint8_t *fallback[4] = {
7064 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7065 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7066 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7067 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7069 if(get_bits1(&s->gb)){
7070 sps->scaling_matrix_present |= is_sps;
7071 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7072 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7073 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7074 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7075 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7076 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7077 if(is_sps || pps->transform_8x8_mode){
7078 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7079 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7084 int ff_h264_decode_seq_parameter_set(H264Context *h){
7085 MpegEncContext * const s = &h->s;
7086 int profile_idc, level_idc;
7087 unsigned int sps_id;
7091 profile_idc= get_bits(&s->gb, 8);
7092 get_bits1(&s->gb); //constraint_set0_flag
7093 get_bits1(&s->gb); //constraint_set1_flag
7094 get_bits1(&s->gb); //constraint_set2_flag
7095 get_bits1(&s->gb); //constraint_set3_flag
7096 get_bits(&s->gb, 4); // reserved
7097 level_idc= get_bits(&s->gb, 8);
7098 sps_id= get_ue_golomb_31(&s->gb);
7100 if(sps_id >= MAX_SPS_COUNT) {
7101 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7104 sps= av_mallocz(sizeof(SPS));
7108 sps->profile_idc= profile_idc;
7109 sps->level_idc= level_idc;
7111 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7112 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7113 sps->scaling_matrix_present = 0;
7115 if(sps->profile_idc >= 100){ //high profile
7116 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7117 if(sps->chroma_format_idc == 3)
7118 sps->residual_color_transform_flag = get_bits1(&s->gb);
7119 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7120 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7121 sps->transform_bypass = get_bits1(&s->gb);
7122 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7124 sps->chroma_format_idc= 1;
7127 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7128 sps->poc_type= get_ue_golomb_31(&s->gb);
7130 if(sps->poc_type == 0){ //FIXME #define
7131 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7132 } else if(sps->poc_type == 1){//FIXME #define
7133 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7134 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7135 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7136 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7138 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7139 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7143 for(i=0; i<sps->poc_cycle_length; i++)
7144 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7145 }else if(sps->poc_type != 2){
7146 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7150 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7151 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7152 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7155 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7156 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7157 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7158 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7159 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7160 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7164 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7165 if(!sps->frame_mbs_only_flag)
7166 sps->mb_aff= get_bits1(&s->gb);
7170 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7172 #ifndef ALLOW_INTERLACE
7174 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7176 sps->crop= get_bits1(&s->gb);
7178 sps->crop_left = get_ue_golomb(&s->gb);
7179 sps->crop_right = get_ue_golomb(&s->gb);
7180 sps->crop_top = get_ue_golomb(&s->gb);
7181 sps->crop_bottom= get_ue_golomb(&s->gb);
7182 if(sps->crop_left || sps->crop_top){
7183 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7185 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7186 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7192 sps->crop_bottom= 0;
7195 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7196 if( sps->vui_parameters_present_flag )
7197 decode_vui_parameters(h, sps);
7199 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7200 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7201 sps_id, sps->profile_idc, sps->level_idc,
7203 sps->ref_frame_count,
7204 sps->mb_width, sps->mb_height,
7205 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7206 sps->direct_8x8_inference_flag ? "8B8" : "",
7207 sps->crop_left, sps->crop_right,
7208 sps->crop_top, sps->crop_bottom,
7209 sps->vui_parameters_present_flag ? "VUI" : "",
7210 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7214 av_free(h->sps_buffers[sps_id]);
7215 h->sps_buffers[sps_id]= sps;
7224 build_qp_table(PPS *pps, int t, int index)
7227 for(i = 0; i < 52; i++)
7228 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7231 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7232 MpegEncContext * const s = &h->s;
7233 unsigned int pps_id= get_ue_golomb(&s->gb);
7236 if(pps_id >= MAX_PPS_COUNT) {
7237 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7241 pps= av_mallocz(sizeof(PPS));
7244 pps->sps_id= get_ue_golomb_31(&s->gb);
7245 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7246 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7250 pps->cabac= get_bits1(&s->gb);
7251 pps->pic_order_present= get_bits1(&s->gb);
7252 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7253 if(pps->slice_group_count > 1 ){
7254 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7255 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7256 switch(pps->mb_slice_group_map_type){
7259 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7260 | run_length[ i ] |1 |ue(v) |
7265 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7267 | top_left_mb[ i ] |1 |ue(v) |
7268 | bottom_right_mb[ i ] |1 |ue(v) |
7276 | slice_group_change_direction_flag |1 |u(1) |
7277 | slice_group_change_rate_minus1 |1 |ue(v) |
7282 | slice_group_id_cnt_minus1 |1 |ue(v) |
7283 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7285 | slice_group_id[ i ] |1 |u(v) |
7290 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7291 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7292 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7293 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7297 pps->weighted_pred= get_bits1(&s->gb);
7298 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7299 pps->init_qp= get_se_golomb(&s->gb) + 26;
7300 pps->init_qs= get_se_golomb(&s->gb) + 26;
7301 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7302 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7303 pps->constrained_intra_pred= get_bits1(&s->gb);
7304 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7306 pps->transform_8x8_mode= 0;
7307 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7308 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7309 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7311 if(get_bits_count(&s->gb) < bit_length){
7312 pps->transform_8x8_mode= get_bits1(&s->gb);
7313 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7314 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7316 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7319 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7320 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7321 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7322 h->pps.chroma_qp_diff= 1;
7324 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7325 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7326 pps_id, pps->sps_id,
7327 pps->cabac ? "CABAC" : "CAVLC",
7328 pps->slice_group_count,
7329 pps->ref_count[0], pps->ref_count[1],
7330 pps->weighted_pred ? "weighted" : "",
7331 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7332 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7333 pps->constrained_intra_pred ? "CONSTR" : "",
7334 pps->redundant_pic_cnt_present ? "REDU" : "",
7335 pps->transform_8x8_mode ? "8x8DCT" : ""
7339 av_free(h->pps_buffers[pps_id]);
7340 h->pps_buffers[pps_id]= pps;
7348 * Call decode_slice() for each context.
7350 * @param h h264 master context
7351 * @param context_count number of contexts to execute
7353 static void execute_decode_slices(H264Context *h, int context_count){
7354 MpegEncContext * const s = &h->s;
7355 AVCodecContext * const avctx= s->avctx;
7359 if (s->avctx->hwaccel)
7361 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7363 if(context_count == 1) {
7364 decode_slice(avctx, &h);
7366 for(i = 1; i < context_count; i++) {
7367 hx = h->thread_context[i];
7368 hx->s.error_recognition = avctx->error_recognition;
7369 hx->s.error_count = 0;
7372 avctx->execute(avctx, (void *)decode_slice,
7373 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7375 /* pull back stuff from slices to master context */
7376 hx = h->thread_context[context_count - 1];
7377 s->mb_x = hx->s.mb_x;
7378 s->mb_y = hx->s.mb_y;
7379 s->dropable = hx->s.dropable;
7380 s->picture_structure = hx->s.picture_structure;
7381 for(i = 1; i < context_count; i++)
7382 h->s.error_count += h->thread_context[i]->s.error_count;
7387 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7388 MpegEncContext * const s = &h->s;
7389 AVCodecContext * const avctx= s->avctx;
7391 H264Context *hx; ///< thread context
7392 int context_count = 0;
7394 h->max_contexts = avctx->thread_count;
7397 for(i=0; i<50; i++){
7398 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7401 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7402 h->current_slice = 0;
7403 if (!s->first_field)
7404 s->current_picture_ptr= NULL;
7416 if(buf_index >= buf_size) break;
7418 for(i = 0; i < h->nal_length_size; i++)
7419 nalsize = (nalsize << 8) | buf[buf_index++];
7420 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7425 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7430 // start code prefix search
7431 for(; buf_index + 3 < buf_size; buf_index++){
7432 // This should always succeed in the first iteration.
7433 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7437 if(buf_index+3 >= buf_size) break;
7442 hx = h->thread_context[context_count];
7444 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7445 if (ptr==NULL || dst_length < 0){
7448 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7450 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7452 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7453 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7456 if (h->is_avc && (nalsize != consumed)){
7457 int i, debug_level = AV_LOG_DEBUG;
7458 for (i = consumed; i < nalsize; i++)
7459 if (buf[buf_index+i])
7460 debug_level = AV_LOG_ERROR;
7461 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7465 buf_index += consumed;
7467 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7468 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7473 switch(hx->nal_unit_type){
7475 if (h->nal_unit_type != NAL_IDR_SLICE) {
7476 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7479 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7481 init_get_bits(&hx->s.gb, ptr, bit_length);
7483 hx->inter_gb_ptr= &hx->s.gb;
7484 hx->s.data_partitioning = 0;
7486 if((err = decode_slice_header(hx, h)))
7489 s->current_picture_ptr->key_frame |=
7490 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7491 (h->sei_recovery_frame_cnt >= 0);
7492 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7493 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7494 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7495 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7496 && avctx->skip_frame < AVDISCARD_ALL){
7497 if(avctx->hwaccel) {
7498 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7501 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7502 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7503 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7504 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7510 init_get_bits(&hx->s.gb, ptr, bit_length);
7512 hx->inter_gb_ptr= NULL;
7513 hx->s.data_partitioning = 1;
7515 err = decode_slice_header(hx, h);
7518 init_get_bits(&hx->intra_gb, ptr, bit_length);
7519 hx->intra_gb_ptr= &hx->intra_gb;
7522 init_get_bits(&hx->inter_gb, ptr, bit_length);
7523 hx->inter_gb_ptr= &hx->inter_gb;
7525 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7526 && s->context_initialized
7528 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7529 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7530 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7531 && avctx->skip_frame < AVDISCARD_ALL)
7535 init_get_bits(&s->gb, ptr, bit_length);
7536 ff_h264_decode_sei(h);
7539 init_get_bits(&s->gb, ptr, bit_length);
7540 ff_h264_decode_seq_parameter_set(h);
7542 if(s->flags& CODEC_FLAG_LOW_DELAY)
7545 if(avctx->has_b_frames < 2)
7546 avctx->has_b_frames= !s->low_delay;
7549 init_get_bits(&s->gb, ptr, bit_length);
7551 ff_h264_decode_picture_parameter_set(h, bit_length);
7555 case NAL_END_SEQUENCE:
7556 case NAL_END_STREAM:
7557 case NAL_FILLER_DATA:
7559 case NAL_AUXILIARY_SLICE:
7562 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7565 if(context_count == h->max_contexts) {
7566 execute_decode_slices(h, context_count);
7571 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7573 /* Slice could not be decoded in parallel mode, copy down
7574 * NAL unit stuff to context 0 and restart. Note that
7575 * rbsp_buffer is not transferred, but since we no longer
7576 * run in parallel mode this should not be an issue. */
7577 h->nal_unit_type = hx->nal_unit_type;
7578 h->nal_ref_idc = hx->nal_ref_idc;
7584 execute_decode_slices(h, context_count);
7589 * returns the number of bytes consumed for building the current frame
7591 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7592 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7593 if(pos+10>buf_size) pos=buf_size; // oops ;)
7598 static int decode_frame(AVCodecContext *avctx,
7599 void *data, int *data_size,
7600 const uint8_t *buf, int buf_size)
7602 H264Context *h = avctx->priv_data;
7603 MpegEncContext *s = &h->s;
7604 AVFrame *pict = data;
7607 s->flags= avctx->flags;
7608 s->flags2= avctx->flags2;
7610 /* end of stream, output what is still in the buffers */
7611 if (buf_size == 0) {
7615 //FIXME factorize this with the output code below
7616 out = h->delayed_pic[0];
7618 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7619 if(h->delayed_pic[i]->poc < out->poc){
7620 out = h->delayed_pic[i];
7624 for(i=out_idx; h->delayed_pic[i]; i++)
7625 h->delayed_pic[i] = h->delayed_pic[i+1];
7628 *data_size = sizeof(AVFrame);
7629 *pict= *(AVFrame*)out;
7635 if(h->is_avc && !h->got_avcC) {
7636 int i, cnt, nalsize;
7637 unsigned char *p = avctx->extradata;
7638 if(avctx->extradata_size < 7) {
7639 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7643 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7646 /* sps and pps in the avcC always have length coded with 2 bytes,
7647 so put a fake nal_length_size = 2 while parsing them */
7648 h->nal_length_size = 2;
7649 // Decode sps from avcC
7650 cnt = *(p+5) & 0x1f; // Number of sps
7652 for (i = 0; i < cnt; i++) {
7653 nalsize = AV_RB16(p) + 2;
7654 if(decode_nal_units(h, p, nalsize) < 0) {
7655 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7660 // Decode pps from avcC
7661 cnt = *(p++); // Number of pps
7662 for (i = 0; i < cnt; i++) {
7663 nalsize = AV_RB16(p) + 2;
7664 if(decode_nal_units(h, p, nalsize) != nalsize) {
7665 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7670 // Now store right nal length size, that will be use to parse all other nals
7671 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7672 // Do not reparse avcC
7676 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7677 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7682 buf_index=decode_nal_units(h, buf, buf_size);
7686 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7687 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7688 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7692 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7693 Picture *out = s->current_picture_ptr;
7694 Picture *cur = s->current_picture_ptr;
7695 int i, pics, cross_idr, out_of_order, out_idx;
7699 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7700 s->current_picture_ptr->pict_type= s->pict_type;
7702 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7703 ff_vdpau_h264_set_reference_frames(s);
7706 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7707 h->prev_poc_msb= h->poc_msb;
7708 h->prev_poc_lsb= h->poc_lsb;
7710 h->prev_frame_num_offset= h->frame_num_offset;
7711 h->prev_frame_num= h->frame_num;
7713 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7714 ff_vdpau_h264_picture_complete(s);
7717 * FIXME: Error handling code does not seem to support interlaced
7718 * when slices span multiple rows
7719 * The ff_er_add_slice calls don't work right for bottom
7720 * fields; they cause massive erroneous error concealing
7721 * Error marking covers both fields (top and bottom).
7722 * This causes a mismatched s->error_count
7723 * and a bad error table. Further, the error count goes to
7724 * INT_MAX when called for bottom field, because mb_y is
7725 * past end by one (callers fault) and resync_mb_y != 0
7726 * causes problems for the first MB line, too.
7732 h->sei_recovery_frame_cnt = -1;
7733 h->sei_dpb_output_delay = 0;
7734 h->sei_cpb_removal_delay = -1;
7735 h->sei_buffering_period_present = 0;
7737 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7738 /* Wait for second field. */
7742 cur->repeat_pict = 0;
7744 /* Signal interlacing information externally. */
7745 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7746 if(h->sps.pic_struct_present_flag){
7747 switch (h->sei_pic_struct)
7749 case SEI_PIC_STRUCT_FRAME:
7750 cur->interlaced_frame = 0;
7752 case SEI_PIC_STRUCT_TOP_FIELD:
7753 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7754 case SEI_PIC_STRUCT_TOP_BOTTOM:
7755 case SEI_PIC_STRUCT_BOTTOM_TOP:
7756 cur->interlaced_frame = 1;
7758 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7759 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7760 // Signal the possibility of telecined film externally (pic_struct 5,6)
7761 // From these hints, let the applications decide if they apply deinterlacing.
7762 cur->repeat_pict = 1;
7763 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7765 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7766 // Force progressive here, as doubling interlaced frame is a bad idea.
7767 cur->interlaced_frame = 0;
7768 cur->repeat_pict = 2;
7770 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7771 cur->interlaced_frame = 0;
7772 cur->repeat_pict = 4;
7776 /* Derive interlacing flag from used decoding process. */
7777 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7780 if (cur->field_poc[0] != cur->field_poc[1]){
7781 /* Derive top_field_first from field pocs. */
7782 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7784 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7785 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7786 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7787 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7788 cur->top_field_first = 1;
7790 cur->top_field_first = 0;
7792 /* Most likely progressive */
7793 cur->top_field_first = 0;
7797 //FIXME do something with unavailable reference frames
7799 /* Sort B-frames into display order */
7801 if(h->sps.bitstream_restriction_flag
7802 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7803 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7807 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7808 && !h->sps.bitstream_restriction_flag){
7809 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7814 while(h->delayed_pic[pics]) pics++;
7816 assert(pics <= MAX_DELAYED_PIC_COUNT);
7818 h->delayed_pic[pics++] = cur;
7819 if(cur->reference == 0)
7820 cur->reference = DELAYED_PIC_REF;
7822 out = h->delayed_pic[0];
7824 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7825 if(h->delayed_pic[i]->poc < out->poc){
7826 out = h->delayed_pic[i];
7829 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7831 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7833 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7835 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7837 ((!cross_idr && out->poc > h->outputed_poc + 2)
7838 || cur->pict_type == FF_B_TYPE)))
7841 s->avctx->has_b_frames++;
7844 if(out_of_order || pics > s->avctx->has_b_frames){
7845 out->reference &= ~DELAYED_PIC_REF;
7846 for(i=out_idx; h->delayed_pic[i]; i++)
7847 h->delayed_pic[i] = h->delayed_pic[i+1];
7849 if(!out_of_order && pics > s->avctx->has_b_frames){
7850 *data_size = sizeof(AVFrame);
7852 h->outputed_poc = out->poc;
7853 *pict= *(AVFrame*)out;
7855 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7860 assert(pict->data[0] || !*data_size);
7861 ff_print_debug_info(s, pict);
7862 //printf("out %d\n", (int)pict->data[0]);
7865 /* Return the Picture timestamp as the frame number */
7866 /* we subtract 1 because it is added on utils.c */
7867 avctx->frame_number = s->picture_number - 1;
7869 return get_consumed_bytes(s, buf_index, buf_size);
7872 static inline void fill_mb_avail(H264Context *h){
7873 MpegEncContext * const s = &h->s;
7874 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7877 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7878 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7879 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7885 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7886 h->mb_avail[4]= 1; //FIXME move out
7887 h->mb_avail[5]= 0; //FIXME move out
7895 #define SIZE (COUNT*40)
7901 // int int_temp[10000];
7903 AVCodecContext avctx;
7905 dsputil_init(&dsp, &avctx);
7907 init_put_bits(&pb, temp, SIZE);
7908 printf("testing unsigned exp golomb\n");
7909 for(i=0; i<COUNT; i++){
7911 set_ue_golomb(&pb, i);
7912 STOP_TIMER("set_ue_golomb");
7914 flush_put_bits(&pb);
7916 init_get_bits(&gb, temp, 8*SIZE);
7917 for(i=0; i<COUNT; i++){
7920 s= show_bits(&gb, 24);
7923 j= get_ue_golomb(&gb);
7925 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7928 STOP_TIMER("get_ue_golomb");
7932 init_put_bits(&pb, temp, SIZE);
7933 printf("testing signed exp golomb\n");
7934 for(i=0; i<COUNT; i++){
7936 set_se_golomb(&pb, i - COUNT/2);
7937 STOP_TIMER("set_se_golomb");
7939 flush_put_bits(&pb);
7941 init_get_bits(&gb, temp, 8*SIZE);
7942 for(i=0; i<COUNT; i++){
7945 s= show_bits(&gb, 24);
7948 j= get_se_golomb(&gb);
7949 if(j != i - COUNT/2){
7950 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7953 STOP_TIMER("get_se_golomb");
7957 printf("testing 4x4 (I)DCT\n");
7960 uint8_t src[16], ref[16];
7961 uint64_t error= 0, max_error=0;
7963 for(i=0; i<COUNT; i++){
7965 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7966 for(j=0; j<16; j++){
7967 ref[j]= random()%255;
7968 src[j]= random()%255;
7971 h264_diff_dct_c(block, src, ref, 4);
7974 for(j=0; j<16; j++){
7975 // printf("%d ", block[j]);
7976 block[j]= block[j]*4;
7977 if(j&1) block[j]= (block[j]*4 + 2)/5;
7978 if(j&4) block[j]= (block[j]*4 + 2)/5;
7982 s->dsp.h264_idct_add(ref, block, 4);
7983 /* for(j=0; j<16; j++){
7984 printf("%d ", ref[j]);
7988 for(j=0; j<16; j++){
7989 int diff= FFABS(src[j] - ref[j]);
7992 max_error= FFMAX(max_error, diff);
7995 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7996 printf("testing quantizer\n");
7997 for(qp=0; qp<52; qp++){
7999 src1_block[i]= src2_block[i]= random()%255;
8002 printf("Testing NAL layer\n");
8004 uint8_t bitstream[COUNT];
8005 uint8_t nal[COUNT*2];
8007 memset(&h, 0, sizeof(H264Context));
8009 for(i=0; i<COUNT; i++){
8017 for(j=0; j<COUNT; j++){
8018 bitstream[j]= (random() % 255) + 1;
8021 for(j=0; j<zeros; j++){
8022 int pos= random() % COUNT;
8023 while(bitstream[pos] == 0){
8032 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8034 printf("encoding failed\n");
8038 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8042 if(out_length != COUNT){
8043 printf("incorrect length %d %d\n", out_length, COUNT);
8047 if(consumed != nal_length){
8048 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8052 if(memcmp(bitstream, out, COUNT)){
8053 printf("mismatch\n");
8059 printf("Testing RBSP\n");
8067 static av_cold int decode_end(AVCodecContext *avctx)
8069 H264Context *h = avctx->priv_data;
8070 MpegEncContext *s = &h->s;
8073 av_freep(&h->rbsp_buffer[0]);
8074 av_freep(&h->rbsp_buffer[1]);
8075 free_tables(h); //FIXME cleanup init stuff perhaps
8077 for(i = 0; i < MAX_SPS_COUNT; i++)
8078 av_freep(h->sps_buffers + i);
8080 for(i = 0; i < MAX_PPS_COUNT; i++)
8081 av_freep(h->pps_buffers + i);
8085 // memset(h, 0, sizeof(H264Context));
8091 AVCodec h264_decoder = {
8095 sizeof(H264Context),
8100 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8102 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8103 .pix_fmts= ff_pixfmt_list_420,
8106 #if CONFIG_H264_VDPAU_DECODER
8107 AVCodec h264_vdpau_decoder = {
8111 sizeof(H264Context),
8116 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8118 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8122 #if CONFIG_SVQ3_DECODER